123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477 |
- <template>
- <div class="smart-paragraph">
- <el-card>
- <template #header>
- <div class="card-header">
- <span>智能分段工具</span>
- <el-tag type="info">基于语义的段落优化</el-tag>
- </div>
- </template>
-
- <el-form :model="form" label-width="120px">
- <el-form-item label="输入文本">
- <el-input
- v-model="form.inputText"
- type="textarea"
- :rows="10"
- placeholder="请输入需要智能分段的文本..."
- ></el-input>
- </el-form-item>
-
- <el-form-item label="分段策略">
- <el-radio-group v-model="form.strategy">
- <el-radio label="auto">自动优化</el-radio>
- <el-radio label="sentence">按句子分段</el-radio>
- <el-radio label="length">按长度分段</el-radio>
- <el-radio label="semantic">语义分段</el-radio>
- </el-radio-group>
- </el-form-item>
-
- <el-form-item label="分段参数">
- <el-row :gutter="20">
- <el-col :span="8">
- <el-form-item label="最小段落长度">
- <el-input-number
- v-model="form.minLength"
- :min="10"
- :max="500"
- :step="10"
- ></el-input-number>
- </el-form-item>
- </el-col>
- <el-col :span="8">
- <el-form-item label="最大段落长度">
- <el-input-number
- v-model="form.maxLength"
- :min="50"
- :max="1000"
- :step="50"
- ></el-input-number>
- </el-form-item>
- </el-col>
- <el-col :span="8">
- <el-form-item label="目标段落长度">
- <el-input-number
- v-model="form.targetLength"
- :min="50"
- :max="500"
- :step="25"
- ></el-input-number>
- </el-form-item>
- </el-col>
- </el-row>
- </el-form-item>
-
- <el-form-item label="特殊处理">
- <el-checkbox v-model="form.keepDialogue">保持对话完整性</el-checkbox>
- <el-checkbox v-model="form.keepQuotes">保持引用完整性</el-checkbox>
- <el-checkbox v-model="form.mergeShort">合并过短段落</el-checkbox>
- <el-checkbox v-model="form.splitLong">分割过长段落</el-checkbox>
- </el-form-item>
-
- <el-form-item>
- <el-button type="primary" @click="processSmartParagraph">智能分段</el-button>
- <el-button @click="resetForm">重置</el-button>
- <el-button @click="previewResult" :disabled="!result">预览结果</el-button>
- </el-form-item>
- </el-form>
-
- <div v-if="result" class="result-section">
- <h4>分段结果:</h4>
- <div class="paragraphs-container">
- <div
- v-for="(paragraph, index) in result.paragraphs"
- :key="index"
- class="paragraph-item"
- :class="{ 'short': paragraph.length < form.minLength, 'long': paragraph.length > form.maxLength }"
- >
- <div class="paragraph-header">
- <span class="paragraph-number">段落 {{ index + 1 }}</span>
- <span class="paragraph-length">{{ paragraph.length }} 字符</span>
- <el-tag
- v-if="paragraph.length < form.minLength"
- type="warning"
- size="small"
- >过短</el-tag>
- <el-tag
- v-if="paragraph.length > form.maxLength"
- type="danger"
- size="small"
- >过长</el-tag>
- </div>
- <div class="paragraph-content">{{ paragraph }}</div>
- </div>
- </div>
-
- <div class="statistics">
- <el-descriptions :column="4" border>
- <el-descriptions-item label="总段落数">{{ result.paragraphs.length }}</el-descriptions-item>
- <el-descriptions-item label="平均长度">{{ result.averageLength }} 字符</el-descriptions-item>
- <el-descriptions-item label="最短段落">{{ result.minLength }} 字符</el-descriptions-item>
- <el-descriptions-item label="最长段落">{{ result.maxLength }} 字符</el-descriptions-item>
- </el-descriptions>
- </div>
- </div>
- </el-card>
- </div>
- </template>
- <script setup>
- import { ref } from 'vue';
- import { ElMessage } from 'element-plus';
- const form = ref({
- inputText: '',
- strategy: 'auto',
- minLength: 50,
- maxLength: 300,
- targetLength: 150,
- keepDialogue: true,
- keepQuotes: true,
- mergeShort: true,
- splitLong: true
- });
- const result = ref(null);
- // 智能分段核心算法
- function smartParagraphSplit(text, options) {
- if (!text.trim()) return { paragraphs: [], averageLength: 0, minLength: 0, maxLength: 0 };
-
- let paragraphs = [];
-
- switch (options.strategy) {
- case 'sentence':
- paragraphs = splitBySentences(text, options);
- break;
- case 'length':
- paragraphs = splitByLength(text, options);
- break;
- case 'semantic':
- paragraphs = splitBySemantic(text, options);
- break;
- default:
- paragraphs = autoOptimize(text, options);
- }
-
- // 后处理
- paragraphs = postProcess(paragraphs, options);
-
- // 计算统计信息
- const lengths = paragraphs.map(p => p.length);
- const averageLength = Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length);
- const minLength = Math.min(...lengths);
- const maxLength = Math.max(...lengths);
-
- return {
- paragraphs,
- averageLength,
- minLength,
- maxLength
- };
- }
- // 按句子分段
- function splitBySentences(text, options) {
- // 识别句子结束标点
- const sentenceEndings = /[。!?;\n]+/g;
- const sentences = text.split(sentenceEndings).filter(s => s.trim());
-
- let paragraphs = [];
- let currentParagraph = '';
-
- for (const sentence of sentences) {
- const trimmed = sentence.trim();
- if (!trimmed) continue;
-
- if (currentParagraph.length + trimmed.length > options.maxLength) {
- if (currentParagraph) {
- paragraphs.push(currentParagraph.trim());
- currentParagraph = trimmed;
- } else {
- // 单个句子就超过最大长度,强制分割
- paragraphs.push(trimmed);
- }
- } else {
- currentParagraph += (currentParagraph ? '。' : '') + trimmed;
- }
- }
-
- if (currentParagraph) {
- paragraphs.push(currentParagraph.trim());
- }
-
- return paragraphs;
- }
- // 按长度分段
- function splitByLength(text, options) {
- const paragraphs = [];
- let currentParagraph = '';
- const words = text.split('');
-
- for (const word of words) {
- currentParagraph += word;
-
- if (currentParagraph.length >= options.targetLength) {
- // 寻找合适的分割点
- const splitPoint = findBestSplitPoint(currentParagraph, options);
- if (splitPoint > 0) {
- paragraphs.push(currentParagraph.substring(0, splitPoint).trim());
- currentParagraph = currentParagraph.substring(splitPoint);
- }
- }
- }
-
- if (currentParagraph.trim()) {
- paragraphs.push(currentParagraph.trim());
- }
-
- return paragraphs;
- }
- // 语义分段
- function splitBySemantic(text, options) {
- // 识别段落标记
- const paragraphMarkers = /\n\s*\n+/g;
- const initialParagraphs = text.split(paragraphMarkers);
-
- let paragraphs = [];
-
- for (const paragraph of initialParagraphs) {
- if (!paragraph.trim()) continue;
-
- // 如果段落太长,进一步分割
- if (paragraph.length > options.maxLength) {
- const subParagraphs = splitLongParagraph(paragraph, options);
- paragraphs.push(...subParagraphs);
- } else {
- paragraphs.push(paragraph.trim());
- }
- }
-
- return paragraphs;
- }
- // 自动优化
- function autoOptimize(text, options) {
- // 首先按语义分段
- let paragraphs = splitBySemantic(text, options);
-
- // 然后优化长度
- paragraphs = optimizeLength(paragraphs, options);
-
- return paragraphs;
- }
- // 寻找最佳分割点
- function findBestSplitPoint(text, options) {
- const splitPoints = [
- /[。!?;]/g, // 句号、感叹号、问号、分号
- /[,、]/g, // 逗号、顿号
- /[:]/g, // 冒号
- /\s+/g // 空格
- ];
-
- for (const pattern of splitPoints) {
- const matches = [...text.matchAll(pattern)];
- for (let i = matches.length - 1; i >= 0; i--) {
- const match = matches[i];
- const position = match.index + match[0].length;
-
- // 检查分割点是否在合理范围内
- if (position >= options.minLength && position <= options.maxLength) {
- return position;
- }
- }
- }
-
- // 如果没找到合适的分割点,强制分割
- return Math.min(options.maxLength, text.length);
- }
- // 分割长段落
- function splitLongParagraph(paragraph, options) {
- const sentences = paragraph.split(/[。!?;]/g).filter(s => s.trim());
- const result = [];
- let current = '';
-
- for (const sentence of sentences) {
- if (current.length + sentence.length > options.maxLength) {
- if (current) {
- result.push(current.trim());
- current = sentence;
- } else {
- // 单个句子就太长,按长度分割
- const chunks = splitByLength(sentence, options);
- result.push(...chunks);
- }
- } else {
- current += (current ? '。' : '') + sentence;
- }
- }
-
- if (current) {
- result.push(current.trim());
- }
-
- return result;
- }
- // 优化段落长度
- function optimizeLength(paragraphs, options) {
- const result = [];
-
- for (const paragraph of paragraphs) {
- if (paragraph.length < options.minLength && options.mergeShort) {
- // 尝试与下一个段落合并
- if (result.length > 0) {
- const lastParagraph = result[result.length - 1];
- if (lastParagraph.length + paragraph.length <= options.maxLength) {
- result[result.length - 1] = lastParagraph + '。' + paragraph;
- continue;
- }
- }
- }
-
- if (paragraph.length > options.maxLength && options.splitLong) {
- // 分割过长段落
- const subParagraphs = splitLongParagraph(paragraph, options);
- result.push(...subParagraphs);
- } else {
- result.push(paragraph);
- }
- }
-
- return result;
- }
- // 后处理
- function postProcess(paragraphs) {
- return paragraphs
- .map(p => p.trim())
- .filter(p => p.length > 0)
- .map(p => {
- // 确保段落以句号结尾
- if (!p.endsWith('。') && !p.endsWith('!') && !p.endsWith('?')) {
- return p + '。';
- }
- return p;
- });
- }
- // 处理智能分段
- function processSmartParagraph() {
- if (!form.value.inputText.trim()) {
- ElMessage.warning('请输入需要分段的文本');
- return;
- }
-
- try {
- result.value = smartParagraphSplit(form.value.inputText, form.value);
- ElMessage.success('智能分段完成');
- } catch (error) {
- ElMessage.error('分段处理失败:' + error.message);
- }
- }
- // 重置表单
- function resetForm() {
- form.value = {
- inputText: '',
- strategy: 'auto',
- minLength: 50,
- maxLength: 300,
- targetLength: 150,
- keepDialogue: true,
- keepQuotes: true,
- mergeShort: true,
- splitLong: true
- };
- result.value = null;
- }
- // 预览结果
- function previewResult() {
- if (!result.value) return;
-
- const previewText = result.value.paragraphs.join('\n\n');
- console.log('分段结果预览:', previewText);
-
- // 可以在这里添加复制到剪贴板的功能
- navigator.clipboard.writeText(previewText).then(() => {
- ElMessage.success('结果已复制到剪贴板');
- }).catch(() => {
- ElMessage.info('请手动复制结果');
- });
- }
- </script>
- <style scoped>
- .smart-paragraph {
- padding: 20px;
- }
- .card-header {
- display: flex;
- justify-content: space-between;
- align-items: center;
- }
- .result-section {
- margin-top: 20px;
- }
- .paragraphs-container {
- max-height: 400px;
- overflow-y: auto;
- border: 1px solid #e4e7ed;
- border-radius: 4px;
- padding: 10px;
- }
- .paragraph-item {
- margin-bottom: 15px;
- padding: 10px;
- border: 1px solid #f0f0f0;
- border-radius: 4px;
- background-color: #fafafa;
- }
- .paragraph-item.short {
- border-left: 3px solid #e6a23c;
- background-color: #fdf6ec;
- }
- .paragraph-item.long {
- border-left: 3px solid #f56c6c;
- background-color: #fef0f0;
- }
- .paragraph-header {
- display: flex;
- justify-content: space-between;
- align-items: center;
- margin-bottom: 8px;
- font-size: 12px;
- color: #606266;
- }
- .paragraph-number {
- font-weight: bold;
- }
- .paragraph-length {
- color: #909399;
- }
- .paragraph-content {
- line-height: 1.6;
- color: #303133;
- white-space: pre-wrap;
- }
- .statistics {
- margin-top: 20px;
- }
- </style>
|