|
@@ -0,0 +1,477 @@
|
|
|
+<template>
|
|
|
+ <div class="smart-paragraph">
|
|
|
+ <el-card>
|
|
|
+ <template #header>
|
|
|
+ <div class="card-header">
|
|
|
+ <span>智能分段工具</span>
|
|
|
+ <el-tag type="info">基于语义的段落优化</el-tag>
|
|
|
+ </div>
|
|
|
+ </template>
|
|
|
+
|
|
|
+ <el-form :model="form" label-width="120px">
|
|
|
+ <el-form-item label="输入文本">
|
|
|
+ <el-input
|
|
|
+ v-model="form.inputText"
|
|
|
+ type="textarea"
|
|
|
+ :rows="10"
|
|
|
+ placeholder="请输入需要智能分段的文本..."
|
|
|
+ ></el-input>
|
|
|
+ </el-form-item>
|
|
|
+
|
|
|
+ <el-form-item label="分段策略">
|
|
|
+ <el-radio-group v-model="form.strategy">
|
|
|
+ <el-radio label="auto">自动优化</el-radio>
|
|
|
+ <el-radio label="sentence">按句子分段</el-radio>
|
|
|
+ <el-radio label="length">按长度分段</el-radio>
|
|
|
+ <el-radio label="semantic">语义分段</el-radio>
|
|
|
+ </el-radio-group>
|
|
|
+ </el-form-item>
|
|
|
+
|
|
|
+ <el-form-item label="分段参数">
|
|
|
+ <el-row :gutter="20">
|
|
|
+ <el-col :span="8">
|
|
|
+ <el-form-item label="最小段落长度">
|
|
|
+ <el-input-number
|
|
|
+ v-model="form.minLength"
|
|
|
+ :min="10"
|
|
|
+ :max="500"
|
|
|
+ :step="10"
|
|
|
+ ></el-input-number>
|
|
|
+ </el-form-item>
|
|
|
+ </el-col>
|
|
|
+ <el-col :span="8">
|
|
|
+ <el-form-item label="最大段落长度">
|
|
|
+ <el-input-number
|
|
|
+ v-model="form.maxLength"
|
|
|
+ :min="50"
|
|
|
+ :max="1000"
|
|
|
+ :step="50"
|
|
|
+ ></el-input-number>
|
|
|
+ </el-form-item>
|
|
|
+ </el-col>
|
|
|
+ <el-col :span="8">
|
|
|
+ <el-form-item label="目标段落长度">
|
|
|
+ <el-input-number
|
|
|
+ v-model="form.targetLength"
|
|
|
+ :min="50"
|
|
|
+ :max="500"
|
|
|
+ :step="25"
|
|
|
+ ></el-input-number>
|
|
|
+ </el-form-item>
|
|
|
+ </el-col>
|
|
|
+ </el-row>
|
|
|
+ </el-form-item>
|
|
|
+
|
|
|
+ <el-form-item label="特殊处理">
|
|
|
+ <el-checkbox v-model="form.keepDialogue">保持对话完整性</el-checkbox>
|
|
|
+ <el-checkbox v-model="form.keepQuotes">保持引用完整性</el-checkbox>
|
|
|
+ <el-checkbox v-model="form.mergeShort">合并过短段落</el-checkbox>
|
|
|
+ <el-checkbox v-model="form.splitLong">分割过长段落</el-checkbox>
|
|
|
+ </el-form-item>
|
|
|
+
|
|
|
+ <el-form-item>
|
|
|
+ <el-button type="primary" @click="processSmartParagraph">智能分段</el-button>
|
|
|
+ <el-button @click="resetForm">重置</el-button>
|
|
|
+ <el-button @click="previewResult" :disabled="!result">预览结果</el-button>
|
|
|
+ </el-form-item>
|
|
|
+ </el-form>
|
|
|
+
|
|
|
+ <div v-if="result" class="result-section">
|
|
|
+ <h4>分段结果:</h4>
|
|
|
+ <div class="paragraphs-container">
|
|
|
+ <div
|
|
|
+ v-for="(paragraph, index) in result.paragraphs"
|
|
|
+ :key="index"
|
|
|
+ class="paragraph-item"
|
|
|
+ :class="{ 'short': paragraph.length < form.minLength, 'long': paragraph.length > form.maxLength }"
|
|
|
+ >
|
|
|
+ <div class="paragraph-header">
|
|
|
+ <span class="paragraph-number">段落 {{ index + 1 }}</span>
|
|
|
+ <span class="paragraph-length">{{ paragraph.length }} 字符</span>
|
|
|
+ <el-tag
|
|
|
+ v-if="paragraph.length < form.minLength"
|
|
|
+ type="warning"
|
|
|
+ size="small"
|
|
|
+ >过短</el-tag>
|
|
|
+ <el-tag
|
|
|
+ v-if="paragraph.length > form.maxLength"
|
|
|
+ type="danger"
|
|
|
+ size="small"
|
|
|
+ >过长</el-tag>
|
|
|
+ </div>
|
|
|
+ <div class="paragraph-content">{{ paragraph }}</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="statistics">
|
|
|
+ <el-descriptions :column="4" border>
|
|
|
+ <el-descriptions-item label="总段落数">{{ result.paragraphs.length }}</el-descriptions-item>
|
|
|
+ <el-descriptions-item label="平均长度">{{ result.averageLength }} 字符</el-descriptions-item>
|
|
|
+ <el-descriptions-item label="最短段落">{{ result.minLength }} 字符</el-descriptions-item>
|
|
|
+ <el-descriptions-item label="最长段落">{{ result.maxLength }} 字符</el-descriptions-item>
|
|
|
+ </el-descriptions>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </el-card>
|
|
|
+ </div>
|
|
|
+</template>
|
|
|
+
|
|
|
+<script setup>
|
|
|
+import { ref } from 'vue';
|
|
|
+import { ElMessage } from 'element-plus';
|
|
|
+
|
|
|
+const form = ref({
|
|
|
+ inputText: '',
|
|
|
+ strategy: 'auto',
|
|
|
+ minLength: 50,
|
|
|
+ maxLength: 300,
|
|
|
+ targetLength: 150,
|
|
|
+ keepDialogue: true,
|
|
|
+ keepQuotes: true,
|
|
|
+ mergeShort: true,
|
|
|
+ splitLong: true
|
|
|
+});
|
|
|
+
|
|
|
+const result = ref(null);
|
|
|
+
|
|
|
+// 智能分段核心算法
|
|
|
+function smartParagraphSplit(text, options) {
|
|
|
+ if (!text.trim()) return { paragraphs: [], averageLength: 0, minLength: 0, maxLength: 0 };
|
|
|
+
|
|
|
+ let paragraphs = [];
|
|
|
+
|
|
|
+ switch (options.strategy) {
|
|
|
+ case 'sentence':
|
|
|
+ paragraphs = splitBySentences(text, options);
|
|
|
+ break;
|
|
|
+ case 'length':
|
|
|
+ paragraphs = splitByLength(text, options);
|
|
|
+ break;
|
|
|
+ case 'semantic':
|
|
|
+ paragraphs = splitBySemantic(text, options);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ paragraphs = autoOptimize(text, options);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 后处理
|
|
|
+ paragraphs = postProcess(paragraphs, options);
|
|
|
+
|
|
|
+ // 计算统计信息
|
|
|
+ const lengths = paragraphs.map(p => p.length);
|
|
|
+ const averageLength = Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length);
|
|
|
+ const minLength = Math.min(...lengths);
|
|
|
+ const maxLength = Math.max(...lengths);
|
|
|
+
|
|
|
+ return {
|
|
|
+ paragraphs,
|
|
|
+ averageLength,
|
|
|
+ minLength,
|
|
|
+ maxLength
|
|
|
+ };
|
|
|
+}
|
|
|
+
|
|
|
+// 按句子分段
|
|
|
+function splitBySentences(text, options) {
|
|
|
+ // 识别句子结束标点
|
|
|
+ const sentenceEndings = /[。!?;\n]+/g;
|
|
|
+ const sentences = text.split(sentenceEndings).filter(s => s.trim());
|
|
|
+
|
|
|
+ let paragraphs = [];
|
|
|
+ let currentParagraph = '';
|
|
|
+
|
|
|
+ for (const sentence of sentences) {
|
|
|
+ const trimmed = sentence.trim();
|
|
|
+ if (!trimmed) continue;
|
|
|
+
|
|
|
+ if (currentParagraph.length + trimmed.length > options.maxLength) {
|
|
|
+ if (currentParagraph) {
|
|
|
+ paragraphs.push(currentParagraph.trim());
|
|
|
+ currentParagraph = trimmed;
|
|
|
+ } else {
|
|
|
+ // 单个句子就超过最大长度,强制分割
|
|
|
+ paragraphs.push(trimmed);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ currentParagraph += (currentParagraph ? '。' : '') + trimmed;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (currentParagraph) {
|
|
|
+ paragraphs.push(currentParagraph.trim());
|
|
|
+ }
|
|
|
+
|
|
|
+ return paragraphs;
|
|
|
+}
|
|
|
+
|
|
|
+// 按长度分段
|
|
|
+function splitByLength(text, options) {
|
|
|
+ const paragraphs = [];
|
|
|
+ let currentParagraph = '';
|
|
|
+ const words = text.split('');
|
|
|
+
|
|
|
+ for (const word of words) {
|
|
|
+ currentParagraph += word;
|
|
|
+
|
|
|
+ if (currentParagraph.length >= options.targetLength) {
|
|
|
+ // 寻找合适的分割点
|
|
|
+ const splitPoint = findBestSplitPoint(currentParagraph, options);
|
|
|
+ if (splitPoint > 0) {
|
|
|
+ paragraphs.push(currentParagraph.substring(0, splitPoint).trim());
|
|
|
+ currentParagraph = currentParagraph.substring(splitPoint);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (currentParagraph.trim()) {
|
|
|
+ paragraphs.push(currentParagraph.trim());
|
|
|
+ }
|
|
|
+
|
|
|
+ return paragraphs;
|
|
|
+}
|
|
|
+
|
|
|
+// 语义分段
|
|
|
+function splitBySemantic(text, options) {
|
|
|
+ // 识别段落标记
|
|
|
+ const paragraphMarkers = /\n\s*\n+/g;
|
|
|
+ const initialParagraphs = text.split(paragraphMarkers);
|
|
|
+
|
|
|
+ let paragraphs = [];
|
|
|
+
|
|
|
+ for (const paragraph of initialParagraphs) {
|
|
|
+ if (!paragraph.trim()) continue;
|
|
|
+
|
|
|
+ // 如果段落太长,进一步分割
|
|
|
+ if (paragraph.length > options.maxLength) {
|
|
|
+ const subParagraphs = splitLongParagraph(paragraph, options);
|
|
|
+ paragraphs.push(...subParagraphs);
|
|
|
+ } else {
|
|
|
+ paragraphs.push(paragraph.trim());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return paragraphs;
|
|
|
+}
|
|
|
+
|
|
|
+// 自动优化
|
|
|
+function autoOptimize(text, options) {
|
|
|
+ // 首先按语义分段
|
|
|
+ let paragraphs = splitBySemantic(text, options);
|
|
|
+
|
|
|
+ // 然后优化长度
|
|
|
+ paragraphs = optimizeLength(paragraphs, options);
|
|
|
+
|
|
|
+ return paragraphs;
|
|
|
+}
|
|
|
+
|
|
|
+// 寻找最佳分割点
|
|
|
+function findBestSplitPoint(text, options) {
|
|
|
+ const splitPoints = [
|
|
|
+ /[。!?;]/g, // 句号、感叹号、问号、分号
|
|
|
+ /[,、]/g, // 逗号、顿号
|
|
|
+ /[:]/g, // 冒号
|
|
|
+ /\s+/g // 空格
|
|
|
+ ];
|
|
|
+
|
|
|
+ for (const pattern of splitPoints) {
|
|
|
+ const matches = [...text.matchAll(pattern)];
|
|
|
+ for (let i = matches.length - 1; i >= 0; i--) {
|
|
|
+ const match = matches[i];
|
|
|
+ const position = match.index + match[0].length;
|
|
|
+
|
|
|
+ // 检查分割点是否在合理范围内
|
|
|
+ if (position >= options.minLength && position <= options.maxLength) {
|
|
|
+ return position;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 如果没找到合适的分割点,强制分割
|
|
|
+ return Math.min(options.maxLength, text.length);
|
|
|
+}
|
|
|
+
|
|
|
+// 分割长段落
|
|
|
+function splitLongParagraph(paragraph, options) {
|
|
|
+ const sentences = paragraph.split(/[。!?;]/g).filter(s => s.trim());
|
|
|
+ const result = [];
|
|
|
+ let current = '';
|
|
|
+
|
|
|
+ for (const sentence of sentences) {
|
|
|
+ if (current.length + sentence.length > options.maxLength) {
|
|
|
+ if (current) {
|
|
|
+ result.push(current.trim());
|
|
|
+ current = sentence;
|
|
|
+ } else {
|
|
|
+ // 单个句子就太长,按长度分割
|
|
|
+ const chunks = splitByLength(sentence, options);
|
|
|
+ result.push(...chunks);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ current += (current ? '。' : '') + sentence;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (current) {
|
|
|
+ result.push(current.trim());
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+// 优化段落长度
|
|
|
+function optimizeLength(paragraphs, options) {
|
|
|
+ const result = [];
|
|
|
+
|
|
|
+ for (const paragraph of paragraphs) {
|
|
|
+ if (paragraph.length < options.minLength && options.mergeShort) {
|
|
|
+ // 尝试与下一个段落合并
|
|
|
+ if (result.length > 0) {
|
|
|
+ const lastParagraph = result[result.length - 1];
|
|
|
+ if (lastParagraph.length + paragraph.length <= options.maxLength) {
|
|
|
+ result[result.length - 1] = lastParagraph + '。' + paragraph;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (paragraph.length > options.maxLength && options.splitLong) {
|
|
|
+ // 分割过长段落
|
|
|
+ const subParagraphs = splitLongParagraph(paragraph, options);
|
|
|
+ result.push(...subParagraphs);
|
|
|
+ } else {
|
|
|
+ result.push(paragraph);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return result;
|
|
|
+}
|
|
|
+
|
|
|
+// 后处理
|
|
|
+function postProcess(paragraphs) {
|
|
|
+ return paragraphs
|
|
|
+ .map(p => p.trim())
|
|
|
+ .filter(p => p.length > 0)
|
|
|
+ .map(p => {
|
|
|
+ // 确保段落以句号结尾
|
|
|
+ if (!p.endsWith('。') && !p.endsWith('!') && !p.endsWith('?')) {
|
|
|
+ return p + '。';
|
|
|
+ }
|
|
|
+ return p;
|
|
|
+ });
|
|
|
+}
|
|
|
+
|
|
|
+// 处理智能分段
|
|
|
+function processSmartParagraph() {
|
|
|
+ if (!form.value.inputText.trim()) {
|
|
|
+ ElMessage.warning('请输入需要分段的文本');
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ try {
|
|
|
+ result.value = smartParagraphSplit(form.value.inputText, form.value);
|
|
|
+ ElMessage.success('智能分段完成');
|
|
|
+ } catch (error) {
|
|
|
+ ElMessage.error('分段处理失败:' + error.message);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// 重置表单
|
|
|
+function resetForm() {
|
|
|
+ form.value = {
|
|
|
+ inputText: '',
|
|
|
+ strategy: 'auto',
|
|
|
+ minLength: 50,
|
|
|
+ maxLength: 300,
|
|
|
+ targetLength: 150,
|
|
|
+ keepDialogue: true,
|
|
|
+ keepQuotes: true,
|
|
|
+ mergeShort: true,
|
|
|
+ splitLong: true
|
|
|
+ };
|
|
|
+ result.value = null;
|
|
|
+}
|
|
|
+
|
|
|
+// 预览结果
|
|
|
+function previewResult() {
|
|
|
+ if (!result.value) return;
|
|
|
+
|
|
|
+ const previewText = result.value.paragraphs.join('\n\n');
|
|
|
+ console.log('分段结果预览:', previewText);
|
|
|
+
|
|
|
+ // 可以在这里添加复制到剪贴板的功能
|
|
|
+ navigator.clipboard.writeText(previewText).then(() => {
|
|
|
+ ElMessage.success('结果已复制到剪贴板');
|
|
|
+ }).catch(() => {
|
|
|
+ ElMessage.info('请手动复制结果');
|
|
|
+ });
|
|
|
+}
|
|
|
+</script>
|
|
|
+
|
|
|
+<style scoped>
|
|
|
+.smart-paragraph {
|
|
|
+ padding: 20px;
|
|
|
+}
|
|
|
+
|
|
|
+.card-header {
|
|
|
+ display: flex;
|
|
|
+ justify-content: space-between;
|
|
|
+ align-items: center;
|
|
|
+}
|
|
|
+
|
|
|
+.result-section {
|
|
|
+ margin-top: 20px;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraphs-container {
|
|
|
+ max-height: 400px;
|
|
|
+ overflow-y: auto;
|
|
|
+ border: 1px solid #e4e7ed;
|
|
|
+ border-radius: 4px;
|
|
|
+ padding: 10px;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-item {
|
|
|
+ margin-bottom: 15px;
|
|
|
+ padding: 10px;
|
|
|
+ border: 1px solid #f0f0f0;
|
|
|
+ border-radius: 4px;
|
|
|
+ background-color: #fafafa;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-item.short {
|
|
|
+ border-left: 3px solid #e6a23c;
|
|
|
+ background-color: #fdf6ec;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-item.long {
|
|
|
+ border-left: 3px solid #f56c6c;
|
|
|
+ background-color: #fef0f0;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-header {
|
|
|
+ display: flex;
|
|
|
+ justify-content: space-between;
|
|
|
+ align-items: center;
|
|
|
+ margin-bottom: 8px;
|
|
|
+ font-size: 12px;
|
|
|
+ color: #606266;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-number {
|
|
|
+ font-weight: bold;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-length {
|
|
|
+ color: #909399;
|
|
|
+}
|
|
|
+
|
|
|
+.paragraph-content {
|
|
|
+ line-height: 1.6;
|
|
|
+ color: #303133;
|
|
|
+ white-space: pre-wrap;
|
|
|
+}
|
|
|
+
|
|
|
+.statistics {
|
|
|
+ margin-top: 20px;
|
|
|
+}
|
|
|
+</style>
|