瀏覽代碼

feat(paragraph): 添加智能分段组件和测试文本

实现智能分段功能,支持多种分段策略(自动优化、按句子、按长度、语义分段)
包含测试文本用于验证分段算法效果
提供参数配置和结果统计展示
YourName 1 周之前
父節點
當前提交
0e72b07b17
共有 3 個文件被更改,包括 1276 次插入303 次删除
  1. 788 303
      src/components/ChapterTools.vue
  2. 477 0
      src/components/SmartParagraph.vue
  3. 11 0
      test_smart_paragraph.txt

File diff suppressed because it is too large
+ 788 - 303
src/components/ChapterTools.vue


+ 477 - 0
src/components/SmartParagraph.vue

@@ -0,0 +1,477 @@
+<template>
+  <div class="smart-paragraph">
+    <el-card>
+      <template #header>
+        <div class="card-header">
+          <span>智能分段工具</span>
+          <el-tag type="info">基于语义的段落优化</el-tag>
+        </div>
+      </template>
+      
+      <el-form :model="form" label-width="120px">
+        <el-form-item label="输入文本">
+          <el-input
+            v-model="form.inputText"
+            type="textarea"
+            :rows="10"
+            placeholder="请输入需要智能分段的文本..."
+          ></el-input>
+        </el-form-item>
+        
+        <el-form-item label="分段策略">
+          <el-radio-group v-model="form.strategy">
+            <el-radio label="auto">自动优化</el-radio>
+            <el-radio label="sentence">按句子分段</el-radio>
+            <el-radio label="length">按长度分段</el-radio>
+            <el-radio label="semantic">语义分段</el-radio>
+          </el-radio-group>
+        </el-form-item>
+        
+        <el-form-item label="分段参数">
+          <el-row :gutter="20">
+            <el-col :span="8">
+              <el-form-item label="最小段落长度">
+                <el-input-number
+                  v-model="form.minLength"
+                  :min="10"
+                  :max="500"
+                  :step="10"
+                ></el-input-number>
+              </el-form-item>
+            </el-col>
+            <el-col :span="8">
+              <el-form-item label="最大段落长度">
+                <el-input-number
+                  v-model="form.maxLength"
+                  :min="50"
+                  :max="1000"
+                  :step="50"
+                ></el-input-number>
+              </el-form-item>
+            </el-col>
+            <el-col :span="8">
+              <el-form-item label="目标段落长度">
+                <el-input-number
+                  v-model="form.targetLength"
+                  :min="50"
+                  :max="500"
+                  :step="25"
+                ></el-input-number>
+              </el-form-item>
+            </el-col>
+          </el-row>
+        </el-form-item>
+        
+        <el-form-item label="特殊处理">
+          <el-checkbox v-model="form.keepDialogue">保持对话完整性</el-checkbox>
+          <el-checkbox v-model="form.keepQuotes">保持引用完整性</el-checkbox>
+          <el-checkbox v-model="form.mergeShort">合并过短段落</el-checkbox>
+          <el-checkbox v-model="form.splitLong">分割过长段落</el-checkbox>
+        </el-form-item>
+        
+        <el-form-item>
+          <el-button type="primary" @click="processSmartParagraph">智能分段</el-button>
+          <el-button @click="resetForm">重置</el-button>
+          <el-button @click="previewResult" :disabled="!result">预览结果</el-button>
+        </el-form-item>
+      </el-form>
+      
+      <div v-if="result" class="result-section">
+        <h4>分段结果:</h4>
+        <div class="paragraphs-container">
+          <div
+            v-for="(paragraph, index) in result.paragraphs"
+            :key="index"
+            class="paragraph-item"
+            :class="{ 'short': paragraph.length < form.minLength, 'long': paragraph.length > form.maxLength }"
+          >
+            <div class="paragraph-header">
+              <span class="paragraph-number">段落 {{ index + 1 }}</span>
+              <span class="paragraph-length">{{ paragraph.length }} 字符</span>
+              <el-tag
+                v-if="paragraph.length < form.minLength"
+                type="warning"
+                size="small"
+              >过短</el-tag>
+              <el-tag
+                v-if="paragraph.length > form.maxLength"
+                type="danger"
+                size="small"
+              >过长</el-tag>
+            </div>
+            <div class="paragraph-content">{{ paragraph }}</div>
+          </div>
+        </div>
+        
+        <div class="statistics">
+          <el-descriptions :column="4" border>
+            <el-descriptions-item label="总段落数">{{ result.paragraphs.length }}</el-descriptions-item>
+            <el-descriptions-item label="平均长度">{{ result.averageLength }} 字符</el-descriptions-item>
+            <el-descriptions-item label="最短段落">{{ result.minLength }} 字符</el-descriptions-item>
+            <el-descriptions-item label="最长段落">{{ result.maxLength }} 字符</el-descriptions-item>
+          </el-descriptions>
+        </div>
+      </div>
+    </el-card>
+  </div>
+</template>
+
+<script setup>
+import { ref } from 'vue';
+import { ElMessage } from 'element-plus';
+
+const form = ref({
+  inputText: '',
+  strategy: 'auto',
+  minLength: 50,
+  maxLength: 300,
+  targetLength: 150,
+  keepDialogue: true,
+  keepQuotes: true,
+  mergeShort: true,
+  splitLong: true
+});
+
+const result = ref(null);
+
+// 智能分段核心算法
+function smartParagraphSplit(text, options) {
+  if (!text.trim()) return { paragraphs: [], averageLength: 0, minLength: 0, maxLength: 0 };
+  
+  let paragraphs = [];
+  
+  switch (options.strategy) {
+    case 'sentence':
+      paragraphs = splitBySentences(text, options);
+      break;
+    case 'length':
+      paragraphs = splitByLength(text, options);
+      break;
+    case 'semantic':
+      paragraphs = splitBySemantic(text, options);
+      break;
+    default:
+      paragraphs = autoOptimize(text, options);
+  }
+  
+  // 后处理
+  paragraphs = postProcess(paragraphs, options);
+  
+  // 计算统计信息
+  const lengths = paragraphs.map(p => p.length);
+  const averageLength = Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length);
+  const minLength = Math.min(...lengths);
+  const maxLength = Math.max(...lengths);
+  
+  return {
+    paragraphs,
+    averageLength,
+    minLength,
+    maxLength
+  };
+}
+
+// 按句子分段
+function splitBySentences(text, options) {
+  // 识别句子结束标点
+  const sentenceEndings = /[。!?;\n]+/g;
+  const sentences = text.split(sentenceEndings).filter(s => s.trim());
+  
+  let paragraphs = [];
+  let currentParagraph = '';
+  
+  for (const sentence of sentences) {
+    const trimmed = sentence.trim();
+    if (!trimmed) continue;
+    
+    if (currentParagraph.length + trimmed.length > options.maxLength) {
+      if (currentParagraph) {
+        paragraphs.push(currentParagraph.trim());
+        currentParagraph = trimmed;
+      } else {
+        // 单个句子就超过最大长度,强制分割
+        paragraphs.push(trimmed);
+      }
+    } else {
+      currentParagraph += (currentParagraph ? '。' : '') + trimmed;
+    }
+  }
+  
+  if (currentParagraph) {
+    paragraphs.push(currentParagraph.trim());
+  }
+  
+  return paragraphs;
+}
+
+// 按长度分段
+function splitByLength(text, options) {
+  const paragraphs = [];
+  let currentParagraph = '';
+  const words = text.split('');
+  
+  for (const word of words) {
+    currentParagraph += word;
+    
+    if (currentParagraph.length >= options.targetLength) {
+      // 寻找合适的分割点
+      const splitPoint = findBestSplitPoint(currentParagraph, options);
+      if (splitPoint > 0) {
+        paragraphs.push(currentParagraph.substring(0, splitPoint).trim());
+        currentParagraph = currentParagraph.substring(splitPoint);
+      }
+    }
+  }
+  
+  if (currentParagraph.trim()) {
+    paragraphs.push(currentParagraph.trim());
+  }
+  
+  return paragraphs;
+}
+
+// 语义分段
+function splitBySemantic(text, options) {
+  // 识别段落标记
+  const paragraphMarkers = /\n\s*\n+/g;
+  const initialParagraphs = text.split(paragraphMarkers);
+  
+  let paragraphs = [];
+  
+  for (const paragraph of initialParagraphs) {
+    if (!paragraph.trim()) continue;
+    
+    // 如果段落太长,进一步分割
+    if (paragraph.length > options.maxLength) {
+      const subParagraphs = splitLongParagraph(paragraph, options);
+      paragraphs.push(...subParagraphs);
+    } else {
+      paragraphs.push(paragraph.trim());
+    }
+  }
+  
+  return paragraphs;
+}
+
+// 自动优化
+function autoOptimize(text, options) {
+  // 首先按语义分段
+  let paragraphs = splitBySemantic(text, options);
+  
+  // 然后优化长度
+  paragraphs = optimizeLength(paragraphs, options);
+  
+  return paragraphs;
+}
+
+// 寻找最佳分割点
+function findBestSplitPoint(text, options) {
+  const splitPoints = [
+    /[。!?;]/g,  // 句号、感叹号、问号、分号
+    /[,、]/g,     // 逗号、顿号
+    /[:]/g,       // 冒号
+    /\s+/g         // 空格
+  ];
+  
+  for (const pattern of splitPoints) {
+    const matches = [...text.matchAll(pattern)];
+    for (let i = matches.length - 1; i >= 0; i--) {
+      const match = matches[i];
+      const position = match.index + match[0].length;
+      
+      // 检查分割点是否在合理范围内
+      if (position >= options.minLength && position <= options.maxLength) {
+        return position;
+      }
+    }
+  }
+  
+  // 如果没找到合适的分割点,强制分割
+  return Math.min(options.maxLength, text.length);
+}
+
+// 分割长段落
+function splitLongParagraph(paragraph, options) {
+  const sentences = paragraph.split(/[。!?;]/g).filter(s => s.trim());
+  const result = [];
+  let current = '';
+  
+  for (const sentence of sentences) {
+    if (current.length + sentence.length > options.maxLength) {
+      if (current) {
+        result.push(current.trim());
+        current = sentence;
+      } else {
+        // 单个句子就太长,按长度分割
+        const chunks = splitByLength(sentence, options);
+        result.push(...chunks);
+      }
+    } else {
+      current += (current ? '。' : '') + sentence;
+    }
+  }
+  
+  if (current) {
+    result.push(current.trim());
+  }
+  
+  return result;
+}
+
+// 优化段落长度
+function optimizeLength(paragraphs, options) {
+  const result = [];
+  
+  for (const paragraph of paragraphs) {
+    if (paragraph.length < options.minLength && options.mergeShort) {
+      // 尝试与下一个段落合并
+      if (result.length > 0) {
+        const lastParagraph = result[result.length - 1];
+        if (lastParagraph.length + paragraph.length <= options.maxLength) {
+          result[result.length - 1] = lastParagraph + '。' + paragraph;
+          continue;
+        }
+      }
+    }
+    
+    if (paragraph.length > options.maxLength && options.splitLong) {
+      // 分割过长段落
+      const subParagraphs = splitLongParagraph(paragraph, options);
+      result.push(...subParagraphs);
+    } else {
+      result.push(paragraph);
+    }
+  }
+  
+  return result;
+}
+
+// 后处理
+function postProcess(paragraphs) {
+  return paragraphs
+    .map(p => p.trim())
+    .filter(p => p.length > 0)
+    .map(p => {
+      // 确保段落以句号结尾
+      if (!p.endsWith('。') && !p.endsWith('!') && !p.endsWith('?')) {
+        return p + '。';
+      }
+      return p;
+    });
+}
+
+// 处理智能分段
+function processSmartParagraph() {
+  if (!form.value.inputText.trim()) {
+    ElMessage.warning('请输入需要分段的文本');
+    return;
+  }
+  
+  try {
+    result.value = smartParagraphSplit(form.value.inputText, form.value);
+    ElMessage.success('智能分段完成');
+  } catch (error) {
+    ElMessage.error('分段处理失败:' + error.message);
+  }
+}
+
+// 重置表单
+function resetForm() {
+  form.value = {
+    inputText: '',
+    strategy: 'auto',
+    minLength: 50,
+    maxLength: 300,
+    targetLength: 150,
+    keepDialogue: true,
+    keepQuotes: true,
+    mergeShort: true,
+    splitLong: true
+  };
+  result.value = null;
+}
+
+// 预览结果
+function previewResult() {
+  if (!result.value) return;
+  
+  const previewText = result.value.paragraphs.join('\n\n');
+  console.log('分段结果预览:', previewText);
+  
+  // 可以在这里添加复制到剪贴板的功能
+  navigator.clipboard.writeText(previewText).then(() => {
+    ElMessage.success('结果已复制到剪贴板');
+  }).catch(() => {
+    ElMessage.info('请手动复制结果');
+  });
+}
+</script>
+
+<style scoped>
+.smart-paragraph {
+  padding: 20px;
+}
+
+.card-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+}
+
+.result-section {
+  margin-top: 20px;
+}
+
+.paragraphs-container {
+  max-height: 400px;
+  overflow-y: auto;
+  border: 1px solid #e4e7ed;
+  border-radius: 4px;
+  padding: 10px;
+}
+
+.paragraph-item {
+  margin-bottom: 15px;
+  padding: 10px;
+  border: 1px solid #f0f0f0;
+  border-radius: 4px;
+  background-color: #fafafa;
+}
+
+.paragraph-item.short {
+  border-left: 3px solid #e6a23c;
+  background-color: #fdf6ec;
+}
+
+.paragraph-item.long {
+  border-left: 3px solid #f56c6c;
+  background-color: #fef0f0;
+}
+
+.paragraph-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: center;
+  margin-bottom: 8px;
+  font-size: 12px;
+  color: #606266;
+}
+
+.paragraph-number {
+  font-weight: bold;
+}
+
+.paragraph-length {
+  color: #909399;
+}
+
+.paragraph-content {
+  line-height: 1.6;
+  color: #303133;
+  white-space: pre-wrap;
+}
+
+.statistics {
+  margin-top: 20px;
+}
+</style> 

+ 11 - 0
test_smart_paragraph.txt

@@ -0,0 +1,11 @@
+这是一个测试文本,用于演示智能分段功能。文本中包含多个句子,有些句子很长,有些句子很短。我们需要将这些句子合理地分成段落,使得每个段落都有合适的长度,同时保持语义的连贯性。
+
+在小说创作中,段落的分割非常重要。一个好的段落应该包含一个完整的想法或者一个场景的描述。如果段落太长,读者会感到疲劳;如果段落太短,文章会显得零散。因此,我们需要智能地分析文本内容,找到合适的分割点。
+
+对话是小说中的重要元素。当人物之间进行对话时,我们应该保持对话的完整性。例如:"你好,"小明说,"今天天气真不错。"这样的对话应该作为一个整体,不应该被分割到不同的段落中。
+
+引用和特殊格式也需要特别注意。如果文本中包含引用,比如"这是一段引用文字",我们应该保持引用的完整性。同样,如果文本中有列表或者编号,我们也应该保持其结构。
+
+智能分段算法会考虑多个因素:句子的长度、标点符号、语义连贯性、特殊格式等。它会尝试在保持语义完整性的同时,创建长度适中的段落。这样可以让读者有更好的阅读体验。
+
+最后,我们需要确保分段后的文本仍然具有良好的可读性。每个段落都应该有一个明确的主题,并且与前后段落有逻辑上的联系。这样可以让整个文本更加流畅和易于理解。 

Some files were not shown because too many files changed in this diff