SmartParagraph.vue 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. <template>
  2. <div class="smart-paragraph">
  3. <el-card>
  4. <template #header>
  5. <div class="card-header">
  6. <span>智能分段工具</span>
  7. <el-tag type="info">基于语义的段落优化</el-tag>
  8. </div>
  9. </template>
  10. <el-form :model="form" label-width="120px">
  11. <el-form-item label="输入文本">
  12. <el-input
  13. v-model="form.inputText"
  14. type="textarea"
  15. :rows="10"
  16. placeholder="请输入需要智能分段的文本..."
  17. ></el-input>
  18. </el-form-item>
  19. <el-form-item label="分段策略">
  20. <el-radio-group v-model="form.strategy">
  21. <el-radio label="auto">自动优化</el-radio>
  22. <el-radio label="sentence">按句子分段</el-radio>
  23. <el-radio label="length">按长度分段</el-radio>
  24. <el-radio label="semantic">语义分段</el-radio>
  25. </el-radio-group>
  26. </el-form-item>
  27. <el-form-item label="分段参数">
  28. <el-row :gutter="20">
  29. <el-col :span="8">
  30. <el-form-item label="最小段落长度">
  31. <el-input-number
  32. v-model="form.minLength"
  33. :min="10"
  34. :max="500"
  35. :step="10"
  36. ></el-input-number>
  37. </el-form-item>
  38. </el-col>
  39. <el-col :span="8">
  40. <el-form-item label="最大段落长度">
  41. <el-input-number
  42. v-model="form.maxLength"
  43. :min="50"
  44. :max="1000"
  45. :step="50"
  46. ></el-input-number>
  47. </el-form-item>
  48. </el-col>
  49. <el-col :span="8">
  50. <el-form-item label="目标段落长度">
  51. <el-input-number
  52. v-model="form.targetLength"
  53. :min="50"
  54. :max="500"
  55. :step="25"
  56. ></el-input-number>
  57. </el-form-item>
  58. </el-col>
  59. </el-row>
  60. </el-form-item>
  61. <el-form-item label="特殊处理">
  62. <el-checkbox v-model="form.keepDialogue">保持对话完整性</el-checkbox>
  63. <el-checkbox v-model="form.keepQuotes">保持引用完整性</el-checkbox>
  64. <el-checkbox v-model="form.mergeShort">合并过短段落</el-checkbox>
  65. <el-checkbox v-model="form.splitLong">分割过长段落</el-checkbox>
  66. </el-form-item>
  67. <el-form-item>
  68. <el-button type="primary" @click="processSmartParagraph">智能分段</el-button>
  69. <el-button @click="resetForm">重置</el-button>
  70. <el-button @click="previewResult" :disabled="!result">预览结果</el-button>
  71. </el-form-item>
  72. </el-form>
  73. <div v-if="result" class="result-section">
  74. <h4>分段结果:</h4>
  75. <div class="paragraphs-container">
  76. <div
  77. v-for="(paragraph, index) in result.paragraphs"
  78. :key="index"
  79. class="paragraph-item"
  80. :class="{ 'short': paragraph.length < form.minLength, 'long': paragraph.length > form.maxLength }"
  81. >
  82. <div class="paragraph-header">
  83. <span class="paragraph-number">段落 {{ index + 1 }}</span>
  84. <span class="paragraph-length">{{ paragraph.length }} 字符</span>
  85. <el-tag
  86. v-if="paragraph.length < form.minLength"
  87. type="warning"
  88. size="small"
  89. >过短</el-tag>
  90. <el-tag
  91. v-if="paragraph.length > form.maxLength"
  92. type="danger"
  93. size="small"
  94. >过长</el-tag>
  95. </div>
  96. <div class="paragraph-content">{{ paragraph }}</div>
  97. </div>
  98. </div>
  99. <div class="statistics">
  100. <el-descriptions :column="4" border>
  101. <el-descriptions-item label="总段落数">{{ result.paragraphs.length }}</el-descriptions-item>
  102. <el-descriptions-item label="平均长度">{{ result.averageLength }} 字符</el-descriptions-item>
  103. <el-descriptions-item label="最短段落">{{ result.minLength }} 字符</el-descriptions-item>
  104. <el-descriptions-item label="最长段落">{{ result.maxLength }} 字符</el-descriptions-item>
  105. </el-descriptions>
  106. </div>
  107. </div>
  108. </el-card>
  109. </div>
  110. </template>
  111. <script setup>
  112. import { ref } from 'vue';
  113. import { ElMessage } from 'element-plus';
  114. const form = ref({
  115. inputText: '',
  116. strategy: 'auto',
  117. minLength: 50,
  118. maxLength: 300,
  119. targetLength: 150,
  120. keepDialogue: true,
  121. keepQuotes: true,
  122. mergeShort: true,
  123. splitLong: true
  124. });
  125. const result = ref(null);
  126. // 智能分段核心算法
  127. function smartParagraphSplit(text, options) {
  128. if (!text.trim()) return { paragraphs: [], averageLength: 0, minLength: 0, maxLength: 0 };
  129. let paragraphs = [];
  130. switch (options.strategy) {
  131. case 'sentence':
  132. paragraphs = splitBySentences(text, options);
  133. break;
  134. case 'length':
  135. paragraphs = splitByLength(text, options);
  136. break;
  137. case 'semantic':
  138. paragraphs = splitBySemantic(text, options);
  139. break;
  140. default:
  141. paragraphs = autoOptimize(text, options);
  142. }
  143. // 后处理
  144. paragraphs = postProcess(paragraphs, options);
  145. // 计算统计信息
  146. const lengths = paragraphs.map(p => p.length);
  147. const averageLength = Math.round(lengths.reduce((a, b) => a + b, 0) / lengths.length);
  148. const minLength = Math.min(...lengths);
  149. const maxLength = Math.max(...lengths);
  150. return {
  151. paragraphs,
  152. averageLength,
  153. minLength,
  154. maxLength
  155. };
  156. }
  157. // 按句子分段
  158. function splitBySentences(text, options) {
  159. // 识别句子结束标点
  160. const sentenceEndings = /[。!?;\n]+/g;
  161. const sentences = text.split(sentenceEndings).filter(s => s.trim());
  162. let paragraphs = [];
  163. let currentParagraph = '';
  164. for (const sentence of sentences) {
  165. const trimmed = sentence.trim();
  166. if (!trimmed) continue;
  167. if (currentParagraph.length + trimmed.length > options.maxLength) {
  168. if (currentParagraph) {
  169. paragraphs.push(currentParagraph.trim());
  170. currentParagraph = trimmed;
  171. } else {
  172. // 单个句子就超过最大长度,强制分割
  173. paragraphs.push(trimmed);
  174. }
  175. } else {
  176. currentParagraph += (currentParagraph ? '。' : '') + trimmed;
  177. }
  178. }
  179. if (currentParagraph) {
  180. paragraphs.push(currentParagraph.trim());
  181. }
  182. return paragraphs;
  183. }
  184. // 按长度分段
  185. function splitByLength(text, options) {
  186. const paragraphs = [];
  187. let currentParagraph = '';
  188. const words = text.split('');
  189. for (const word of words) {
  190. currentParagraph += word;
  191. if (currentParagraph.length >= options.targetLength) {
  192. // 寻找合适的分割点
  193. const splitPoint = findBestSplitPoint(currentParagraph, options);
  194. if (splitPoint > 0) {
  195. paragraphs.push(currentParagraph.substring(0, splitPoint).trim());
  196. currentParagraph = currentParagraph.substring(splitPoint);
  197. }
  198. }
  199. }
  200. if (currentParagraph.trim()) {
  201. paragraphs.push(currentParagraph.trim());
  202. }
  203. return paragraphs;
  204. }
  205. // 语义分段
  206. function splitBySemantic(text, options) {
  207. // 识别段落标记
  208. const paragraphMarkers = /\n\s*\n+/g;
  209. const initialParagraphs = text.split(paragraphMarkers);
  210. let paragraphs = [];
  211. for (const paragraph of initialParagraphs) {
  212. if (!paragraph.trim()) continue;
  213. // 如果段落太长,进一步分割
  214. if (paragraph.length > options.maxLength) {
  215. const subParagraphs = splitLongParagraph(paragraph, options);
  216. paragraphs.push(...subParagraphs);
  217. } else {
  218. paragraphs.push(paragraph.trim());
  219. }
  220. }
  221. return paragraphs;
  222. }
  223. // 自动优化
  224. function autoOptimize(text, options) {
  225. // 首先按语义分段
  226. let paragraphs = splitBySemantic(text, options);
  227. // 然后优化长度
  228. paragraphs = optimizeLength(paragraphs, options);
  229. return paragraphs;
  230. }
  231. // 寻找最佳分割点
  232. function findBestSplitPoint(text, options) {
  233. const splitPoints = [
  234. /[。!?;]/g, // 句号、感叹号、问号、分号
  235. /[,、]/g, // 逗号、顿号
  236. /[:]/g, // 冒号
  237. /\s+/g // 空格
  238. ];
  239. for (const pattern of splitPoints) {
  240. const matches = [...text.matchAll(pattern)];
  241. for (let i = matches.length - 1; i >= 0; i--) {
  242. const match = matches[i];
  243. const position = match.index + match[0].length;
  244. // 检查分割点是否在合理范围内
  245. if (position >= options.minLength && position <= options.maxLength) {
  246. return position;
  247. }
  248. }
  249. }
  250. // 如果没找到合适的分割点,强制分割
  251. return Math.min(options.maxLength, text.length);
  252. }
  253. // 分割长段落
  254. function splitLongParagraph(paragraph, options) {
  255. const sentences = paragraph.split(/[。!?;]/g).filter(s => s.trim());
  256. const result = [];
  257. let current = '';
  258. for (const sentence of sentences) {
  259. if (current.length + sentence.length > options.maxLength) {
  260. if (current) {
  261. result.push(current.trim());
  262. current = sentence;
  263. } else {
  264. // 单个句子就太长,按长度分割
  265. const chunks = splitByLength(sentence, options);
  266. result.push(...chunks);
  267. }
  268. } else {
  269. current += (current ? '。' : '') + sentence;
  270. }
  271. }
  272. if (current) {
  273. result.push(current.trim());
  274. }
  275. return result;
  276. }
  277. // 优化段落长度
  278. function optimizeLength(paragraphs, options) {
  279. const result = [];
  280. for (const paragraph of paragraphs) {
  281. if (paragraph.length < options.minLength && options.mergeShort) {
  282. // 尝试与下一个段落合并
  283. if (result.length > 0) {
  284. const lastParagraph = result[result.length - 1];
  285. if (lastParagraph.length + paragraph.length <= options.maxLength) {
  286. result[result.length - 1] = lastParagraph + '。' + paragraph;
  287. continue;
  288. }
  289. }
  290. }
  291. if (paragraph.length > options.maxLength && options.splitLong) {
  292. // 分割过长段落
  293. const subParagraphs = splitLongParagraph(paragraph, options);
  294. result.push(...subParagraphs);
  295. } else {
  296. result.push(paragraph);
  297. }
  298. }
  299. return result;
  300. }
  301. // 后处理
  302. function postProcess(paragraphs) {
  303. return paragraphs
  304. .map(p => p.trim())
  305. .filter(p => p.length > 0)
  306. .map(p => {
  307. // 确保段落以句号结尾
  308. if (!p.endsWith('。') && !p.endsWith('!') && !p.endsWith('?')) {
  309. return p + '。';
  310. }
  311. return p;
  312. });
  313. }
  314. // 处理智能分段
  315. function processSmartParagraph() {
  316. if (!form.value.inputText.trim()) {
  317. ElMessage.warning('请输入需要分段的文本');
  318. return;
  319. }
  320. try {
  321. result.value = smartParagraphSplit(form.value.inputText, form.value);
  322. ElMessage.success('智能分段完成');
  323. } catch (error) {
  324. ElMessage.error('分段处理失败:' + error.message);
  325. }
  326. }
  327. // 重置表单
  328. function resetForm() {
  329. form.value = {
  330. inputText: '',
  331. strategy: 'auto',
  332. minLength: 50,
  333. maxLength: 300,
  334. targetLength: 150,
  335. keepDialogue: true,
  336. keepQuotes: true,
  337. mergeShort: true,
  338. splitLong: true
  339. };
  340. result.value = null;
  341. }
  342. // 预览结果
  343. function previewResult() {
  344. if (!result.value) return;
  345. const previewText = result.value.paragraphs.join('\n\n');
  346. console.log('分段结果预览:', previewText);
  347. // 可以在这里添加复制到剪贴板的功能
  348. navigator.clipboard.writeText(previewText).then(() => {
  349. ElMessage.success('结果已复制到剪贴板');
  350. }).catch(() => {
  351. ElMessage.info('请手动复制结果');
  352. });
  353. }
  354. </script>
  355. <style scoped>
  356. .smart-paragraph {
  357. padding: 20px;
  358. }
  359. .card-header {
  360. display: flex;
  361. justify-content: space-between;
  362. align-items: center;
  363. }
  364. .result-section {
  365. margin-top: 20px;
  366. }
  367. .paragraphs-container {
  368. max-height: 400px;
  369. overflow-y: auto;
  370. border: 1px solid #e4e7ed;
  371. border-radius: 4px;
  372. padding: 10px;
  373. }
  374. .paragraph-item {
  375. margin-bottom: 15px;
  376. padding: 10px;
  377. border: 1px solid #f0f0f0;
  378. border-radius: 4px;
  379. background-color: #fafafa;
  380. }
  381. .paragraph-item.short {
  382. border-left: 3px solid #e6a23c;
  383. background-color: #fdf6ec;
  384. }
  385. .paragraph-item.long {
  386. border-left: 3px solid #f56c6c;
  387. background-color: #fef0f0;
  388. }
  389. .paragraph-header {
  390. display: flex;
  391. justify-content: space-between;
  392. align-items: center;
  393. margin-bottom: 8px;
  394. font-size: 12px;
  395. color: #606266;
  396. }
  397. .paragraph-number {
  398. font-weight: bold;
  399. }
  400. .paragraph-length {
  401. color: #909399;
  402. }
  403. .paragraph-content {
  404. line-height: 1.6;
  405. color: #303133;
  406. white-space: pre-wrap;
  407. }
  408. .statistics {
  409. margin-top: 20px;
  410. }
  411. </style>