只保留LLM提取模式,修改提取逻辑

This commit is contained in:
2026-04-18 20:33:58 +08:00
parent f01ddf045d
commit e274e7faa2
9 changed files with 1427 additions and 403 deletions

View File

@@ -3,12 +3,12 @@
# LLM配置 - 阿里云千问
llm:
# 是否启用LLM设为false则使用纯规则提取
# 是否启用LLM当前版本必须为true
enabled: true
# LLM提供商qwen阿里云千问
provider: "qwen"
# 模型名称
model: "qwen3-max-2026-01-23"
model: "glm-5"
# API密钥建议使用环境变量 DASHSCOPE_API_KEY
api_key: "sk-7097f7842f724f0c9e70c4bf3b16dacb"
# 可选参数
@@ -48,7 +48,7 @@ extraction:
priority: 1
接口需求:
prefix: "IR"
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "CAN", "以太网", "通信"]
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "输入输出"]
priority: 2
性能需求:
prefix: "PR"
@@ -68,23 +68,105 @@ extraction:
priority: 6
splitter:
enabled: true
max_sentence_len: 120
min_clause_len: 12
max_sentence_len: 160
min_clause_len: 20
semantic_type_policy:
interface_section_hints:
- "接口描述"
- "接口需求"
- "接口要求"
- "外部接口"
- "内部接口"
- "I/O"
interface_title_excludes:
- "计算机通信需求"
- "通信需求"
- "通信要求"
functional_section_hints:
- "功能需求"
- "功能要求"
other_section_hints:
- "安全性需求"
- "保密性需求"
- "适应性需求"
- "环境需求"
- "资源需求"
- "质量"
- "设计约束"
- "培训需求"
- "软件保障"
- "验收"
- "交付"
- "包装"
- "通信需求"
- "计算机通信需求"
- "硬件环境"
- "软件环境"
- "运行环境"
semantic_guard:
enabled: true
preserve_condition_action_chain: true
preserve_alarm_chain: true
system_description_hints:
- "系统描述"
- "功能描述"
- "概述"
- "示意图"
- "组成"
- "架构"
- "原理"
table_strategy:
llm_semantic_enabled: true
sequence_table_merge: "single_requirement"
merge_time_series_rows_min: 3
skip_keywords:
- "系统功能要求"
- "性能要求"
- "系统性能要求"
- "系统接口要求"
- "功能矩阵"
- "能力对照"
- "性能指标对照"
interface_keywords:
- "接口"
- "interface"
- "输入输出"
- "I/O"
- "数据来源"
- "数据目的地"
- "来源"
- "目的地"
single_requirement_keywords:
- "硬件要求"
- "软件要求"
- "运行环境"
- "硬件环境"
- "软件环境"
- "运行硬件环境"
- "运行软件环境"
- "环境需求"
- "资源需求"
- "计算机资源"
rewrite_policy:
llm_light_rewrite_enabled: true
preserve_ratio_min: 0.65
max_length_growth_ratio: 1.25
non_interface_max_edit_distance: 20
renumber_policy:
enabled: true
mode: "section_continuous"
dedup_policy:
similarity_threshold: 0.88
enable_cross_section_dedup: true
prefer_text_over_table: true
interface_policy:
unknown_fallback: "未知"
normalization_policy:
ocr_spacing_normalize: true
fidelity_policy:
preserve_source_text_for_text_blocks: true
punctuation_policy:
ensure_terminal_period: true
# 输出配置
output: