只保留LLM提取模式,修改提取逻辑
This commit is contained in:
92
config.yaml
92
config.yaml
@@ -3,12 +3,12 @@
|
||||
|
||||
# LLM配置 - 阿里云千问
|
||||
llm:
|
||||
# 是否启用LLM(设为false则使用纯规则提取)
|
||||
# 是否启用LLM(当前版本必须为true)
|
||||
enabled: true
|
||||
# LLM提供商:qwen(阿里云千问)
|
||||
provider: "qwen"
|
||||
# 模型名称
|
||||
model: "qwen3-max-2026-01-23"
|
||||
model: "glm-5"
|
||||
# API密钥(建议使用环境变量 DASHSCOPE_API_KEY)
|
||||
api_key: "sk-7097f7842f724f0c9e70c4bf3b16dacb"
|
||||
# 可选参数
|
||||
@@ -48,7 +48,7 @@ extraction:
|
||||
priority: 1
|
||||
接口需求:
|
||||
prefix: "IR"
|
||||
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "CAN", "以太网", "通信"]
|
||||
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "输入输出"]
|
||||
priority: 2
|
||||
性能需求:
|
||||
prefix: "PR"
|
||||
@@ -68,23 +68,105 @@ extraction:
|
||||
priority: 6
|
||||
splitter:
|
||||
enabled: true
|
||||
max_sentence_len: 120
|
||||
min_clause_len: 12
|
||||
max_sentence_len: 160
|
||||
min_clause_len: 20
|
||||
semantic_type_policy:
|
||||
interface_section_hints:
|
||||
- "接口描述"
|
||||
- "接口需求"
|
||||
- "接口要求"
|
||||
- "外部接口"
|
||||
- "内部接口"
|
||||
- "I/O"
|
||||
interface_title_excludes:
|
||||
- "计算机通信需求"
|
||||
- "通信需求"
|
||||
- "通信要求"
|
||||
functional_section_hints:
|
||||
- "功能需求"
|
||||
- "功能要求"
|
||||
other_section_hints:
|
||||
- "安全性需求"
|
||||
- "保密性需求"
|
||||
- "适应性需求"
|
||||
- "环境需求"
|
||||
- "资源需求"
|
||||
- "质量"
|
||||
- "设计约束"
|
||||
- "培训需求"
|
||||
- "软件保障"
|
||||
- "验收"
|
||||
- "交付"
|
||||
- "包装"
|
||||
- "通信需求"
|
||||
- "计算机通信需求"
|
||||
- "硬件环境"
|
||||
- "软件环境"
|
||||
- "运行环境"
|
||||
semantic_guard:
|
||||
enabled: true
|
||||
preserve_condition_action_chain: true
|
||||
preserve_alarm_chain: true
|
||||
system_description_hints:
|
||||
- "系统描述"
|
||||
- "功能描述"
|
||||
- "概述"
|
||||
- "示意图"
|
||||
- "组成"
|
||||
- "架构"
|
||||
- "原理"
|
||||
table_strategy:
|
||||
llm_semantic_enabled: true
|
||||
sequence_table_merge: "single_requirement"
|
||||
merge_time_series_rows_min: 3
|
||||
skip_keywords:
|
||||
- "系统功能要求"
|
||||
- "性能要求"
|
||||
- "系统性能要求"
|
||||
- "系统接口要求"
|
||||
- "功能矩阵"
|
||||
- "能力对照"
|
||||
- "性能指标对照"
|
||||
interface_keywords:
|
||||
- "接口"
|
||||
- "interface"
|
||||
- "输入输出"
|
||||
- "I/O"
|
||||
- "数据来源"
|
||||
- "数据目的地"
|
||||
- "来源"
|
||||
- "目的地"
|
||||
single_requirement_keywords:
|
||||
- "硬件要求"
|
||||
- "软件要求"
|
||||
- "运行环境"
|
||||
- "硬件环境"
|
||||
- "软件环境"
|
||||
- "运行硬件环境"
|
||||
- "运行软件环境"
|
||||
- "环境需求"
|
||||
- "资源需求"
|
||||
- "计算机资源"
|
||||
rewrite_policy:
|
||||
llm_light_rewrite_enabled: true
|
||||
preserve_ratio_min: 0.65
|
||||
max_length_growth_ratio: 1.25
|
||||
non_interface_max_edit_distance: 20
|
||||
renumber_policy:
|
||||
enabled: true
|
||||
mode: "section_continuous"
|
||||
dedup_policy:
|
||||
similarity_threshold: 0.88
|
||||
enable_cross_section_dedup: true
|
||||
prefer_text_over_table: true
|
||||
interface_policy:
|
||||
unknown_fallback: "未知"
|
||||
normalization_policy:
|
||||
ocr_spacing_normalize: true
|
||||
fidelity_policy:
|
||||
preserve_source_text_for_text_blocks: true
|
||||
punctuation_policy:
|
||||
ensure_terminal_period: true
|
||||
|
||||
# 输出配置
|
||||
output:
|
||||
|
||||
Reference in New Issue
Block a user