185 lines
4.7 KiB
YAML
185 lines
4.7 KiB
YAML
# 配置文件 - SRS 需求文档解析工具 (LLM增强版)
|
||
# Configuration file for SRS Requirement Document Parser (LLM Enhanced Version)
|
||
|
||
# LLM配置 - 阿里云千问
|
||
llm:
|
||
# 是否启用LLM(当前版本必须为true)
|
||
enabled: true
|
||
# LLM提供商:qwen(阿里云千问)
|
||
provider: "qwen"
|
||
# 模型名称
|
||
model: "glm-5"
|
||
# API密钥(建议使用环境变量 DASHSCOPE_API_KEY)
|
||
api_key: "sk-7097f7842f724f0c9e70c4bf3b16dacb"
|
||
# 可选参数
|
||
temperature: 0.3
|
||
max_tokens: 1024
|
||
|
||
# 文档解析配置
|
||
document:
|
||
supported_formats:
|
||
- ".pdf"
|
||
- ".docx"
|
||
# 标题识别的样式列表
|
||
heading_styles:
|
||
- "Heading 1"
|
||
- "Heading 2"
|
||
- "Heading 3"
|
||
- "Heading 4"
|
||
- "Heading 5"
|
||
# 需要过滤的非需求章节(GJB438B标准)
|
||
non_requirement_sections:
|
||
- "标识"
|
||
- "系统概述"
|
||
- "文档概述"
|
||
- "引用文档"
|
||
- "合格性规定"
|
||
- "需求可追踪性"
|
||
- "注释"
|
||
- "附录"
|
||
|
||
# 需求提取配置
|
||
extraction:
|
||
# 需求类型关键字(用于自动判断需求类型)
|
||
requirement_types:
|
||
功能需求:
|
||
prefix: "FR"
|
||
keywords: ["功能", "feature", "requirement", "CSCI组成", "控制", "处理", "监测", "显示"]
|
||
priority: 1
|
||
接口需求:
|
||
prefix: "IR"
|
||
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "输入输出"]
|
||
priority: 2
|
||
性能需求:
|
||
prefix: "PR"
|
||
keywords: ["性能", "performance", "速度", "响应时间", "吞吐量"]
|
||
priority: 3
|
||
安全需求:
|
||
prefix: "SR"
|
||
keywords: ["安全", "security", "安全性", "报警"]
|
||
priority: 4
|
||
可靠性需求:
|
||
prefix: "RR"
|
||
keywords: ["可靠", "reliability", "容错", "恢复", "冗余"]
|
||
priority: 5
|
||
其他需求:
|
||
prefix: "OR"
|
||
keywords: ["约束", "资源", "适应性", "保密", "环境", "计算机", "质量", "设计", "人员", "培训", "保障", "验收", "交付"]
|
||
priority: 6
|
||
splitter:
|
||
enabled: true
|
||
max_sentence_len: 160
|
||
min_clause_len: 20
|
||
semantic_type_policy:
|
||
interface_section_hints:
|
||
- "接口描述"
|
||
- "接口需求"
|
||
- "接口要求"
|
||
- "外部接口"
|
||
- "内部接口"
|
||
- "I/O"
|
||
interface_title_excludes:
|
||
- "计算机通信需求"
|
||
- "通信需求"
|
||
- "通信要求"
|
||
functional_section_hints:
|
||
- "功能需求"
|
||
- "功能要求"
|
||
other_section_hints:
|
||
- "安全性需求"
|
||
- "保密性需求"
|
||
- "适应性需求"
|
||
- "环境需求"
|
||
- "资源需求"
|
||
- "质量"
|
||
- "设计约束"
|
||
- "培训需求"
|
||
- "软件保障"
|
||
- "验收"
|
||
- "交付"
|
||
- "包装"
|
||
- "通信需求"
|
||
- "计算机通信需求"
|
||
- "硬件环境"
|
||
- "软件环境"
|
||
- "运行环境"
|
||
semantic_guard:
|
||
enabled: true
|
||
preserve_condition_action_chain: true
|
||
preserve_alarm_chain: true
|
||
system_description_hints:
|
||
- "系统描述"
|
||
- "功能描述"
|
||
- "概述"
|
||
- "示意图"
|
||
- "组成"
|
||
- "架构"
|
||
- "原理"
|
||
table_strategy:
|
||
llm_semantic_enabled: true
|
||
sequence_table_merge: "single_requirement"
|
||
merge_time_series_rows_min: 3
|
||
skip_keywords:
|
||
- "系统功能要求"
|
||
- "性能要求"
|
||
- "系统性能要求"
|
||
- "系统接口要求"
|
||
- "功能矩阵"
|
||
- "能力对照"
|
||
- "性能指标对照"
|
||
interface_keywords:
|
||
- "接口"
|
||
- "interface"
|
||
- "输入输出"
|
||
- "I/O"
|
||
- "数据来源"
|
||
- "数据目的地"
|
||
- "来源"
|
||
- "目的地"
|
||
single_requirement_keywords:
|
||
- "硬件要求"
|
||
- "软件要求"
|
||
- "运行环境"
|
||
- "硬件环境"
|
||
- "软件环境"
|
||
- "运行硬件环境"
|
||
- "运行软件环境"
|
||
- "环境需求"
|
||
- "资源需求"
|
||
- "计算机资源"
|
||
rewrite_policy:
|
||
llm_light_rewrite_enabled: true
|
||
preserve_ratio_min: 0.65
|
||
max_length_growth_ratio: 1.25
|
||
non_interface_max_edit_distance: 20
|
||
renumber_policy:
|
||
enabled: true
|
||
mode: "section_continuous"
|
||
dedup_policy:
|
||
similarity_threshold: 0.88
|
||
enable_cross_section_dedup: true
|
||
prefer_text_over_table: true
|
||
interface_policy:
|
||
unknown_fallback: "未知"
|
||
normalization_policy:
|
||
ocr_spacing_normalize: true
|
||
fidelity_policy:
|
||
preserve_source_text_for_text_blocks: true
|
||
punctuation_policy:
|
||
ensure_terminal_period: true
|
||
|
||
# 输出配置
|
||
output:
|
||
format: "json"
|
||
indent: 2
|
||
# 是否美化输出(格式化)
|
||
pretty_print: true
|
||
# 是否包含元数据
|
||
include_metadata: true
|
||
|
||
# 日志配置
|
||
logging:
|
||
level: "INFO" # DEBUG, INFO, WARNING, ERROR
|
||
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||
file: "srs_parser.log"
|