Files
Extract_reqs/config.yaml

185 lines
4.7 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# 配置文件 - SRS 需求文档解析工具 (LLM增强版)
# Configuration file for SRS Requirement Document Parser (LLM Enhanced Version)
# LLM配置 - 阿里云千问
llm:
# 是否启用LLM当前版本必须为true
enabled: true
# LLM提供商qwen阿里云千问
provider: "qwen"
# 模型名称
model: "glm-5"
# API密钥建议使用环境变量 DASHSCOPE_API_KEY
api_key: "sk-7097f7842f724f0c9e70c4bf3b16dacb"
# 可选参数
temperature: 0.3
max_tokens: 1024
# 文档解析配置
document:
supported_formats:
- ".pdf"
- ".docx"
# 标题识别的样式列表
heading_styles:
- "Heading 1"
- "Heading 2"
- "Heading 3"
- "Heading 4"
- "Heading 5"
# 需要过滤的非需求章节GJB438B标准
non_requirement_sections:
- "标识"
- "系统概述"
- "文档概述"
- "引用文档"
- "合格性规定"
- "需求可追踪性"
- "注释"
- "附录"
# 需求提取配置
extraction:
# 需求类型关键字(用于自动判断需求类型)
requirement_types:
功能需求:
prefix: "FR"
keywords: ["功能", "feature", "requirement", "CSCI组成", "控制", "处理", "监测", "显示"]
priority: 1
接口需求:
prefix: "IR"
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "输入输出"]
priority: 2
性能需求:
prefix: "PR"
keywords: ["性能", "performance", "速度", "响应时间", "吞吐量"]
priority: 3
安全需求:
prefix: "SR"
keywords: ["安全", "security", "安全性", "报警"]
priority: 4
可靠性需求:
prefix: "RR"
keywords: ["可靠", "reliability", "容错", "恢复", "冗余"]
priority: 5
其他需求:
prefix: "OR"
keywords: ["约束", "资源", "适应性", "保密", "环境", "计算机", "质量", "设计", "人员", "培训", "保障", "验收", "交付"]
priority: 6
splitter:
enabled: true
max_sentence_len: 160
min_clause_len: 20
semantic_type_policy:
interface_section_hints:
- "接口描述"
- "接口需求"
- "接口要求"
- "外部接口"
- "内部接口"
- "I/O"
interface_title_excludes:
- "计算机通信需求"
- "通信需求"
- "通信要求"
functional_section_hints:
- "功能需求"
- "功能要求"
other_section_hints:
- "安全性需求"
- "保密性需求"
- "适应性需求"
- "环境需求"
- "资源需求"
- "质量"
- "设计约束"
- "培训需求"
- "软件保障"
- "验收"
- "交付"
- "包装"
- "通信需求"
- "计算机通信需求"
- "硬件环境"
- "软件环境"
- "运行环境"
semantic_guard:
enabled: true
preserve_condition_action_chain: true
preserve_alarm_chain: true
system_description_hints:
- "系统描述"
- "功能描述"
- "概述"
- "示意图"
- "组成"
- "架构"
- "原理"
table_strategy:
llm_semantic_enabled: true
sequence_table_merge: "single_requirement"
merge_time_series_rows_min: 3
skip_keywords:
- "系统功能要求"
- "性能要求"
- "系统性能要求"
- "系统接口要求"
- "功能矩阵"
- "能力对照"
- "性能指标对照"
interface_keywords:
- "接口"
- "interface"
- "输入输出"
- "I/O"
- "数据来源"
- "数据目的地"
- "来源"
- "目的地"
single_requirement_keywords:
- "硬件要求"
- "软件要求"
- "运行环境"
- "硬件环境"
- "软件环境"
- "运行硬件环境"
- "运行软件环境"
- "环境需求"
- "资源需求"
- "计算机资源"
rewrite_policy:
llm_light_rewrite_enabled: true
preserve_ratio_min: 0.65
max_length_growth_ratio: 1.25
non_interface_max_edit_distance: 20
renumber_policy:
enabled: true
mode: "section_continuous"
dedup_policy:
similarity_threshold: 0.88
enable_cross_section_dedup: true
prefer_text_over_table: true
interface_policy:
unknown_fallback: "未知"
normalization_policy:
ocr_spacing_normalize: true
fidelity_policy:
preserve_source_text_for_text_blocks: true
punctuation_policy:
ensure_terminal_period: true
# 输出配置
output:
format: "json"
indent: 2
# 是否美化输出(格式化)
pretty_print: true
# 是否包含元数据
include_metadata: true
# 日志配置
logging:
level: "INFO" # DEBUG, INFO, WARNING, ERROR
format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
file: "srs_parser.log"