针对测试用例生成添加了常用测试方法;更新了需求提取工具

This commit is contained in:
2026-04-18 21:13:33 +08:00
parent c7c0659a85
commit 0c2ed67e2a
21 changed files with 2029 additions and 481 deletions

View File

@@ -3,14 +3,14 @@
# LLM配置 - 阿里云千问
llm:
# 是否启用LLM设为false则使用纯规则提取
# 是否启用LLM当前版本必须为true
enabled: true
# LLM提供商qwen阿里云千问
provider: "qwen"
# 模型名称
model: "qwen3-max"
# API密钥统一由 rag-web-ui 的环境变量提供
api_key: ""
model: "glm-5"
# API密钥(建议使用环境变量 DASHSCOPE_API_KEY
api_key: "sk-7097f7842f724f0c9e70c4bf3b16dacb"
# 可选参数
temperature: 0.3
max_tokens: 1024
@@ -48,7 +48,7 @@ extraction:
priority: 1
接口需求:
prefix: "IR"
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "CAN", "以太网", "通信"]
keywords: ["接口", "interface", "api", "外部接口", "内部接口", "输入输出"]
priority: 2
性能需求:
prefix: "PR"
@@ -68,23 +68,105 @@ extraction:
priority: 6
splitter:
enabled: true
max_sentence_len: 120
min_clause_len: 12
max_sentence_len: 160
min_clause_len: 20
semantic_type_policy:
interface_section_hints:
- "接口描述"
- "接口需求"
- "接口要求"
- "外部接口"
- "内部接口"
- "I/O"
interface_title_excludes:
- "计算机通信需求"
- "通信需求"
- "通信要求"
functional_section_hints:
- "功能需求"
- "功能要求"
other_section_hints:
- "安全性需求"
- "保密性需求"
- "适应性需求"
- "环境需求"
- "资源需求"
- "质量"
- "设计约束"
- "培训需求"
- "软件保障"
- "验收"
- "交付"
- "包装"
- "通信需求"
- "计算机通信需求"
- "硬件环境"
- "软件环境"
- "运行环境"
semantic_guard:
enabled: true
preserve_condition_action_chain: true
preserve_alarm_chain: true
system_description_hints:
- "系统描述"
- "功能描述"
- "概述"
- "示意图"
- "组成"
- "架构"
- "原理"
table_strategy:
llm_semantic_enabled: true
sequence_table_merge: "single_requirement"
merge_time_series_rows_min: 3
skip_keywords:
- "系统功能要求"
- "性能要求"
- "系统性能要求"
- "系统接口要求"
- "功能矩阵"
- "能力对照"
- "性能指标对照"
interface_keywords:
- "接口"
- "interface"
- "输入输出"
- "I/O"
- "数据来源"
- "数据目的地"
- "来源"
- "目的地"
single_requirement_keywords:
- "硬件要求"
- "软件要求"
- "运行环境"
- "硬件环境"
- "软件环境"
- "运行硬件环境"
- "运行软件环境"
- "环境需求"
- "资源需求"
- "计算机资源"
rewrite_policy:
llm_light_rewrite_enabled: true
preserve_ratio_min: 0.65
max_length_growth_ratio: 1.25
non_interface_max_edit_distance: 20
renumber_policy:
enabled: true
mode: "section_continuous"
dedup_policy:
similarity_threshold: 0.88
enable_cross_section_dedup: true
prefer_text_over_table: true
interface_policy:
unknown_fallback: "未知"
normalization_policy:
ocr_spacing_normalize: true
fidelity_policy:
preserve_source_text_for_text_blocks: true
punctuation_policy:
ensure_terminal_period: true
# 输出配置
output:

View File

@@ -4,7 +4,6 @@
支持PDF和Docx格式针对GJB438B标准SRS文档优化
"""
import os
import re
import logging
import importlib
@@ -119,43 +118,19 @@ class DocumentParser(ABC):
sections: 章节列表
parent_number: 父章节编号
"""
# 仅在顶级章节重编号
if not parent_number:
# 前置章节关键词(需要跳过的)
skip_keywords = ['目录', '封面', '扉页', '未命名', '', '']
# 正文章节关键词(遇到这些说明正文开始)
content_keywords = ['外部接口', '接口', '软件需求', '需求', '功能', '性能', '设计', '概述', '标识', '引言']
start_index = 0
for idx, section in enumerate(sections):
# 优先检查是否是正文章节
is_content = any(kw in section.title for kw in content_keywords)
if is_content and section.level == 1:
start_index = idx
break
# 重新编号所有章节
counter = 1
for i, section in enumerate(sections):
if i < start_index:
# 前置章节不编号
section.number = ""
else:
# 正文章节顶级章节从1开始编号
if section.level == 1:
section.number = str(counter)
counter += 1
# 递归处理子章节
if section.children:
self._auto_number_sections(section.children, section.number)
else:
# 子章节编号
for i, section in enumerate(sections, 1):
if not section.number or self._is_chinese_number(section.number):
section.generate_auto_number(parent_number, i)
if section.children:
self._auto_number_sections(section.children, section.number)
if not sections:
return
# 仅为缺失编号的章节补号;已存在的文档原始编号必须保留。
sibling_index = 0
for section in sections:
has_number = bool((section.number or "").strip()) and not self._is_chinese_number(section.number)
if not has_number:
sibling_index += 1
section.generate_auto_number(parent_number, sibling_index)
if section.children:
self._auto_number_sections(section.children, section.number)
def _is_chinese_number(self, text: str) -> bool:
"""检查是否是中文数字编号"""
@@ -327,8 +302,13 @@ class PDFParser(DocumentParser):
'优先', '关键', '合格', '追踪', '注释',
'CSCI', '计算机', '软件', '硬件', '通信', '通讯',
'数据', '适应', '可靠', '内部', '外部',
'描述', '要求', '规定', '说明', '定义',
'电场', '防护', '装置', '控制', '监控', '显控'
'描述', '要求', '规定', '说明', '定义'
]
TOP_LEVEL_TITLE_KEYWORDS = [
'范围', '标识', '概述', '引用', '文档', '需求', '接口', '性能',
'安全', '保密', '环境', '资源', '质量', '设计', '约束', '验收',
'交付', '包装', '注释'
]
# 明显无效的章节标题模式(噪声)
@@ -411,21 +391,41 @@ class PDFParser(DocumentParser):
if page_idx < len(self._page_texts):
page_text = self._page_texts[page_idx]
extracted_tables = page.extract_tables() or []
for table_idx, table in enumerate(extracted_tables):
table_objs = page.find_tables() or []
if table_objs:
extracted_tables = [(idx, t.extract(), t.bbox) for idx, t in enumerate(table_objs)]
else:
raw_tables = page.extract_tables() or []
extracted_tables = [(idx, t, None) for idx, t in enumerate(raw_tables)]
for table_idx, table, bbox in extracted_tables:
cleaned_table: List[List[str]] = []
for row in table or []:
cells = [re.sub(r'\s+', ' ', str(cell or '')).strip() for cell in row]
# 只要存在非空单元格就保留,避免有效行被误丢弃。
if any(cells):
cleaned_table.append(cells)
if cleaned_table:
section_hint = ""
if bbox:
try:
top = float(bbox[1])
text_above = page.crop((0, 0, page.width, top)).extract_text() or ""
section_hint = self._find_last_section_number(text_above)
except Exception:
section_hint = ""
table_ref = self._extract_table_reference(cleaned_table)
tables.append(
{
"page_idx": page_idx,
"table_idx": table_idx,
"page_text": page_text,
"data": cleaned_table,
"section_hint": section_hint,
"table_ref": table_ref,
}
)
except Exception as e:
@@ -435,16 +435,86 @@ class PDFParser(DocumentParser):
logger.info(f"PDF表格提取完成{len(tables)}个表格")
return tables
def _extract_table_reference(self, table: List[List[str]]) -> str:
"""从表格前几行中提取表号引用如“表3-5”。"""
if not table:
return ""
head_rows = table[:2]
merged = " ".join(" ".join(str(c or "") for c in row) for row in head_rows)
merged = re.sub(r"\s+", "", merged)
m = re.search(r"\s*(\d+(?:[-]\d+){1,3})", merged)
if not m:
return ""
return m.group(1).replace("", "-")
def _build_table_reference_index(self, sections: List[Section]) -> Dict[str, List[Section]]:
"""构建“表号 -> 章节”索引,用于优先精确挂接表格。"""
index: Dict[str, List[Section]] = {}
for section in sections:
content = re.sub(r"\s+", "", section.content or "")
for m in re.finditer(r"\s*(\d+(?:[-]\d+){1,3})", content):
ref = m.group(1).replace("", "-")
index.setdefault(ref, []).append(section)
return index
def _find_last_section_number(self, text: str) -> str:
"""从文本中提取最后出现的章节号。"""
if not text:
return ""
found = ""
for line in text.split("\n"):
line = line.strip()
if not line:
continue
section_info = self._match_section_header(line, set())
if section_info:
found = section_info[0]
return found
def _attach_pdf_tables_to_sections(self, tables: List[Dict[str, Any]]) -> None:
"""将提取出的PDF表格挂接到最匹配的章节。"""
flat_sections = self._flatten_sections(self.sections)
if not flat_sections:
return
section_by_number = {
(s.number or "").strip(): s
for s in flat_sections
if (s.number or "").strip()
}
table_ref_index = self._build_table_reference_index(flat_sections)
last_section: Optional[Section] = None
for table in tables:
matched = self._match_table_section(table.get("page_text", ""), flat_sections)
target = matched or last_section or flat_sections[0]
target = None
table_ref = (table.get("table_ref") or "").strip()
if table_ref and table_ref in table_ref_index:
candidates = table_ref_index[table_ref]
# 同表号命中多个章节时,优先更深层章节,避免父级“汇总章节”抢占。
target = max(candidates, key=lambda s: (s.level, len(s.content or "")))
section_hint = (table.get("section_hint") or "").strip()
if not target and section_hint and section_hint in section_by_number:
target = section_by_number[section_hint]
if not target:
target = self._match_table_section(table.get("page_text", ""), flat_sections)
# 兜底优先使用上一个命中章节,避免错误挂到首章节造成跨章污染。
if not target:
target = last_section
if not target:
logger.warning(
"未定位到表格归属章节,跳过: page=%s table=%s",
table.get("page_idx", -1),
table.get("table_idx", -1),
)
continue
target.add_table(table["data"])
last_section = target
@@ -464,7 +534,7 @@ class PDFParser(DocumentParser):
return None
matched: Optional[Section] = None
matched_score = -1
matched_score = (-1, -1)
for section in sections:
title = (section.title or "").strip()
if not title:
@@ -479,7 +549,7 @@ class PDFParser(DocumentParser):
for candidate in candidates:
normalized_candidate = re.sub(r"\s+", "", candidate).lower()
if normalized_candidate and normalized_candidate in normalized_page:
score = len(normalized_candidate)
score = (len(normalized_candidate), section.level)
if score > matched_score:
matched = section
matched_score = score
@@ -514,6 +584,7 @@ class PDFParser(DocumentParser):
current_section = None
content_buffer = []
found_sections = set()
last_top_level_number = 0
for line in lines:
line = line.strip()
@@ -526,6 +597,22 @@ class PDFParser(DocumentParser):
if section_info:
number, title = section_info
level = len(number.split('.'))
top_level_number = int(number.split('.')[0])
# 顶级章节序号大幅跳跃通常是误识别如正文中的“8 表...”)。
if level == 1 and last_top_level_number and top_level_number > last_top_level_number + 1:
if line and not self._is_noise(line):
content_buffer.append(line)
continue
# 顶级章节编号倒退通常是正文枚举项被误识别如“1 综合监控...”)。
if level == 1 and last_top_level_number and top_level_number < last_top_level_number:
if line and not self._is_noise(line):
content_buffer.append(line)
continue
if level > 6:
continue
# 保存之前章节的内容
if current_section and content_buffer:
@@ -540,6 +627,7 @@ class PDFParser(DocumentParser):
if level == 1:
sections.append(section)
section_stack = {1: section}
last_top_level_number = top_level_number
else:
parent_level = level - 1
while parent_level >= 1 and parent_level not in section_stack:
@@ -557,6 +645,10 @@ class PDFParser(DocumentParser):
for l in list(section_stack.keys()):
if l > level:
del section_stack[l]
# 若出现层级跳跃如1->3自动回退到父级+1。
if level > 1 and (level - 1) not in section_stack:
section.level = max(section_stack.keys()) if section_stack else 1
current_section = section
else:
@@ -577,13 +669,14 @@ class PDFParser(DocumentParser):
Returns:
(章节编号, 章节标题) 或 None
"""
# 模式: "3.1功能需求" "3.1 功能需求"
match = re.match(r'^(\d+(?:\.\d+)*)\s*(.+)$', line)
# 模式: "3.1 功能需求" / "3.1.2 电场..."
match = re.match(r'^(\d+(?:\.\d+)*)[\s、.)]*(.+)$', line)
if not match:
return None
number = match.group(1)
title = match.group(2).strip()
level = len(number.split('.'))
# 排除目录行
if '...' in title or title.count('.') > 5:
@@ -609,6 +702,18 @@ class PDFParser(DocumentParser):
# 标题长度检查
if len(title) > 60 or len(title) < 2:
return None
# 过滤更像正文描述的句式。
if self._looks_like_statement(title):
return None
# 过滤疑似正文句子(含句号/分号且过长)。
if len(title) > 24 and re.search(r'[。;;]', title):
return None
# 过滤指令拼接噪声标题(逗号过多通常是正文残片)。
if title.count('') >= 2 and len(title) > 20:
return None
# 放宽标题字符要求兼容部分PDF字体导致中文抽取异常的情况
if not re.search(r'[\u4e00-\u9fa5A-Za-z]', title):
@@ -631,8 +736,30 @@ class PDFParser(DocumentParser):
# 检查标题是否包含反斜杠(通常是表格噪声)
if '\\' in title and '需求' not in title:
return None
# 常见有效标题关键词兜底,降低正文被识别为标题的概率。
if not any(k in title for k in self.VALID_TITLE_KEYWORDS):
return None
# 顶级章节标题需符合SRS结构性关键词避免“综合监控”“电场”等正文短语被识别。
if level == 1 and not any(k in title for k in self.TOP_LEVEL_TITLE_KEYWORDS):
return None
return (number, title)
def _looks_like_statement(self, title: str) -> bool:
"""判断标题是否更像正文语句而非章节名。"""
if not title:
return False
statement_hints = ["", "能够", "可以", "进行", "通过", "", "同时", "", "如果", ""]
if any(h in title for h in statement_hints):
return True
if len(title) > 24 and re.search(r'[,。;;:]', title):
return True
return False
def _is_noise(self, line: str) -> bool:
"""检查是否是噪声行"""

View File

@@ -146,8 +146,8 @@ class JSONGenerator:
if req.type == 'interface':
req_dict["接口名称"] = req.interface_name
req_dict["接口类型"] = req.interface_type
req_dict["来源"] = req.source
req_dict["目的地"] = req.destination
req_dict["数据来源"] = req.source
req_dict["数据目的地"] = req.destination
result["需求列表"].append(req_dict)
# 如果有子章节,添加子章节

View File

@@ -7,6 +7,7 @@ import logging
import json
from abc import ABC, abstractmethod
from typing import Dict, List, Optional, Any
import requests
from .utils import get_env_or_config
@@ -86,6 +87,34 @@ class QwenLLM(LLMInterface):
except ImportError:
logger.error("dashscope库未安装请运行: pip install dashscope")
raise
def _call_compatible_mode(self, prompt: str) -> str:
"""使用OpenAI兼容模式HTTP接口调用千问。"""
endpoint = self.api_endpoint.rstrip("/") + "/chat/completions"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model,
"messages": [{"role": "user", "content": prompt}],
"temperature": self.extra_params.get("temperature", 0.3),
"max_tokens": self.extra_params.get("max_tokens", 1024),
}
resp = requests.post(endpoint, headers=headers, json=payload, timeout=120)
if resp.status_code != 200:
raise Exception(f"兼容模式API调用失败: {resp.status_code} {resp.text[:300]}")
data = resp.json()
choices = data.get("choices", [])
if not choices:
raise Exception(f"兼容模式API返回无choices: {str(data)[:300]}")
message = choices[0].get("message", {})
content = message.get("content", "")
if not content:
raise Exception(f"兼容模式API返回空内容: {str(data)[:300]}")
return content
def call(self, prompt: str) -> str:
"""
@@ -153,6 +182,11 @@ class QwenLLM(LLMInterface):
return str(response)
except Exception as e:
err_msg = str(e)
# DashScope旧路径报url error时回退到兼容模式接口。
if "url error" in err_msg.lower() or "status: 400" in err_msg.lower():
logger.warning("DashScope调用失败回退兼容模式接口: %s", err_msg)
return self._call_compatible_mode(prompt)
logger.error(f"调用千问LLM失败: {e}")
raise

View File

@@ -33,8 +33,10 @@ class RequirementSplitter:
CONNECTOR_HINTS = ["", "并且", "同时", "然后", "", "以及", ""]
CONDITIONAL_HINTS = ["如果", "", "", "", "其中", "此时", "满足"]
CONTEXT_PRONOUN_HINTS = ["", "", "上述", "", "这些", "那些"]
CHAIN_HINTS = ["从而", "以便", "用于", "以实现", "并据此", "进而", "从而实现"]
ENUMERATION_HINTS = ["具体包括", "包括但不限于", "主要包括", "其中包括", "如下"]
def __init__(self, max_sentence_len: int = 120, min_clause_len: int = 12):
def __init__(self, max_sentence_len: int = 160, min_clause_len: int = 20):
self.max_sentence_len = max_sentence_len
self.min_clause_len = min_clause_len
@@ -107,6 +109,14 @@ class RequirementSplitter:
if len(current) < self.min_clause_len:
return False
# “具体包括/其中包括”后的列举项通常是上一句延伸,不应拆分为独立需求。
if any(h in current for h in self.ENUMERATION_HINTS):
return False
# 承接链条短语一般不是独立需求动作,避免切断语义链。
if any(fragment.startswith(h) for h in self.CHAIN_HINTS):
return False
# 指代承接片段通常是语义延续,不应切断。
if any(fragment.startswith(h) for h in self.CONTEXT_PRONOUN_HINTS):
return False
@@ -123,6 +133,12 @@ class RequirementSplitter:
has_action = any(h in fragment for h in self.ACTION_HINTS)
current_has_action = any(h in current for h in self.ACTION_HINTS)
# 并列连接词后接“控制/处理/显示”等限定短语时,优先视为同一需求。
if has_connector and len(fragment) < self.max_sentence_len // 3 and not any(
kw in fragment for kw in ["并输出", "并上传", "并记录", "并触发"]
):
return False
# 连接词 + 动作词,且当前片段已经包含动作,优先拆分。
if has_connector and has_action and current_has_action:
return True
@@ -147,6 +163,9 @@ class RequirementSplitter:
return merged
def _should_merge(self, prev: str, current: str) -> bool:
if any(h in prev for h in self.ENUMERATION_HINTS):
return True
# 指代开头:如“该报警信号...”。
if any(current.startswith(h) for h in self.CONTEXT_PRONOUN_HINTS):
return True

View File

@@ -60,6 +60,46 @@ class AppSettings:
"other": "OR",
}
DEFAULT_INTERFACE_SECTION_HINTS = [
"接口描述",
"接口需求",
"接口要求",
"外部接口",
"内部接口",
"i/o",
]
DEFAULT_INTERFACE_TITLE_EXCLUDES = [
"计算机通信需求",
"通信需求",
"通信要求",
]
DEFAULT_FUNCTIONAL_SECTION_HINTS = [
"功能需求",
"功能要求",
]
DEFAULT_OTHER_SECTION_HINTS = [
"安全性需求",
"保密性需求",
"适应性需求",
"环境需求",
"资源需求",
"质量",
"设计约束",
"培训需求",
"软件保障",
"验收",
"交付",
"包装",
"通信需求",
"计算机通信需求",
"硬件环境",
"软件环境",
"运行环境",
]
def __init__(self, config: Dict[str, Any] = None):
self.config = config or {}
@@ -75,6 +115,20 @@ class AppSettings:
self.type_prefix = self._build_type_prefix(req_types_cfg)
self.type_chinese = self._build_type_chinese(req_types_cfg)
semantic_type_cfg = extraction_cfg.get("semantic_type_policy", {})
self.interface_section_hints = [
str(x).lower() for x in semantic_type_cfg.get("interface_section_hints", self.DEFAULT_INTERFACE_SECTION_HINTS)
]
self.interface_title_excludes = [
str(x).lower() for x in semantic_type_cfg.get("interface_title_excludes", self.DEFAULT_INTERFACE_TITLE_EXCLUDES)
]
self.functional_section_hints = [
str(x).lower() for x in semantic_type_cfg.get("functional_section_hints", self.DEFAULT_FUNCTIONAL_SECTION_HINTS)
]
self.other_section_hints = [
str(x).lower() for x in semantic_type_cfg.get("other_section_hints", self.DEFAULT_OTHER_SECTION_HINTS)
]
splitter_cfg = extraction_cfg.get("splitter", {})
self.splitter_max_sentence_len = int(splitter_cfg.get("max_sentence_len", 120))
self.splitter_min_clause_len = int(splitter_cfg.get("min_clause_len", 12))
@@ -91,16 +145,61 @@ class AppSettings:
self.table_llm_semantic_enabled = bool(table_cfg.get("llm_semantic_enabled", True))
self.sequence_table_merge = table_cfg.get("sequence_table_merge", "single_requirement")
self.merge_time_series_rows_min = int(table_cfg.get("merge_time_series_rows_min", 3))
self.table_skip_keywords = list(
table_cfg.get(
"skip_keywords",
["系统功能要求", "性能要求", "功能矩阵", "能力对照", "性能指标对照"],
)
)
self.table_interface_keywords = list(
table_cfg.get(
"interface_keywords",
["接口", "interface", "输入输出", "I/O", "数据来源", "数据目的地", "来源", "目的地"],
)
)
self.table_single_requirement_keywords = list(
table_cfg.get(
"single_requirement_keywords",
["硬件要求", "软件要求", "运行环境", "环境需求", "资源需求", "计算机资源"],
)
)
rewrite_cfg = extraction_cfg.get("rewrite_policy", {})
self.llm_light_rewrite_enabled = bool(rewrite_cfg.get("llm_light_rewrite_enabled", True))
self.preserve_ratio_min = float(rewrite_cfg.get("preserve_ratio_min", 0.65))
self.max_length_growth_ratio = float(rewrite_cfg.get("max_length_growth_ratio", 1.25))
self.non_interface_max_edit_distance = int(rewrite_cfg.get("non_interface_max_edit_distance", 20))
self.system_description_hints = list(
extraction_cfg.get(
"system_description_hints",
["系统描述", "功能描述", "概述", "示意图", "组成", "架构", "原理"],
)
)
renumber_cfg = extraction_cfg.get("renumber_policy", {})
self.renumber_enabled = bool(renumber_cfg.get("enabled", True))
self.renumber_mode = renumber_cfg.get("mode", "section_continuous")
dedup_cfg = extraction_cfg.get("dedup_policy", {})
self.dedup_similarity_threshold = float(dedup_cfg.get("similarity_threshold", 0.88))
self.enable_cross_section_dedup = bool(dedup_cfg.get("enable_cross_section_dedup", True))
self.prefer_text_over_table = bool(dedup_cfg.get("prefer_text_over_table", True))
interface_cfg = extraction_cfg.get("interface_policy", {})
self.interface_unknown_fallback = str(interface_cfg.get("unknown_fallback", "未知"))
normalization_cfg = extraction_cfg.get("normalization_policy", {})
self.ocr_spacing_normalize = bool(normalization_cfg.get("ocr_spacing_normalize", True))
fidelity_cfg = extraction_cfg.get("fidelity_policy", {})
self.preserve_source_text_for_text_blocks = bool(
fidelity_cfg.get("preserve_source_text_for_text_blocks", True)
)
punctuation_cfg = extraction_cfg.get("punctuation_policy", {})
self.ensure_terminal_period = bool(punctuation_cfg.get("ensure_terminal_period", True))
def _build_rules(self, req_types_cfg: Dict[str, Dict[str, Any]]) -> List[RequirementTypeRule]:
rules: List[RequirementTypeRule] = []
if not req_types_cfg:
@@ -153,10 +252,45 @@ class AppSettings:
def is_non_requirement_section(self, title: str) -> bool:
return any(keyword in title for keyword in self.non_requirement_sections)
def is_interface_semantic_title(self, title: str) -> bool:
t = (title or "").strip().lower()
if not t:
return False
excluded = any(x in t for x in self.interface_title_excludes)
if excluded and "接口" not in t:
return False
return any(h in t for h in self.interface_section_hints)
def is_functional_semantic_title(self, title: str) -> bool:
t = (title or "").strip().lower()
if not t:
return False
return any(h in t for h in self.functional_section_hints)
def is_other_semantic_title(self, title: str) -> bool:
t = (title or "").strip().lower()
if not t:
return False
return any(h in t for h in self.other_section_hints)
def detect_requirement_type(self, title: str, content: str) -> str:
# 章节语义优先:接口仅由接口类章节触发;安全/保密/适应性等统一归其他需求。
if self.is_interface_semantic_title(title):
return "interface"
if self.is_functional_semantic_title(title):
return "functional"
if self.is_other_semantic_title(title):
return "other"
combined_text = f"{title} {(content or '')[:500]}".lower()
for rule in self.requirement_rules:
if rule.key == "interface" and not self.is_interface_semantic_title(title):
continue
for keyword in rule.keywords:
if keyword.lower() in combined_text:
if rule.key in {"performance", "security", "reliability", "other"}:
return "other"
return rule.key
return "functional"

View File

@@ -55,6 +55,9 @@ class SRSTool:
ToolRegistry.register(self.DEFINITION)
def run(self, file_path: str, enable_llm: bool = True) -> Dict[str, Any]:
if not enable_llm:
raise ValueError("当前版本仅支持LLM模式请将 enable_llm 设为 true")
config = self._load_config()
llm = self._build_llm(config, enable_llm=enable_llm)
@@ -122,12 +125,12 @@ class SRSTool:
def _build_llm(self, config: Dict[str, Any], enable_llm: bool) -> QwenLLM | None:
if not enable_llm:
return None
raise ValueError("当前版本仅支持LLM模式")
llm_cfg = config.get("llm", {})
api_key = llm_cfg.get("api_key")
if not api_key:
return None
raise ValueError("未配置API密钥请设置 DASH_SCOPE_API_KEY 或 DASHSCOPE_API_KEY")
return QwenLLM(
api_key=api_key,