增加代码知识库;修复文档处理内容;增加API设置
This commit is contained in:
4
rag-web-ui/backend/app/services/consistency/__init__.py
Normal file
4
rag-web-ui/backend/app/services/consistency/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
||||
from app.services.consistency.comparator import ConsistencyComparator
|
||||
|
||||
__all__ = ["ConsistencyComparator"]
|
||||
|
||||
258
rag-web-ui/backend/app/services/consistency/comparator.py
Normal file
258
rag-web-ui/backend/app/services/consistency/comparator.py
Normal file
@@ -0,0 +1,258 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
from app.services.code_kb.adapter import CodeKnowledgeBaseAdapter
|
||||
from app.services.code_kb.formatter import format_evidence_context
|
||||
from app.services.code_kb.schema import CodeGraphContext, CodeSearchHit
|
||||
from app.services.consistency.prompt import build_judgment_prompt, build_requirement_query
|
||||
from app.services.consistency.schema import ConsistencyResultItem, RequirementSnapshot, VERDICTS
|
||||
from app.services.consistency.scorer import coverage_score
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _clip(value: str, limit: int) -> str:
|
||||
text = value or ""
|
||||
if len(text) <= limit:
|
||||
return text
|
||||
return text[:limit].rstrip() + "\n...[truncated]"
|
||||
|
||||
|
||||
def _as_list(value: Any) -> List[str]:
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
return [str(item) for item in value if str(item).strip()]
|
||||
if isinstance(value, tuple):
|
||||
return [str(item) for item in value if str(item).strip()]
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return []
|
||||
try:
|
||||
parsed = json.loads(text)
|
||||
return _as_list(parsed)
|
||||
except json.JSONDecodeError:
|
||||
return [line.strip() for line in text.splitlines() if line.strip()]
|
||||
return [str(value)]
|
||||
|
||||
|
||||
def requirement_to_snapshot(requirement: Any) -> RequirementSnapshot:
|
||||
getter = requirement.get if isinstance(requirement, dict) else lambda key, default=None: getattr(requirement, key, default)
|
||||
return RequirementSnapshot(
|
||||
requirement_uid=getter("requirement_uid") or getter("id") or "",
|
||||
title=getter("title") or "",
|
||||
description=getter("description") or "",
|
||||
acceptance_criteria=_as_list(getter("acceptance_criteria") or getter("acceptanceCriteria")),
|
||||
requirement_type=getter("requirement_type") or getter("requirementType"),
|
||||
section_title=getter("section_title") or getter("sectionTitle"),
|
||||
interface_name=getter("interface_name") or getter("interfaceName"),
|
||||
interface_type=getter("interface_type") or getter("interfaceType"),
|
||||
data_source=getter("data_source") or getter("dataSource"),
|
||||
data_destination=getter("data_destination") or getter("dataDestination"),
|
||||
)
|
||||
|
||||
|
||||
class ConsistencyComparator:
|
||||
def __init__(
|
||||
self,
|
||||
code_kb_adapter: CodeKnowledgeBaseAdapter,
|
||||
llm: Any = None,
|
||||
use_llm: bool = True,
|
||||
) -> None:
|
||||
self.code_kb_adapter = code_kb_adapter
|
||||
self.llm = llm
|
||||
self.use_llm = use_llm
|
||||
|
||||
def compare_requirements(
|
||||
self,
|
||||
requirements: Iterable[Any],
|
||||
top_k: int = 8,
|
||||
max_call_hops: int = 2,
|
||||
min_similarity: float = 0.55,
|
||||
) -> List[ConsistencyResultItem]:
|
||||
return [
|
||||
self.compare_requirement(
|
||||
requirement,
|
||||
top_k=top_k,
|
||||
max_call_hops=max_call_hops,
|
||||
min_similarity=min_similarity,
|
||||
)
|
||||
for requirement in requirements
|
||||
]
|
||||
|
||||
def compare_requirement(
|
||||
self,
|
||||
requirement: Any,
|
||||
top_k: int = 8,
|
||||
max_call_hops: int = 2,
|
||||
min_similarity: float = 0.55,
|
||||
) -> ConsistencyResultItem:
|
||||
snapshot = requirement_to_snapshot(requirement)
|
||||
query = build_requirement_query(snapshot)
|
||||
hits = self.code_kb_adapter.search_functions(
|
||||
query=query,
|
||||
top_k=top_k,
|
||||
min_similarity=min_similarity,
|
||||
)
|
||||
contexts = [
|
||||
self.code_kb_adapter.expand_call_context(hit.evidence.node_id, max_hops=max_call_hops)
|
||||
for hit in hits
|
||||
]
|
||||
|
||||
if not hits:
|
||||
judgment = self._missing_judgment("未找到满足相似度阈值的函数证据。")
|
||||
elif not self.use_llm:
|
||||
judgment = self._heuristic_judgment(hits, contexts)
|
||||
else:
|
||||
judgment = self._llm_judgment(snapshot, hits, contexts)
|
||||
|
||||
judgment = self._normalize_judgment(judgment)
|
||||
judgment["requirement_snapshot"] = snapshot.to_dict()
|
||||
score = coverage_score(snapshot, hits, contexts, judgment)
|
||||
matched_functions = [self._matched_function_payload(hit) for hit in hits]
|
||||
call_chains = self._collect_call_chains(contexts)
|
||||
|
||||
return ConsistencyResultItem(
|
||||
requirement_uid=snapshot.requirement_uid,
|
||||
requirement_title=snapshot.title,
|
||||
requirement_type=snapshot.requirement_type,
|
||||
requirement_text=snapshot.description,
|
||||
verdict=judgment["verdict"],
|
||||
coverage_score=score,
|
||||
confidence=float(judgment.get("confidence") or 0.0),
|
||||
matched_functions=matched_functions,
|
||||
covered_points=_as_list(judgment.get("covered_points")),
|
||||
missing_points=_as_list(judgment.get("missing_points")),
|
||||
conflict_points=_as_list(judgment.get("conflict_points")),
|
||||
call_chain_evidence=call_chains,
|
||||
suggestion=str(judgment.get("suggestion") or ""),
|
||||
raw_judgment=judgment,
|
||||
)
|
||||
|
||||
def _llm_judgment(
|
||||
self,
|
||||
requirement: RequirementSnapshot,
|
||||
hits: List[CodeSearchHit],
|
||||
contexts: List[CodeGraphContext],
|
||||
) -> Dict[str, Any]:
|
||||
try:
|
||||
evidence_context = format_evidence_context(hits, contexts)
|
||||
prompt = build_judgment_prompt(requirement, evidence_context)
|
||||
from app.services.llm.llm_factory import LLMFactory
|
||||
|
||||
llm = self.llm or LLMFactory.create(temperature=0, streaming=False)
|
||||
response = llm.invoke(prompt) if hasattr(llm, "invoke") else llm(prompt)
|
||||
text = getattr(response, "content", response)
|
||||
return self.parse_json_judgment(str(text))
|
||||
except Exception as exc:
|
||||
logger.exception("LLM consistency judgment failed: %s", exc)
|
||||
return {
|
||||
"verdict": "uncertain",
|
||||
"confidence": 0.2,
|
||||
"covered_points": [],
|
||||
"missing_points": ["模型判定失败,无法可靠确认覆盖情况。"],
|
||||
"conflict_points": [],
|
||||
"primary_evidence": [hit.evidence.node_id for hit in hits[:3]],
|
||||
"reasoning": f"LLM judgment failed: {exc}",
|
||||
"suggestion": "请检查模型配置,或人工复核匹配函数证据。",
|
||||
"fallback": True,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def parse_json_judgment(raw_text: str) -> Dict[str, Any]:
|
||||
text = raw_text.strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
|
||||
text = re.sub(r"```$", "", text).strip()
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
match = re.search(r"\{.*\}", text, flags=re.DOTALL)
|
||||
if match:
|
||||
return json.loads(match.group(0))
|
||||
raise
|
||||
|
||||
def _heuristic_judgment(
|
||||
self,
|
||||
hits: List[CodeSearchHit],
|
||||
contexts: List[CodeGraphContext],
|
||||
) -> Dict[str, Any]:
|
||||
best = hits[0].similarity if hits else 0.0
|
||||
if best >= 0.78:
|
||||
verdict = "partial"
|
||||
confidence = min(0.68, best)
|
||||
else:
|
||||
verdict = "uncertain"
|
||||
confidence = min(0.5, best)
|
||||
return {
|
||||
"verdict": verdict,
|
||||
"confidence": confidence,
|
||||
"covered_points": [],
|
||||
"missing_points": ["未启用 LLM 判定,无法细分验收准则覆盖点。"],
|
||||
"conflict_points": [],
|
||||
"primary_evidence": [hit.evidence.node_id for hit in hits[:3]],
|
||||
"reasoning": "仅基于向量召回和调用图生成保守判定。",
|
||||
"suggestion": "启用模型判定或人工复核主要匹配函数。",
|
||||
"call_context_count": len(contexts),
|
||||
}
|
||||
|
||||
def _missing_judgment(self, reason: str) -> Dict[str, Any]:
|
||||
return {
|
||||
"verdict": "missing",
|
||||
"confidence": 0.75,
|
||||
"covered_points": [],
|
||||
"missing_points": [reason],
|
||||
"conflict_points": [],
|
||||
"primary_evidence": [],
|
||||
"reasoning": reason,
|
||||
"suggestion": "补充代码实现或降低阈值后重新召回,并人工确认是否存在命名差异。",
|
||||
}
|
||||
|
||||
def _normalize_judgment(self, judgment: Dict[str, Any]) -> Dict[str, Any]:
|
||||
verdict = str(judgment.get("verdict") or "uncertain").strip().lower()
|
||||
if verdict not in VERDICTS:
|
||||
verdict = "uncertain"
|
||||
confidence = judgment.get("confidence", 0.0)
|
||||
try:
|
||||
confidence = max(0.0, min(1.0, float(confidence)))
|
||||
except (TypeError, ValueError):
|
||||
confidence = 0.0
|
||||
normalized = dict(judgment)
|
||||
normalized["verdict"] = verdict
|
||||
normalized["confidence"] = confidence
|
||||
normalized.setdefault("covered_points", [])
|
||||
normalized.setdefault("missing_points", [])
|
||||
normalized.setdefault("conflict_points", [])
|
||||
normalized.setdefault("primary_evidence", [])
|
||||
normalized.setdefault("reasoning", "")
|
||||
normalized.setdefault("suggestion", "")
|
||||
return normalized
|
||||
|
||||
def _matched_function_payload(self, hit: CodeSearchHit) -> Dict[str, Any]:
|
||||
item = hit.evidence
|
||||
return {
|
||||
"node_id": item.node_id,
|
||||
"name": item.name,
|
||||
"file": item.file,
|
||||
"start_line": item.start_line,
|
||||
"end_line": item.end_line,
|
||||
"similarity": round(hit.similarity, 4),
|
||||
"role": item.summary[:120] if item.summary else "",
|
||||
"evidence_summary": item.summary,
|
||||
"logic_flow": _clip(item.logic_flow, 1200),
|
||||
"code_snippet": _clip(item.code_snippet, 2000),
|
||||
"calls": item.calls[:20],
|
||||
"called_by": item.called_by[:20],
|
||||
"signature": item.signature,
|
||||
}
|
||||
|
||||
def _collect_call_chains(self, contexts: List[CodeGraphContext]) -> List[str]:
|
||||
chains: List[str] = []
|
||||
for context in contexts:
|
||||
chains.extend(context.call_chains)
|
||||
return list(dict.fromkeys(chains))[:30]
|
||||
134
rag-web-ui/backend/app/services/consistency/exporter.py
Normal file
134
rag-web-ui/backend/app/services/consistency/exporter.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
from typing import Any, Dict, Iterable, List
|
||||
|
||||
|
||||
def normalize_result_dicts(results: Iterable[Any]) -> List[Dict[str, Any]]:
|
||||
normalized: List[Dict[str, Any]] = []
|
||||
for item in results:
|
||||
if hasattr(item, "to_dict"):
|
||||
normalized.append(item.to_dict())
|
||||
elif isinstance(item, dict):
|
||||
normalized.append(item)
|
||||
else:
|
||||
normalized.append(
|
||||
{
|
||||
"requirement_uid": getattr(item, "requirement_uid", ""),
|
||||
"verdict": getattr(item, "verdict", ""),
|
||||
"coverage_score": getattr(item, "coverage_score", 0.0),
|
||||
"confidence": getattr(item, "confidence", 0.0),
|
||||
"matched_functions": getattr(item, "matched_functions", []),
|
||||
"covered_points": getattr(item, "covered_points", []),
|
||||
"missing_points": getattr(item, "missing_points", []),
|
||||
"conflict_points": getattr(item, "conflict_points", []),
|
||||
"call_chain_evidence": getattr(item, "call_chain_evidence", []),
|
||||
"suggestion": getattr(item, "suggestion", ""),
|
||||
"raw_judgment": getattr(item, "raw_judgment", {}),
|
||||
}
|
||||
)
|
||||
return normalized
|
||||
|
||||
|
||||
def export_json(results: Iterable[Any]) -> bytes:
|
||||
return json.dumps(
|
||||
{"results": normalize_result_dicts(results)},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
).encode("utf-8")
|
||||
|
||||
|
||||
def export_markdown(results: Iterable[Any]) -> str:
|
||||
rows = normalize_result_dicts(results)
|
||||
lines = [
|
||||
"# 需求代码一致性比对报告",
|
||||
"",
|
||||
"| 需求 ID | 判定 | 覆盖分 | 置信度 | 匹配函数 | 缺失点 | 建议 |",
|
||||
"| --- | --- | ---: | ---: | ---: | ---: | --- |",
|
||||
]
|
||||
for item in rows:
|
||||
lines.append(
|
||||
"| {uid} | {verdict} | {score:.2f} | {confidence:.2f} | {functions} | {missing} | {suggestion} |".format(
|
||||
uid=item.get("requirement_uid", ""),
|
||||
verdict=item.get("verdict", ""),
|
||||
score=float(item.get("coverage_score") or 0),
|
||||
confidence=float(item.get("confidence") or 0),
|
||||
functions=len(item.get("matched_functions") or []),
|
||||
missing=len(item.get("missing_points") or []),
|
||||
suggestion=str(item.get("suggestion") or "").replace("|", "/"),
|
||||
)
|
||||
)
|
||||
|
||||
for item in rows:
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
f"## {item.get('requirement_uid', '')} {item.get('requirement_title', '')}",
|
||||
"",
|
||||
f"- 判定: `{item.get('verdict', '')}`",
|
||||
f"- 覆盖分: {float(item.get('coverage_score') or 0):.2f}",
|
||||
f"- 置信度: {float(item.get('confidence') or 0):.2f}",
|
||||
f"- 建议: {item.get('suggestion') or '-'}",
|
||||
"",
|
||||
"### 匹配函数",
|
||||
]
|
||||
)
|
||||
for function in item.get("matched_functions") or []:
|
||||
lines.append(
|
||||
f"- `{function.get('name')}` {function.get('file')}:{function.get('start_line')} "
|
||||
f"(similarity={float(function.get('similarity') or 0):.2f})"
|
||||
)
|
||||
lines.extend(["", "### 缺失点"])
|
||||
for point in item.get("missing_points") or ["-"]:
|
||||
lines.append(f"- {point}")
|
||||
if item.get("conflict_points"):
|
||||
lines.extend(["", "### 冲突点"])
|
||||
for point in item.get("conflict_points") or []:
|
||||
lines.append(f"- {point}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def export_excel(results: Iterable[Any]) -> bytes:
|
||||
try:
|
||||
from openpyxl import Workbook
|
||||
except ImportError as exc:
|
||||
raise RuntimeError("openpyxl is required to export Excel reports.") from exc
|
||||
|
||||
rows = normalize_result_dicts(results)
|
||||
workbook = Workbook()
|
||||
sheet = workbook.active
|
||||
sheet.title = "Consistency"
|
||||
headers = [
|
||||
"需求ID",
|
||||
"需求标题",
|
||||
"需求类型",
|
||||
"判定",
|
||||
"覆盖分",
|
||||
"置信度",
|
||||
"匹配函数数量",
|
||||
"主要文件",
|
||||
"缺失点数量",
|
||||
"建议",
|
||||
]
|
||||
sheet.append(headers)
|
||||
for item in rows:
|
||||
functions = item.get("matched_functions") or []
|
||||
sheet.append(
|
||||
[
|
||||
item.get("requirement_uid", ""),
|
||||
item.get("requirement_title", ""),
|
||||
item.get("requirement_type", ""),
|
||||
item.get("verdict", ""),
|
||||
item.get("coverage_score", 0),
|
||||
item.get("confidence", 0),
|
||||
len(functions),
|
||||
functions[0].get("file", "") if functions else "",
|
||||
len(item.get("missing_points") or []),
|
||||
item.get("suggestion", ""),
|
||||
]
|
||||
)
|
||||
output = io.BytesIO()
|
||||
workbook.save(output)
|
||||
return output.getvalue()
|
||||
|
||||
58
rag-web-ui/backend/app/services/consistency/prompt.py
Normal file
58
rag-web-ui/backend/app/services/consistency/prompt.py
Normal file
@@ -0,0 +1,58 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
from app.services.consistency.schema import RequirementSnapshot
|
||||
|
||||
|
||||
SYSTEM_INSTRUCTION = """你是需求代码一致性审查助手。
|
||||
只能基于输入的需求、验收准则、函数摘要、代码片段、调用链证据判断。
|
||||
不得补充未给出的代码事实。
|
||||
证据不足时输出 uncertain。
|
||||
输出严格 JSON,不要 Markdown。"""
|
||||
|
||||
|
||||
def build_requirement_query(requirement: RequirementSnapshot) -> str:
|
||||
parts = []
|
||||
req_type = (requirement.requirement_type or "").lower()
|
||||
if req_type == "interface":
|
||||
parts.extend(
|
||||
[
|
||||
requirement.interface_name or "",
|
||||
requirement.interface_type or "",
|
||||
requirement.data_source or "",
|
||||
requirement.data_destination or "",
|
||||
requirement.description,
|
||||
]
|
||||
)
|
||||
else:
|
||||
parts.extend(
|
||||
[
|
||||
requirement.description,
|
||||
"\n".join(requirement.acceptance_criteria),
|
||||
requirement.section_title or "",
|
||||
requirement.interface_name or "",
|
||||
requirement.data_source or "",
|
||||
requirement.data_destination or "",
|
||||
]
|
||||
)
|
||||
return "\n".join(part for part in parts if part).strip()
|
||||
|
||||
|
||||
def build_judgment_prompt(requirement: RequirementSnapshot, evidence_context: str) -> str:
|
||||
payload = {
|
||||
"requirement": requirement.to_dict(),
|
||||
"evidence": evidence_context,
|
||||
"output_schema": {
|
||||
"verdict": "implemented | partial | missing | conflict | uncertain",
|
||||
"confidence": 0.0,
|
||||
"covered_points": [],
|
||||
"missing_points": [],
|
||||
"conflict_points": [],
|
||||
"primary_evidence": [],
|
||||
"reasoning": "brief reason based only on evidence",
|
||||
"suggestion": "next action",
|
||||
},
|
||||
}
|
||||
return SYSTEM_INSTRUCTION + "\n\n" + json.dumps(payload, ensure_ascii=False, indent=2)
|
||||
|
||||
61
rag-web-ui/backend/app/services/consistency/run_compare.py
Normal file
61
rag-web-ui/backend/app/services/consistency/run_compare.py
Normal file
@@ -0,0 +1,61 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Run requirement-code consistency comparison.")
|
||||
parser.add_argument("--srs-extraction-id", type=int, required=True)
|
||||
parser.add_argument("--vector-path", required=True)
|
||||
parser.add_argument("--metadata-path", required=True)
|
||||
parser.add_argument("--graph-path", required=True)
|
||||
parser.add_argument("--output", required=True)
|
||||
parser.add_argument("--output-excel", default=None)
|
||||
parser.add_argument("--output-markdown", default=None)
|
||||
parser.add_argument("--top-k", type=int, default=8)
|
||||
parser.add_argument("--max-call-hops", type=int, default=2)
|
||||
parser.add_argument("--min-similarity", type=float, default=0.55)
|
||||
parser.add_argument("--no-llm", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = parse_args()
|
||||
from app.db.session import SessionLocal
|
||||
from app.models.tooling import SRSRequirement
|
||||
from app.services.code_kb.adapter import CodeKnowledgeBaseAdapter
|
||||
from app.services.consistency.comparator import ConsistencyComparator
|
||||
from app.services.consistency.exporter import export_excel, export_json, export_markdown
|
||||
|
||||
adapter = CodeKnowledgeBaseAdapter()
|
||||
adapter.load(args.vector_path, args.metadata_path, args.graph_path)
|
||||
comparator = ConsistencyComparator(adapter, use_llm=not args.no_llm)
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
requirements = (
|
||||
db.query(SRSRequirement)
|
||||
.filter(SRSRequirement.extraction_id == args.srs_extraction_id)
|
||||
.order_by(SRSRequirement.sort_order)
|
||||
.all()
|
||||
)
|
||||
results = comparator.compare_requirements(
|
||||
requirements,
|
||||
top_k=args.top_k,
|
||||
max_call_hops=args.max_call_hops,
|
||||
min_similarity=args.min_similarity,
|
||||
)
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Path(args.output).write_bytes(export_json(results))
|
||||
if args.output_markdown:
|
||||
Path(args.output_markdown).write_text(export_markdown(results), encoding="utf-8")
|
||||
if args.output_excel:
|
||||
Path(args.output_excel).write_bytes(export_excel(results))
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
46
rag-web-ui/backend/app/services/consistency/schema.py
Normal file
46
rag-web-ui/backend/app/services/consistency/schema.py
Normal file
@@ -0,0 +1,46 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
VERDICTS = {"implemented", "partial", "missing", "conflict", "uncertain"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RequirementSnapshot:
|
||||
requirement_uid: str
|
||||
title: str
|
||||
description: str
|
||||
acceptance_criteria: List[str] = field(default_factory=list)
|
||||
requirement_type: Optional[str] = None
|
||||
section_title: Optional[str] = None
|
||||
interface_name: Optional[str] = None
|
||||
interface_type: Optional[str] = None
|
||||
data_source: Optional[str] = None
|
||||
data_destination: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConsistencyResultItem:
|
||||
requirement_uid: str
|
||||
requirement_title: str
|
||||
requirement_type: Optional[str]
|
||||
requirement_text: str
|
||||
verdict: str
|
||||
coverage_score: float
|
||||
confidence: float
|
||||
matched_functions: List[Dict[str, Any]]
|
||||
covered_points: List[str] = field(default_factory=list)
|
||||
missing_points: List[str] = field(default_factory=list)
|
||||
conflict_points: List[str] = field(default_factory=list)
|
||||
call_chain_evidence: List[str] = field(default_factory=list)
|
||||
suggestion: str = ""
|
||||
raw_judgment: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
120
rag-web-ui/backend/app/services/consistency/scorer.py
Normal file
120
rag-web-ui/backend/app/services/consistency/scorer.py
Normal file
@@ -0,0 +1,120 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, Iterable, List
|
||||
|
||||
from app.services.code_kb.schema import CodeGraphContext, CodeSearchHit
|
||||
from app.services.consistency.schema import RequirementSnapshot
|
||||
|
||||
|
||||
def _clamp(value: float) -> float:
|
||||
return max(0.0, min(1.0, value))
|
||||
|
||||
|
||||
def _tokens(*values: str) -> List[str]:
|
||||
text = " ".join(value or "" for value in values).lower()
|
||||
return [item for item in re.split(r"[^a-z0-9_\u4e00-\u9fff]+", text) if len(item) >= 2]
|
||||
|
||||
|
||||
def semantic_score(hits: List[CodeSearchHit]) -> float:
|
||||
if not hits:
|
||||
return 0.0
|
||||
top = max(hit.similarity for hit in hits)
|
||||
avg = sum(hit.similarity for hit in hits[:3]) / min(3, len(hits))
|
||||
return _clamp(top * 0.7 + avg * 0.3)
|
||||
|
||||
|
||||
def acceptance_coverage_score(requirement: RequirementSnapshot, judgment: Dict[str, Any]) -> float:
|
||||
criteria = requirement.acceptance_criteria or []
|
||||
covered = judgment.get("covered_points") or []
|
||||
missing = judgment.get("missing_points") or []
|
||||
verdict = judgment.get("verdict")
|
||||
if criteria:
|
||||
if missing:
|
||||
return _clamp((len(criteria) - min(len(missing), len(criteria))) / len(criteria))
|
||||
if covered:
|
||||
return _clamp(len(covered) / len(criteria))
|
||||
return 1.0 if verdict == "implemented" else 0.4 if verdict == "partial" else 0.0
|
||||
return {"implemented": 1.0, "partial": 0.55, "conflict": 0.25, "missing": 0.0}.get(verdict, 0.35)
|
||||
|
||||
|
||||
def evidence_strength_score(hits: List[CodeSearchHit]) -> float:
|
||||
if not hits:
|
||||
return 0.0
|
||||
scores: List[float] = []
|
||||
for hit in hits[:5]:
|
||||
item = hit.evidence
|
||||
checks = [
|
||||
bool(item.file),
|
||||
item.start_line is not None,
|
||||
item.end_line is not None,
|
||||
bool(item.summary),
|
||||
bool(item.logic_flow),
|
||||
bool(item.code_snippet),
|
||||
]
|
||||
scores.append(sum(1 for value in checks if value) / len(checks))
|
||||
return _clamp(sum(scores) / len(scores))
|
||||
|
||||
|
||||
def call_graph_score(contexts: Iterable[CodeGraphContext]) -> float:
|
||||
contexts = list(contexts)
|
||||
if not contexts:
|
||||
return 0.0
|
||||
scored = []
|
||||
for context in contexts[:5]:
|
||||
score = 0.0
|
||||
if context.callers:
|
||||
score += 0.35
|
||||
if context.callees:
|
||||
score += 0.35
|
||||
if context.call_chains:
|
||||
score += 0.30
|
||||
scored.append(score)
|
||||
return _clamp(sum(scored) / len(scored))
|
||||
|
||||
|
||||
def exact_match_score(requirement: RequirementSnapshot, hits: List[CodeSearchHit]) -> float:
|
||||
if not hits:
|
||||
return 0.0
|
||||
important = _tokens(
|
||||
requirement.interface_name or "",
|
||||
requirement.interface_type or "",
|
||||
requirement.data_source or "",
|
||||
requirement.data_destination or "",
|
||||
requirement.title or "",
|
||||
)
|
||||
if not important:
|
||||
important = _tokens(requirement.description)[:12]
|
||||
if not important:
|
||||
return 0.0
|
||||
|
||||
evidence_text = " ".join(
|
||||
f"{hit.evidence.name} {hit.evidence.qualified_name} {hit.evidence.summary} {hit.evidence.logic_flow}"
|
||||
for hit in hits[:5]
|
||||
).lower()
|
||||
matched = sum(1 for token in important if token.lower() in evidence_text)
|
||||
return _clamp(matched / len(important))
|
||||
|
||||
|
||||
def coverage_score(
|
||||
requirement: RequirementSnapshot,
|
||||
hits: List[CodeSearchHit],
|
||||
contexts: List[CodeGraphContext],
|
||||
judgment: Dict[str, Any],
|
||||
) -> float:
|
||||
score = (
|
||||
semantic_score(hits) * 0.25
|
||||
+ acceptance_coverage_score(requirement, judgment) * 0.30
|
||||
+ evidence_strength_score(hits) * 0.20
|
||||
+ call_graph_score(contexts) * 0.15
|
||||
+ exact_match_score(requirement, hits) * 0.10
|
||||
)
|
||||
verdict = judgment.get("verdict")
|
||||
if verdict == "missing":
|
||||
score = min(score, 0.25)
|
||||
elif verdict == "uncertain":
|
||||
score = min(score, 0.55)
|
||||
elif verdict == "conflict":
|
||||
score = min(score, 0.45)
|
||||
return round(_clamp(score), 4)
|
||||
|
||||
Reference in New Issue
Block a user