rag-web-ui/backend/app/services/consistency/comparator.py

from __future__ import annotations

import json
import logging
import re
from typing import Any, Dict, Iterable, List, Optional

from app.services.code_kb.adapter import CodeKnowledgeBaseAdapter
from app.services.code_kb.formatter import format_evidence_context
from app.services.code_kb.schema import CodeGraphContext, CodeSearchHit
from app.services.consistency.prompt import build_judgment_prompt, build_requirement_query
from app.services.consistency.schema import ConsistencyResultItem, RequirementSnapshot, VERDICTS
from app.services.consistency.scorer import coverage_score

logger = logging.getLogger(__name__)


def _clip(value: str, limit: int) -> str:
    text = value or ""
    if len(text) <= limit:
        return text
    return text[:limit].rstrip() + "\n...[truncated]"


def _as_list(value: Any) -> List[str]:
    if value is None:
        return []
    if isinstance(value, list):
        return [str(item) for item in value if str(item).strip()]
    if isinstance(value, tuple):
        return [str(item) for item in value if str(item).strip()]
    if isinstance(value, str):
        text = value.strip()
        if not text:
            return []
        try:
            parsed = json.loads(text)
            return _as_list(parsed)
        except json.JSONDecodeError:
            return [line.strip() for line in text.splitlines() if line.strip()]
    return [str(value)]


def requirement_to_snapshot(requirement: Any) -> RequirementSnapshot:
    getter = requirement.get if isinstance(requirement, dict) else lambda key, default=None: getattr(requirement, key, default)
    return RequirementSnapshot(
        requirement_uid=getter("requirement_uid") or getter("id") or "",
        title=getter("title") or "",
        description=getter("description") or "",
        acceptance_criteria=_as_list(getter("acceptance_criteria") or getter("acceptanceCriteria")),
        requirement_type=getter("requirement_type") or getter("requirementType"),
        section_title=getter("section_title") or getter("sectionTitle"),
        interface_name=getter("interface_name") or getter("interfaceName"),
        interface_type=getter("interface_type") or getter("interfaceType"),
        data_source=getter("data_source") or getter("dataSource"),
        data_destination=getter("data_destination") or getter("dataDestination"),
    )


class ConsistencyComparator:
    def __init__(
        self,
        code_kb_adapter: CodeKnowledgeBaseAdapter,
        llm: Any = None,
        use_llm: bool = True,
    ) -> None:
        self.code_kb_adapter = code_kb_adapter
        self.llm = llm
        self.use_llm = use_llm

    def compare_requirements(
        self,
        requirements: Iterable[Any],
        top_k: int = 8,
        max_call_hops: int = 2,
        min_similarity: float = 0.55,
    ) -> List[ConsistencyResultItem]:
        return [
            self.compare_requirement(
                requirement,
                top_k=top_k,
                max_call_hops=max_call_hops,
                min_similarity=min_similarity,
            )
            for requirement in requirements
        ]

    def compare_requirement(
        self,
        requirement: Any,
        top_k: int = 8,
        max_call_hops: int = 2,
        min_similarity: float = 0.55,
    ) -> ConsistencyResultItem:
        snapshot = requirement_to_snapshot(requirement)
        query = build_requirement_query(snapshot)
        hits = self.code_kb_adapter.search_functions(
            query=query,
            top_k=top_k,
            min_similarity=min_similarity,
        )
        contexts = [
            self.code_kb_adapter.expand_call_context(hit.evidence.node_id, max_hops=max_call_hops)
            for hit in hits
        ]

        if not hits:
            judgment = self._missing_judgment("未找到满足相似度阈值的函数证据。")
        elif not self.use_llm:
            judgment = self._heuristic_judgment(hits, contexts)
        else:
            judgment = self._llm_judgment(snapshot, hits, contexts)

        judgment = self._normalize_judgment(judgment)
        judgment["requirement_snapshot"] = snapshot.to_dict()
        score = coverage_score(snapshot, hits, contexts, judgment)
        matched_functions = [self._matched_function_payload(hit) for hit in hits]
        call_chains = self._collect_call_chains(contexts)

        return ConsistencyResultItem(
            requirement_uid=snapshot.requirement_uid,
            requirement_title=snapshot.title,
            requirement_type=snapshot.requirement_type,
            requirement_text=snapshot.description,
            verdict=judgment["verdict"],
            coverage_score=score,
            confidence=float(judgment.get("confidence") or 0.0),
            matched_functions=matched_functions,
            covered_points=_as_list(judgment.get("covered_points")),
            missing_points=_as_list(judgment.get("missing_points")),
            conflict_points=_as_list(judgment.get("conflict_points")),
            call_chain_evidence=call_chains,
            suggestion=str(judgment.get("suggestion") or ""),
            raw_judgment=judgment,
        )

    def _llm_judgment(
        self,
        requirement: RequirementSnapshot,
        hits: List[CodeSearchHit],
        contexts: List[CodeGraphContext],
    ) -> Dict[str, Any]:
        try:
            evidence_context = format_evidence_context(hits, contexts)
            prompt = build_judgment_prompt(requirement, evidence_context)
            from app.services.llm.llm_factory import LLMFactory

            llm = self.llm or LLMFactory.create(temperature=0, streaming=False)
            response = llm.invoke(prompt) if hasattr(llm, "invoke") else llm(prompt)
            text = getattr(response, "content", response)
            return self.parse_json_judgment(str(text))
        except Exception as exc:
            logger.exception("LLM consistency judgment failed: %s", exc)
            return {
                "verdict": "uncertain",
                "confidence": 0.2,
                "covered_points": [],
                "missing_points": ["模型判定失败，无法可靠确认覆盖情况。"],
                "conflict_points": [],
                "primary_evidence": [hit.evidence.node_id for hit in hits[:3]],
                "reasoning": f"LLM judgment failed: {exc}",
                "suggestion": "请检查模型配置，或人工复核匹配函数证据。",
                "fallback": True,
            }

    @staticmethod
    def parse_json_judgment(raw_text: str) -> Dict[str, Any]:
        text = raw_text.strip()
        if text.startswith("```"):
            text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
            text = re.sub(r"```$", "", text).strip()
        try:
            return json.loads(text)
        except json.JSONDecodeError:
            match = re.search(r"\{.*\}", text, flags=re.DOTALL)
            if match:
                return json.loads(match.group(0))
            raise

    def _heuristic_judgment(
        self,
        hits: List[CodeSearchHit],
        contexts: List[CodeGraphContext],
    ) -> Dict[str, Any]:
        best = hits[0].similarity if hits else 0.0
        if best >= 0.78:
            verdict = "partial"
            confidence = min(0.68, best)
        else:
            verdict = "uncertain"
            confidence = min(0.5, best)
        return {
            "verdict": verdict,
            "confidence": confidence,
            "covered_points": [],
            "missing_points": ["未启用 LLM 判定，无法细分验收准则覆盖点。"],
            "conflict_points": [],
            "primary_evidence": [hit.evidence.node_id for hit in hits[:3]],
            "reasoning": "仅基于向量召回和调用图生成保守判定。",
            "suggestion": "启用模型判定或人工复核主要匹配函数。",
            "call_context_count": len(contexts),
        }

    def _missing_judgment(self, reason: str) -> Dict[str, Any]:
        return {
            "verdict": "missing",
            "confidence": 0.75,
            "covered_points": [],
            "missing_points": [reason],
            "conflict_points": [],
            "primary_evidence": [],
            "reasoning": reason,
            "suggestion": "补充代码实现或降低阈值后重新召回，并人工确认是否存在命名差异。",
        }

    def _normalize_judgment(self, judgment: Dict[str, Any]) -> Dict[str, Any]:
        verdict = str(judgment.get("verdict") or "uncertain").strip().lower()
        if verdict not in VERDICTS:
            verdict = "uncertain"
        confidence = judgment.get("confidence", 0.0)
        try:
            confidence = max(0.0, min(1.0, float(confidence)))
        except (TypeError, ValueError):
            confidence = 0.0
        normalized = dict(judgment)
        normalized["verdict"] = verdict
        normalized["confidence"] = confidence
        normalized.setdefault("covered_points", [])
        normalized.setdefault("missing_points", [])
        normalized.setdefault("conflict_points", [])
        normalized.setdefault("primary_evidence", [])
        normalized.setdefault("reasoning", "")
        normalized.setdefault("suggestion", "")
        return normalized

    def _matched_function_payload(self, hit: CodeSearchHit) -> Dict[str, Any]:
        item = hit.evidence
        return {
            "node_id": item.node_id,
            "name": item.name,
            "file": item.file,
            "start_line": item.start_line,
            "end_line": item.end_line,
            "similarity": round(hit.similarity, 4),
            "role": item.summary[:120] if item.summary else "",
            "evidence_summary": item.summary,
            "logic_flow": _clip(item.logic_flow, 1200),
            "code_snippet": _clip(item.code_snippet, 2000),
            "calls": item.calls[:20],
            "called_by": item.called_by[:20],
            "signature": item.signature,
        }

    def _collect_call_chains(self, contexts: List[CodeGraphContext]) -> List[str]:
        chains: List[str] = []
        for context in contexts:
            chains.extend(context.call_chains)
        return list(dict.fromkeys(chains))[:30]
增加代码知识库；修复文档处理内容；增加API设置 2026-05-16 20:20:10 +08:00			`from __future__ import annotations`

			`import json`
			`import logging`
			`import re`
			`from typing import Any, Dict, Iterable, List, Optional`

			`from app.services.code_kb.adapter import CodeKnowledgeBaseAdapter`
			`from app.services.code_kb.formatter import format_evidence_context`
			`from app.services.code_kb.schema import CodeGraphContext, CodeSearchHit`
			`from app.services.consistency.prompt import build_judgment_prompt, build_requirement_query`
			`from app.services.consistency.schema import ConsistencyResultItem, RequirementSnapshot, VERDICTS`
			`from app.services.consistency.scorer import coverage_score`

			`logger = logging.getLogger(__name__)`


			`def _clip(value: str, limit: int) -> str:`
			`text = value or ""`
			`if len(text) <= limit:`
			`return text`
			`return text[:limit].rstrip() + "\n...[truncated]"`


			`def _as_list(value: Any) -> List[str]:`
			`if value is None:`
			`return []`
			`if isinstance(value, list):`
			`return [str(item) for item in value if str(item).strip()]`
			`if isinstance(value, tuple):`
			`return [str(item) for item in value if str(item).strip()]`
			`if isinstance(value, str):`
			`text = value.strip()`
			`if not text:`
			`return []`
			`try:`
			`parsed = json.loads(text)`
			`return _as_list(parsed)`
			`except json.JSONDecodeError:`
			`return [line.strip() for line in text.splitlines() if line.strip()]`
			`return [str(value)]`


			`def requirement_to_snapshot(requirement: Any) -> RequirementSnapshot:`
			`getter = requirement.get if isinstance(requirement, dict) else lambda key, default=None: getattr(requirement, key, default)`
			`return RequirementSnapshot(`
			`requirement_uid=getter("requirement_uid") or getter("id") or "",`
			`title=getter("title") or "",`
			`description=getter("description") or "",`
			`acceptance_criteria=_as_list(getter("acceptance_criteria") or getter("acceptanceCriteria")),`
			`requirement_type=getter("requirement_type") or getter("requirementType"),`
			`section_title=getter("section_title") or getter("sectionTitle"),`
			`interface_name=getter("interface_name") or getter("interfaceName"),`
			`interface_type=getter("interface_type") or getter("interfaceType"),`
			`data_source=getter("data_source") or getter("dataSource"),`
			`data_destination=getter("data_destination") or getter("dataDestination"),`
			`)`


			`class ConsistencyComparator:`
			`def __init__(`
			`self,`
			`code_kb_adapter: CodeKnowledgeBaseAdapter,`
			`llm: Any = None,`
			`use_llm: bool = True,`
			`) -> None:`
			`self.code_kb_adapter = code_kb_adapter`
			`self.llm = llm`
			`self.use_llm = use_llm`

			`def compare_requirements(`
			`self,`
			`requirements: Iterable[Any],`
			`top_k: int = 8,`
			`max_call_hops: int = 2,`
			`min_similarity: float = 0.55,`
			`) -> List[ConsistencyResultItem]:`
			`return [`
			`self.compare_requirement(`
			`requirement,`
			`top_k=top_k,`
			`max_call_hops=max_call_hops,`
			`min_similarity=min_similarity,`
			`)`
			`for requirement in requirements`
			`]`

			`def compare_requirement(`
			`self,`
			`requirement: Any,`
			`top_k: int = 8,`
			`max_call_hops: int = 2,`
			`min_similarity: float = 0.55,`
			`) -> ConsistencyResultItem:`
			`snapshot = requirement_to_snapshot(requirement)`
			`query = build_requirement_query(snapshot)`
			`hits = self.code_kb_adapter.search_functions(`
			`query=query,`
			`top_k=top_k,`
			`min_similarity=min_similarity,`
			`)`
			`contexts = [`
			`self.code_kb_adapter.expand_call_context(hit.evidence.node_id, max_hops=max_call_hops)`
			`for hit in hits`
			`]`

			`if not hits:`
			`judgment = self._missing_judgment("未找到满足相似度阈值的函数证据。")`
			`elif not self.use_llm:`
			`judgment = self._heuristic_judgment(hits, contexts)`
			`else:`
			`judgment = self._llm_judgment(snapshot, hits, contexts)`

			`judgment = self._normalize_judgment(judgment)`
			`judgment["requirement_snapshot"] = snapshot.to_dict()`
			`score = coverage_score(snapshot, hits, contexts, judgment)`
			`matched_functions = [self._matched_function_payload(hit) for hit in hits]`
			`call_chains = self._collect_call_chains(contexts)`

			`return ConsistencyResultItem(`
			`requirement_uid=snapshot.requirement_uid,`
			`requirement_title=snapshot.title,`
			`requirement_type=snapshot.requirement_type,`
			`requirement_text=snapshot.description,`
			`verdict=judgment["verdict"],`
			`coverage_score=score,`
			`confidence=float(judgment.get("confidence") or 0.0),`
			`matched_functions=matched_functions,`
			`covered_points=_as_list(judgment.get("covered_points")),`
			`missing_points=_as_list(judgment.get("missing_points")),`
			`conflict_points=_as_list(judgment.get("conflict_points")),`
			`call_chain_evidence=call_chains,`
			`suggestion=str(judgment.get("suggestion") or ""),`
			`raw_judgment=judgment,`
			`)`

			`def _llm_judgment(`
			`self,`
			`requirement: RequirementSnapshot,`
			`hits: List[CodeSearchHit],`
			`contexts: List[CodeGraphContext],`
			`) -> Dict[str, Any]:`
			`try:`
			`evidence_context = format_evidence_context(hits, contexts)`
			`prompt = build_judgment_prompt(requirement, evidence_context)`
			`from app.services.llm.llm_factory import LLMFactory`

			`llm = self.llm or LLMFactory.create(temperature=0, streaming=False)`
			`response = llm.invoke(prompt) if hasattr(llm, "invoke") else llm(prompt)`
			`text = getattr(response, "content", response)`
			`return self.parse_json_judgment(str(text))`
			`except Exception as exc:`
			`logger.exception("LLM consistency judgment failed: %s", exc)`
			`return {`
			`"verdict": "uncertain",`
			`"confidence": 0.2,`
			`"covered_points": [],`
			`"missing_points": ["模型判定失败，无法可靠确认覆盖情况。"],`
			`"conflict_points": [],`
			`"primary_evidence": [hit.evidence.node_id for hit in hits[:3]],`
			`"reasoning": f"LLM judgment failed: {exc}",`
			`"suggestion": "请检查模型配置，或人工复核匹配函数证据。",`
			`"fallback": True,`
			`}`

			`@staticmethod`
			`def parse_json_judgment(raw_text: str) -> Dict[str, Any]:`
			`text = raw_text.strip()`
			if text.startswith("```"):
			text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
			text = re.sub(r"```$", "", text).strip()
			`try:`
			`return json.loads(text)`
			`except json.JSONDecodeError:`
			`match = re.search(r"\{.*\}", text, flags=re.DOTALL)`
			`if match:`
			`return json.loads(match.group(0))`
			`raise`

			`def _heuristic_judgment(`
			`self,`
			`hits: List[CodeSearchHit],`
			`contexts: List[CodeGraphContext],`
			`) -> Dict[str, Any]:`
			`best = hits[0].similarity if hits else 0.0`
			`if best >= 0.78:`
			`verdict = "partial"`
			`confidence = min(0.68, best)`
			`else:`
			`verdict = "uncertain"`
			`confidence = min(0.5, best)`
			`return {`
			`"verdict": verdict,`
			`"confidence": confidence,`
			`"covered_points": [],`
			`"missing_points": ["未启用 LLM 判定，无法细分验收准则覆盖点。"],`
			`"conflict_points": [],`
			`"primary_evidence": [hit.evidence.node_id for hit in hits[:3]],`
			`"reasoning": "仅基于向量召回和调用图生成保守判定。",`
			`"suggestion": "启用模型判定或人工复核主要匹配函数。",`
			`"call_context_count": len(contexts),`
			`}`

			`def _missing_judgment(self, reason: str) -> Dict[str, Any]:`
			`return {`
			`"verdict": "missing",`
			`"confidence": 0.75,`
			`"covered_points": [],`
			`"missing_points": [reason],`
			`"conflict_points": [],`
			`"primary_evidence": [],`
			`"reasoning": reason,`
			`"suggestion": "补充代码实现或降低阈值后重新召回，并人工确认是否存在命名差异。",`
			`}`

			`def _normalize_judgment(self, judgment: Dict[str, Any]) -> Dict[str, Any]:`
			`verdict = str(judgment.get("verdict") or "uncertain").strip().lower()`
			`if verdict not in VERDICTS:`
			`verdict = "uncertain"`
			`confidence = judgment.get("confidence", 0.0)`
			`try:`
			`confidence = max(0.0, min(1.0, float(confidence)))`
			`except (TypeError, ValueError):`
			`confidence = 0.0`
			`normalized = dict(judgment)`
			`normalized["verdict"] = verdict`
			`normalized["confidence"] = confidence`
			`normalized.setdefault("covered_points", [])`
			`normalized.setdefault("missing_points", [])`
			`normalized.setdefault("conflict_points", [])`
			`normalized.setdefault("primary_evidence", [])`
			`normalized.setdefault("reasoning", "")`
			`normalized.setdefault("suggestion", "")`
			`return normalized`

			`def _matched_function_payload(self, hit: CodeSearchHit) -> Dict[str, Any]:`
			`item = hit.evidence`
			`return {`
			`"node_id": item.node_id,`
			`"name": item.name,`
			`"file": item.file,`
			`"start_line": item.start_line,`
			`"end_line": item.end_line,`
			`"similarity": round(hit.similarity, 4),`
			`"role": item.summary[:120] if item.summary else "",`
			`"evidence_summary": item.summary,`
			`"logic_flow": _clip(item.logic_flow, 1200),`
			`"code_snippet": _clip(item.code_snippet, 2000),`
			`"calls": item.calls[:20],`
			`"called_by": item.called_by[:20],`
			`"signature": item.signature,`
			`}`

			`def _collect_call_chains(self, contexts: List[CodeGraphContext]) -> List[str]:`
			`chains: List[str] = []`
			`for context in contexts:`
			`chains.extend(context.call_chains)`
			`return list(dict.fromkeys(chains))[:30]`