init. project

2026-04-13 11:34:23 +08:00
commit c7c0659a85
202 changed files with 31196 additions and 0 deletions
--- a/rag-web-ui/backend/app/services/testing_pipeline/init.py
+++ b/rag-web-ui/backend/app/services/testing_pipeline/init.py
@@ -0,0 +1,3 @@
+from app.services.testing_pipeline.pipeline import run_testing_pipeline
+
+__all__ = ["run_testing_pipeline"]
--- a/rag-web-ui/backend/app/services/testing_pipeline/base.py
+++ b/rag-web-ui/backend/app/services/testing_pipeline/base.py
@@ -0,0 +1,20 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict
+
+
+@dataclass
+class ToolExecutionResult:
+    context: Dict[str, Any]
+    output_summary: str
+    fallback_used: bool = False
+
+
+class TestingTool(ABC):
+    name: str
+
+    @abstractmethod
+    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
+        raise NotImplementedError
--- a/rag-web-ui/backend/app/services/testing_pipeline/pipeline.py
+++ b/rag-web-ui/backend/app/services/testing_pipeline/pipeline.py
@@ -0,0 +1,99 @@
+from __future__ import annotations
+
+from time import perf_counter
+from typing import Any, Dict, List, Optional
+from uuid import uuid4
+
+from app.services.llm.llm_factory import LLMFactory
+from app.services.testing_pipeline.tools import build_default_tool_chain
+
+
+def _build_input_summary(context: Dict[str, Any]) -> str:
+    req_text = str(context.get("user_requirement_text", "")).strip()
+    req_type = str(context.get("requirement_type_input", "")).strip() or "auto"
+    short_text = req_text if len(req_text) <= 60 else f"{req_text[:60]}..."
+    return f"requirement_type_input={req_type}; requirement_text={short_text}"
+
+
+def _build_output_summary(context: Dict[str, Any]) -> str:
+    req_type_result = context.get("requirement_type_result", {})
+    req_type = req_type_result.get("requirement_type", "")
+    test_items = context.get("test_items", {})
+    test_cases = context.get("test_cases", {})
+
+    return (
+        f"requirement_type={req_type}; "
+        f"items={len(test_items.get('normal', [])) + len(test_items.get('abnormal', []))}; "
+        f"cases={len(test_cases.get('normal', [])) + len(test_cases.get('abnormal', []))}"
+    )
+
+
+def run_testing_pipeline(
+    user_requirement_text: str,
+    requirement_type_input: Optional[str] = None,
+    debug: bool = False,
+    knowledge_context: Optional[str] = None,
+    use_model_generation: bool = False,
+    max_items_per_group: int = 12,
+    cases_per_item: int = 2,
+    max_focus_points: int = 6,
+    max_llm_calls: int = 10,
+) -> Dict[str, Any]:
+    llm_model = None
+    if use_model_generation:
+        try:
+            llm_model = LLMFactory.create(streaming=False)
+        except Exception:
+            llm_model = None
+
+    context: Dict[str, Any] = {
+        "trace_id": str(uuid4()),
+        "user_requirement_text": user_requirement_text,
+        "requirement_type_input": requirement_type_input,
+        "debug": bool(debug),
+        "knowledge_context": (knowledge_context or "").strip(),
+        "knowledge_used": bool((knowledge_context or "").strip()),
+        "use_model_generation": bool(use_model_generation),
+        "llm_model": llm_model,
+        "max_items_per_group": max(4, min(int(max_items_per_group), 30)),
+        "cases_per_item": max(1, min(int(cases_per_item), 5)),
+        "max_focus_points": max(3, min(int(max_focus_points), 12)),
+        "llm_call_budget": max(0, min(int(max_llm_calls), 100)),
+    }
+
+    step_logs: List[Dict[str, Any]] = []
+
+    for tool in build_default_tool_chain():
+        start = perf_counter()
+        input_summary = _build_input_summary(context)
+
+        execution = tool.execute(context)
+        context = execution.context
+
+        duration_ms = (perf_counter() - start) * 1000
+        step_logs.append(
+            {
+                "step_name": tool.name,
+                "input_summary": input_summary,
+                "output_summary": execution.output_summary,
+                "success": True,
+                "fallback_used": execution.fallback_used,
+                "duration_ms": round(duration_ms, 3),
+            }
+        )
+
+    req_result = context.get("requirement_type_result", {})
+
+    return {
+        "trace_id": context.get("trace_id"),
+        "requirement_type": req_result.get("requirement_type", "未知类型"),
+        "reason": req_result.get("reason", ""),
+        "candidates": req_result.get("candidates", []),
+        "test_items": context.get("test_items", {"normal": [], "abnormal": []}),
+        "test_cases": context.get("test_cases", {"normal": [], "abnormal": []}),
+        "expected_results": context.get("expected_results", {"normal": [], "abnormal": []}),
+        "formatted_output": context.get("formatted_output", ""),
+        "pipeline_summary": _build_output_summary(context),
+        "knowledge_used": bool(context.get("knowledge_used", False)),
+        "step_logs": step_logs if debug else [],
+    }
--- a/rag-web-ui/backend/app/services/testing_pipeline/rules.py
+++ b/rag-web-ui/backend/app/services/testing_pipeline/rules.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+from typing import Dict, List
+
+
+REQUIREMENT_TYPES: List[str] = [
+    "功能测试",
+    "性能测试",
+    "外部接口测试",
+    "人机交互界面测试",
+    "强度测试",
+    "余量测试",
+    "可靠性测试",
+    "安全性测试",
+    "恢复性测试",
+    "边界测试",
+    "安装性测试",
+    "互操作性测试",
+    "敏感性测试",
+    "测试充分性要求",
+]
+
+
+
+TYPE_SIGNAL_RULES: Dict[str, str] = {
+    "功能测试": "关注功能需求逐项验证、业务流程正确性、输入输出行为、状态转换与边界值处理。",
+    "性能测试": "关注处理精度、响应时间、处理数据量、系统协调性、负载潜力与运行占用空间。",
+    "外部接口测试": "关注外部输入输出接口的格式、内容、协议与正常/异常交互表现。",
+    "人机交互界面测试": "关注界面一致性、界面风格、操作流程、误操作健壮性与错误提示能力。",
+    "强度测试": "关注系统在极限、超负荷、饱和和降级条件下的稳定性与承受能力。",
+    "余量测试": "关注存储余量、输入输出通道余量、功能处理时间余量等资源裕度。",
+    "可靠性测试": "关注真实或仿真环境下的失效等级、运行剖面、输入覆盖和长期稳定运行能力。",
+    "安全性测试": "关注危险状态响应、安全关键部件、异常输入防护、非法访问阻断和数据完整性保护。",
+    "恢复性测试": "关注故障探测、备用切换、系统状态保护与从无错误状态继续执行能力。",
+    "边界测试": "关注输入输出域边界、状态转换端点、功能界限、性能界限与容量界限。",
+    "安装性测试": "关注不同配置下安装卸载流程和安装规程执行正确性。",
+    "互操作性测试": "关注多个软件并行运行时的互操作能力与协同正确性。",
+    "敏感性测试": "关注有效输入类中可能引发不稳定或不正常处理的数据组合。",
+    "测试充分性要求": "关注需求覆盖率、配置项覆盖、语句覆盖、分支覆盖及未覆盖分析确认。",
+}
+
+
+DECOMPOSE_FORCE_RULES: List[str] = [
+    "每个软件功能至少应被正常测试与被认可的异常场景覆盖；复杂功能需继续细分。",
+    "每个测试项必须语义完整、可直接执行。",
+    "覆盖必须包含：正常流程、边界条件（适用时）、异常条件。",
+    "粒度需适中，避免过粗或过细。",
+    "对未知类型必须执行通用分解，并保持正常/异常分组。",
+    "对需求说明未显式给出但在用户手册或操作手册体现的功能，也应补充测试项覆盖。",
+]
+
+
+REQUIREMENT_RULES: Dict[str, Dict[str, List[str]]] = {
+    "功能测试": {
+        "keywords": ["功能", "业务流程", "输入输出", "状态转换", "边界值"],
+        "normal": [
+            "正常覆盖功能主路径、基本数据类型、合法边界值与状态转换。",
+        ],
+        "abnormal": [
+            "异常覆盖非法输入、不规则输入、非法边界值与最坏情况。",
+        ],
+    },
+    "性能测试": {
+        "keywords": ["性能", "处理精度", "响应时间", "处理数据量", "负载", "占用空间"],
+        "normal": [
+            "正常覆盖处理精度、响应时间、处理数据量与模块协调性。",
+        ],
+        "abnormal": [
+            "异常覆盖超负荷、软硬件限制、负载潜力上限与资源占用异常。",
+        ],
+    },
+    "外部接口测试": {
+        "keywords": ["外部接口", "输入接口", "输出接口", "格式", "内容", "协议", "异常交互"],
+        "normal": [
+            "正常覆盖全部外部接口格式与内容正确性。",
+        ],
+        "abnormal": [
+            "异常覆盖每个输入输出接口的错误格式、错误内容与异常交互。",
+        ],
+    },
+    "人机交互界面测试": {
+        "keywords": ["界面", "风格", "交互", "误操作", "错误提示", "操作流程"],
+        "normal": [
+            "正常覆盖界面风格一致性与标准操作流程。",
+        ],
+        "abnormal": [
+            "异常覆盖误操作、快速操作、非法输入、错误命令与错误流程提示。",
+        ],
+    },
+    "强度测试": {
+        "keywords": ["强度", "极限", "超负荷", "饱和", "降级", "健壮性"],
+        "normal": [
+            "正常覆盖设计极限下系统功能和性能表现。",
+        ],
+        "abnormal": [
+            "异常覆盖超出极限时的降级行为、健壮性与饱和表现。",
+        ],
+    },
+    "余量测试": {
+        "keywords": ["余量", "存储余量", "通道余量", "处理时间余量", "资源裕度"],
+        "normal": [
+            "正常覆盖存储、通道、处理时间余量是否满足要求。",
+        ],
+        "abnormal": [
+            "异常覆盖余量不足或耗尽时系统告警与受控行为。",
+        ],
+    },
+    "可靠性测试": {
+        "keywords": ["可靠性", "运行剖面", "失效等级", "输入覆盖", "长期稳定"],
+        "normal": [
+            "正常覆盖典型环境、运行剖面与输入变量组合。",
+        ],
+        "abnormal": [
+            "异常覆盖失效等级场景、边界环境变化、不合法输入域及失效记录。",
+        ],
+    },
+    "安全性测试": {
+        "keywords": ["安全", "危险状态", "安全关键部件", "非法进入", "完整性", "防护"],
+        "normal": [
+            "正常覆盖安全关键部件、安全结构与合法操作路径。",
+        ],
+        "abnormal": [
+            "异常覆盖危险状态、故障模式、边界接合部、非法进入与数据完整性保护。",
+        ],
+    },
+    "恢复性测试": {
+        "keywords": ["恢复", "故障探测", "备用切换", "状态保护", "继续执行", "reset"],
+        "normal": [
+            "正常覆盖故障探测、备用切换、恢复后继续执行。",
+        ],
+        "abnormal": [
+            "异常覆盖故障中作业保护、状态保护与恢复失败路径。",
+        ],
+    },
+    "边界测试": {
+        "keywords": ["边界", "端点", "输入输出域", "状态转换", "性能界限", "容量界限"],
+        "normal": [
+            "正常覆盖输入输出域边界、状态转换端点与功能界限。",
+        ],
+        "abnormal": [
+            "异常覆盖性能界限、容量界限和越界端点。",
+        ],
+    },
+    "安装性测试": {
+        "keywords": ["安装", "卸载", "配置", "安装规程", "部署", "中断"],
+        "normal": [
+            "正常覆盖标准及不同配置下安装卸载流程。",
+        ],
+        "abnormal": [
+            "异常覆盖安装规程错误、依赖异常与中断后的处理。",
+        ],
+    },
+    "互操作性测试": {
+        "keywords": ["互操作", "并行运行", "协同", "兼容", "冲突", "互操作失败"],
+        "normal": [
+            "正常覆盖两个或多个软件同时运行与互操作过程。",
+        ],
+        "abnormal": [
+            "异常覆盖互操作失败、并行冲突与协同异常。",
+        ],
+    },
+    "敏感性测试": {
+        "keywords": ["敏感性", "输入类", "数据组合", "不稳定", "不正常处理"],
+        "normal": [
+            "正常覆盖有效输入类中典型数据组合。",
+        ],
+        "abnormal": [
+            "异常覆盖引发不稳定或不正常处理的特殊数据组合。",
+        ],
+    },
+    "测试充分性要求": {
+        "keywords": ["测试充分性", "需求覆盖率", "配置项覆盖", "语句覆盖", "分支覆盖", "未覆盖分析"],
+        "normal": [
+            "正常覆盖需求覆盖率、配置项覆盖与代码覆盖达标。",
+        ],
+        "abnormal": [
+            "异常覆盖未覆盖部分逐项分析、确认与报告输出。",
+        ],
+    },
+}
+
+
+GENERIC_DECOMPOSITION_RULES: Dict[str, List[str]] = {
+    "normal": [
+        "主流程正确性。",
+        "合法边界值。",
+        "标准输入输出。",
+    ],
+    "abnormal": [
+        "非法输入。",
+        "越界输入。",
+        "资源异常或状态冲突。",
+    ],
+}
+
+
+EXPECTED_RESULT_PLACEHOLDER_MAP: Dict[str, str] = {
+    "{{return_value}}": "接口或函数返回值验证。",
+    "{{state_change}}": "系统状态变化验证。",
+    "{{error_message}}": "异常场景错误信息验证。",
+    "{{data_persistence}}": "数据库或存储落库结果验证。",
+    "{{ui_display}}": "界面显示反馈验证。",
+}
--- a/rag-web-ui/backend/app/services/testing_pipeline/tools.py
+++ b/rag-web-ui/backend/app/services/testing_pipeline/tools.py
@@ -0,0 +1,867 @@
+from __future__ import annotations
+
+import json
+import re
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Tuple
+
+from app.services.testing_pipeline.base import TestingTool, ToolExecutionResult
+from app.services.testing_pipeline.rules import (
+    DECOMPOSE_FORCE_RULES,
+    EXPECTED_RESULT_PLACEHOLDER_MAP,
+    GENERIC_DECOMPOSITION_RULES,
+    REQUIREMENT_RULES,
+    REQUIREMENT_TYPES,
+    TYPE_SIGNAL_RULES,
+)
+
+
+def _clean_text(value: str) -> str:
+    return " ".join((value or "").replace("\n", " ").split())
+
+
+def _truncate_text(value: str, max_len: int = 2000) -> str:
+    text = _clean_text(value)
+    if len(text) <= max_len:
+        return text
+    return f"{text[:max_len]}..."
+
+
+def _safe_int(value: Any, default: int, low: int, high: int) -> int:
+    try:
+        parsed = int(value)
+    except Exception:
+        parsed = default
+    return max(low, min(parsed, high))
+
+
+def _strip_instruction_prefix(value: str) -> str:
+    text = _clean_text(value)
+    if not text:
+        return text
+
+    lowered = text.lower()
+    if lowered.startswith("/testing"):
+        text = _clean_text(text[len("/testing") :])
+
+    prefixes = [
+        "为以下需求生成测试用例",
+        "根据以下需求生成测试用例",
+        "请根据以下需求生成测试用例",
+        "请根据需求生成测试用例",
+        "请生成测试用例",
+        "生成测试用例",
+    ]
+    for prefix in prefixes:
+        if text.startswith(prefix):
+            for sep in ("：", ":"):
+                idx = text.find(sep)
+                if idx != -1:
+                    text = _clean_text(text[idx + 1 :])
+                    break
+            else:
+                text = _clean_text(text[len(prefix) :])
+            break
+
+    pattern = re.compile(r"^(请)?(根据|按|基于).{0,40}(需求|场景).{0,30}(生成|输出).{0,20}(测试项|测试用例)[：:]")
+    matched = pattern.match(text)
+    if matched:
+        text = _clean_text(text[matched.end() :])
+
+    return text
+
+
+def _extract_focus_points(value: str, max_points: int = 6) -> List[str]:
+    text = _strip_instruction_prefix(value)
+    if not text:
+        return []
+
+    parts = [_clean_text(part) for part in re.split(r"[，,。；;]", text)]
+    parts = [part for part in parts if part]
+
+    ignored_tokens = ["生成测试用例", "测试项分解", "测试用例生成", "以下需求"]
+    filtered = [
+        part
+        for part in parts
+        if len(part) >= 4 and not any(token in part for token in ignored_tokens)
+    ]
+    if not filtered:
+        filtered = parts
+
+    priority_keywords = [
+        "启停",
+        "开启",
+        "关闭",
+        "远程控制",
+        "保护",
+        "联动",
+        "状态",
+        "故障",
+        "恢复",
+        "切换",
+        "告警",
+        "模式",
+        "边界",
+        "时序",
+    ]
+    priority = [part for part in filtered if any(keyword in part for keyword in priority_keywords)]
+    candidates = priority if priority else filtered
+
+    unique: List[str] = []
+    for part in candidates:
+        if part not in unique:
+            unique.append(part)
+
+    return unique[:max_points]
+
+
+def _build_type_scores(text: str) -> Dict[str, int]:
+    scores: Dict[str, int] = {}
+    lowered = text.lower()
+
+    for req_type, rule in REQUIREMENT_RULES.items():
+        score = 0
+        if req_type in text:
+            score += 5
+        for keyword in rule.get("keywords", []):
+            if keyword.lower() in lowered:
+                score += 2
+        scores[req_type] = score
+
+    return scores
+
+
+def _top_candidates(scores: Dict[str, int], top_n: int = 3) -> List[str]:
+    sorted_pairs = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
+    non_zero = [name for name, score in sorted_pairs if score > 0]
+    if non_zero:
+        return non_zero[:top_n]
+    return ["功能测试", "边界测试", "性能测试"][:top_n]
+
+
+def _message_to_text(value: Any) -> str:
+    content = getattr(value, "content", value)
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        chunks: List[str] = []
+        for item in content:
+            if isinstance(item, str):
+                chunks.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    chunks.append(text)
+            else:
+                chunks.append(str(item))
+        return "".join(chunks)
+    return str(content)
+
+
+def _extract_json_object(value: str) -> Optional[Dict[str, Any]]:
+    text = (value or "").strip()
+    if not text:
+        return None
+
+    if text.startswith("```"):
+        text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
+        if text.endswith("```"):
+            text = text[:-3].strip()
+
+    try:
+        data = json.loads(text)
+        if isinstance(data, dict):
+            return data
+    except Exception:
+        pass
+
+    start = text.find("{")
+    if start == -1:
+        return None
+
+    depth = 0
+    for idx in range(start, len(text)):
+        ch = text[idx]
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                fragment = text[start : idx + 1]
+                try:
+                    data = json.loads(fragment)
+                    if isinstance(data, dict):
+                        return data
+                except Exception:
+                    return None
+    return None
+
+
+def _invoke_llm_json(context: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
+    model = context.get("llm_model")
+    if model is None or not context.get("use_model_generation"):
+        return None
+
+    budget = context.get("llm_call_budget")
+    if isinstance(budget, int):
+        if budget <= 0:
+            return None
+        context["llm_call_budget"] = budget - 1
+
+    try:
+        response = model.invoke(prompt)
+        text = _message_to_text(response)
+        return _extract_json_object(text)
+    except Exception:
+        return None
+
+
+def _invoke_llm_text(context: Dict[str, Any], prompt: str) -> str:
+    model = context.get("llm_model")
+    if model is None or not context.get("use_model_generation"):
+        return ""
+
+    budget = context.get("llm_call_budget")
+    if isinstance(budget, int):
+        if budget <= 0:
+            return ""
+        context["llm_call_budget"] = budget - 1
+
+    try:
+        response = model.invoke(prompt)
+        return _clean_text(_message_to_text(response))
+    except Exception:
+        return ""
+
+
+def _normalize_item_entry(item: Any) -> Optional[Dict[str, Any]]:
+    if isinstance(item, str):
+        content = _clean_text(item)
+        if not content:
+            return None
+        return {"content": content, "coverage_tags": []}
+
+    if isinstance(item, dict):
+        content = _clean_text(str(item.get("content", "")))
+        if not content:
+            return None
+        tags = item.get("coverage_tags") or item.get("covered_points") or []
+        if not isinstance(tags, list):
+            tags = [str(tags)]
+        tags = [_clean_text(str(tag)) for tag in tags if _clean_text(str(tag))]
+        return {"content": content, "coverage_tags": tags}
+
+    return None
+
+
+def _dedupe_items(items: List[Dict[str, Any]], max_items: int) -> List[Dict[str, Any]]:
+    merged: Dict[str, Dict[str, Any]] = {}
+    for item in items:
+        content = _clean_text(item.get("content", ""))
+        if not content:
+            continue
+        existing = merged.get(content)
+        if existing is None:
+            merged[content] = {
+                "content": content,
+                "coverage_tags": list(item.get("coverage_tags") or []),
+            }
+        else:
+            existing_tags = set(existing.get("coverage_tags") or [])
+            for tag in item.get("coverage_tags") or []:
+                if tag and tag not in existing_tags:
+                    existing_tags.add(tag)
+            existing["coverage_tags"] = list(existing_tags)
+
+    deduped = list(merged.values())
+    return deduped[:max_items]
+
+
+def _pick_expected_result_placeholder(content: str, abnormal: bool) -> str:
+    text = content or ""
+
+    if abnormal or any(token in text for token in ["非法", "异常", "错误", "拒绝", "越界", "失败"]):
+        return "{{error_message}}"
+    if any(token in text for token in ["状态", "切换", "转换", "恢复"]):
+        return "{{state_change}}"
+    if any(token in text for token in ["数据库", "存储", "落库", "持久化"]):
+        return "{{data_persistence}}"
+    if any(token in text for token in ["界面", "UI", "页面", "按钮", "提示"]):
+        return "{{ui_display}}"
+    return "{{return_value}}"
+
+
+class IdentifyRequirementTypeTool(TestingTool):
+    name = "identify-requirement-type"
+
+    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
+        raw_text = _clean_text(context.get("user_requirement_text", ""))
+        text = _strip_instruction_prefix(raw_text)
+        if not text:
+            text = raw_text
+
+        max_focus_points = _safe_int(context.get("max_focus_points"), 6, 3, 12)
+        provided_type = _clean_text(context.get("requirement_type_input", ""))
+        focus_points = _extract_focus_points(text, max_points=max_focus_points)
+        fallback_used = False
+
+        if provided_type in REQUIREMENT_TYPES:
+            result = {
+                "requirement_type": provided_type,
+                "reason": "用户已显式指定需求类型，系统按指定类型执行。",
+                "candidates": [],
+                "scores": {},
+                "secondary_types": [],
+            }
+        else:
+            scores = _build_type_scores(text)
+            sorted_pairs = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
+            best_type, best_score = sorted_pairs[0]
+            secondary = [name for name, score in sorted_pairs[1:4] if score > 0]
+
+            if best_score <= 0:
+                fallback_used = True
+                candidates = _top_candidates(scores)
+                result = {
+                    "requirement_type": "未知类型",
+                    "reason": "未命中明确分类规则，已回退到未知类型并提供最接近候选。",
+                    "candidates": candidates,
+                    "scores": scores,
+                    "secondary_types": [],
+                }
+            else:
+                signal = TYPE_SIGNAL_RULES.get(best_type, "")
+                result = {
+                    "requirement_type": best_type,
+                    "reason": f"命中{best_type}识别信号。{signal}",
+                    "candidates": [],
+                    "scores": scores,
+                    "secondary_types": secondary,
+                }
+
+        context["requirement_type_result"] = result
+        context["normalized_requirement_text"] = text
+        context["requirement_focus_points"] = focus_points
+        context["knowledge_used"] = bool(context.get("knowledge_context"))
+
+        return ToolExecutionResult(
+            context=context,
+            output_summary=(
+                f"type={result['requirement_type']}; candidates={len(result['candidates'])}; "
+                f"secondary_types={len(result.get('secondary_types', []))}; focus_points={len(focus_points)}"
+            ),
+            fallback_used=fallback_used,
+        )
+
+
+class DecomposeTestItemsTool(TestingTool):
+    name = "decompose-test-items"
+
+    @staticmethod
+    def _seed_items(
+        req_type: str,
+        req_text: str,
+        focus_points: List[str],
+        max_items: int,
+    ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+        if req_type in REQUIREMENT_RULES:
+            source_rules = REQUIREMENT_RULES[req_type]
+            normal_templates = list(source_rules.get("normal", []))
+            abnormal_templates = list(source_rules.get("abnormal", []))
+        else:
+            normal_templates = list(GENERIC_DECOMPOSITION_RULES["normal"])
+            abnormal_templates = list(GENERIC_DECOMPOSITION_RULES["abnormal"])
+
+        normal: List[Dict[str, Any]] = []
+        abnormal: List[Dict[str, Any]] = []
+
+        for template in normal_templates:
+            normal.append({"content": template, "coverage_tags": [req_type]})
+        for template in abnormal_templates:
+            abnormal.append({"content": template, "coverage_tags": [req_type]})
+
+        for point in focus_points:
+            normal.extend(
+                [
+                    {
+                        "content": f"验证{point}在标准作业流程下稳定执行且结果符合业务约束。",
+                        "coverage_tags": [point, "正常流程"],
+                    },
+                    {
+                        "content": f"验证{point}与相关联动控制、状态同步和回执反馈的一致性。",
+                        "coverage_tags": [point, "联动一致性"],
+                    },
+                ]
+            )
+            abnormal.extend(
+                [
+                    {
+                        "content": f"验证{point}在非法输入、错误指令或权限异常时的保护与拒绝机制。",
+                        "coverage_tags": [point, "异常输入"],
+                    },
+                    {
+                        "content": f"验证{point}在边界条件、时序冲突或设备故障下的告警和恢复行为。",
+                        "coverage_tags": [point, "边界异常"],
+                    },
+                ]
+            )
+
+        if any(token in req_text for token in ["手册", "操作手册", "用户手册", "作业指导"]):
+            normal.append(
+                {
+                    "content": "验证需求说明未显式给出但在用户手册或操作手册体现的功能流程。",
+                    "coverage_tags": ["手册功能"],
+                }
+            )
+
+        return _dedupe_items(normal, max_items), _dedupe_items(abnormal, max_items)
+
+    @staticmethod
+    def _generate_by_llm(context: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
+        req_result = context.get("requirement_type_result", {})
+        req_type = req_result.get("requirement_type", "未知类型")
+        req_text = context.get("normalized_requirement_text", "")
+        focus_points = context.get("requirement_focus_points", [])
+        max_items = _safe_int(context.get("max_items_per_group"), 12, 4, 30)
+        knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=2500)
+
+        prompt = f"""
+你是资深测试分析师。请根据需求、分解规则和知识库片段，生成尽可能覆盖要点的测试项。
+
+需求文本：{req_text}
+需求类型：{req_type}
+需求要点：{focus_points}
+知识库片段：{knowledge_context or '无'}
+
+分解约束：
+1. 正常测试与异常测试必须分组输出。
+2. 每条测试项必须可执行、可验证，避免模板化空话。
+3. 尽可能覆盖全部需求要点；每组建议输出6-{max_items}条。
+4. 优先生成与需求对象/控制逻辑/异常处理/边界条件强相关的测试项。
+
+请仅输出 JSON 对象，结构如下：
+{{
+  "normal_test_items": [
+    {{"content": "...", "coverage_tags": ["..."]}}
+  ],
+  "abnormal_test_items": [
+    {{"content": "...", "coverage_tags": ["..."]}}
+  ]
+}}
+""".strip()
+
+        data = _invoke_llm_json(context, prompt)
+        if not data:
+            return [], []
+
+        normal_raw = data.get("normal_test_items", [])
+        abnormal_raw = data.get("abnormal_test_items", [])
+
+        normal: List[Dict[str, Any]] = []
+        abnormal: List[Dict[str, Any]] = []
+
+        for item in normal_raw if isinstance(normal_raw, list) else []:
+            normalized = _normalize_item_entry(item)
+            if normalized:
+                normal.append(normalized)
+
+        for item in abnormal_raw if isinstance(abnormal_raw, list) else []:
+            normalized = _normalize_item_entry(item)
+            if normalized:
+                abnormal.append(normalized)
+
+        return _dedupe_items(normal, max_items), _dedupe_items(abnormal, max_items)
+
+    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
+        req_result = context.get("requirement_type_result", {})
+        req_type = req_result.get("requirement_type", "未知类型")
+        req_text = context.get("normalized_requirement_text") or _strip_instruction_prefix(
+            context.get("user_requirement_text", "")
+        )
+        focus_points = context.get("requirement_focus_points", [])
+        max_items = _safe_int(context.get("max_items_per_group"), 12, 4, 30)
+
+        seeded_normal, seeded_abnormal = self._seed_items(req_type, req_text, focus_points, max_items)
+        llm_normal, llm_abnormal = self._generate_by_llm(context)
+
+        merged_normal = _dedupe_items(llm_normal + seeded_normal, max_items)
+        merged_abnormal = _dedupe_items(llm_abnormal + seeded_abnormal, max_items)
+
+        fallback_used = not bool(llm_normal or llm_abnormal)
+
+        normal_items: List[Dict[str, Any]] = []
+        abnormal_items: List[Dict[str, Any]] = []
+
+        for idx, item in enumerate(merged_normal, start=1):
+            normal_items.append(
+                {
+                    "id": f"N{idx}",
+                    "content": item["content"],
+                    "coverage_tags": item.get("coverage_tags", []),
+                }
+            )
+
+        for idx, item in enumerate(merged_abnormal, start=1):
+            abnormal_items.append(
+                {
+                    "id": f"E{idx}",
+                    "content": item["content"],
+                    "coverage_tags": item.get("coverage_tags", []),
+                }
+            )
+
+        context["test_items"] = {
+            "normal": normal_items,
+            "abnormal": abnormal_items,
+        }
+        context["decompose_force_rules"] = DECOMPOSE_FORCE_RULES
+
+        return ToolExecutionResult(
+            context=context,
+            output_summary=(
+                f"normal_items={len(normal_items)}; abnormal_items={len(abnormal_items)}; "
+                f"llm_items={len(llm_normal) + len(llm_abnormal)}"
+            ),
+            fallback_used=fallback_used,
+        )
+
+
+class GenerateTestCasesTool(TestingTool):
+    name = "generate-test-cases"
+
+    @staticmethod
+    def _build_fallback_steps(item_content: str, abnormal: bool, variant: str) -> List[str]:
+        if abnormal:
+            return [
+                "确认测试前置环境、设备状态与日志采集开关已准备就绪。",
+                f"准备异常场景“{variant}”所需的输入数据、操作账号和触发条件。",
+                f"在目标对象执行异常触发操作，重点验证：{item_content}",
+                "持续观察系统返回码、错误文案、告警信息与日志链路完整性。",
+                "检查保护机制是否生效，包括拒绝策略、回滚行为和状态一致性。",
+                "记录证据并复位环境，确认异常处理后系统可恢复到稳定状态。",
+            ]
+
+        return [
+            "确认测试环境、设备连接状态和前置业务数据均已初始化。",
+            f"准备“{variant}”所需输入参数、操作路径和判定阈值。",
+            f"在目标对象执行业务控制流程，重点验证：{item_content}",
+            "校验关键返回值、状态变化、控制回执及界面或接口反馈结果。",
+            "检查联动模块、日志记录和数据落库是否满足一致性要求。",
+            "沉淀测试证据并恢复环境，确保后续用例可重复执行。",
+        ]
+
+    def _generate_cases_by_llm(
+        self,
+        context: Dict[str, Any],
+        item: Dict[str, Any],
+        abnormal: bool,
+        cases_per_item: int,
+    ) -> List[Dict[str, Any]]:
+        req_text = context.get("normalized_requirement_text", "")
+        knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=1800)
+
+        prompt = f"""
+你是资深测试工程师。请围绕给定测试项生成详细测试用例。
+
+需求：{req_text}
+测试项：{item.get('content', '')}
+测试类型：{'异常测试' if abnormal else '正常测试'}
+知识库片段：{knowledge_context or '无'}
+
+要求：
+1. 生成 {cases_per_item}-{max(cases_per_item + 1, cases_per_item)} 条测试用例。
+2. 每条用例包含 test_content 与 operation_steps。
+3. operation_steps 必须详细，至少5步，包含前置、执行、观察、校验与证据留存。
+4. 内容必须围绕当前测试项，不要输出空洞模板。
+
+仅输出 JSON：
+{{
+  "test_cases": [
+    {{
+      "title": "...",
+      "test_content": "...",
+      "operation_steps": ["...", "..."]
+    }}
+  ]
+}}
+""".strip()
+
+        data = _invoke_llm_json(context, prompt)
+        if not data:
+            return []
+
+        raw_cases = data.get("test_cases", [])
+        if not isinstance(raw_cases, list):
+            return []
+
+        normalized_cases: List[Dict[str, Any]] = []
+        for case in raw_cases:
+            if not isinstance(case, dict):
+                continue
+            test_content = _clean_text(str(case.get("test_content", "")))
+            if not test_content:
+                continue
+            steps = case.get("operation_steps", [])
+            if not isinstance(steps, list):
+                continue
+            cleaned_steps = [_clean_text(str(step)) for step in steps if _clean_text(str(step))]
+            if len(cleaned_steps) < 5:
+                continue
+            normalized_cases.append(
+                {
+                    "title": _clean_text(str(case.get("title", ""))),
+                    "test_content": test_content,
+                    "operation_steps": cleaned_steps,
+                }
+            )
+
+        return normalized_cases[: max(1, cases_per_item)]
+
+    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
+        test_items = context.get("test_items", {})
+        cases_per_item = _safe_int(context.get("cases_per_item"), 2, 1, 5)
+
+        normal_cases: List[Dict[str, Any]] = []
+        abnormal_cases: List[Dict[str, Any]] = []
+        llm_case_count = 0
+
+        for item in test_items.get("normal", []):
+            generated = self._generate_cases_by_llm(context, item, abnormal=False, cases_per_item=cases_per_item)
+            if not generated:
+                generated = [
+                    {
+                        "title": "标准流程验证",
+                        "test_content": f"验证{item['content']}",
+                        "operation_steps": self._build_fallback_steps(item["content"], False, "标准流程"),
+                    },
+                    {
+                        "title": "边界与联动验证",
+                        "test_content": f"验证{item['content']}在边界条件和联动场景下的稳定性",
+                        "operation_steps": self._build_fallback_steps(item["content"], False, "边界与联动"),
+                    },
+                ][:cases_per_item]
+            else:
+                llm_case_count += len(generated)
+
+            for idx, case in enumerate(generated, start=1):
+                merged_content = _clean_text(case.get("test_content", item["content"]))
+                placeholder = _pick_expected_result_placeholder(merged_content, abnormal=False)
+                normal_cases.append(
+                    {
+                        "id": f"{item['id']}-C{idx}",
+                        "item_id": item["id"],
+                        "title": _clean_text(case.get("title", "")),
+                        "operation_steps": case.get("operation_steps", []),
+                        "test_content": merged_content,
+                        "expected_result_placeholder": placeholder,
+                    }
+                )
+
+        for item in test_items.get("abnormal", []):
+            generated = self._generate_cases_by_llm(context, item, abnormal=True, cases_per_item=cases_per_item)
+            if not generated:
+                generated = [
+                    {
+                        "title": "非法输入与权限异常验证",
+                        "test_content": f"验证{item['content']}在非法输入与权限异常下的处理表现",
+                        "operation_steps": self._build_fallback_steps(item["content"], True, "非法输入与权限异常"),
+                    },
+                    {
+                        "title": "故障与时序冲突验证",
+                        "test_content": f"验证{item['content']}在故障和时序冲突场景下的保护行为",
+                        "operation_steps": self._build_fallback_steps(item["content"], True, "故障与时序冲突"),
+                    },
+                ][:cases_per_item]
+            else:
+                llm_case_count += len(generated)
+
+            for idx, case in enumerate(generated, start=1):
+                merged_content = _clean_text(case.get("test_content", item["content"]))
+                placeholder = _pick_expected_result_placeholder(merged_content, abnormal=True)
+                abnormal_cases.append(
+                    {
+                        "id": f"{item['id']}-C{idx}",
+                        "item_id": item["id"],
+                        "title": _clean_text(case.get("title", "")),
+                        "operation_steps": case.get("operation_steps", []),
+                        "test_content": merged_content,
+                        "expected_result_placeholder": placeholder,
+                    }
+                )
+
+        context["test_cases"] = {
+            "normal": normal_cases,
+            "abnormal": abnormal_cases,
+        }
+
+        return ToolExecutionResult(
+            context=context,
+            output_summary=(
+                f"normal_cases={len(normal_cases)}; abnormal_cases={len(abnormal_cases)}; llm_cases={llm_case_count}"
+            ),
+            fallback_used=llm_case_count == 0,
+        )
+
+
+class BuildExpectedResultsTool(TestingTool):
+    name = "build_expected_results"
+
+    def _expected_for_case(self, context: Dict[str, Any], case: Dict[str, Any], abnormal: bool) -> str:
+        placeholder = case.get("expected_result_placeholder", "{{return_value}}")
+        if placeholder not in EXPECTED_RESULT_PLACEHOLDER_MAP:
+            placeholder = "{{return_value}}"
+
+        req_text = context.get("normalized_requirement_text", "")
+        knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=1200)
+        prompt = f"""
+请基于以下信息生成一条可验证、可度量的测试预期结果，避免模板化空话。
+
+需求：{req_text}
+测试内容：{case.get('test_content', '')}
+测试类型：{'异常测试' if abnormal else '正常测试'}
+占位符语义：{placeholder} -> {EXPECTED_RESULT_PLACEHOLDER_MAP.get(placeholder, '')}
+知识库片段：{knowledge_context or '无'}
+
+输出要求：
+1. 仅输出一句中文预期结果。
+2. 结果必须可判定成功/失败。
+3. 包含关键观测项（返回值、状态、告警、日志、数据一致性中的相关项）。
+""".strip()
+
+        llm_text = _invoke_llm_text(context, prompt)
+        if llm_text:
+            return _truncate_text(llm_text, max_len=220)
+
+        test_content = _clean_text(case.get("test_content", ""))
+        if placeholder == "{{error_message}}":
+            return f"触发{test_content}后，系统应返回明确错误码与错误文案，拒绝非法请求且核心状态保持一致。"
+        if placeholder == "{{state_change}}":
+            return f"执行{test_content}后，系统状态转换应符合需求定义，状态变化可被日志与回执共同验证。"
+        if placeholder == "{{data_persistence}}":
+            return f"执行{test_content}后，数据库或存储层应产生符合约束的持久化结果且无脏数据。"
+        if placeholder == "{{ui_display}}":
+            return f"执行{test_content}后，界面应展示与控制结果一致的反馈信息且提示可被用户执行。"
+
+        if abnormal:
+            return f"执行异常场景“{test_content}”后，系统应触发保护策略并输出可追溯日志，业务状态保持可恢复。"
+
+        return f"执行“{test_content}”后，返回值与状态变化应满足需求约束，关键结果可通过日志或回执验证。"
+
+    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
+        test_cases = context.get("test_cases", {})
+
+        normal_expected: List[Dict[str, str]] = []
+        abnormal_expected: List[Dict[str, str]] = []
+
+        for case in test_cases.get("normal", []):
+            normal_expected.append(
+                {
+                    "id": case["id"],
+                    "case_id": case["id"],
+                    "result": self._expected_for_case(context, case, abnormal=False),
+                }
+            )
+
+        for case in test_cases.get("abnormal", []):
+            abnormal_expected.append(
+                {
+                    "id": case["id"],
+                    "case_id": case["id"],
+                    "result": self._expected_for_case(context, case, abnormal=True),
+                }
+            )
+
+        context["expected_results"] = {
+            "normal": normal_expected,
+            "abnormal": abnormal_expected,
+        }
+
+        return ToolExecutionResult(
+            context=context,
+            output_summary=(
+                f"normal_expected={len(normal_expected)}; abnormal_expected={len(abnormal_expected)}"
+            ),
+        )
+
+
+class FormatOutputTool(TestingTool):
+    name = "format_output"
+
+    @staticmethod
+    def _format_case_block(case: Dict[str, Any], index: int) -> List[str]:
+        item_id = case.get("item_id", case.get("id", ""))
+        title = _clean_text(case.get("title", ""))
+
+        block: List[str] = []
+        block.append(f"{index}. [用例 {case['id']}]（对应测试项 {item_id}）：{case.get('test_content', '')}")
+        if title:
+            block.append(f"   场景标题：{title}")
+        block.append("   操作步骤：")
+        for step_idx, step in enumerate(case.get("operation_steps", []), start=1):
+            block.append(f"   {step_idx}) {step}")
+        return block
+
+    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
+        test_items = context.get("test_items", {"normal": [], "abnormal": []})
+        test_cases = context.get("test_cases", {"normal": [], "abnormal": []})
+        expected_results = context.get("expected_results", {"normal": [], "abnormal": []})
+
+        lines: List[str] = []
+
+        lines.append("**测试项**")
+        lines.append("")
+        lines.append("**正常测试**：")
+        for index, item in enumerate(test_items.get("normal", []), start=1):
+            lines.append(f"{index}. [测试项 {item['id']}]：{item['content']}")
+        lines.append("")
+        lines.append("**异常测试**：")
+        for index, item in enumerate(test_items.get("abnormal", []), start=1):
+            lines.append(f"{index}. [测试项 {item['id']}]：{item['content']}")
+
+        lines.append("")
+        lines.append("**测试用例**")
+        lines.append("")
+        lines.append("**正常测试**：")
+        for index, case in enumerate(test_cases.get("normal", []), start=1):
+            lines.extend(self._format_case_block(case, index))
+        lines.append("")
+        lines.append("**异常测试**：")
+        for index, case in enumerate(test_cases.get("abnormal", []), start=1):
+            lines.extend(self._format_case_block(case, index))
+
+        lines.append("")
+        lines.append("**预期成果**")
+        lines.append("")
+        lines.append("**正常测试**：")
+        for index, expected in enumerate(expected_results.get("normal", []), start=1):
+            lines.append(
+                f"{index}. [预期 {expected['id']}]（对应用例 {expected['case_id']}）：{expected['result']}"
+            )
+        lines.append("")
+        lines.append("**异常测试**：")
+        for index, expected in enumerate(expected_results.get("abnormal", []), start=1):
+            lines.append(
+                f"{index}. [预期 {expected['id']}]（对应用例 {expected['case_id']}）：{expected['result']}"
+            )
+
+        context["formatted_output"] = "\n".join(lines)
+        context["structured_output"] = {
+            "test_items": test_items,
+            "test_cases": test_cases,
+            "expected_results": expected_results,
+        }
+
+        return ToolExecutionResult(
+            context=context,
+            output_summary="formatted_sections=3",
+        )
+
+
+def build_default_tool_chain() -> List[TestingTool]:
+    return [
+        IdentifyRequirementTypeTool(),
+        DecomposeTestItemsTool(),
+        GenerateTestCasesTool(),
+        BuildExpectedResultsTool(),
+        FormatOutputTool(),
+    ]