rag_agent/rag-web-ui/backend/app/services/testing_pipeline/tools.py

from __future__ import annotations

import json
import re
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple

from app.services.testing_pipeline.base import TestingTool, ToolExecutionResult
from app.services.testing_pipeline.rules import (
    DECOMPOSE_FORCE_RULES,
    EXPECTED_RESULT_PLACEHOLDER_MAP,
    GENERIC_DECOMPOSITION_RULES,
    REQUIREMENT_RULES,
    REQUIREMENT_TYPES,
    TYPE_SIGNAL_RULES,
)


def _clean_text(value: str) -> str:
    return " ".join((value or "").replace("\n", " ").split())


def _truncate_text(value: str, max_len: int = 2000) -> str:
    text = _clean_text(value)
    if len(text) <= max_len:
        return text
    return f"{text[:max_len]}..."


def _safe_int(value: Any, default: int, low: int, high: int) -> int:
    try:
        parsed = int(value)
    except Exception:
        parsed = default
    return max(low, min(parsed, high))


def _strip_instruction_prefix(value: str) -> str:
    text = _clean_text(value)
    if not text:
        return text

    lowered = text.lower()
    if lowered.startswith("/testing"):
        text = _clean_text(text[len("/testing") :])

    prefixes = [
        "为以下需求生成测试用例",
        "根据以下需求生成测试用例",
        "请根据以下需求生成测试用例",
        "请根据需求生成测试用例",
        "请生成测试用例",
        "生成测试用例",
    ]
    for prefix in prefixes:
        if text.startswith(prefix):
            for sep in ("：", ":"):
                idx = text.find(sep)
                if idx != -1:
                    text = _clean_text(text[idx + 1 :])
                    break
            else:
                text = _clean_text(text[len(prefix) :])
            break

    pattern = re.compile(r"^(请)?(根据|按|基于).{0,40}(需求|场景).{0,30}(生成|输出).{0,20}(测试项|测试用例)[：:]")
    matched = pattern.match(text)
    if matched:
        text = _clean_text(text[matched.end() :])

    return text


def _extract_focus_points(value: str, max_points: int = 6) -> List[str]:
    text = _strip_instruction_prefix(value)
    if not text:
        return []

    parts = [_clean_text(part) for part in re.split(r"[，,。；;]", text)]
    parts = [part for part in parts if part]

    ignored_tokens = ["生成测试用例", "测试项分解", "测试用例生成", "以下需求"]
    filtered = [
        part
        for part in parts
        if len(part) >= 4 and not any(token in part for token in ignored_tokens)
    ]
    if not filtered:
        filtered = parts

    priority_keywords = [
        "启停",
        "开启",
        "关闭",
        "远程控制",
        "保护",
        "联动",
        "状态",
        "故障",
        "恢复",
        "切换",
        "告警",
        "模式",
        "边界",
        "时序",
    ]
    priority = [part for part in filtered if any(keyword in part for keyword in priority_keywords)]
    candidates = priority if priority else filtered

    unique: List[str] = []
    for part in candidates:
        if part not in unique:
            unique.append(part)

    return unique[:max_points]


def _build_type_scores(text: str) -> Dict[str, int]:
    scores: Dict[str, int] = {}
    lowered = text.lower()

    for req_type, rule in REQUIREMENT_RULES.items():
        score = 0
        if req_type in text:
            score += 5
        for keyword in rule.get("keywords", []):
            if keyword.lower() in lowered:
                score += 2
        scores[req_type] = score

    return scores


def _top_candidates(scores: Dict[str, int], top_n: int = 3) -> List[str]:
    sorted_pairs = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
    non_zero = [name for name, score in sorted_pairs if score > 0]
    if non_zero:
        return non_zero[:top_n]
    return ["功能测试", "边界测试", "性能测试"][:top_n]


def _message_to_text(value: Any) -> str:
    content = getattr(value, "content", value)
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        chunks: List[str] = []
        for item in content:
            if isinstance(item, str):
                chunks.append(item)
            elif isinstance(item, dict):
                text = item.get("text")
                if isinstance(text, str):
                    chunks.append(text)
            else:
                chunks.append(str(item))
        return "".join(chunks)
    return str(content)


def _extract_json_object(value: str) -> Optional[Dict[str, Any]]:
    text = (value or "").strip()
    if not text:
        return None

    if text.startswith("```"):
        text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
        if text.endswith("```"):
            text = text[:-3].strip()

    try:
        data = json.loads(text)
        if isinstance(data, dict):
            return data
    except Exception:
        pass

    start = text.find("{")
    if start == -1:
        return None

    depth = 0
    for idx in range(start, len(text)):
        ch = text[idx]
        if ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                fragment = text[start : idx + 1]
                try:
                    data = json.loads(fragment)
                    if isinstance(data, dict):
                        return data
                except Exception:
                    return None
    return None


def _invoke_llm_json(context: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
    model = context.get("llm_model")
    if model is None or not context.get("use_model_generation"):
        return None

    budget = context.get("llm_call_budget")
    if isinstance(budget, int):
        if budget <= 0:
            return None
        context["llm_call_budget"] = budget - 1

    try:
        response = model.invoke(prompt)
        text = _message_to_text(response)
        return _extract_json_object(text)
    except Exception:
        return None


def _invoke_llm_text(context: Dict[str, Any], prompt: str) -> str:
    model = context.get("llm_model")
    if model is None or not context.get("use_model_generation"):
        return ""

    budget = context.get("llm_call_budget")
    if isinstance(budget, int):
        if budget <= 0:
            return ""
        context["llm_call_budget"] = budget - 1

    try:
        response = model.invoke(prompt)
        return _clean_text(_message_to_text(response))
    except Exception:
        return ""


def _normalize_item_entry(item: Any) -> Optional[Dict[str, Any]]:
    if isinstance(item, str):
        content = _clean_text(item)
        if not content:
            return None
        return {"content": content, "coverage_tags": []}

    if isinstance(item, dict):
        content = _clean_text(str(item.get("content", "")))
        if not content:
            return None
        tags = item.get("coverage_tags") or item.get("covered_points") or []
        if not isinstance(tags, list):
            tags = [str(tags)]
        tags = [_clean_text(str(tag)) for tag in tags if _clean_text(str(tag))]
        return {"content": content, "coverage_tags": tags}

    return None


def _dedupe_items(items: List[Dict[str, Any]], max_items: int) -> List[Dict[str, Any]]:
    merged: Dict[str, Dict[str, Any]] = {}
    for item in items:
        content = _clean_text(item.get("content", ""))
        if not content:
            continue
        existing = merged.get(content)
        if existing is None:
            merged[content] = {
                "content": content,
                "coverage_tags": list(item.get("coverage_tags") or []),
            }
        else:
            existing_tags = set(existing.get("coverage_tags") or [])
            for tag in item.get("coverage_tags") or []:
                if tag and tag not in existing_tags:
                    existing_tags.add(tag)
            existing["coverage_tags"] = list(existing_tags)

    deduped = list(merged.values())
    return deduped[:max_items]


def _pick_expected_result_placeholder(content: str, abnormal: bool) -> str:
    text = content or ""

    if abnormal or any(token in text for token in ["非法", "异常", "错误", "拒绝", "越界", "失败"]):
        return "{{error_message}}"
    if any(token in text for token in ["状态", "切换", "转换", "恢复"]):
        return "{{state_change}}"
    if any(token in text for token in ["数据库", "存储", "落库", "持久化"]):
        return "{{data_persistence}}"
    if any(token in text for token in ["界面", "UI", "页面", "按钮", "提示"]):
        return "{{ui_display}}"
    return "{{return_value}}"


class IdentifyRequirementTypeTool(TestingTool):
    name = "identify-requirement-type"

    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
        raw_text = _clean_text(context.get("user_requirement_text", ""))
        text = _strip_instruction_prefix(raw_text)
        if not text:
            text = raw_text

        max_focus_points = _safe_int(context.get("max_focus_points"), 6, 3, 12)
        provided_type = _clean_text(context.get("requirement_type_input", ""))
        focus_points = _extract_focus_points(text, max_points=max_focus_points)
        fallback_used = False

        if provided_type in REQUIREMENT_TYPES:
            result = {
                "requirement_type": provided_type,
                "reason": "用户已显式指定需求类型，系统按指定类型执行。",
                "candidates": [],
                "scores": {},
                "secondary_types": [],
            }
        else:
            scores = _build_type_scores(text)
            sorted_pairs = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
            best_type, best_score = sorted_pairs[0]
            secondary = [name for name, score in sorted_pairs[1:4] if score > 0]

            if best_score <= 0:
                fallback_used = True
                candidates = _top_candidates(scores)
                result = {
                    "requirement_type": "未知类型",
                    "reason": "未命中明确分类规则，已回退到未知类型并提供最接近候选。",
                    "candidates": candidates,
                    "scores": scores,
                    "secondary_types": [],
                }
            else:
                signal = TYPE_SIGNAL_RULES.get(best_type, "")
                result = {
                    "requirement_type": best_type,
                    "reason": f"命中{best_type}识别信号。{signal}",
                    "candidates": [],
                    "scores": scores,
                    "secondary_types": secondary,
                }

        context["requirement_type_result"] = result
        context["normalized_requirement_text"] = text
        context["requirement_focus_points"] = focus_points
        context["knowledge_used"] = bool(context.get("knowledge_context"))

        return ToolExecutionResult(
            context=context,
            output_summary=(
                f"type={result['requirement_type']}; candidates={len(result['candidates'])}; "
                f"secondary_types={len(result.get('secondary_types', []))}; focus_points={len(focus_points)}"
            ),
            fallback_used=fallback_used,
        )


class DecomposeTestItemsTool(TestingTool):
    name = "decompose-test-items"

    @staticmethod
    def _seed_items(
        req_type: str,
        req_text: str,
        focus_points: List[str],
        max_items: int,
    ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
        if req_type in REQUIREMENT_RULES:
            source_rules = REQUIREMENT_RULES[req_type]
            normal_templates = list(source_rules.get("normal", []))
            abnormal_templates = list(source_rules.get("abnormal", []))
        else:
            normal_templates = list(GENERIC_DECOMPOSITION_RULES["normal"])
            abnormal_templates = list(GENERIC_DECOMPOSITION_RULES["abnormal"])

        normal: List[Dict[str, Any]] = []
        abnormal: List[Dict[str, Any]] = []

        for template in normal_templates:
            normal.append({"content": template, "coverage_tags": [req_type]})
        for template in abnormal_templates:
            abnormal.append({"content": template, "coverage_tags": [req_type]})

        for point in focus_points:
            normal.extend(
                [
                    {
                        "content": f"验证{point}在标准作业流程下稳定执行且结果符合业务约束。",
                        "coverage_tags": [point, "正常流程"],
                    },
                    {
                        "content": f"验证{point}与相关联动控制、状态同步和回执反馈的一致性。",
                        "coverage_tags": [point, "联动一致性"],
                    },
                ]
            )
            abnormal.extend(
                [
                    {
                        "content": f"验证{point}在非法输入、错误指令或权限异常时的保护与拒绝机制。",
                        "coverage_tags": [point, "异常输入"],
                    },
                    {
                        "content": f"验证{point}在边界条件、时序冲突或设备故障下的告警和恢复行为。",
                        "coverage_tags": [point, "边界异常"],
                    },
                ]
            )

        if any(token in req_text for token in ["手册", "操作手册", "用户手册", "作业指导"]):
            normal.append(
                {
                    "content": "验证需求说明未显式给出但在用户手册或操作手册体现的功能流程。",
                    "coverage_tags": ["手册功能"],
                }
            )

        return _dedupe_items(normal, max_items), _dedupe_items(abnormal, max_items)

    @staticmethod
    def _generate_by_llm(context: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
        req_result = context.get("requirement_type_result", {})
        req_type = req_result.get("requirement_type", "未知类型")
        req_text = context.get("normalized_requirement_text", "")
        focus_points = context.get("requirement_focus_points", [])
        max_items = _safe_int(context.get("max_items_per_group"), 12, 4, 30)
        knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=2500)

        prompt = f"""
你是资深测试分析师。请根据需求、分解规则和知识库片段，生成尽可能覆盖要点的测试项。

需求文本：{req_text}
需求类型：{req_type}
需求要点：{focus_points}
知识库片段：{knowledge_context or '无'}

分解约束：
1. 正常测试与异常测试必须分组输出。
2. 每条测试项必须可执行、可验证，避免模板化空话。
3. 尽可能覆盖全部需求要点；每组建议输出6-{max_items}条。
4. 优先生成与需求对象/控制逻辑/异常处理/边界条件强相关的测试项。

请仅输出 JSON 对象，结构如下：
{{
  "normal_test_items": [
    {{"content": "...", "coverage_tags": ["..."]}}
  ],
  "abnormal_test_items": [
    {{"content": "...", "coverage_tags": ["..."]}}
  ]
}}
""".strip()

        data = _invoke_llm_json(context, prompt)
        if not data:
            return [], []

        normal_raw = data.get("normal_test_items", [])
        abnormal_raw = data.get("abnormal_test_items", [])

        normal: List[Dict[str, Any]] = []
        abnormal: List[Dict[str, Any]] = []

        for item in normal_raw if isinstance(normal_raw, list) else []:
            normalized = _normalize_item_entry(item)
            if normalized:
                normal.append(normalized)

        for item in abnormal_raw if isinstance(abnormal_raw, list) else []:
            normalized = _normalize_item_entry(item)
            if normalized:
                abnormal.append(normalized)

        return _dedupe_items(normal, max_items), _dedupe_items(abnormal, max_items)

    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
        req_result = context.get("requirement_type_result", {})
        req_type = req_result.get("requirement_type", "未知类型")
        req_text = context.get("normalized_requirement_text") or _strip_instruction_prefix(
            context.get("user_requirement_text", "")
        )
        focus_points = context.get("requirement_focus_points", [])
        max_items = _safe_int(context.get("max_items_per_group"), 12, 4, 30)

        seeded_normal, seeded_abnormal = self._seed_items(req_type, req_text, focus_points, max_items)
        llm_normal, llm_abnormal = self._generate_by_llm(context)

        merged_normal = _dedupe_items(llm_normal + seeded_normal, max_items)
        merged_abnormal = _dedupe_items(llm_abnormal + seeded_abnormal, max_items)

        fallback_used = not bool(llm_normal or llm_abnormal)

        normal_items: List[Dict[str, Any]] = []
        abnormal_items: List[Dict[str, Any]] = []

        for idx, item in enumerate(merged_normal, start=1):
            normal_items.append(
                {
                    "id": f"N{idx}",
                    "content": item["content"],
                    "coverage_tags": item.get("coverage_tags", []),
                }
            )

        for idx, item in enumerate(merged_abnormal, start=1):
            abnormal_items.append(
                {
                    "id": f"E{idx}",
                    "content": item["content"],
                    "coverage_tags": item.get("coverage_tags", []),
                }
            )

        context["test_items"] = {
            "normal": normal_items,
            "abnormal": abnormal_items,
        }
        context["decompose_force_rules"] = DECOMPOSE_FORCE_RULES

        return ToolExecutionResult(
            context=context,
            output_summary=(
                f"normal_items={len(normal_items)}; abnormal_items={len(abnormal_items)}; "
                f"llm_items={len(llm_normal) + len(llm_abnormal)}"
            ),
            fallback_used=fallback_used,
        )


class GenerateTestCasesTool(TestingTool):
    name = "generate-test-cases"

    @staticmethod
    def _build_fallback_steps(item_content: str, abnormal: bool, variant: str) -> List[str]:
        if abnormal:
            return [
                "确认测试前置环境、设备状态与日志采集开关已准备就绪。",
                f"准备异常场景“{variant}”所需的输入数据、操作账号和触发条件。",
                f"在目标对象执行异常触发操作，重点验证：{item_content}",
                "持续观察系统返回码、错误文案、告警信息与日志链路完整性。",
                "检查保护机制是否生效，包括拒绝策略、回滚行为和状态一致性。",
                "记录证据并复位环境，确认异常处理后系统可恢复到稳定状态。",
            ]

        return [
            "确认测试环境、设备连接状态和前置业务数据均已初始化。",
            f"准备“{variant}”所需输入参数、操作路径和判定阈值。",
            f"在目标对象执行业务控制流程，重点验证：{item_content}",
            "校验关键返回值、状态变化、控制回执及界面或接口反馈结果。",
            "检查联动模块、日志记录和数据落库是否满足一致性要求。",
            "沉淀测试证据并恢复环境，确保后续用例可重复执行。",
        ]

    def _generate_cases_by_llm(
        self,
        context: Dict[str, Any],
        item: Dict[str, Any],
        abnormal: bool,
        cases_per_item: int,
    ) -> List[Dict[str, Any]]:
        req_text = context.get("normalized_requirement_text", "")
        knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=1800)

        prompt = f"""
你是资深测试工程师。请围绕给定测试项生成详细测试用例。

需求：{req_text}
测试项：{item.get('content', '')}
测试类型：{'异常测试' if abnormal else '正常测试'}
知识库片段：{knowledge_context or '无'}

要求：
1. 生成 {cases_per_item}-{max(cases_per_item + 1, cases_per_item)} 条测试用例。
2. 每条用例包含 test_content 与 operation_steps。
3. operation_steps 必须详细，至少5步，包含前置、执行、观察、校验与证据留存。
4. 内容必须围绕当前测试项，不要输出空洞模板。

仅输出 JSON：
{{
  "test_cases": [
    {{
      "title": "...",
      "test_content": "...",
      "operation_steps": ["...", "..."]
    }}
  ]
}}
""".strip()

        data = _invoke_llm_json(context, prompt)
        if not data:
            return []

        raw_cases = data.get("test_cases", [])
        if not isinstance(raw_cases, list):
            return []

        normalized_cases: List[Dict[str, Any]] = []
        for case in raw_cases:
            if not isinstance(case, dict):
                continue
            test_content = _clean_text(str(case.get("test_content", "")))
            if not test_content:
                continue
            steps = case.get("operation_steps", [])
            if not isinstance(steps, list):
                continue
            cleaned_steps = [_clean_text(str(step)) for step in steps if _clean_text(str(step))]
            if len(cleaned_steps) < 5:
                continue
            normalized_cases.append(
                {
                    "title": _clean_text(str(case.get("title", ""))),
                    "test_content": test_content,
                    "operation_steps": cleaned_steps,
                }
            )

        return normalized_cases[: max(1, cases_per_item)]

    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
        test_items = context.get("test_items", {})
        cases_per_item = _safe_int(context.get("cases_per_item"), 2, 1, 5)

        normal_cases: List[Dict[str, Any]] = []
        abnormal_cases: List[Dict[str, Any]] = []
        llm_case_count = 0

        for item in test_items.get("normal", []):
            generated = self._generate_cases_by_llm(context, item, abnormal=False, cases_per_item=cases_per_item)
            if not generated:
                generated = [
                    {
                        "title": "标准流程验证",
                        "test_content": f"验证{item['content']}",
                        "operation_steps": self._build_fallback_steps(item["content"], False, "标准流程"),
                    },
                    {
                        "title": "边界与联动验证",
                        "test_content": f"验证{item['content']}在边界条件和联动场景下的稳定性",
                        "operation_steps": self._build_fallback_steps(item["content"], False, "边界与联动"),
                    },
                ][:cases_per_item]
            else:
                llm_case_count += len(generated)

            for idx, case in enumerate(generated, start=1):
                merged_content = _clean_text(case.get("test_content", item["content"]))
                placeholder = _pick_expected_result_placeholder(merged_content, abnormal=False)
                normal_cases.append(
                    {
                        "id": f"{item['id']}-C{idx}",
                        "item_id": item["id"],
                        "title": _clean_text(case.get("title", "")),
                        "operation_steps": case.get("operation_steps", []),
                        "test_content": merged_content,
                        "expected_result_placeholder": placeholder,
                    }
                )

        for item in test_items.get("abnormal", []):
            generated = self._generate_cases_by_llm(context, item, abnormal=True, cases_per_item=cases_per_item)
            if not generated:
                generated = [
                    {
                        "title": "非法输入与权限异常验证",
                        "test_content": f"验证{item['content']}在非法输入与权限异常下的处理表现",
                        "operation_steps": self._build_fallback_steps(item["content"], True, "非法输入与权限异常"),
                    },
                    {
                        "title": "故障与时序冲突验证",
                        "test_content": f"验证{item['content']}在故障和时序冲突场景下的保护行为",
                        "operation_steps": self._build_fallback_steps(item["content"], True, "故障与时序冲突"),
                    },
                ][:cases_per_item]
            else:
                llm_case_count += len(generated)

            for idx, case in enumerate(generated, start=1):
                merged_content = _clean_text(case.get("test_content", item["content"]))
                placeholder = _pick_expected_result_placeholder(merged_content, abnormal=True)
                abnormal_cases.append(
                    {
                        "id": f"{item['id']}-C{idx}",
                        "item_id": item["id"],
                        "title": _clean_text(case.get("title", "")),
                        "operation_steps": case.get("operation_steps", []),
                        "test_content": merged_content,
                        "expected_result_placeholder": placeholder,
                    }
                )

        context["test_cases"] = {
            "normal": normal_cases,
            "abnormal": abnormal_cases,
        }

        return ToolExecutionResult(
            context=context,
            output_summary=(
                f"normal_cases={len(normal_cases)}; abnormal_cases={len(abnormal_cases)}; llm_cases={llm_case_count}"
            ),
            fallback_used=llm_case_count == 0,
        )


class BuildExpectedResultsTool(TestingTool):
    name = "build_expected_results"

    def _expected_for_case(self, context: Dict[str, Any], case: Dict[str, Any], abnormal: bool) -> str:
        placeholder = case.get("expected_result_placeholder", "{{return_value}}")
        if placeholder not in EXPECTED_RESULT_PLACEHOLDER_MAP:
            placeholder = "{{return_value}}"

        req_text = context.get("normalized_requirement_text", "")
        knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=1200)
        prompt = f"""
请基于以下信息生成一条可验证、可度量的测试预期结果，避免模板化空话。

需求：{req_text}
测试内容：{case.get('test_content', '')}
测试类型：{'异常测试' if abnormal else '正常测试'}
占位符语义：{placeholder} -> {EXPECTED_RESULT_PLACEHOLDER_MAP.get(placeholder, '')}
知识库片段：{knowledge_context or '无'}

输出要求：
1. 仅输出一句中文预期结果。
2. 结果必须可判定成功/失败。
3. 包含关键观测项（返回值、状态、告警、日志、数据一致性中的相关项）。
""".strip()

        llm_text = _invoke_llm_text(context, prompt)
        if llm_text:
            return _truncate_text(llm_text, max_len=220)

        test_content = _clean_text(case.get("test_content", ""))
        if placeholder == "{{error_message}}":
            return f"触发{test_content}后，系统应返回明确错误码与错误文案，拒绝非法请求且核心状态保持一致。"
        if placeholder == "{{state_change}}":
            return f"执行{test_content}后，系统状态转换应符合需求定义，状态变化可被日志与回执共同验证。"
        if placeholder == "{{data_persistence}}":
            return f"执行{test_content}后，数据库或存储层应产生符合约束的持久化结果且无脏数据。"
        if placeholder == "{{ui_display}}":
            return f"执行{test_content}后，界面应展示与控制结果一致的反馈信息且提示可被用户执行。"

        if abnormal:
            return f"执行异常场景“{test_content}”后，系统应触发保护策略并输出可追溯日志，业务状态保持可恢复。"

        return f"执行“{test_content}”后，返回值与状态变化应满足需求约束，关键结果可通过日志或回执验证。"

    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
        test_cases = context.get("test_cases", {})

        normal_expected: List[Dict[str, str]] = []
        abnormal_expected: List[Dict[str, str]] = []

        for case in test_cases.get("normal", []):
            normal_expected.append(
                {
                    "id": case["id"],
                    "case_id": case["id"],
                    "result": self._expected_for_case(context, case, abnormal=False),
                }
            )

        for case in test_cases.get("abnormal", []):
            abnormal_expected.append(
                {
                    "id": case["id"],
                    "case_id": case["id"],
                    "result": self._expected_for_case(context, case, abnormal=True),
                }
            )

        context["expected_results"] = {
            "normal": normal_expected,
            "abnormal": abnormal_expected,
        }

        return ToolExecutionResult(
            context=context,
            output_summary=(
                f"normal_expected={len(normal_expected)}; abnormal_expected={len(abnormal_expected)}"
            ),
        )


class FormatOutputTool(TestingTool):
    name = "format_output"

    @staticmethod
    def _format_case_block(case: Dict[str, Any], index: int) -> List[str]:
        item_id = case.get("item_id", case.get("id", ""))
        title = _clean_text(case.get("title", ""))

        block: List[str] = []
        block.append(f"{index}. [用例 {case['id']}]（对应测试项 {item_id}）：{case.get('test_content', '')}")
        if title:
            block.append(f"   场景标题：{title}")
        block.append("   操作步骤：")
        for step_idx, step in enumerate(case.get("operation_steps", []), start=1):
            block.append(f"   {step_idx}) {step}")
        return block

    def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
        test_items = context.get("test_items", {"normal": [], "abnormal": []})
        test_cases = context.get("test_cases", {"normal": [], "abnormal": []})
        expected_results = context.get("expected_results", {"normal": [], "abnormal": []})

        lines: List[str] = []

        lines.append("**测试项**")
        lines.append("")
        lines.append("**正常测试**：")
        for index, item in enumerate(test_items.get("normal", []), start=1):
            lines.append(f"{index}. [测试项 {item['id']}]：{item['content']}")
        lines.append("")
        lines.append("**异常测试**：")
        for index, item in enumerate(test_items.get("abnormal", []), start=1):
            lines.append(f"{index}. [测试项 {item['id']}]：{item['content']}")

        lines.append("")
        lines.append("**测试用例**")
        lines.append("")
        lines.append("**正常测试**：")
        for index, case in enumerate(test_cases.get("normal", []), start=1):
            lines.extend(self._format_case_block(case, index))
        lines.append("")
        lines.append("**异常测试**：")
        for index, case in enumerate(test_cases.get("abnormal", []), start=1):
            lines.extend(self._format_case_block(case, index))

        lines.append("")
        lines.append("**预期成果**")
        lines.append("")
        lines.append("**正常测试**：")
        for index, expected in enumerate(expected_results.get("normal", []), start=1):
            lines.append(
                f"{index}. [预期 {expected['id']}]（对应用例 {expected['case_id']}）：{expected['result']}"
            )
        lines.append("")
        lines.append("**异常测试**：")
        for index, expected in enumerate(expected_results.get("abnormal", []), start=1):
            lines.append(
                f"{index}. [预期 {expected['id']}]（对应用例 {expected['case_id']}）：{expected['result']}"
            )

        context["formatted_output"] = "\n".join(lines)
        context["structured_output"] = {
            "test_items": test_items,
            "test_cases": test_cases,
            "expected_results": expected_results,
        }

        return ToolExecutionResult(
            context=context,
            output_summary="formatted_sections=3",
        )


def build_default_tool_chain() -> List[TestingTool]:
    return [
        IdentifyRequirementTypeTool(),
        DecomposeTestItemsTool(),
        GenerateTestCasesTool(),
        BuildExpectedResultsTool(),
        FormatOutputTool(),
    ]