from __future__ import annotations from time import perf_counter from typing import Any, Dict, List, Optional from uuid import uuid4 from app.services.llm.llm_factory import LLMFactory from app.services.testing_pipeline.tools import build_default_tool_chain def _build_input_summary(context: Dict[str, Any]) -> str: req_text = str(context.get("user_requirement_text", "")).strip() req_type = str(context.get("requirement_type_input", "")).strip() or "auto" short_text = req_text if len(req_text) <= 60 else f"{req_text[:60]}..." return f"requirement_type_input={req_type}; requirement_text={short_text}" def _build_output_summary(context: Dict[str, Any]) -> str: req_type_result = context.get("requirement_type_result", {}) req_type = req_type_result.get("requirement_type", "") test_items = context.get("test_items", {}) test_cases = context.get("test_cases", {}) return ( f"requirement_type={req_type}; " f"items={len(test_items.get('normal', [])) + len(test_items.get('abnormal', []))}; " f"cases={len(test_cases.get('normal', [])) + len(test_cases.get('abnormal', []))}" ) def run_testing_pipeline( user_requirement_text: str, requirement_type_input: Optional[str] = None, debug: bool = False, knowledge_context: Optional[str] = None, use_model_generation: bool = False, max_items_per_group: int = 12, cases_per_item: int = 2, max_focus_points: int = 6, max_llm_calls: int = 10, ) -> Dict[str, Any]: llm_model = None if use_model_generation: try: llm_model = LLMFactory.create(streaming=False) except Exception: llm_model = None context: Dict[str, Any] = { "trace_id": str(uuid4()), "user_requirement_text": user_requirement_text, "requirement_type_input": requirement_type_input, "debug": bool(debug), "knowledge_context": (knowledge_context or "").strip(), "knowledge_used": bool((knowledge_context or "").strip()), "use_model_generation": bool(use_model_generation), "llm_model": llm_model, "max_items_per_group": max(4, min(int(max_items_per_group), 30)), "cases_per_item": max(1, min(int(cases_per_item), 5)), "max_focus_points": max(3, min(int(max_focus_points), 12)), "llm_call_budget": max(0, min(int(max_llm_calls), 100)), } step_logs: List[Dict[str, Any]] = [] for tool in build_default_tool_chain(): start = perf_counter() input_summary = _build_input_summary(context) execution = tool.execute(context) context = execution.context duration_ms = (perf_counter() - start) * 1000 step_logs.append( { "step_name": tool.name, "input_summary": input_summary, "output_summary": execution.output_summary, "success": True, "fallback_used": execution.fallback_used, "duration_ms": round(duration_ms, 3), } ) req_result = context.get("requirement_type_result", {}) return { "trace_id": context.get("trace_id"), "requirement_type": req_result.get("requirement_type", "未知类型"), "reason": req_result.get("reason", ""), "candidates": req_result.get("candidates", []), "test_items": context.get("test_items", {"normal": [], "abnormal": []}), "test_cases": context.get("test_cases", {"normal": [], "abnormal": []}), "expected_results": context.get("expected_results", {"normal": [], "abnormal": []}), "formatted_output": context.get("formatted_output", ""), "pipeline_summary": _build_output_summary(context), "knowledge_used": bool(context.get("knowledge_used", False)), "step_logs": step_logs if debug else [], }