init. project

This commit is contained in:
2026-04-13 11:34:23 +08:00
commit c7c0659a85
202 changed files with 31196 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
from app.services.testing_pipeline.pipeline import run_testing_pipeline
__all__ = ["run_testing_pipeline"]

View File

@@ -0,0 +1,20 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Dict
@dataclass
class ToolExecutionResult:
context: Dict[str, Any]
output_summary: str
fallback_used: bool = False
class TestingTool(ABC):
name: str
@abstractmethod
def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
raise NotImplementedError

View File

@@ -0,0 +1,99 @@
from __future__ import annotations
from time import perf_counter
from typing import Any, Dict, List, Optional
from uuid import uuid4
from app.services.llm.llm_factory import LLMFactory
from app.services.testing_pipeline.tools import build_default_tool_chain
def _build_input_summary(context: Dict[str, Any]) -> str:
req_text = str(context.get("user_requirement_text", "")).strip()
req_type = str(context.get("requirement_type_input", "")).strip() or "auto"
short_text = req_text if len(req_text) <= 60 else f"{req_text[:60]}..."
return f"requirement_type_input={req_type}; requirement_text={short_text}"
def _build_output_summary(context: Dict[str, Any]) -> str:
req_type_result = context.get("requirement_type_result", {})
req_type = req_type_result.get("requirement_type", "")
test_items = context.get("test_items", {})
test_cases = context.get("test_cases", {})
return (
f"requirement_type={req_type}; "
f"items={len(test_items.get('normal', [])) + len(test_items.get('abnormal', []))}; "
f"cases={len(test_cases.get('normal', [])) + len(test_cases.get('abnormal', []))}"
)
def run_testing_pipeline(
user_requirement_text: str,
requirement_type_input: Optional[str] = None,
debug: bool = False,
knowledge_context: Optional[str] = None,
use_model_generation: bool = False,
max_items_per_group: int = 12,
cases_per_item: int = 2,
max_focus_points: int = 6,
max_llm_calls: int = 10,
) -> Dict[str, Any]:
llm_model = None
if use_model_generation:
try:
llm_model = LLMFactory.create(streaming=False)
except Exception:
llm_model = None
context: Dict[str, Any] = {
"trace_id": str(uuid4()),
"user_requirement_text": user_requirement_text,
"requirement_type_input": requirement_type_input,
"debug": bool(debug),
"knowledge_context": (knowledge_context or "").strip(),
"knowledge_used": bool((knowledge_context or "").strip()),
"use_model_generation": bool(use_model_generation),
"llm_model": llm_model,
"max_items_per_group": max(4, min(int(max_items_per_group), 30)),
"cases_per_item": max(1, min(int(cases_per_item), 5)),
"max_focus_points": max(3, min(int(max_focus_points), 12)),
"llm_call_budget": max(0, min(int(max_llm_calls), 100)),
}
step_logs: List[Dict[str, Any]] = []
for tool in build_default_tool_chain():
start = perf_counter()
input_summary = _build_input_summary(context)
execution = tool.execute(context)
context = execution.context
duration_ms = (perf_counter() - start) * 1000
step_logs.append(
{
"step_name": tool.name,
"input_summary": input_summary,
"output_summary": execution.output_summary,
"success": True,
"fallback_used": execution.fallback_used,
"duration_ms": round(duration_ms, 3),
}
)
req_result = context.get("requirement_type_result", {})
return {
"trace_id": context.get("trace_id"),
"requirement_type": req_result.get("requirement_type", "未知类型"),
"reason": req_result.get("reason", ""),
"candidates": req_result.get("candidates", []),
"test_items": context.get("test_items", {"normal": [], "abnormal": []}),
"test_cases": context.get("test_cases", {"normal": [], "abnormal": []}),
"expected_results": context.get("expected_results", {"normal": [], "abnormal": []}),
"formatted_output": context.get("formatted_output", ""),
"pipeline_summary": _build_output_summary(context),
"knowledge_used": bool(context.get("knowledge_used", False)),
"step_logs": step_logs if debug else [],
}

View File

@@ -0,0 +1,203 @@
from __future__ import annotations
from typing import Dict, List
REQUIREMENT_TYPES: List[str] = [
"功能测试",
"性能测试",
"外部接口测试",
"人机交互界面测试",
"强度测试",
"余量测试",
"可靠性测试",
"安全性测试",
"恢复性测试",
"边界测试",
"安装性测试",
"互操作性测试",
"敏感性测试",
"测试充分性要求",
]
TYPE_SIGNAL_RULES: Dict[str, str] = {
"功能测试": "关注功能需求逐项验证、业务流程正确性、输入输出行为、状态转换与边界值处理。",
"性能测试": "关注处理精度、响应时间、处理数据量、系统协调性、负载潜力与运行占用空间。",
"外部接口测试": "关注外部输入输出接口的格式、内容、协议与正常/异常交互表现。",
"人机交互界面测试": "关注界面一致性、界面风格、操作流程、误操作健壮性与错误提示能力。",
"强度测试": "关注系统在极限、超负荷、饱和和降级条件下的稳定性与承受能力。",
"余量测试": "关注存储余量、输入输出通道余量、功能处理时间余量等资源裕度。",
"可靠性测试": "关注真实或仿真环境下的失效等级、运行剖面、输入覆盖和长期稳定运行能力。",
"安全性测试": "关注危险状态响应、安全关键部件、异常输入防护、非法访问阻断和数据完整性保护。",
"恢复性测试": "关注故障探测、备用切换、系统状态保护与从无错误状态继续执行能力。",
"边界测试": "关注输入输出域边界、状态转换端点、功能界限、性能界限与容量界限。",
"安装性测试": "关注不同配置下安装卸载流程和安装规程执行正确性。",
"互操作性测试": "关注多个软件并行运行时的互操作能力与协同正确性。",
"敏感性测试": "关注有效输入类中可能引发不稳定或不正常处理的数据组合。",
"测试充分性要求": "关注需求覆盖率、配置项覆盖、语句覆盖、分支覆盖及未覆盖分析确认。",
}
DECOMPOSE_FORCE_RULES: List[str] = [
"每个软件功能至少应被正常测试与被认可的异常场景覆盖;复杂功能需继续细分。",
"每个测试项必须语义完整、可直接执行。",
"覆盖必须包含:正常流程、边界条件(适用时)、异常条件。",
"粒度需适中,避免过粗或过细。",
"对未知类型必须执行通用分解,并保持正常/异常分组。",
"对需求说明未显式给出但在用户手册或操作手册体现的功能,也应补充测试项覆盖。",
]
REQUIREMENT_RULES: Dict[str, Dict[str, List[str]]] = {
"功能测试": {
"keywords": ["功能", "业务流程", "输入输出", "状态转换", "边界值"],
"normal": [
"正常覆盖功能主路径、基本数据类型、合法边界值与状态转换。",
],
"abnormal": [
"异常覆盖非法输入、不规则输入、非法边界值与最坏情况。",
],
},
"性能测试": {
"keywords": ["性能", "处理精度", "响应时间", "处理数据量", "负载", "占用空间"],
"normal": [
"正常覆盖处理精度、响应时间、处理数据量与模块协调性。",
],
"abnormal": [
"异常覆盖超负荷、软硬件限制、负载潜力上限与资源占用异常。",
],
},
"外部接口测试": {
"keywords": ["外部接口", "输入接口", "输出接口", "格式", "内容", "协议", "异常交互"],
"normal": [
"正常覆盖全部外部接口格式与内容正确性。",
],
"abnormal": [
"异常覆盖每个输入输出接口的错误格式、错误内容与异常交互。",
],
},
"人机交互界面测试": {
"keywords": ["界面", "风格", "交互", "误操作", "错误提示", "操作流程"],
"normal": [
"正常覆盖界面风格一致性与标准操作流程。",
],
"abnormal": [
"异常覆盖误操作、快速操作、非法输入、错误命令与错误流程提示。",
],
},
"强度测试": {
"keywords": ["强度", "极限", "超负荷", "饱和", "降级", "健壮性"],
"normal": [
"正常覆盖设计极限下系统功能和性能表现。",
],
"abnormal": [
"异常覆盖超出极限时的降级行为、健壮性与饱和表现。",
],
},
"余量测试": {
"keywords": ["余量", "存储余量", "通道余量", "处理时间余量", "资源裕度"],
"normal": [
"正常覆盖存储、通道、处理时间余量是否满足要求。",
],
"abnormal": [
"异常覆盖余量不足或耗尽时系统告警与受控行为。",
],
},
"可靠性测试": {
"keywords": ["可靠性", "运行剖面", "失效等级", "输入覆盖", "长期稳定"],
"normal": [
"正常覆盖典型环境、运行剖面与输入变量组合。",
],
"abnormal": [
"异常覆盖失效等级场景、边界环境变化、不合法输入域及失效记录。",
],
},
"安全性测试": {
"keywords": ["安全", "危险状态", "安全关键部件", "非法进入", "完整性", "防护"],
"normal": [
"正常覆盖安全关键部件、安全结构与合法操作路径。",
],
"abnormal": [
"异常覆盖危险状态、故障模式、边界接合部、非法进入与数据完整性保护。",
],
},
"恢复性测试": {
"keywords": ["恢复", "故障探测", "备用切换", "状态保护", "继续执行", "reset"],
"normal": [
"正常覆盖故障探测、备用切换、恢复后继续执行。",
],
"abnormal": [
"异常覆盖故障中作业保护、状态保护与恢复失败路径。",
],
},
"边界测试": {
"keywords": ["边界", "端点", "输入输出域", "状态转换", "性能界限", "容量界限"],
"normal": [
"正常覆盖输入输出域边界、状态转换端点与功能界限。",
],
"abnormal": [
"异常覆盖性能界限、容量界限和越界端点。",
],
},
"安装性测试": {
"keywords": ["安装", "卸载", "配置", "安装规程", "部署", "中断"],
"normal": [
"正常覆盖标准及不同配置下安装卸载流程。",
],
"abnormal": [
"异常覆盖安装规程错误、依赖异常与中断后的处理。",
],
},
"互操作性测试": {
"keywords": ["互操作", "并行运行", "协同", "兼容", "冲突", "互操作失败"],
"normal": [
"正常覆盖两个或多个软件同时运行与互操作过程。",
],
"abnormal": [
"异常覆盖互操作失败、并行冲突与协同异常。",
],
},
"敏感性测试": {
"keywords": ["敏感性", "输入类", "数据组合", "不稳定", "不正常处理"],
"normal": [
"正常覆盖有效输入类中典型数据组合。",
],
"abnormal": [
"异常覆盖引发不稳定或不正常处理的特殊数据组合。",
],
},
"测试充分性要求": {
"keywords": ["测试充分性", "需求覆盖率", "配置项覆盖", "语句覆盖", "分支覆盖", "未覆盖分析"],
"normal": [
"正常覆盖需求覆盖率、配置项覆盖与代码覆盖达标。",
],
"abnormal": [
"异常覆盖未覆盖部分逐项分析、确认与报告输出。",
],
},
}
GENERIC_DECOMPOSITION_RULES: Dict[str, List[str]] = {
"normal": [
"主流程正确性。",
"合法边界值。",
"标准输入输出。",
],
"abnormal": [
"非法输入。",
"越界输入。",
"资源异常或状态冲突。",
],
}
EXPECTED_RESULT_PLACEHOLDER_MAP: Dict[str, str] = {
"{{return_value}}": "接口或函数返回值验证。",
"{{state_change}}": "系统状态变化验证。",
"{{error_message}}": "异常场景错误信息验证。",
"{{data_persistence}}": "数据库或存储落库结果验证。",
"{{ui_display}}": "界面显示反馈验证。",
}

View File

@@ -0,0 +1,867 @@
from __future__ import annotations
import json
import re
from collections import defaultdict
from typing import Any, Dict, List, Optional, Tuple
from app.services.testing_pipeline.base import TestingTool, ToolExecutionResult
from app.services.testing_pipeline.rules import (
DECOMPOSE_FORCE_RULES,
EXPECTED_RESULT_PLACEHOLDER_MAP,
GENERIC_DECOMPOSITION_RULES,
REQUIREMENT_RULES,
REQUIREMENT_TYPES,
TYPE_SIGNAL_RULES,
)
def _clean_text(value: str) -> str:
return " ".join((value or "").replace("\n", " ").split())
def _truncate_text(value: str, max_len: int = 2000) -> str:
text = _clean_text(value)
if len(text) <= max_len:
return text
return f"{text[:max_len]}..."
def _safe_int(value: Any, default: int, low: int, high: int) -> int:
try:
parsed = int(value)
except Exception:
parsed = default
return max(low, min(parsed, high))
def _strip_instruction_prefix(value: str) -> str:
text = _clean_text(value)
if not text:
return text
lowered = text.lower()
if lowered.startswith("/testing"):
text = _clean_text(text[len("/testing") :])
prefixes = [
"为以下需求生成测试用例",
"根据以下需求生成测试用例",
"请根据以下需求生成测试用例",
"请根据需求生成测试用例",
"请生成测试用例",
"生成测试用例",
]
for prefix in prefixes:
if text.startswith(prefix):
for sep in ("", ":"):
idx = text.find(sep)
if idx != -1:
text = _clean_text(text[idx + 1 :])
break
else:
text = _clean_text(text[len(prefix) :])
break
pattern = re.compile(r"^(请)?(根据|按|基于).{0,40}(需求|场景).{0,30}(生成|输出).{0,20}(测试项|测试用例)[:]")
matched = pattern.match(text)
if matched:
text = _clean_text(text[matched.end() :])
return text
def _extract_focus_points(value: str, max_points: int = 6) -> List[str]:
text = _strip_instruction_prefix(value)
if not text:
return []
parts = [_clean_text(part) for part in re.split(r"[,。;;]", text)]
parts = [part for part in parts if part]
ignored_tokens = ["生成测试用例", "测试项分解", "测试用例生成", "以下需求"]
filtered = [
part
for part in parts
if len(part) >= 4 and not any(token in part for token in ignored_tokens)
]
if not filtered:
filtered = parts
priority_keywords = [
"启停",
"开启",
"关闭",
"远程控制",
"保护",
"联动",
"状态",
"故障",
"恢复",
"切换",
"告警",
"模式",
"边界",
"时序",
]
priority = [part for part in filtered if any(keyword in part for keyword in priority_keywords)]
candidates = priority if priority else filtered
unique: List[str] = []
for part in candidates:
if part not in unique:
unique.append(part)
return unique[:max_points]
def _build_type_scores(text: str) -> Dict[str, int]:
scores: Dict[str, int] = {}
lowered = text.lower()
for req_type, rule in REQUIREMENT_RULES.items():
score = 0
if req_type in text:
score += 5
for keyword in rule.get("keywords", []):
if keyword.lower() in lowered:
score += 2
scores[req_type] = score
return scores
def _top_candidates(scores: Dict[str, int], top_n: int = 3) -> List[str]:
sorted_pairs = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
non_zero = [name for name, score in sorted_pairs if score > 0]
if non_zero:
return non_zero[:top_n]
return ["功能测试", "边界测试", "性能测试"][:top_n]
def _message_to_text(value: Any) -> str:
content = getattr(value, "content", value)
if isinstance(content, str):
return content
if isinstance(content, list):
chunks: List[str] = []
for item in content:
if isinstance(item, str):
chunks.append(item)
elif isinstance(item, dict):
text = item.get("text")
if isinstance(text, str):
chunks.append(text)
else:
chunks.append(str(item))
return "".join(chunks)
return str(content)
def _extract_json_object(value: str) -> Optional[Dict[str, Any]]:
text = (value or "").strip()
if not text:
return None
if text.startswith("```"):
text = re.sub(r"^```(?:json)?", "", text, flags=re.IGNORECASE).strip()
if text.endswith("```"):
text = text[:-3].strip()
try:
data = json.loads(text)
if isinstance(data, dict):
return data
except Exception:
pass
start = text.find("{")
if start == -1:
return None
depth = 0
for idx in range(start, len(text)):
ch = text[idx]
if ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
fragment = text[start : idx + 1]
try:
data = json.loads(fragment)
if isinstance(data, dict):
return data
except Exception:
return None
return None
def _invoke_llm_json(context: Dict[str, Any], prompt: str) -> Optional[Dict[str, Any]]:
model = context.get("llm_model")
if model is None or not context.get("use_model_generation"):
return None
budget = context.get("llm_call_budget")
if isinstance(budget, int):
if budget <= 0:
return None
context["llm_call_budget"] = budget - 1
try:
response = model.invoke(prompt)
text = _message_to_text(response)
return _extract_json_object(text)
except Exception:
return None
def _invoke_llm_text(context: Dict[str, Any], prompt: str) -> str:
model = context.get("llm_model")
if model is None or not context.get("use_model_generation"):
return ""
budget = context.get("llm_call_budget")
if isinstance(budget, int):
if budget <= 0:
return ""
context["llm_call_budget"] = budget - 1
try:
response = model.invoke(prompt)
return _clean_text(_message_to_text(response))
except Exception:
return ""
def _normalize_item_entry(item: Any) -> Optional[Dict[str, Any]]:
if isinstance(item, str):
content = _clean_text(item)
if not content:
return None
return {"content": content, "coverage_tags": []}
if isinstance(item, dict):
content = _clean_text(str(item.get("content", "")))
if not content:
return None
tags = item.get("coverage_tags") or item.get("covered_points") or []
if not isinstance(tags, list):
tags = [str(tags)]
tags = [_clean_text(str(tag)) for tag in tags if _clean_text(str(tag))]
return {"content": content, "coverage_tags": tags}
return None
def _dedupe_items(items: List[Dict[str, Any]], max_items: int) -> List[Dict[str, Any]]:
merged: Dict[str, Dict[str, Any]] = {}
for item in items:
content = _clean_text(item.get("content", ""))
if not content:
continue
existing = merged.get(content)
if existing is None:
merged[content] = {
"content": content,
"coverage_tags": list(item.get("coverage_tags") or []),
}
else:
existing_tags = set(existing.get("coverage_tags") or [])
for tag in item.get("coverage_tags") or []:
if tag and tag not in existing_tags:
existing_tags.add(tag)
existing["coverage_tags"] = list(existing_tags)
deduped = list(merged.values())
return deduped[:max_items]
def _pick_expected_result_placeholder(content: str, abnormal: bool) -> str:
text = content or ""
if abnormal or any(token in text for token in ["非法", "异常", "错误", "拒绝", "越界", "失败"]):
return "{{error_message}}"
if any(token in text for token in ["状态", "切换", "转换", "恢复"]):
return "{{state_change}}"
if any(token in text for token in ["数据库", "存储", "落库", "持久化"]):
return "{{data_persistence}}"
if any(token in text for token in ["界面", "UI", "页面", "按钮", "提示"]):
return "{{ui_display}}"
return "{{return_value}}"
class IdentifyRequirementTypeTool(TestingTool):
name = "identify-requirement-type"
def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
raw_text = _clean_text(context.get("user_requirement_text", ""))
text = _strip_instruction_prefix(raw_text)
if not text:
text = raw_text
max_focus_points = _safe_int(context.get("max_focus_points"), 6, 3, 12)
provided_type = _clean_text(context.get("requirement_type_input", ""))
focus_points = _extract_focus_points(text, max_points=max_focus_points)
fallback_used = False
if provided_type in REQUIREMENT_TYPES:
result = {
"requirement_type": provided_type,
"reason": "用户已显式指定需求类型,系统按指定类型执行。",
"candidates": [],
"scores": {},
"secondary_types": [],
}
else:
scores = _build_type_scores(text)
sorted_pairs = sorted(scores.items(), key=lambda pair: pair[1], reverse=True)
best_type, best_score = sorted_pairs[0]
secondary = [name for name, score in sorted_pairs[1:4] if score > 0]
if best_score <= 0:
fallback_used = True
candidates = _top_candidates(scores)
result = {
"requirement_type": "未知类型",
"reason": "未命中明确分类规则,已回退到未知类型并提供最接近候选。",
"candidates": candidates,
"scores": scores,
"secondary_types": [],
}
else:
signal = TYPE_SIGNAL_RULES.get(best_type, "")
result = {
"requirement_type": best_type,
"reason": f"命中{best_type}识别信号。{signal}",
"candidates": [],
"scores": scores,
"secondary_types": secondary,
}
context["requirement_type_result"] = result
context["normalized_requirement_text"] = text
context["requirement_focus_points"] = focus_points
context["knowledge_used"] = bool(context.get("knowledge_context"))
return ToolExecutionResult(
context=context,
output_summary=(
f"type={result['requirement_type']}; candidates={len(result['candidates'])}; "
f"secondary_types={len(result.get('secondary_types', []))}; focus_points={len(focus_points)}"
),
fallback_used=fallback_used,
)
class DecomposeTestItemsTool(TestingTool):
name = "decompose-test-items"
@staticmethod
def _seed_items(
req_type: str,
req_text: str,
focus_points: List[str],
max_items: int,
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
if req_type in REQUIREMENT_RULES:
source_rules = REQUIREMENT_RULES[req_type]
normal_templates = list(source_rules.get("normal", []))
abnormal_templates = list(source_rules.get("abnormal", []))
else:
normal_templates = list(GENERIC_DECOMPOSITION_RULES["normal"])
abnormal_templates = list(GENERIC_DECOMPOSITION_RULES["abnormal"])
normal: List[Dict[str, Any]] = []
abnormal: List[Dict[str, Any]] = []
for template in normal_templates:
normal.append({"content": template, "coverage_tags": [req_type]})
for template in abnormal_templates:
abnormal.append({"content": template, "coverage_tags": [req_type]})
for point in focus_points:
normal.extend(
[
{
"content": f"验证{point}在标准作业流程下稳定执行且结果符合业务约束。",
"coverage_tags": [point, "正常流程"],
},
{
"content": f"验证{point}与相关联动控制、状态同步和回执反馈的一致性。",
"coverage_tags": [point, "联动一致性"],
},
]
)
abnormal.extend(
[
{
"content": f"验证{point}在非法输入、错误指令或权限异常时的保护与拒绝机制。",
"coverage_tags": [point, "异常输入"],
},
{
"content": f"验证{point}在边界条件、时序冲突或设备故障下的告警和恢复行为。",
"coverage_tags": [point, "边界异常"],
},
]
)
if any(token in req_text for token in ["手册", "操作手册", "用户手册", "作业指导"]):
normal.append(
{
"content": "验证需求说明未显式给出但在用户手册或操作手册体现的功能流程。",
"coverage_tags": ["手册功能"],
}
)
return _dedupe_items(normal, max_items), _dedupe_items(abnormal, max_items)
@staticmethod
def _generate_by_llm(context: Dict[str, Any]) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
req_result = context.get("requirement_type_result", {})
req_type = req_result.get("requirement_type", "未知类型")
req_text = context.get("normalized_requirement_text", "")
focus_points = context.get("requirement_focus_points", [])
max_items = _safe_int(context.get("max_items_per_group"), 12, 4, 30)
knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=2500)
prompt = f"""
你是资深测试分析师。请根据需求、分解规则和知识库片段,生成尽可能覆盖要点的测试项。
需求文本:{req_text}
需求类型:{req_type}
需求要点:{focus_points}
知识库片段:{knowledge_context or ''}
分解约束:
1. 正常测试与异常测试必须分组输出。
2. 每条测试项必须可执行、可验证,避免模板化空话。
3. 尽可能覆盖全部需求要点每组建议输出6-{max_items}条。
4. 优先生成与需求对象/控制逻辑/异常处理/边界条件强相关的测试项。
请仅输出 JSON 对象,结构如下:
{{
"normal_test_items": [
{{"content": "...", "coverage_tags": ["..."]}}
],
"abnormal_test_items": [
{{"content": "...", "coverage_tags": ["..."]}}
]
}}
""".strip()
data = _invoke_llm_json(context, prompt)
if not data:
return [], []
normal_raw = data.get("normal_test_items", [])
abnormal_raw = data.get("abnormal_test_items", [])
normal: List[Dict[str, Any]] = []
abnormal: List[Dict[str, Any]] = []
for item in normal_raw if isinstance(normal_raw, list) else []:
normalized = _normalize_item_entry(item)
if normalized:
normal.append(normalized)
for item in abnormal_raw if isinstance(abnormal_raw, list) else []:
normalized = _normalize_item_entry(item)
if normalized:
abnormal.append(normalized)
return _dedupe_items(normal, max_items), _dedupe_items(abnormal, max_items)
def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
req_result = context.get("requirement_type_result", {})
req_type = req_result.get("requirement_type", "未知类型")
req_text = context.get("normalized_requirement_text") or _strip_instruction_prefix(
context.get("user_requirement_text", "")
)
focus_points = context.get("requirement_focus_points", [])
max_items = _safe_int(context.get("max_items_per_group"), 12, 4, 30)
seeded_normal, seeded_abnormal = self._seed_items(req_type, req_text, focus_points, max_items)
llm_normal, llm_abnormal = self._generate_by_llm(context)
merged_normal = _dedupe_items(llm_normal + seeded_normal, max_items)
merged_abnormal = _dedupe_items(llm_abnormal + seeded_abnormal, max_items)
fallback_used = not bool(llm_normal or llm_abnormal)
normal_items: List[Dict[str, Any]] = []
abnormal_items: List[Dict[str, Any]] = []
for idx, item in enumerate(merged_normal, start=1):
normal_items.append(
{
"id": f"N{idx}",
"content": item["content"],
"coverage_tags": item.get("coverage_tags", []),
}
)
for idx, item in enumerate(merged_abnormal, start=1):
abnormal_items.append(
{
"id": f"E{idx}",
"content": item["content"],
"coverage_tags": item.get("coverage_tags", []),
}
)
context["test_items"] = {
"normal": normal_items,
"abnormal": abnormal_items,
}
context["decompose_force_rules"] = DECOMPOSE_FORCE_RULES
return ToolExecutionResult(
context=context,
output_summary=(
f"normal_items={len(normal_items)}; abnormal_items={len(abnormal_items)}; "
f"llm_items={len(llm_normal) + len(llm_abnormal)}"
),
fallback_used=fallback_used,
)
class GenerateTestCasesTool(TestingTool):
name = "generate-test-cases"
@staticmethod
def _build_fallback_steps(item_content: str, abnormal: bool, variant: str) -> List[str]:
if abnormal:
return [
"确认测试前置环境、设备状态与日志采集开关已准备就绪。",
f"准备异常场景“{variant}”所需的输入数据、操作账号和触发条件。",
f"在目标对象执行异常触发操作,重点验证:{item_content}",
"持续观察系统返回码、错误文案、告警信息与日志链路完整性。",
"检查保护机制是否生效,包括拒绝策略、回滚行为和状态一致性。",
"记录证据并复位环境,确认异常处理后系统可恢复到稳定状态。",
]
return [
"确认测试环境、设备连接状态和前置业务数据均已初始化。",
f"准备“{variant}”所需输入参数、操作路径和判定阈值。",
f"在目标对象执行业务控制流程,重点验证:{item_content}",
"校验关键返回值、状态变化、控制回执及界面或接口反馈结果。",
"检查联动模块、日志记录和数据落库是否满足一致性要求。",
"沉淀测试证据并恢复环境,确保后续用例可重复执行。",
]
def _generate_cases_by_llm(
self,
context: Dict[str, Any],
item: Dict[str, Any],
abnormal: bool,
cases_per_item: int,
) -> List[Dict[str, Any]]:
req_text = context.get("normalized_requirement_text", "")
knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=1800)
prompt = f"""
你是资深测试工程师。请围绕给定测试项生成详细测试用例。
需求:{req_text}
测试项:{item.get('content', '')}
测试类型:{'异常测试' if abnormal else '正常测试'}
知识库片段:{knowledge_context or ''}
要求:
1. 生成 {cases_per_item}-{max(cases_per_item + 1, cases_per_item)} 条测试用例。
2. 每条用例包含 test_content 与 operation_steps。
3. operation_steps 必须详细至少5步包含前置、执行、观察、校验与证据留存。
4. 内容必须围绕当前测试项,不要输出空洞模板。
仅输出 JSON
{{
"test_cases": [
{{
"title": "...",
"test_content": "...",
"operation_steps": ["...", "..."]
}}
]
}}
""".strip()
data = _invoke_llm_json(context, prompt)
if not data:
return []
raw_cases = data.get("test_cases", [])
if not isinstance(raw_cases, list):
return []
normalized_cases: List[Dict[str, Any]] = []
for case in raw_cases:
if not isinstance(case, dict):
continue
test_content = _clean_text(str(case.get("test_content", "")))
if not test_content:
continue
steps = case.get("operation_steps", [])
if not isinstance(steps, list):
continue
cleaned_steps = [_clean_text(str(step)) for step in steps if _clean_text(str(step))]
if len(cleaned_steps) < 5:
continue
normalized_cases.append(
{
"title": _clean_text(str(case.get("title", ""))),
"test_content": test_content,
"operation_steps": cleaned_steps,
}
)
return normalized_cases[: max(1, cases_per_item)]
def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
test_items = context.get("test_items", {})
cases_per_item = _safe_int(context.get("cases_per_item"), 2, 1, 5)
normal_cases: List[Dict[str, Any]] = []
abnormal_cases: List[Dict[str, Any]] = []
llm_case_count = 0
for item in test_items.get("normal", []):
generated = self._generate_cases_by_llm(context, item, abnormal=False, cases_per_item=cases_per_item)
if not generated:
generated = [
{
"title": "标准流程验证",
"test_content": f"验证{item['content']}",
"operation_steps": self._build_fallback_steps(item["content"], False, "标准流程"),
},
{
"title": "边界与联动验证",
"test_content": f"验证{item['content']}在边界条件和联动场景下的稳定性",
"operation_steps": self._build_fallback_steps(item["content"], False, "边界与联动"),
},
][:cases_per_item]
else:
llm_case_count += len(generated)
for idx, case in enumerate(generated, start=1):
merged_content = _clean_text(case.get("test_content", item["content"]))
placeholder = _pick_expected_result_placeholder(merged_content, abnormal=False)
normal_cases.append(
{
"id": f"{item['id']}-C{idx}",
"item_id": item["id"],
"title": _clean_text(case.get("title", "")),
"operation_steps": case.get("operation_steps", []),
"test_content": merged_content,
"expected_result_placeholder": placeholder,
}
)
for item in test_items.get("abnormal", []):
generated = self._generate_cases_by_llm(context, item, abnormal=True, cases_per_item=cases_per_item)
if not generated:
generated = [
{
"title": "非法输入与权限异常验证",
"test_content": f"验证{item['content']}在非法输入与权限异常下的处理表现",
"operation_steps": self._build_fallback_steps(item["content"], True, "非法输入与权限异常"),
},
{
"title": "故障与时序冲突验证",
"test_content": f"验证{item['content']}在故障和时序冲突场景下的保护行为",
"operation_steps": self._build_fallback_steps(item["content"], True, "故障与时序冲突"),
},
][:cases_per_item]
else:
llm_case_count += len(generated)
for idx, case in enumerate(generated, start=1):
merged_content = _clean_text(case.get("test_content", item["content"]))
placeholder = _pick_expected_result_placeholder(merged_content, abnormal=True)
abnormal_cases.append(
{
"id": f"{item['id']}-C{idx}",
"item_id": item["id"],
"title": _clean_text(case.get("title", "")),
"operation_steps": case.get("operation_steps", []),
"test_content": merged_content,
"expected_result_placeholder": placeholder,
}
)
context["test_cases"] = {
"normal": normal_cases,
"abnormal": abnormal_cases,
}
return ToolExecutionResult(
context=context,
output_summary=(
f"normal_cases={len(normal_cases)}; abnormal_cases={len(abnormal_cases)}; llm_cases={llm_case_count}"
),
fallback_used=llm_case_count == 0,
)
class BuildExpectedResultsTool(TestingTool):
name = "build_expected_results"
def _expected_for_case(self, context: Dict[str, Any], case: Dict[str, Any], abnormal: bool) -> str:
placeholder = case.get("expected_result_placeholder", "{{return_value}}")
if placeholder not in EXPECTED_RESULT_PLACEHOLDER_MAP:
placeholder = "{{return_value}}"
req_text = context.get("normalized_requirement_text", "")
knowledge_context = _truncate_text(context.get("knowledge_context", ""), max_len=1200)
prompt = f"""
请基于以下信息生成一条可验证、可度量的测试预期结果,避免模板化空话。
需求:{req_text}
测试内容:{case.get('test_content', '')}
测试类型:{'异常测试' if abnormal else '正常测试'}
占位符语义:{placeholder} -> {EXPECTED_RESULT_PLACEHOLDER_MAP.get(placeholder, '')}
知识库片段:{knowledge_context or ''}
输出要求:
1. 仅输出一句中文预期结果。
2. 结果必须可判定成功/失败。
3. 包含关键观测项(返回值、状态、告警、日志、数据一致性中的相关项)。
""".strip()
llm_text = _invoke_llm_text(context, prompt)
if llm_text:
return _truncate_text(llm_text, max_len=220)
test_content = _clean_text(case.get("test_content", ""))
if placeholder == "{{error_message}}":
return f"触发{test_content}后,系统应返回明确错误码与错误文案,拒绝非法请求且核心状态保持一致。"
if placeholder == "{{state_change}}":
return f"执行{test_content}后,系统状态转换应符合需求定义,状态变化可被日志与回执共同验证。"
if placeholder == "{{data_persistence}}":
return f"执行{test_content}后,数据库或存储层应产生符合约束的持久化结果且无脏数据。"
if placeholder == "{{ui_display}}":
return f"执行{test_content}后,界面应展示与控制结果一致的反馈信息且提示可被用户执行。"
if abnormal:
return f"执行异常场景“{test_content}”后,系统应触发保护策略并输出可追溯日志,业务状态保持可恢复。"
return f"执行“{test_content}”后,返回值与状态变化应满足需求约束,关键结果可通过日志或回执验证。"
def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
test_cases = context.get("test_cases", {})
normal_expected: List[Dict[str, str]] = []
abnormal_expected: List[Dict[str, str]] = []
for case in test_cases.get("normal", []):
normal_expected.append(
{
"id": case["id"],
"case_id": case["id"],
"result": self._expected_for_case(context, case, abnormal=False),
}
)
for case in test_cases.get("abnormal", []):
abnormal_expected.append(
{
"id": case["id"],
"case_id": case["id"],
"result": self._expected_for_case(context, case, abnormal=True),
}
)
context["expected_results"] = {
"normal": normal_expected,
"abnormal": abnormal_expected,
}
return ToolExecutionResult(
context=context,
output_summary=(
f"normal_expected={len(normal_expected)}; abnormal_expected={len(abnormal_expected)}"
),
)
class FormatOutputTool(TestingTool):
name = "format_output"
@staticmethod
def _format_case_block(case: Dict[str, Any], index: int) -> List[str]:
item_id = case.get("item_id", case.get("id", ""))
title = _clean_text(case.get("title", ""))
block: List[str] = []
block.append(f"{index}. [用例 {case['id']}](对应测试项 {item_id}{case.get('test_content', '')}")
if title:
block.append(f" 场景标题:{title}")
block.append(" 操作步骤:")
for step_idx, step in enumerate(case.get("operation_steps", []), start=1):
block.append(f" {step_idx}) {step}")
return block
def execute(self, context: Dict[str, Any]) -> ToolExecutionResult:
test_items = context.get("test_items", {"normal": [], "abnormal": []})
test_cases = context.get("test_cases", {"normal": [], "abnormal": []})
expected_results = context.get("expected_results", {"normal": [], "abnormal": []})
lines: List[str] = []
lines.append("**测试项**")
lines.append("")
lines.append("**正常测试**")
for index, item in enumerate(test_items.get("normal", []), start=1):
lines.append(f"{index}. [测试项 {item['id']}]{item['content']}")
lines.append("")
lines.append("**异常测试**")
for index, item in enumerate(test_items.get("abnormal", []), start=1):
lines.append(f"{index}. [测试项 {item['id']}]{item['content']}")
lines.append("")
lines.append("**测试用例**")
lines.append("")
lines.append("**正常测试**")
for index, case in enumerate(test_cases.get("normal", []), start=1):
lines.extend(self._format_case_block(case, index))
lines.append("")
lines.append("**异常测试**")
for index, case in enumerate(test_cases.get("abnormal", []), start=1):
lines.extend(self._format_case_block(case, index))
lines.append("")
lines.append("**预期成果**")
lines.append("")
lines.append("**正常测试**")
for index, expected in enumerate(expected_results.get("normal", []), start=1):
lines.append(
f"{index}. [预期 {expected['id']}](对应用例 {expected['case_id']}{expected['result']}"
)
lines.append("")
lines.append("**异常测试**")
for index, expected in enumerate(expected_results.get("abnormal", []), start=1):
lines.append(
f"{index}. [预期 {expected['id']}](对应用例 {expected['case_id']}{expected['result']}"
)
context["formatted_output"] = "\n".join(lines)
context["structured_output"] = {
"test_items": test_items,
"test_cases": test_cases,
"expected_results": expected_results,
}
return ToolExecutionResult(
context=context,
output_summary="formatted_sections=3",
)
def build_default_tool_chain() -> List[TestingTool]:
return [
IdentifyRequirementTypeTool(),
DecomposeTestItemsTool(),
GenerateTestCasesTool(),
BuildExpectedResultsTool(),
FormatOutputTool(),
]