Files
test_item_gen/modules/parser_adapters/section_array_adapter.py
2026-02-04 14:38:52 +08:00

165 lines
6.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# @line_count 150
"""旧格式适配器sections数组格式"""
from typing import List, Dict, Any
from .base_adapter import BaseParserAdapter
class SectionArrayAdapter(BaseParserAdapter):
"""处理旧格式sections数组"""
def extract_function_points(self) -> List[Dict[str, Any]]:
"""从sections数组中提取功能点"""
function_points = []
sections = self.data.get('sections', [])
for section in sections:
module_name = section.get('title', '')
content = section.get('content', [])
# 提取模块总体描述(第一个较长的文本内容)
module_description = ""
for item in content:
if item.get('type') == 'text':
text = item.get('content', '').strip()
if len(text) > 50: # 较长的文本通常是模块描述
module_description = text
break
# 识别功能点
# 功能点通常是较短的文本(标题),后面跟着描述
current_function = None
function_description_parts = []
for i, item in enumerate(content):
if item.get('type') != 'text':
continue
text = item.get('content', '').strip()
if not text:
continue
# 判断是否是功能点标题
# 规则:短文本(通常<20字符且不是描述性文本
is_function_title = (
len(text) < 20 and
not text.endswith('') and
not text.endswith('') and
not ('如下' in text or '所示' in text)
)
if is_function_title:
# 保存之前的功能点
if current_function:
function_points.append({
'module_name': module_name,
'module_description': module_description,
'function_name': current_function,
'description': ' '.join(function_description_parts),
'operation_steps': self._extract_steps(function_description_parts)
})
# 开始新功能点
current_function = text
function_description_parts = []
else:
# 添加到当前功能点的描述
if current_function:
function_description_parts.append(text)
elif not module_description:
# 如果还没有模块描述,这可能是模块描述的一部分
pass
# 保存最后一个功能点
if current_function:
function_points.append({
'module_name': module_name,
'module_description': module_description,
'function_name': current_function,
'description': ' '.join(function_description_parts),
'operation_steps': self._extract_steps(function_description_parts)
})
# 如果没有识别到功能点,将整个模块作为一个功能点
if not current_function and module_description:
function_points.append({
'module_name': module_name,
'module_description': module_description,
'function_name': module_name,
'description': module_description,
'operation_steps': []
})
return function_points
def _extract_steps(self, description_parts: List[str]) -> List[str]:
"""
从描述中提取操作步骤
Args:
description_parts: 描述文本列表
Returns:
操作步骤列表
"""
steps = []
for part in description_parts:
# 查找包含操作动词的句子
if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
# 移除"如下图所示"等描述性文字
cleaned = part.replace('如下图所示', '').replace('如下图所示:', '').strip()
if cleaned:
steps.append(cleaned)
return steps
def get_document_info(self) -> Dict[str, Any]:
"""获取文档信息"""
return {
'title': self.data.get('document_title', ''),
'version': self.data.get('version', ''),
'date': self.data.get('date', ''),
'section_count': len(self.data.get('sections', []))
}
def get_sections(self) -> List[Dict[str, Any]]:
"""获取章节列表"""
return self.data.get('sections', [])
def get_module_summary(self) -> List[Dict[str, Any]]:
"""获取模块摘要"""
modules = []
sections = self.data.get('sections', [])
for section in sections:
module_info = {
'name': section.get('title', ''),
'function_count': 0,
'description': ''
}
# 查找模块描述
content = section.get('content', [])
for item in content:
if item.get('type') == 'text':
text = item.get('content', '').strip()
if len(text) > 50:
module_info['description'] = text
break
# 统计功能点数量(简单统计)
function_names = []
for item in content:
if item.get('type') == 'text':
text = item.get('content', '').strip()
if len(text) < 20 and text and not text.endswith(''):
function_names.append(text)
module_info['function_count'] = len(set(function_names))
modules.append(module_info)
return modules
@staticmethod
def can_parse(data: Dict[str, Any]) -> bool:
"""检测是否为旧格式"""
return 'sections' in data and isinstance(data['sections'], list)