This commit is contained in:
2026-02-04 14:38:52 +08:00
commit a5147b1429
29 changed files with 4489 additions and 0 deletions

View File

@@ -0,0 +1,164 @@
# @line_count 150
"""旧格式适配器sections数组格式"""
from typing import List, Dict, Any
from .base_adapter import BaseParserAdapter
class SectionArrayAdapter(BaseParserAdapter):
"""处理旧格式sections数组"""
def extract_function_points(self) -> List[Dict[str, Any]]:
"""从sections数组中提取功能点"""
function_points = []
sections = self.data.get('sections', [])
for section in sections:
module_name = section.get('title', '')
content = section.get('content', [])
# 提取模块总体描述(第一个较长的文本内容)
module_description = ""
for item in content:
if item.get('type') == 'text':
text = item.get('content', '').strip()
if len(text) > 50: # 较长的文本通常是模块描述
module_description = text
break
# 识别功能点
# 功能点通常是较短的文本(标题),后面跟着描述
current_function = None
function_description_parts = []
for i, item in enumerate(content):
if item.get('type') != 'text':
continue
text = item.get('content', '').strip()
if not text:
continue
# 判断是否是功能点标题
# 规则:短文本(通常<20字符且不是描述性文本
is_function_title = (
len(text) < 20 and
not text.endswith('') and
not text.endswith('') and
not ('如下' in text or '所示' in text)
)
if is_function_title:
# 保存之前的功能点
if current_function:
function_points.append({
'module_name': module_name,
'module_description': module_description,
'function_name': current_function,
'description': ' '.join(function_description_parts),
'operation_steps': self._extract_steps(function_description_parts)
})
# 开始新功能点
current_function = text
function_description_parts = []
else:
# 添加到当前功能点的描述
if current_function:
function_description_parts.append(text)
elif not module_description:
# 如果还没有模块描述,这可能是模块描述的一部分
pass
# 保存最后一个功能点
if current_function:
function_points.append({
'module_name': module_name,
'module_description': module_description,
'function_name': current_function,
'description': ' '.join(function_description_parts),
'operation_steps': self._extract_steps(function_description_parts)
})
# 如果没有识别到功能点,将整个模块作为一个功能点
if not current_function and module_description:
function_points.append({
'module_name': module_name,
'module_description': module_description,
'function_name': module_name,
'description': module_description,
'operation_steps': []
})
return function_points
def _extract_steps(self, description_parts: List[str]) -> List[str]:
"""
从描述中提取操作步骤
Args:
description_parts: 描述文本列表
Returns:
操作步骤列表
"""
steps = []
for part in description_parts:
# 查找包含操作动词的句子
if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
# 移除"如下图所示"等描述性文字
cleaned = part.replace('如下图所示', '').replace('如下图所示:', '').strip()
if cleaned:
steps.append(cleaned)
return steps
def get_document_info(self) -> Dict[str, Any]:
"""获取文档信息"""
return {
'title': self.data.get('document_title', ''),
'version': self.data.get('version', ''),
'date': self.data.get('date', ''),
'section_count': len(self.data.get('sections', []))
}
def get_sections(self) -> List[Dict[str, Any]]:
"""获取章节列表"""
return self.data.get('sections', [])
def get_module_summary(self) -> List[Dict[str, Any]]:
"""获取模块摘要"""
modules = []
sections = self.data.get('sections', [])
for section in sections:
module_info = {
'name': section.get('title', ''),
'function_count': 0,
'description': ''
}
# 查找模块描述
content = section.get('content', [])
for item in content:
if item.get('type') == 'text':
text = item.get('content', '').strip()
if len(text) > 50:
module_info['description'] = text
break
# 统计功能点数量(简单统计)
function_names = []
for item in content:
if item.get('type') == 'text':
text = item.get('content', '').strip()
if len(text) < 20 and text and not text.endswith(''):
function_names.append(text)
module_info['function_count'] = len(set(function_names))
modules.append(module_info)
return modules
@staticmethod
def can_parse(data: Dict[str, Any]) -> bool:
"""检测是否为旧格式"""
return 'sections' in data and isinstance(data['sections'], list)