165 lines
6.5 KiB
Python
165 lines
6.5 KiB
Python
# @line_count 150
|
||
"""旧格式适配器(sections数组格式)"""
|
||
from typing import List, Dict, Any
|
||
from .base_adapter import BaseParserAdapter
|
||
|
||
|
||
class SectionArrayAdapter(BaseParserAdapter):
|
||
"""处理旧格式:sections数组"""
|
||
|
||
def extract_function_points(self) -> List[Dict[str, Any]]:
|
||
"""从sections数组中提取功能点"""
|
||
function_points = []
|
||
sections = self.data.get('sections', [])
|
||
|
||
for section in sections:
|
||
module_name = section.get('title', '')
|
||
content = section.get('content', [])
|
||
|
||
# 提取模块总体描述(第一个较长的文本内容)
|
||
module_description = ""
|
||
for item in content:
|
||
if item.get('type') == 'text':
|
||
text = item.get('content', '').strip()
|
||
if len(text) > 50: # 较长的文本通常是模块描述
|
||
module_description = text
|
||
break
|
||
|
||
# 识别功能点
|
||
# 功能点通常是较短的文本(标题),后面跟着描述
|
||
current_function = None
|
||
function_description_parts = []
|
||
|
||
for i, item in enumerate(content):
|
||
if item.get('type') != 'text':
|
||
continue
|
||
|
||
text = item.get('content', '').strip()
|
||
if not text:
|
||
continue
|
||
|
||
# 判断是否是功能点标题
|
||
# 规则:短文本(通常<20字符),且不是描述性文本
|
||
is_function_title = (
|
||
len(text) < 20 and
|
||
not text.endswith('。') and
|
||
not text.endswith(',') and
|
||
not ('如下' in text or '所示' in text)
|
||
)
|
||
|
||
if is_function_title:
|
||
# 保存之前的功能点
|
||
if current_function:
|
||
function_points.append({
|
||
'module_name': module_name,
|
||
'module_description': module_description,
|
||
'function_name': current_function,
|
||
'description': ' '.join(function_description_parts),
|
||
'operation_steps': self._extract_steps(function_description_parts)
|
||
})
|
||
|
||
# 开始新功能点
|
||
current_function = text
|
||
function_description_parts = []
|
||
else:
|
||
# 添加到当前功能点的描述
|
||
if current_function:
|
||
function_description_parts.append(text)
|
||
elif not module_description:
|
||
# 如果还没有模块描述,这可能是模块描述的一部分
|
||
pass
|
||
|
||
# 保存最后一个功能点
|
||
if current_function:
|
||
function_points.append({
|
||
'module_name': module_name,
|
||
'module_description': module_description,
|
||
'function_name': current_function,
|
||
'description': ' '.join(function_description_parts),
|
||
'operation_steps': self._extract_steps(function_description_parts)
|
||
})
|
||
|
||
# 如果没有识别到功能点,将整个模块作为一个功能点
|
||
if not current_function and module_description:
|
||
function_points.append({
|
||
'module_name': module_name,
|
||
'module_description': module_description,
|
||
'function_name': module_name,
|
||
'description': module_description,
|
||
'operation_steps': []
|
||
})
|
||
|
||
return function_points
|
||
|
||
def _extract_steps(self, description_parts: List[str]) -> List[str]:
|
||
"""
|
||
从描述中提取操作步骤
|
||
|
||
Args:
|
||
description_parts: 描述文本列表
|
||
|
||
Returns:
|
||
操作步骤列表
|
||
"""
|
||
steps = []
|
||
for part in description_parts:
|
||
# 查找包含操作动词的句子
|
||
if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
|
||
# 移除"如下图所示"等描述性文字
|
||
cleaned = part.replace('如下图所示', '').replace('如下图所示:', '').strip()
|
||
if cleaned:
|
||
steps.append(cleaned)
|
||
return steps
|
||
|
||
def get_document_info(self) -> Dict[str, Any]:
|
||
"""获取文档信息"""
|
||
return {
|
||
'title': self.data.get('document_title', ''),
|
||
'version': self.data.get('version', ''),
|
||
'date': self.data.get('date', ''),
|
||
'section_count': len(self.data.get('sections', []))
|
||
}
|
||
|
||
def get_sections(self) -> List[Dict[str, Any]]:
|
||
"""获取章节列表"""
|
||
return self.data.get('sections', [])
|
||
|
||
def get_module_summary(self) -> List[Dict[str, Any]]:
|
||
"""获取模块摘要"""
|
||
modules = []
|
||
sections = self.data.get('sections', [])
|
||
|
||
for section in sections:
|
||
module_info = {
|
||
'name': section.get('title', ''),
|
||
'function_count': 0,
|
||
'description': ''
|
||
}
|
||
|
||
# 查找模块描述
|
||
content = section.get('content', [])
|
||
for item in content:
|
||
if item.get('type') == 'text':
|
||
text = item.get('content', '').strip()
|
||
if len(text) > 50:
|
||
module_info['description'] = text
|
||
break
|
||
|
||
# 统计功能点数量(简单统计)
|
||
function_names = []
|
||
for item in content:
|
||
if item.get('type') == 'text':
|
||
text = item.get('content', '').strip()
|
||
if len(text) < 20 and text and not text.endswith('。'):
|
||
function_names.append(text)
|
||
|
||
module_info['function_count'] = len(set(function_names))
|
||
modules.append(module_info)
|
||
|
||
return modules
|
||
|
||
@staticmethod
|
||
def can_parse(data: Dict[str, Any]) -> bool:
|
||
"""检测是否为旧格式"""
|
||
return 'sections' in data and isinstance(data['sections'], list)
|