165 lines
6.5 KiB
Python
165 lines
6.5 KiB
Python
|
|
# @line_count 150
|
|||
|
|
"""旧格式适配器(sections数组格式)"""
|
|||
|
|
from typing import List, Dict, Any
|
|||
|
|
from .base_adapter import BaseParserAdapter
|
|||
|
|
|
|||
|
|
|
|||
|
|
class SectionArrayAdapter(BaseParserAdapter):
|
|||
|
|
"""处理旧格式:sections数组"""
|
|||
|
|
|
|||
|
|
def extract_function_points(self) -> List[Dict[str, Any]]:
|
|||
|
|
"""从sections数组中提取功能点"""
|
|||
|
|
function_points = []
|
|||
|
|
sections = self.data.get('sections', [])
|
|||
|
|
|
|||
|
|
for section in sections:
|
|||
|
|
module_name = section.get('title', '')
|
|||
|
|
content = section.get('content', [])
|
|||
|
|
|
|||
|
|
# 提取模块总体描述(第一个较长的文本内容)
|
|||
|
|
module_description = ""
|
|||
|
|
for item in content:
|
|||
|
|
if item.get('type') == 'text':
|
|||
|
|
text = item.get('content', '').strip()
|
|||
|
|
if len(text) > 50: # 较长的文本通常是模块描述
|
|||
|
|
module_description = text
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 识别功能点
|
|||
|
|
# 功能点通常是较短的文本(标题),后面跟着描述
|
|||
|
|
current_function = None
|
|||
|
|
function_description_parts = []
|
|||
|
|
|
|||
|
|
for i, item in enumerate(content):
|
|||
|
|
if item.get('type') != 'text':
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
text = item.get('content', '').strip()
|
|||
|
|
if not text:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
# 判断是否是功能点标题
|
|||
|
|
# 规则:短文本(通常<20字符),且不是描述性文本
|
|||
|
|
is_function_title = (
|
|||
|
|
len(text) < 20 and
|
|||
|
|
not text.endswith('。') and
|
|||
|
|
not text.endswith(',') and
|
|||
|
|
not ('如下' in text or '所示' in text)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
if is_function_title:
|
|||
|
|
# 保存之前的功能点
|
|||
|
|
if current_function:
|
|||
|
|
function_points.append({
|
|||
|
|
'module_name': module_name,
|
|||
|
|
'module_description': module_description,
|
|||
|
|
'function_name': current_function,
|
|||
|
|
'description': ' '.join(function_description_parts),
|
|||
|
|
'operation_steps': self._extract_steps(function_description_parts)
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 开始新功能点
|
|||
|
|
current_function = text
|
|||
|
|
function_description_parts = []
|
|||
|
|
else:
|
|||
|
|
# 添加到当前功能点的描述
|
|||
|
|
if current_function:
|
|||
|
|
function_description_parts.append(text)
|
|||
|
|
elif not module_description:
|
|||
|
|
# 如果还没有模块描述,这可能是模块描述的一部分
|
|||
|
|
pass
|
|||
|
|
|
|||
|
|
# 保存最后一个功能点
|
|||
|
|
if current_function:
|
|||
|
|
function_points.append({
|
|||
|
|
'module_name': module_name,
|
|||
|
|
'module_description': module_description,
|
|||
|
|
'function_name': current_function,
|
|||
|
|
'description': ' '.join(function_description_parts),
|
|||
|
|
'operation_steps': self._extract_steps(function_description_parts)
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
# 如果没有识别到功能点,将整个模块作为一个功能点
|
|||
|
|
if not current_function and module_description:
|
|||
|
|
function_points.append({
|
|||
|
|
'module_name': module_name,
|
|||
|
|
'module_description': module_description,
|
|||
|
|
'function_name': module_name,
|
|||
|
|
'description': module_description,
|
|||
|
|
'operation_steps': []
|
|||
|
|
})
|
|||
|
|
|
|||
|
|
return function_points
|
|||
|
|
|
|||
|
|
def _extract_steps(self, description_parts: List[str]) -> List[str]:
|
|||
|
|
"""
|
|||
|
|
从描述中提取操作步骤
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
description_parts: 描述文本列表
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
操作步骤列表
|
|||
|
|
"""
|
|||
|
|
steps = []
|
|||
|
|
for part in description_parts:
|
|||
|
|
# 查找包含操作动词的句子
|
|||
|
|
if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
|
|||
|
|
# 移除"如下图所示"等描述性文字
|
|||
|
|
cleaned = part.replace('如下图所示', '').replace('如下图所示:', '').strip()
|
|||
|
|
if cleaned:
|
|||
|
|
steps.append(cleaned)
|
|||
|
|
return steps
|
|||
|
|
|
|||
|
|
def get_document_info(self) -> Dict[str, Any]:
|
|||
|
|
"""获取文档信息"""
|
|||
|
|
return {
|
|||
|
|
'title': self.data.get('document_title', ''),
|
|||
|
|
'version': self.data.get('version', ''),
|
|||
|
|
'date': self.data.get('date', ''),
|
|||
|
|
'section_count': len(self.data.get('sections', []))
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
def get_sections(self) -> List[Dict[str, Any]]:
|
|||
|
|
"""获取章节列表"""
|
|||
|
|
return self.data.get('sections', [])
|
|||
|
|
|
|||
|
|
def get_module_summary(self) -> List[Dict[str, Any]]:
|
|||
|
|
"""获取模块摘要"""
|
|||
|
|
modules = []
|
|||
|
|
sections = self.data.get('sections', [])
|
|||
|
|
|
|||
|
|
for section in sections:
|
|||
|
|
module_info = {
|
|||
|
|
'name': section.get('title', ''),
|
|||
|
|
'function_count': 0,
|
|||
|
|
'description': ''
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 查找模块描述
|
|||
|
|
content = section.get('content', [])
|
|||
|
|
for item in content:
|
|||
|
|
if item.get('type') == 'text':
|
|||
|
|
text = item.get('content', '').strip()
|
|||
|
|
if len(text) > 50:
|
|||
|
|
module_info['description'] = text
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
# 统计功能点数量(简单统计)
|
|||
|
|
function_names = []
|
|||
|
|
for item in content:
|
|||
|
|
if item.get('type') == 'text':
|
|||
|
|
text = item.get('content', '').strip()
|
|||
|
|
if len(text) < 20 and text and not text.endswith('。'):
|
|||
|
|
function_names.append(text)
|
|||
|
|
|
|||
|
|
module_info['function_count'] = len(set(function_names))
|
|||
|
|
modules.append(module_info)
|
|||
|
|
|
|||
|
|
return modules
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def can_parse(data: Dict[str, Any]) -> bool:
|
|||
|
|
"""检测是否为旧格式"""
|
|||
|
|
return 'sections' in data and isinstance(data['sections'], list)
|