# @line_count 150 """旧格式适配器(sections数组格式)""" from typing import List, Dict, Any from .base_adapter import BaseParserAdapter class SectionArrayAdapter(BaseParserAdapter): """处理旧格式:sections数组""" def extract_function_points(self) -> List[Dict[str, Any]]: """从sections数组中提取功能点""" function_points = [] sections = self.data.get('sections', []) for section in sections: module_name = section.get('title', '') content = section.get('content', []) # 提取模块总体描述(第一个较长的文本内容) module_description = "" for item in content: if item.get('type') == 'text': text = item.get('content', '').strip() if len(text) > 50: # 较长的文本通常是模块描述 module_description = text break # 识别功能点 # 功能点通常是较短的文本(标题),后面跟着描述 current_function = None function_description_parts = [] for i, item in enumerate(content): if item.get('type') != 'text': continue text = item.get('content', '').strip() if not text: continue # 判断是否是功能点标题 # 规则:短文本(通常<20字符),且不是描述性文本 is_function_title = ( len(text) < 20 and not text.endswith('。') and not text.endswith(',') and not ('如下' in text or '所示' in text) ) if is_function_title: # 保存之前的功能点 if current_function: function_points.append({ 'module_name': module_name, 'module_description': module_description, 'function_name': current_function, 'description': ' '.join(function_description_parts), 'operation_steps': self._extract_steps(function_description_parts) }) # 开始新功能点 current_function = text function_description_parts = [] else: # 添加到当前功能点的描述 if current_function: function_description_parts.append(text) elif not module_description: # 如果还没有模块描述,这可能是模块描述的一部分 pass # 保存最后一个功能点 if current_function: function_points.append({ 'module_name': module_name, 'module_description': module_description, 'function_name': current_function, 'description': ' '.join(function_description_parts), 'operation_steps': self._extract_steps(function_description_parts) }) # 如果没有识别到功能点,将整个模块作为一个功能点 if not current_function and module_description: function_points.append({ 'module_name': module_name, 'module_description': module_description, 'function_name': module_name, 'description': module_description, 'operation_steps': [] }) return function_points def _extract_steps(self, description_parts: List[str]) -> List[str]: """ 从描述中提取操作步骤 Args: description_parts: 描述文本列表 Returns: 操作步骤列表 """ steps = [] for part in description_parts: # 查找包含操作动词的句子 if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']): # 移除"如下图所示"等描述性文字 cleaned = part.replace('如下图所示', '').replace('如下图所示:', '').strip() if cleaned: steps.append(cleaned) return steps def get_document_info(self) -> Dict[str, Any]: """获取文档信息""" return { 'title': self.data.get('document_title', ''), 'version': self.data.get('version', ''), 'date': self.data.get('date', ''), 'section_count': len(self.data.get('sections', [])) } def get_sections(self) -> List[Dict[str, Any]]: """获取章节列表""" return self.data.get('sections', []) def get_module_summary(self) -> List[Dict[str, Any]]: """获取模块摘要""" modules = [] sections = self.data.get('sections', []) for section in sections: module_info = { 'name': section.get('title', ''), 'function_count': 0, 'description': '' } # 查找模块描述 content = section.get('content', []) for item in content: if item.get('type') == 'text': text = item.get('content', '').strip() if len(text) > 50: module_info['description'] = text break # 统计功能点数量(简单统计) function_names = [] for item in content: if item.get('type') == 'text': text = item.get('content', '').strip() if len(text) < 20 and text and not text.endswith('。'): function_names.append(text) module_info['function_count'] = len(set(function_names)) modules.append(module_info) return modules @staticmethod def can_parse(data: Dict[str, Any]) -> bool: """检测是否为旧格式""" return 'sections' in data and isinstance(data['sections'], list)