init rep

2026-02-04 14:38:52 +08:00
commit a5147b1429
29 changed files with 4489 additions and 0 deletions
--- a/modules/parser_adapters/section_array_adapter.py
+++ b/modules/parser_adapters/section_array_adapter.py
@@ -0,0 +1,164 @@
+#  @line_count 150
+"""旧格式适配器（sections数组格式）"""
+from typing import List, Dict, Any
+from .base_adapter import BaseParserAdapter
+
+
+class SectionArrayAdapter(BaseParserAdapter):
+    """处理旧格式：sections数组"""
+    
+    def extract_function_points(self) -> List[Dict[str, Any]]:
+        """从sections数组中提取功能点"""
+        function_points = []
+        sections = self.data.get('sections', [])
+        
+        for section in sections:
+            module_name = section.get('title', '')
+            content = section.get('content', [])
+            
+            # 提取模块总体描述（第一个较长的文本内容）
+            module_description = ""
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('content', '').strip()
+                    if len(text) > 50:  # 较长的文本通常是模块描述
+                        module_description = text
+                        break
+            
+            # 识别功能点
+            # 功能点通常是较短的文本（标题），后面跟着描述
+            current_function = None
+            function_description_parts = []
+            
+            for i, item in enumerate(content):
+                if item.get('type') != 'text':
+                    continue
+                
+                text = item.get('content', '').strip()
+                if not text:
+                    continue
+                
+                # 判断是否是功能点标题
+                # 规则：短文本（通常<20字符），且不是描述性文本
+                is_function_title = (
+                    len(text) < 20 and
+                    not text.endswith('。') and
+                    not text.endswith('，') and
+                    not ('如下' in text or '所示' in text)
+                )
+                
+                if is_function_title:
+                    # 保存之前的功能点
+                    if current_function:
+                        function_points.append({
+                            'module_name': module_name,
+                            'module_description': module_description,
+                            'function_name': current_function,
+                            'description': ' '.join(function_description_parts),
+                            'operation_steps': self._extract_steps(function_description_parts)
+                        })
+                    
+                    # 开始新功能点
+                    current_function = text
+                    function_description_parts = []
+                else:
+                    # 添加到当前功能点的描述
+                    if current_function:
+                        function_description_parts.append(text)
+                    elif not module_description:
+                        # 如果还没有模块描述，这可能是模块描述的一部分
+                        pass
+            
+            # 保存最后一个功能点
+            if current_function:
+                function_points.append({
+                    'module_name': module_name,
+                    'module_description': module_description,
+                    'function_name': current_function,
+                    'description': ' '.join(function_description_parts),
+                    'operation_steps': self._extract_steps(function_description_parts)
+                })
+            
+            # 如果没有识别到功能点，将整个模块作为一个功能点
+            if not current_function and module_description:
+                function_points.append({
+                    'module_name': module_name,
+                    'module_description': module_description,
+                    'function_name': module_name,
+                    'description': module_description,
+                    'operation_steps': []
+                })
+        
+        return function_points
+    
+    def _extract_steps(self, description_parts: List[str]) -> List[str]:
+        """
+        从描述中提取操作步骤
+        
+        Args:
+            description_parts: 描述文本列表
+        
+        Returns:
+            操作步骤列表
+        """
+        steps = []
+        for part in description_parts:
+            # 查找包含操作动词的句子
+            if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
+                # 移除"如下图所示"等描述性文字
+                cleaned = part.replace('如下图所示', '').replace('如下图所示：', '').strip()
+                if cleaned:
+                    steps.append(cleaned)
+        return steps
+    
+    def get_document_info(self) -> Dict[str, Any]:
+        """获取文档信息"""
+        return {
+            'title': self.data.get('document_title', ''),
+            'version': self.data.get('version', ''),
+            'date': self.data.get('date', ''),
+            'section_count': len(self.data.get('sections', []))
+        }
+    
+    def get_sections(self) -> List[Dict[str, Any]]:
+        """获取章节列表"""
+        return self.data.get('sections', [])
+    
+    def get_module_summary(self) -> List[Dict[str, Any]]:
+        """获取模块摘要"""
+        modules = []
+        sections = self.data.get('sections', [])
+        
+        for section in sections:
+            module_info = {
+                'name': section.get('title', ''),
+                'function_count': 0,
+                'description': ''
+            }
+            
+            # 查找模块描述
+            content = section.get('content', [])
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('content', '').strip()
+                    if len(text) > 50:
+                        module_info['description'] = text
+                        break
+            
+            # 统计功能点数量（简单统计）
+            function_names = []
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('content', '').strip()
+                    if len(text) < 20 and text and not text.endswith('。'):
+                        function_names.append(text)
+            
+            module_info['function_count'] = len(set(function_names))
+            modules.append(module_info)
+        
+        return modules
+    
+    @staticmethod
+    def can_parse(data: Dict[str, Any]) -> bool:
+        """检测是否为旧格式"""
+        return 'sections' in data and isinstance(data['sections'], list)