init rep

2026-02-04 14:38:52 +08:00
commit a5147b1429
29 changed files with 4489 additions and 0 deletions
--- a/modules/parser_adapters/init.py
+++ b/modules/parser_adapters/init.py
@@ -0,0 +1,13 @@
+#  @line_count 50
+"""JSON解析器适配器包"""
+from .base_adapter import BaseParserAdapter
+from .parser_factory import ParserFactory
+from .section_array_adapter import SectionArrayAdapter
+from .requirement_tree_adapter import RequirementTreeAdapter
+
+__all__ = [
+    'BaseParserAdapter',
+    'ParserFactory',
+    'SectionArrayAdapter',
+    'RequirementTreeAdapter',
+]
--- a/modules/parser_adapters/base_adapter.py
+++ b/modules/parser_adapters/base_adapter.py
@@ -0,0 +1,81 @@
+#  @line_count 100
+"""JSON解析器适配器基类"""
+from abc import ABC, abstractmethod
+from typing import List, Dict, Any
+
+
+class BaseParserAdapter(ABC):
+    """解析器适配器抽象基类"""
+    
+    def __init__(self, data: Dict[str, Any]):
+        """
+        初始化适配器
+        
+        Args:
+            data: 解析后的JSON数据
+        """
+        self.data = data
+    
+    @abstractmethod
+    def extract_function_points(self) -> List[Dict[str, Any]]:
+        """
+        提取功能点列表
+        
+        Returns:
+            功能点列表，统一格式：
+            - module_name: 所属模块
+            - function_name: 功能名称
+            - description: 功能描述
+            - operation_steps: 操作步骤（可选）
+            - requirement_id: 需求编号（可选）
+            - requirement_type: 需求类型（可选）
+        """
+        pass
+    
+    @abstractmethod
+    def get_document_info(self) -> Dict[str, Any]:
+        """
+        获取文档基本信息
+        
+        Returns:
+            文档信息字典，统一格式：
+            - title: 文档标题
+            - version: 版本（可选）
+            - date: 日期（可选）
+            - section_count: 章节数量
+        """
+        pass
+    
+    @abstractmethod
+    def get_sections(self) -> List[Dict[str, Any]]:
+        """
+        获取所有章节信息
+        
+        Returns:
+            章节列表
+        """
+        pass
+    
+    @abstractmethod
+    def get_module_summary(self) -> List[Dict[str, Any]]:
+        """
+        获取模块摘要
+        
+        Returns:
+            模块摘要列表
+        """
+        pass
+    
+    @staticmethod
+    @abstractmethod
+    def can_parse(data: Dict[str, Any]) -> bool:
+        """
+        检测是否能解析该格式
+        
+        Args:
+            data: JSON数据
+            
+        Returns:
+            是否能解析
+        """
+        pass
--- a/modules/parser_adapters/parser_factory.py
+++ b/modules/parser_adapters/parser_factory.py
@@ -0,0 +1,53 @@
+#  @line_count 100
+"""解析器工厂"""
+from typing import Dict, Any, Type, List
+from .base_adapter import BaseParserAdapter
+from .section_array_adapter import SectionArrayAdapter
+from .requirement_tree_adapter import RequirementTreeAdapter
+
+
+class ParserFactory:
+    """解析器工厂，自动检测格式并创建合适的适配器"""
+    
+    # 注册所有适配器（按优先级排序）
+    _adapters: List[Type[BaseParserAdapter]] = [
+        RequirementTreeAdapter,  # 新格式优先
+        SectionArrayAdapter,       # 旧格式
+        # 未来可以在这里添加更多适配器
+    ]
+    
+    @classmethod
+    def create_adapter(cls, data: Dict[str, Any]) -> BaseParserAdapter:
+        """
+        创建合适的适配器
+        
+        Args:
+            data: JSON数据
+            
+        Returns:
+            适配器实例
+            
+        Raises:
+            ValueError: 如果无法识别格式
+        """
+        for adapter_class in cls._adapters:
+            if adapter_class.can_parse(data):
+                return adapter_class(data)
+        
+        raise ValueError(
+            "无法识别JSON格式。支持的格式：\n"
+            "- 需求树格式（包含'需求内容'和'文档元数据'）\n"
+            "- 章节数组格式（包含'sections'数组）"
+        )
+    
+    @classmethod
+    def register_adapter(cls, adapter_class: Type[BaseParserAdapter], 
+                        priority: int = 0):
+        """
+        注册新的适配器
+        
+        Args:
+            adapter_class: 适配器类
+            priority: 优先级（越小越优先，0为最高优先级）
+        """
+        cls._adapters.insert(priority, adapter_class)
--- a/modules/parser_adapters/requirement_tree_adapter.py
+++ b/modules/parser_adapters/requirement_tree_adapter.py
@@ -0,0 +1,139 @@
+#  @line_count 200
+"""新格式适配器（需求树格式）"""
+from typing import List, Dict, Any
+from .base_adapter import BaseParserAdapter
+
+
+class RequirementTreeAdapter(BaseParserAdapter):
+    """处理新格式：需求树结构"""
+    
+    def extract_function_points(self) -> List[Dict[str, Any]]:
+        """从需求树中提取功能点"""
+        function_points = []
+        requirement_content = self.data.get('需求内容', {})
+        
+        # 递归遍历章节
+        self._traverse_requirements(requirement_content, [], function_points)
+        
+        return function_points
+    
+    def _traverse_requirements(self, sections: Dict, path: List[str], 
+                               function_points: List[Dict]):
+        """递归遍历章节，提取需求"""
+        for section_key, section_data in sections.items():
+            section_info = section_data.get('章节信息', {})
+            section_title = section_info.get('章节标题', '')
+            current_path = path + [section_title]
+            
+            # 如果有需求列表，提取需求
+            if '需求列表' in section_data:
+                requirements = section_data['需求列表']
+                for req in requirements:
+                    function_points.append({
+                        'module_name': ' > '.join(current_path[:-1]) if len(current_path) > 1 else section_title,
+                        'function_name': req.get('需求编号', ''),
+                        'description': req.get('需求描述', ''),
+                        'requirement_id': req.get('需求编号', ''),
+                        'requirement_type': self._parse_requirement_type(
+                            req.get('需求编号', '')
+                        ),
+                        'interface_info': {
+                            k: v for k, v in req.items() 
+                            if k in ['接口名称', '接口类型', '来源', '目的地']
+                        } if any(k in req for k in ['接口名称', '接口类型']) else None
+                    })
+            
+            # 递归处理子章节
+            if '子章节' in section_data:
+                self._traverse_requirements(
+                    section_data['子章节'], 
+                    current_path, 
+                    function_points
+                )
+    
+    def _parse_requirement_type(self, req_id: str) -> str:
+        """从需求编号解析需求类型"""
+        if req_id.startswith('FR-'):
+            return '功能需求'
+        elif req_id.startswith('IR-'):
+            return '接口需求'
+        elif req_id.startswith('OR-'):
+            return '其他需求'
+        return '未知'
+    
+    def get_document_info(self) -> Dict[str, Any]:
+        """获取文档信息"""
+        metadata = self.data.get('文档元数据', {})
+        return {
+            'title': metadata.get('标题', ''),
+            'version': '',  # 新格式可能没有版本
+            'date': metadata.get('生成时间', ''),
+            'section_count': self._count_sections(self.data.get('需求内容', {}))
+        }
+    
+    def _count_sections(self, sections: Dict) -> int:
+        """递归统计章节数量"""
+        count = 0
+        for section_data in sections.values():
+            count += 1
+            if '子章节' in section_data:
+                count += self._count_sections(section_data['子章节'])
+        return count
+    
+    def get_sections(self) -> List[Dict[str, Any]]:
+        """获取章节列表（转换为扁平结构）"""
+        sections = []
+        self._flatten_sections(self.data.get('需求内容', {}), sections)
+        return sections
+    
+    def _flatten_sections(self, sections: Dict, result: List[Dict]):
+        """递归扁平化章节结构"""
+        for section_data in sections.values():
+            section_info = section_data.get('章节信息', {})
+            result.append({
+                'title': section_info.get('章节标题', ''),
+                'number': section_info.get('章节编号', ''),
+                'level': section_info.get('章节级别', 0),
+                'requirement_count': len(section_data.get('需求列表', []))
+            })
+            if '子章节' in section_data:
+                self._flatten_sections(section_data['子章节'], result)
+    
+    def get_module_summary(self) -> List[Dict[str, Any]]:
+        """获取模块摘要"""
+        modules = []
+        sections_dict = {}
+        
+        # 遍历需求内容，构建模块统计
+        self._build_module_summary(self.data.get('需求内容', {}), sections_dict)
+        
+        for module_name, info in sections_dict.items():
+            modules.append({
+                'name': module_name,
+                'function_count': info['requirement_count'],
+                'description': info.get('description', '')
+            })
+        
+        return modules
+    
+    def _build_module_summary(self, sections: Dict, result: Dict):
+        """递归构建模块摘要"""
+        for section_data in sections.values():
+            section_info = section_data.get('章节信息', {})
+            section_title = section_info.get('章节标题', '')
+            
+            if '需求列表' in section_data:
+                if section_title not in result:
+                    result[section_title] = {
+                        'requirement_count': 0,
+                        'description': ''
+                    }
+                result[section_title]['requirement_count'] += len(section_data['需求列表'])
+            
+            if '子章节' in section_data:
+                self._build_module_summary(section_data['子章节'], result)
+    
+    @staticmethod
+    def can_parse(data: Dict[str, Any]) -> bool:
+        """检测是否为新格式"""
+        return '需求内容' in data and '文档元数据' in data
--- a/modules/parser_adapters/section_array_adapter.py
+++ b/modules/parser_adapters/section_array_adapter.py
@@ -0,0 +1,164 @@
+#  @line_count 150
+"""旧格式适配器（sections数组格式）"""
+from typing import List, Dict, Any
+from .base_adapter import BaseParserAdapter
+
+
+class SectionArrayAdapter(BaseParserAdapter):
+    """处理旧格式：sections数组"""
+    
+    def extract_function_points(self) -> List[Dict[str, Any]]:
+        """从sections数组中提取功能点"""
+        function_points = []
+        sections = self.data.get('sections', [])
+        
+        for section in sections:
+            module_name = section.get('title', '')
+            content = section.get('content', [])
+            
+            # 提取模块总体描述（第一个较长的文本内容）
+            module_description = ""
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('content', '').strip()
+                    if len(text) > 50:  # 较长的文本通常是模块描述
+                        module_description = text
+                        break
+            
+            # 识别功能点
+            # 功能点通常是较短的文本（标题），后面跟着描述
+            current_function = None
+            function_description_parts = []
+            
+            for i, item in enumerate(content):
+                if item.get('type') != 'text':
+                    continue
+                
+                text = item.get('content', '').strip()
+                if not text:
+                    continue
+                
+                # 判断是否是功能点标题
+                # 规则：短文本（通常<20字符），且不是描述性文本
+                is_function_title = (
+                    len(text) < 20 and
+                    not text.endswith('。') and
+                    not text.endswith('，') and
+                    not ('如下' in text or '所示' in text)
+                )
+                
+                if is_function_title:
+                    # 保存之前的功能点
+                    if current_function:
+                        function_points.append({
+                            'module_name': module_name,
+                            'module_description': module_description,
+                            'function_name': current_function,
+                            'description': ' '.join(function_description_parts),
+                            'operation_steps': self._extract_steps(function_description_parts)
+                        })
+                    
+                    # 开始新功能点
+                    current_function = text
+                    function_description_parts = []
+                else:
+                    # 添加到当前功能点的描述
+                    if current_function:
+                        function_description_parts.append(text)
+                    elif not module_description:
+                        # 如果还没有模块描述，这可能是模块描述的一部分
+                        pass
+            
+            # 保存最后一个功能点
+            if current_function:
+                function_points.append({
+                    'module_name': module_name,
+                    'module_description': module_description,
+                    'function_name': current_function,
+                    'description': ' '.join(function_description_parts),
+                    'operation_steps': self._extract_steps(function_description_parts)
+                })
+            
+            # 如果没有识别到功能点，将整个模块作为一个功能点
+            if not current_function and module_description:
+                function_points.append({
+                    'module_name': module_name,
+                    'module_description': module_description,
+                    'function_name': module_name,
+                    'description': module_description,
+                    'operation_steps': []
+                })
+        
+        return function_points
+    
+    def _extract_steps(self, description_parts: List[str]) -> List[str]:
+        """
+        从描述中提取操作步骤
+        
+        Args:
+            description_parts: 描述文本列表
+        
+        Returns:
+            操作步骤列表
+        """
+        steps = []
+        for part in description_parts:
+            # 查找包含操作动词的句子
+            if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
+                # 移除"如下图所示"等描述性文字
+                cleaned = part.replace('如下图所示', '').replace('如下图所示：', '').strip()
+                if cleaned:
+                    steps.append(cleaned)
+        return steps
+    
+    def get_document_info(self) -> Dict[str, Any]:
+        """获取文档信息"""
+        return {
+            'title': self.data.get('document_title', ''),
+            'version': self.data.get('version', ''),
+            'date': self.data.get('date', ''),
+            'section_count': len(self.data.get('sections', []))
+        }
+    
+    def get_sections(self) -> List[Dict[str, Any]]:
+        """获取章节列表"""
+        return self.data.get('sections', [])
+    
+    def get_module_summary(self) -> List[Dict[str, Any]]:
+        """获取模块摘要"""
+        modules = []
+        sections = self.data.get('sections', [])
+        
+        for section in sections:
+            module_info = {
+                'name': section.get('title', ''),
+                'function_count': 0,
+                'description': ''
+            }
+            
+            # 查找模块描述
+            content = section.get('content', [])
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('content', '').strip()
+                    if len(text) > 50:
+                        module_info['description'] = text
+                        break
+            
+            # 统计功能点数量（简单统计）
+            function_names = []
+            for item in content:
+                if item.get('type') == 'text':
+                    text = item.get('content', '').strip()
+                    if len(text) < 20 and text and not text.endswith('。'):
+                        function_names.append(text)
+            
+            module_info['function_count'] = len(set(function_names))
+            modules.append(module_info)
+        
+        return modules
+    
+    @staticmethod
+    def can_parse(data: Dict[str, Any]) -> bool:
+        """检测是否为旧格式"""
+        return 'sections' in data and isinstance(data['sections'], list)