test_item_gen/modules/parser_adapters/section_array_adapter.py

#  @line_count 150
"""旧格式适配器（sections数组格式）"""
from typing import List, Dict, Any
from .base_adapter import BaseParserAdapter


class SectionArrayAdapter(BaseParserAdapter):
    """处理旧格式：sections数组"""

    def extract_function_points(self) -> List[Dict[str, Any]]:
        """从sections数组中提取功能点"""
        function_points = []
        sections = self.data.get('sections', [])

        for section in sections:
            module_name = section.get('title', '')
            content = section.get('content', [])

            # 提取模块总体描述（第一个较长的文本内容）
            module_description = ""
            for item in content:
                if item.get('type') == 'text':
                    text = item.get('content', '').strip()
                    if len(text) > 50:  # 较长的文本通常是模块描述
                        module_description = text
                        break

            # 识别功能点
            # 功能点通常是较短的文本（标题），后面跟着描述
            current_function = None
            function_description_parts = []

            for i, item in enumerate(content):
                if item.get('type') != 'text':
                    continue

                text = item.get('content', '').strip()
                if not text:
                    continue

                # 判断是否是功能点标题
                # 规则：短文本（通常<20字符），且不是描述性文本
                is_function_title = (
                    len(text) < 20 and
                    not text.endswith('。') and
                    not text.endswith('，') and
                    not ('如下' in text or '所示' in text)
                )

                if is_function_title:
                    # 保存之前的功能点
                    if current_function:
                        function_points.append({
                            'module_name': module_name,
                            'module_description': module_description,
                            'function_name': current_function,
                            'description': ' '.join(function_description_parts),
                            'operation_steps': self._extract_steps(function_description_parts)
                        })

                    # 开始新功能点
                    current_function = text
                    function_description_parts = []
                else:
                    # 添加到当前功能点的描述
                    if current_function:
                        function_description_parts.append(text)
                    elif not module_description:
                        # 如果还没有模块描述，这可能是模块描述的一部分
                        pass

            # 保存最后一个功能点
            if current_function:
                function_points.append({
                    'module_name': module_name,
                    'module_description': module_description,
                    'function_name': current_function,
                    'description': ' '.join(function_description_parts),
                    'operation_steps': self._extract_steps(function_description_parts)
                })

            # 如果没有识别到功能点，将整个模块作为一个功能点
            if not current_function and module_description:
                function_points.append({
                    'module_name': module_name,
                    'module_description': module_description,
                    'function_name': module_name,
                    'description': module_description,
                    'operation_steps': []
                })

        return function_points

    def _extract_steps(self, description_parts: List[str]) -> List[str]:
        """
        从描述中提取操作步骤

        Args:
            description_parts: 描述文本列表

        Returns:
            操作步骤列表
        """
        steps = []
        for part in description_parts:
            # 查找包含操作动词的句子
            if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
                # 移除"如下图所示"等描述性文字
                cleaned = part.replace('如下图所示', '').replace('如下图所示：', '').strip()
                if cleaned:
                    steps.append(cleaned)
        return steps

    def get_document_info(self) -> Dict[str, Any]:
        """获取文档信息"""
        return {
            'title': self.data.get('document_title', ''),
            'version': self.data.get('version', ''),
            'date': self.data.get('date', ''),
            'section_count': len(self.data.get('sections', []))
        }

    def get_sections(self) -> List[Dict[str, Any]]:
        """获取章节列表"""
        return self.data.get('sections', [])

    def get_module_summary(self) -> List[Dict[str, Any]]:
        """获取模块摘要"""
        modules = []
        sections = self.data.get('sections', [])

        for section in sections:
            module_info = {
                'name': section.get('title', ''),
                'function_count': 0,
                'description': ''
            }

            # 查找模块描述
            content = section.get('content', [])
            for item in content:
                if item.get('type') == 'text':
                    text = item.get('content', '').strip()
                    if len(text) > 50:
                        module_info['description'] = text
                        break

            # 统计功能点数量（简单统计）
            function_names = []
            for item in content:
                if item.get('type') == 'text':
                    text = item.get('content', '').strip()
                    if len(text) < 20 and text and not text.endswith('。'):
                        function_names.append(text)

            module_info['function_count'] = len(set(function_names))
            modules.append(module_info)

        return modules

    @staticmethod
    def can_parse(data: Dict[str, Any]) -> bool:
        """检测是否为旧格式"""
        return 'sections' in data and isinstance(data['sections'], list)