init rep
This commit is contained in:
164
modules/parser_adapters/section_array_adapter.py
Normal file
164
modules/parser_adapters/section_array_adapter.py
Normal file
@@ -0,0 +1,164 @@
|
||||
# @line_count 150
|
||||
"""旧格式适配器(sections数组格式)"""
|
||||
from typing import List, Dict, Any
|
||||
from .base_adapter import BaseParserAdapter
|
||||
|
||||
|
||||
class SectionArrayAdapter(BaseParserAdapter):
|
||||
"""处理旧格式:sections数组"""
|
||||
|
||||
def extract_function_points(self) -> List[Dict[str, Any]]:
|
||||
"""从sections数组中提取功能点"""
|
||||
function_points = []
|
||||
sections = self.data.get('sections', [])
|
||||
|
||||
for section in sections:
|
||||
module_name = section.get('title', '')
|
||||
content = section.get('content', [])
|
||||
|
||||
# 提取模块总体描述(第一个较长的文本内容)
|
||||
module_description = ""
|
||||
for item in content:
|
||||
if item.get('type') == 'text':
|
||||
text = item.get('content', '').strip()
|
||||
if len(text) > 50: # 较长的文本通常是模块描述
|
||||
module_description = text
|
||||
break
|
||||
|
||||
# 识别功能点
|
||||
# 功能点通常是较短的文本(标题),后面跟着描述
|
||||
current_function = None
|
||||
function_description_parts = []
|
||||
|
||||
for i, item in enumerate(content):
|
||||
if item.get('type') != 'text':
|
||||
continue
|
||||
|
||||
text = item.get('content', '').strip()
|
||||
if not text:
|
||||
continue
|
||||
|
||||
# 判断是否是功能点标题
|
||||
# 规则:短文本(通常<20字符),且不是描述性文本
|
||||
is_function_title = (
|
||||
len(text) < 20 and
|
||||
not text.endswith('。') and
|
||||
not text.endswith(',') and
|
||||
not ('如下' in text or '所示' in text)
|
||||
)
|
||||
|
||||
if is_function_title:
|
||||
# 保存之前的功能点
|
||||
if current_function:
|
||||
function_points.append({
|
||||
'module_name': module_name,
|
||||
'module_description': module_description,
|
||||
'function_name': current_function,
|
||||
'description': ' '.join(function_description_parts),
|
||||
'operation_steps': self._extract_steps(function_description_parts)
|
||||
})
|
||||
|
||||
# 开始新功能点
|
||||
current_function = text
|
||||
function_description_parts = []
|
||||
else:
|
||||
# 添加到当前功能点的描述
|
||||
if current_function:
|
||||
function_description_parts.append(text)
|
||||
elif not module_description:
|
||||
# 如果还没有模块描述,这可能是模块描述的一部分
|
||||
pass
|
||||
|
||||
# 保存最后一个功能点
|
||||
if current_function:
|
||||
function_points.append({
|
||||
'module_name': module_name,
|
||||
'module_description': module_description,
|
||||
'function_name': current_function,
|
||||
'description': ' '.join(function_description_parts),
|
||||
'operation_steps': self._extract_steps(function_description_parts)
|
||||
})
|
||||
|
||||
# 如果没有识别到功能点,将整个模块作为一个功能点
|
||||
if not current_function and module_description:
|
||||
function_points.append({
|
||||
'module_name': module_name,
|
||||
'module_description': module_description,
|
||||
'function_name': module_name,
|
||||
'description': module_description,
|
||||
'operation_steps': []
|
||||
})
|
||||
|
||||
return function_points
|
||||
|
||||
def _extract_steps(self, description_parts: List[str]) -> List[str]:
|
||||
"""
|
||||
从描述中提取操作步骤
|
||||
|
||||
Args:
|
||||
description_parts: 描述文本列表
|
||||
|
||||
Returns:
|
||||
操作步骤列表
|
||||
"""
|
||||
steps = []
|
||||
for part in description_parts:
|
||||
# 查找包含操作动词的句子
|
||||
if any(keyword in part for keyword in ['点击', '选择', '输入', '打开', '关闭', '设置', '查看']):
|
||||
# 移除"如下图所示"等描述性文字
|
||||
cleaned = part.replace('如下图所示', '').replace('如下图所示:', '').strip()
|
||||
if cleaned:
|
||||
steps.append(cleaned)
|
||||
return steps
|
||||
|
||||
def get_document_info(self) -> Dict[str, Any]:
|
||||
"""获取文档信息"""
|
||||
return {
|
||||
'title': self.data.get('document_title', ''),
|
||||
'version': self.data.get('version', ''),
|
||||
'date': self.data.get('date', ''),
|
||||
'section_count': len(self.data.get('sections', []))
|
||||
}
|
||||
|
||||
def get_sections(self) -> List[Dict[str, Any]]:
|
||||
"""获取章节列表"""
|
||||
return self.data.get('sections', [])
|
||||
|
||||
def get_module_summary(self) -> List[Dict[str, Any]]:
|
||||
"""获取模块摘要"""
|
||||
modules = []
|
||||
sections = self.data.get('sections', [])
|
||||
|
||||
for section in sections:
|
||||
module_info = {
|
||||
'name': section.get('title', ''),
|
||||
'function_count': 0,
|
||||
'description': ''
|
||||
}
|
||||
|
||||
# 查找模块描述
|
||||
content = section.get('content', [])
|
||||
for item in content:
|
||||
if item.get('type') == 'text':
|
||||
text = item.get('content', '').strip()
|
||||
if len(text) > 50:
|
||||
module_info['description'] = text
|
||||
break
|
||||
|
||||
# 统计功能点数量(简单统计)
|
||||
function_names = []
|
||||
for item in content:
|
||||
if item.get('type') == 'text':
|
||||
text = item.get('content', '').strip()
|
||||
if len(text) < 20 and text and not text.endswith('。'):
|
||||
function_names.append(text)
|
||||
|
||||
module_info['function_count'] = len(set(function_names))
|
||||
modules.append(module_info)
|
||||
|
||||
return modules
|
||||
|
||||
@staticmethod
|
||||
def can_parse(data: Dict[str, Any]) -> bool:
|
||||
"""检测是否为旧格式"""
|
||||
return 'sections' in data and isinstance(data['sections'], list)
|
||||
Reference in New Issue
Block a user