#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ SRS 解析工具 - 主程序入口 """ import argparse import os import sys import logging from pathlib import Path # 添加当前目录到Python路径 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from src.utils import load_config, setup_logging, validate_file_path, ensure_directory_exists, get_env_or_config from src.document_parser import create_parser from src.document_parser import Section from src.requirement_extractor import RequirementExtractor from src.json_generator import JSONGenerator logger = logging.getLogger(__name__) def create_llm(config: dict): """ 创建LLM实例 Args: config: 配置字典 Returns: LLM实例或None """ llm_config = config.get('llm', {}) # 当前版本仅支持LLM模式 if not llm_config.get('enabled', True): raise ValueError("当前版本仅支持LLM模式,请将配置 llm.enabled 设为 true") provider = llm_config.get('provider', 'qwen') # 获取API密钥(优先使用环境变量) api_key = get_env_or_config('DASHSCOPE_API_KEY', llm_config.get('api_key')) if not api_key: raise ValueError("未配置API密钥:请设置环境变量 DASHSCOPE_API_KEY 或在 config.yaml 中配置 llm.api_key") try: from src.llm_interface import QwenLLM model = llm_config.get('model', 'qwen-plus') temperature = llm_config.get('temperature', 0.3) max_tokens = llm_config.get('max_tokens', 1024) llm = QwenLLM( api_key=api_key, model=model, temperature=temperature, max_tokens=max_tokens ) logger.info(f"成功创建LLM实例: {provider} ({model})") return llm except ImportError as e: raise RuntimeError(f"无法导入LLM模块: {e}。请安装依赖:pip install dashscope") from e except Exception as e: raise RuntimeError(f"创建LLM实例失败: {e}") from e def parse_chapter_selector(selector: str) -> list: """解析章节筛选参数。""" if not selector: return [] chapters = [x.strip() for x in selector.split(',') if x.strip()] valid = [] for chapter in chapters: if not chapter or not all(p.isdigit() for p in chapter.split('.')): raise ValueError(f"无效章节编号: {chapter},仅支持如 3 或 3.1 的格式") valid.append(chapter) return valid def _clone_section_with_children(section: Section) -> Section: copied = Section( level=section.level, title=section.title, number=section.number, content=section.content, uid=section.uid, ) copied.tables = list(section.tables) copied.blocks = list(section.blocks) for child in section.children: copied.add_child(_clone_section_with_children(child)) return copied def filter_sections_by_chapters(sections: list, chapters: list) -> list: """按章节前缀过滤章节树(如3匹配3及3.x)。""" if not chapters: return sections def matched(number: str) -> bool: number = (number or "").strip() if not number: return False for chapter in chapters: if number == chapter or number.startswith(f"{chapter}."): return True return False def recurse(section: Section) -> Section: if matched(section.number): return _clone_section_with_children(section) copied = Section( level=section.level, title=section.title, number=section.number, content=section.content, uid=section.uid, ) copied.tables = list(section.tables) copied.blocks = list(section.blocks) for child in section.children: filtered_child = recurse(child) if filtered_child: copied.add_child(filtered_child) return copied if copied.children else None filtered = [] for s in sections: fs = recurse(s) if fs: filtered.append(fs) return filtered def main(): """主程序入口""" # 解析命令行参数 parser = argparse.ArgumentParser( description='SRS需求文档解析工具', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" 示例用法: python main.py --input sample.pdf --output output.json python main.py -i requirements.docx -o output.json --verbose python main.py -i DC-SRS.pdf -o output.json """ ) parser.add_argument( '--input', '-i', type=str, required=True, help='输入的SRS文档路径(支持.docx和.pdf)' ) parser.add_argument( '--output', '-o', type=str, default='output.json', help='输出JSON文件路径(默认:output.json)' ) parser.add_argument( '--config', '-c', type=str, default=None, help='配置文件路径(默认:./config.yaml)' ) parser.add_argument( '--verbose', '-v', action='store_true', help='输出详细日志' ) parser.add_argument( '--chapters', type=str, default=None, help='按章节提取(如: 3 或 3,4.1);输入3表示提取第3章及其子章节' ) # 解析命令行参数 args = parser.parse_args() # 加载配置 config = load_config(args.config) # 设置日志 if args.verbose: config.setdefault('logging', {})['level'] = 'DEBUG' setup_logging(config) logger.info("=" * 60) logger.info("SRS需求文档解析工具启动(LLM增强版)") logger.info("=" * 60) try: # 验证输入文件 if not validate_file_path(args.input, ['.pdf', '.docx']): logger.error(f"输入文件验证失败: {args.input}") return False logger.info(f"输入文件: {args.input}") # 创建输出目录 output_dir = os.path.dirname(args.output) or '.' if output_dir != '.' and not ensure_directory_exists(output_dir): logger.error(f"无法创建输出目录: {output_dir}") return False logger.info(f"输出文件: {args.output}") # 创建LLM实例(必需) llm = create_llm(config) logger.info("LLM增强模式已启用") # 步骤1:解析文档 logger.info("\n" + "=" * 60) logger.info("步骤1:解析文档") logger.info("=" * 60) doc_parser = create_parser(args.input) if llm: doc_parser.set_llm(llm) sections = doc_parser.parse() document_title = doc_parser.get_document_title() selected_chapters = parse_chapter_selector(args.chapters) if args.chapters else [] if selected_chapters: sections = filter_sections_by_chapters(sections, selected_chapters) if not sections: raise ValueError(f"未匹配到指定章节: {', '.join(selected_chapters)}") logger.info(f"章节筛选已启用: {', '.join(selected_chapters)}") logger.info(f"成功解析文档,提取{len(sections)}个顶级章节") # 打印章节结构 def print_sections(sections, indent=0): for section in sections: logger.info(" " * indent + f"- {section.number} {section.title}") if section.children: print_sections(section.children, indent + 1) if args.verbose: logger.info("章节结构:") print_sections(sections) # 步骤2:提取需求 logger.info("\n" + "=" * 60) logger.info("步骤2:提取需求(LLM增强模式)") logger.info("=" * 60) extractor = RequirementExtractor(config, llm=llm) requirements = extractor.extract_from_sections(sections) # 统计需求信息 stats = extractor.get_statistics() logger.info(f"\n需求统计:") for req_type, count in stats['by_type'].items(): logger.info(f" {req_type}: {count}项") logger.info(f" 总计: {stats['total']}项") # 步骤3:生成JSON logger.info("\n" + "=" * 60) logger.info("步骤3:生成JSON") logger.info("=" * 60) generator = JSONGenerator(config) json_output = generator.generate( sections, requirements, document_title ) logger.info(f"JSON结构生成完成") # 步骤4:保存文件 logger.info("\n" + "=" * 60) logger.info("步骤4:保存结果") logger.info("=" * 60) generator.save_to_file(json_output, args.output) logger.info(f"成功保存JSON文件到: {args.output}") # 打印输出文件大小 if os.path.exists(args.output): file_size = os.path.getsize(args.output) logger.info(f"文件大小: {file_size} 字节") logger.info("\n" + "=" * 60) logger.info("SRS需求文档解析完成!") logger.info("=" * 60) return True except Exception as e: logger.error(f"处理过程中出现错误: {e}", exc_info=True) return False if __name__ == '__main__': success = main() sys.exit(0 if success else 1)