from pathlib import Path from docx import Document from app.docx_parser import parse_docx from scripts.docx_full_parser import DocxPackage def test_parse_docx_extracts_headings_paragraphs_and_tables(tmp_path: Path) -> None: docx_path = tmp_path / "sample.docx" document = Document() document.add_heading("软件需求规格说明", level=1) document.add_paragraph("本文档描述 CSCI 的能力需求和接口需求。") table = document.add_table(rows=1, cols=2) table.cell(0, 0).text = "需求编号" table.cell(0, 1).text = "REQ-001" document.save(docx_path) parsed = parse_docx(docx_path) assert parsed.filename == "sample.docx" assert "软件需求规格说明" in parsed.text assert "REQ-001" in parsed.text assert parsed.headings[0].text == "软件需求规格说明" assert parsed.tables[0][0] == ["需求编号", "REQ-001"] def test_docx_package_extracts_elements_across_parts_and_replaces_text(tmp_path: Path) -> None: docx_path = tmp_path / "full.docx" output_path = tmp_path / "modified.docx" document = Document() document.add_heading("原始标题", level=1) document.add_paragraph("正文原始内容") document.sections[0].header.paragraphs[0].text = "页眉原始内容" document.sections[0].footer.paragraphs[0].text = "页脚原始内容" table = document.add_table(rows=1, cols=1) table.cell(0, 0).text = "表格原始内容" document.save(docx_path) package = DocxPackage(docx_path) extraction = package.extract() text = "\n".join(element.text for element in extraction.elements) assert any(part.name == "word/document.xml" for part in extraction.parts) assert "原始标题" in text assert "页眉原始内容" in text assert "页脚原始内容" in text assert any(element.kind == "table" for element in extraction.elements) replacements = package.replace_text("原始", "修改后") package.save(output_path) assert replacements >= 4 modified = Document(output_path) assert "修改后标题" in "\n".join(paragraph.text for paragraph in modified.paragraphs) assert modified.sections[0].header.paragraphs[0].text == "页眉修改后内容" assert modified.sections[0].footer.paragraphs[0].text == "页脚修改后内容" assert modified.tables[0].cell(0, 0).text == "表格修改后内容" def test_docx_package_replaces_text_split_across_runs(tmp_path: Path) -> None: docx_path = tmp_path / "split.docx" output_path = tmp_path / "split-modified.docx" document = Document() paragraph = document.add_paragraph() paragraph.add_run("附录") paragraph.add_run("A ") paragraph.add_run("文档审查单") document.save(docx_path) package = DocxPackage(docx_path) replacements = package.replace_text("附录A 文档审查单", "附录A 文档检查单") package.save(output_path) modified = Document(output_path) assert replacements == 1 assert modified.paragraphs[0].text == "附录A 文档检查单" def test_docx_package_fills_review_result_columns(tmp_path: Path) -> None: docx_path = tmp_path / "review.docx" output_path = tmp_path / "review-modified.docx" document = Document() document.add_paragraph("A.3软件设计文档审查单") table = document.add_table(rows=5, cols=7) table.rows[0].cells[0].text = "文档名称" table.rows[1].cells[0].text = "序号" table.rows[1].cells[1].text = "审查项" table.rows[1].cells[2].text = "审查内容" table.rows[1].cells[3].text = "审查结果(填√)" table.rows[1].cells[6].text = "备注" table.rows[2].cells[0].text = "序号" table.rows[2].cells[1].text = "审查项" table.rows[2].cells[2].text = "审查内容" table.rows[2].cells[3].text = "通过" table.rows[2].cells[4].text = "未通过" table.rows[2].cells[5].text = "不适用" table.rows[2].cells[6].text = "备注" table.rows[3].cells[0].text = "1" table.rows[3].cells[1].text = "完整性" table.rows[3].cells[2].text = "标识描述本文档所适用系统和软件的完整标识。" table.rows[3].cells[4].text = "旧值" table.rows[4].cells[0].text = "2" table.rows[4].cells[1].text = "完整性" table.rows[4].cells[2].text = "系统概述本文档适用的系统和软件的用途。" document.save(docx_path) package = DocxPackage(docx_path) updates = package.fill_review_results(heading_contains="A.3", result="通过") package.save(output_path) assert [update.sequence for update in updates] == ["1", "2"] assert updates[0].review_content == "标识描述本文档所适用系统和软件的完整标识。" modified = Document(output_path) modified_table = modified.tables[0] assert modified_table.rows[3].cells[3].text == "✔" assert modified_table.rows[3].cells[4].text == "" assert modified_table.rows[3].cells[5].text == "" assert modified_table.rows[4].cells[3].text == "✔"