2026-05-18 15:50:43 +08:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
from docx import Document
|
|
|
|
|
|
|
|
|
|
from app.docx_parser import parse_docx
|
2026-05-26 15:08:34 +08:00
|
|
|
from scripts.docx_full_parser import DocxPackage
|
2026-05-18 15:50:43 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_docx_extracts_headings_paragraphs_and_tables(tmp_path: Path) -> None:
|
|
|
|
|
docx_path = tmp_path / "sample.docx"
|
|
|
|
|
document = Document()
|
|
|
|
|
document.add_heading("软件需求规格说明", level=1)
|
|
|
|
|
document.add_paragraph("本文档描述 CSCI 的能力需求和接口需求。")
|
|
|
|
|
table = document.add_table(rows=1, cols=2)
|
|
|
|
|
table.cell(0, 0).text = "需求编号"
|
|
|
|
|
table.cell(0, 1).text = "REQ-001"
|
|
|
|
|
document.save(docx_path)
|
|
|
|
|
|
|
|
|
|
parsed = parse_docx(docx_path)
|
|
|
|
|
|
|
|
|
|
assert parsed.filename == "sample.docx"
|
|
|
|
|
assert "软件需求规格说明" in parsed.text
|
|
|
|
|
assert "REQ-001" in parsed.text
|
|
|
|
|
assert parsed.headings[0].text == "软件需求规格说明"
|
|
|
|
|
assert parsed.tables[0][0] == ["需求编号", "REQ-001"]
|
2026-05-26 15:08:34 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_docx_package_extracts_elements_across_parts_and_replaces_text(tmp_path: Path) -> None:
|
|
|
|
|
docx_path = tmp_path / "full.docx"
|
|
|
|
|
output_path = tmp_path / "modified.docx"
|
|
|
|
|
document = Document()
|
|
|
|
|
document.add_heading("原始标题", level=1)
|
|
|
|
|
document.add_paragraph("正文原始内容")
|
|
|
|
|
document.sections[0].header.paragraphs[0].text = "页眉原始内容"
|
|
|
|
|
document.sections[0].footer.paragraphs[0].text = "页脚原始内容"
|
|
|
|
|
table = document.add_table(rows=1, cols=1)
|
|
|
|
|
table.cell(0, 0).text = "表格原始内容"
|
|
|
|
|
document.save(docx_path)
|
|
|
|
|
|
|
|
|
|
package = DocxPackage(docx_path)
|
|
|
|
|
extraction = package.extract()
|
|
|
|
|
text = "\n".join(element.text for element in extraction.elements)
|
|
|
|
|
|
|
|
|
|
assert any(part.name == "word/document.xml" for part in extraction.parts)
|
|
|
|
|
assert "原始标题" in text
|
|
|
|
|
assert "页眉原始内容" in text
|
|
|
|
|
assert "页脚原始内容" in text
|
|
|
|
|
assert any(element.kind == "table" for element in extraction.elements)
|
|
|
|
|
|
|
|
|
|
replacements = package.replace_text("原始", "修改后")
|
|
|
|
|
package.save(output_path)
|
|
|
|
|
|
|
|
|
|
assert replacements >= 4
|
|
|
|
|
modified = Document(output_path)
|
|
|
|
|
assert "修改后标题" in "\n".join(paragraph.text for paragraph in modified.paragraphs)
|
|
|
|
|
assert modified.sections[0].header.paragraphs[0].text == "页眉修改后内容"
|
|
|
|
|
assert modified.sections[0].footer.paragraphs[0].text == "页脚修改后内容"
|
|
|
|
|
assert modified.tables[0].cell(0, 0).text == "表格修改后内容"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_docx_package_replaces_text_split_across_runs(tmp_path: Path) -> None:
|
|
|
|
|
docx_path = tmp_path / "split.docx"
|
|
|
|
|
output_path = tmp_path / "split-modified.docx"
|
|
|
|
|
document = Document()
|
|
|
|
|
paragraph = document.add_paragraph()
|
|
|
|
|
paragraph.add_run("附录")
|
|
|
|
|
paragraph.add_run("A ")
|
|
|
|
|
paragraph.add_run("文档审查单")
|
|
|
|
|
document.save(docx_path)
|
|
|
|
|
|
|
|
|
|
package = DocxPackage(docx_path)
|
|
|
|
|
replacements = package.replace_text("附录A 文档审查单", "附录A 文档检查单")
|
|
|
|
|
package.save(output_path)
|
|
|
|
|
|
|
|
|
|
modified = Document(output_path)
|
|
|
|
|
assert replacements == 1
|
|
|
|
|
assert modified.paragraphs[0].text == "附录A 文档检查单"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_docx_package_fills_review_result_columns(tmp_path: Path) -> None:
|
|
|
|
|
docx_path = tmp_path / "review.docx"
|
|
|
|
|
output_path = tmp_path / "review-modified.docx"
|
|
|
|
|
document = Document()
|
|
|
|
|
document.add_paragraph("A.3软件设计文档审查单")
|
|
|
|
|
table = document.add_table(rows=5, cols=7)
|
|
|
|
|
table.rows[0].cells[0].text = "文档名称"
|
|
|
|
|
table.rows[1].cells[0].text = "序号"
|
|
|
|
|
table.rows[1].cells[1].text = "审查项"
|
|
|
|
|
table.rows[1].cells[2].text = "审查内容"
|
|
|
|
|
table.rows[1].cells[3].text = "审查结果(填√)"
|
|
|
|
|
table.rows[1].cells[6].text = "备注"
|
|
|
|
|
table.rows[2].cells[0].text = "序号"
|
|
|
|
|
table.rows[2].cells[1].text = "审查项"
|
|
|
|
|
table.rows[2].cells[2].text = "审查内容"
|
|
|
|
|
table.rows[2].cells[3].text = "通过"
|
|
|
|
|
table.rows[2].cells[4].text = "未通过"
|
|
|
|
|
table.rows[2].cells[5].text = "不适用"
|
|
|
|
|
table.rows[2].cells[6].text = "备注"
|
|
|
|
|
table.rows[3].cells[0].text = "1"
|
|
|
|
|
table.rows[3].cells[1].text = "完整性"
|
|
|
|
|
table.rows[3].cells[2].text = "标识描述本文档所适用系统和软件的完整标识。"
|
|
|
|
|
table.rows[3].cells[4].text = "旧值"
|
|
|
|
|
table.rows[4].cells[0].text = "2"
|
|
|
|
|
table.rows[4].cells[1].text = "完整性"
|
|
|
|
|
table.rows[4].cells[2].text = "系统概述本文档适用的系统和软件的用途。"
|
|
|
|
|
document.save(docx_path)
|
|
|
|
|
|
|
|
|
|
package = DocxPackage(docx_path)
|
|
|
|
|
updates = package.fill_review_results(heading_contains="A.3", result="通过")
|
|
|
|
|
package.save(output_path)
|
|
|
|
|
|
|
|
|
|
assert [update.sequence for update in updates] == ["1", "2"]
|
|
|
|
|
assert updates[0].review_content == "标识描述本文档所适用系统和软件的完整标识。"
|
|
|
|
|
modified = Document(output_path)
|
|
|
|
|
modified_table = modified.tables[0]
|
|
|
|
|
assert modified_table.rows[3].cells[3].text == "✔"
|
|
|
|
|
assert modified_table.rows[3].cells[4].text == ""
|
|
|
|
|
assert modified_table.rows[3].cells[5].text == ""
|
|
|
|
|
assert modified_table.rows[4].cells[3].text == "✔"
|