Files
linux_format_docs_check/tests/test_docx_parser.py

25 lines
847 B
Python
Raw Permalink Normal View History

2026-05-18 15:50:43 +08:00
from pathlib import Path
from docx import Document
from app.docx_parser import parse_docx
def test_parse_docx_extracts_headings_paragraphs_and_tables(tmp_path: Path) -> None:
docx_path = tmp_path / "sample.docx"
document = Document()
document.add_heading("软件需求规格说明", level=1)
document.add_paragraph("本文档描述 CSCI 的能力需求和接口需求。")
table = document.add_table(rows=1, cols=2)
table.cell(0, 0).text = "需求编号"
table.cell(0, 1).text = "REQ-001"
document.save(docx_path)
parsed = parse_docx(docx_path)
assert parsed.filename == "sample.docx"
assert "软件需求规格说明" in parsed.text
assert "REQ-001" in parsed.text
assert parsed.headings[0].text == "软件需求规格说明"
assert parsed.tables[0][0] == ["需求编号", "REQ-001"]