from pathlib import Path from docx import Document from app.docx_parser import parse_docx def test_parse_docx_extracts_headings_paragraphs_and_tables(tmp_path: Path) -> None: docx_path = tmp_path / "sample.docx" document = Document() document.add_heading("软件需求规格说明", level=1) document.add_paragraph("本文档描述 CSCI 的能力需求和接口需求。") table = document.add_table(rows=1, cols=2) table.cell(0, 0).text = "需求编号" table.cell(0, 1).text = "REQ-001" document.save(docx_path) parsed = parse_docx(docx_path) assert parsed.filename == "sample.docx" assert "软件需求规格说明" in parsed.text assert "REQ-001" in parsed.text assert parsed.headings[0].text == "软件需求规格说明" assert parsed.tables[0][0] == ["需求编号", "REQ-001"]