25 lines
847 B
Python
25 lines
847 B
Python
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
from docx import Document
|
||
|
|
|
||
|
|
from app.docx_parser import parse_docx
|
||
|
|
|
||
|
|
|
||
|
|
def test_parse_docx_extracts_headings_paragraphs_and_tables(tmp_path: Path) -> None:
|
||
|
|
docx_path = tmp_path / "sample.docx"
|
||
|
|
document = Document()
|
||
|
|
document.add_heading("软件需求规格说明", level=1)
|
||
|
|
document.add_paragraph("本文档描述 CSCI 的能力需求和接口需求。")
|
||
|
|
table = document.add_table(rows=1, cols=2)
|
||
|
|
table.cell(0, 0).text = "需求编号"
|
||
|
|
table.cell(0, 1).text = "REQ-001"
|
||
|
|
document.save(docx_path)
|
||
|
|
|
||
|
|
parsed = parse_docx(docx_path)
|
||
|
|
|
||
|
|
assert parsed.filename == "sample.docx"
|
||
|
|
assert "软件需求规格说明" in parsed.text
|
||
|
|
assert "REQ-001" in parsed.text
|
||
|
|
assert parsed.headings[0].text == "软件需求规格说明"
|
||
|
|
assert parsed.tables[0][0] == ["需求编号", "REQ-001"]
|