Files
cdtestplant_v1/apps/createSeiTaiDocument/docXmlUtils.py

353 lines
19 KiB
Python
Raw Normal View History

"""该文件是:替换文档片段然后生成辅助生成最终文档"""
from io import BytesIO
from typing import List, Dict
from pathlib import Path
from docx import Document
from docx.text.paragraph import Paragraph
from docx.table import Table
from docx.oxml.table import CT_Tbl
from docx.oxml.text.paragraph import CT_P
from docx.oxml.text.run import CT_R
from docx.oxml.shape import CT_Picture
from docx.parts.image import ImagePart
from docx.text.run import Run
from docx.shared import Mm
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from lxml.etree import _Element
# 路径工具
from utils.path_utils import project_path
### 模块变量:定义常用图片所在区域的宽高
Demand_table_xqms = Mm(134) # 1.测评大纲-测试项里面-需求描述单元格
Timing_diagram_width = Mm(242) # 2.测试记录-时序图
Test_result_width = Mm(78) # 3.测试记录-测试结果
Horizatal_width = Mm(130) # 4.所有文档-页面图片的横向距离(图片宽度预设置)
def getParentRunNode(node):
"""传入oxml节点对象获取其祖先节点的CT_R"""
if isinstance(node, CT_R):
return node
return getParentRunNode(node.getparent())
def generate_temp_doc(doc_type: str, project_id: int, round_num=None, frag_list=None):
""" 该函数参数:
:param frag_list: 储存用户不覆盖的片段列表
:param round_num: 只有回归说明和回归记录有
:param project_id: 项目id
:param doc_type:大纲 sm:说明 jl:记录 bg:报告 hsm:回归测试说明 hjl:回归测试记录,默认路径为dg -> 所以如果传错就生成生成大纲了
:return (to_tpl_file路径, seitai_final_file路径)
"""
if frag_list is None:
frag_list = []
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
project_path_str = project_path(project_id)
# 根据传入需要处理的文档类型,自动获路径
prefix = Path.cwd() / 'media' / project_path_str
template_file: Path = prefix / 'form_template' / 'products' / '测评大纲.docx'
to_tpl_file: Path = prefix / 'temp' / '测评大纲.docx'
seitai_final_file: Path = prefix / 'final_seitai' / '测评大纲.docx'
if doc_type == 'sm':
template_file = prefix / 'form_template' / 'products' / '测试说明.docx'
to_tpl_file = prefix / 'temp' / '测试说明.docx'
seitai_final_file: Path = prefix / 'final_seitai' / '测试说明.docx'
elif doc_type == 'jl':
template_file = prefix / 'form_template' / 'products' / '测试记录.docx'
to_tpl_file = prefix / 'temp' / '测试记录.docx'
seitai_final_file: Path = prefix / 'final_seitai' / '测试记录.docx'
elif doc_type == 'bg':
template_file = prefix / 'form_template' / 'products' / '测评报告.docx'
to_tpl_file = prefix / 'temp' / '测评报告.docx'
seitai_final_file: Path = prefix / 'final_seitai' / '测评报告.docx'
elif doc_type == 'hsm':
# 如果products里面存在“用户上传的第n轮回归测试说明.docx则使用它作为模版”
template_file = prefix / 'form_template' / 'products' / f'{round_num}轮回归测试说明.docx'
if not template_file.exists():
template_file = prefix / 'form_template' / 'products' / '回归测试说明.docx'
to_tpl_file = prefix / 'temp' / f'{round_num}轮回归测试说明.docx'
seitai_final_file: Path = prefix / 'final_seitai' / f'{round_num}轮回归测试说明.docx'
elif doc_type == 'hjl':
# 如果products里面存在“用户上传的第n轮回归测试记录.docx则使用它作为模版”
template_file = prefix / 'form_template' / 'products' / f'{round_num}轮回归测试记录.docx'
if not template_file.exists():
template_file = prefix / 'form_template' / 'products' / '回归测试记录.docx'
to_tpl_file = prefix / 'temp' / f'{round_num}轮回归测试记录.docx'
seitai_final_file: Path = prefix / 'final_seitai' / f'{round_num}轮回归测试记录.docx'
elif doc_type == 'wtd':
template_file = prefix / 'form_template' / 'products' / '问题单.docx'
to_tpl_file = prefix / 'temp' / '问题单.docx'
seitai_final_file: Path = prefix / 'final_seitai' / '问题单.docx'
# 定义找寻被复制文件根路径 - 后续会根据type找子路径
output_files_path = prefix / 'output_dir'
# 这里可能修改,储存大纲里面的文档片段
dg_copied_files = []
# 储存sm/jl/hsm/hjl/bg/wtd的文档片段
exclusive_copied_files = []
# 新储存reuse的文档片段
reuse_files = []
# 将被拷贝文件分别放入不同两个数组
for file in output_files_path.iterdir():
if file.is_file():
if file.suffix == '.docx':
dg_copied_files.append(file)
elif file.is_dir():
# 如果文件夹名称为sm/jl/hsm/hjl/bg/wtd则进入该判断
# 所以要求文件系统文件夹名称必须是sm/jl/hsm/hjl/bg/wtd不然无法生成
if file.stem == doc_type:
for f in file.iterdir():
if f.suffix == '.docx':
exclusive_copied_files.append(f)
for file in (prefix / 'reuse').iterdir():
if file.is_file():
if file.suffix == '.docx':
reuse_files.append(file)
# 找到基础模版的所有std域
doc = Document(template_file.as_posix())
body = doc.element.body
sdt_element_list = body.xpath('./w:sdt')
# 找到sdt域的名称 -> 为了对应output_dir文件 / 储存所有output_dir图片
area_name_list = []
image_part_list = [] # 修改为字典两个字段{ 'name':'测评对象', 'img':ImagePart }
# 筛选片段【二】:用户前端要求不要覆盖的文档片段
frag_is_cover_dict = {item.name: item.isCover for item in frag_list}
# 遍历所有控件 -> 放入area_name_list【这里准备提取公共代码】
for sdt_ele in sdt_element_list:
isLock = False
for elem in sdt_ele.iterchildren():
# 【一】用户设置lock - 下面2个if将需要被替换的(控件名称)存入area_name_list
if elem.tag.endswith('sdtPr'):
for el in elem.getchildren():
if el.tag.endswith('lock'):
isLock = True
if elem.tag.endswith('sdtPr'):
for el in elem.getchildren():
if el.tag.endswith('alias'):
# 筛序【一】取出用户设置lock的文档片段
if len(el.attrib.values()) > 0 and (isLock == False):
area_name = el.attrib.values()[0]
# 筛选【二】:前端用户选择要覆盖的片段
if frag_is_cover_dict.get(area_name):
area_name_list.append(area_name)
# 下面开始替换area_name_list的“域”这时已经被筛选-因为sdtPr和sdtContent是成对出现
if elem.tag.endswith('sdtContent'):
if len(area_name_list) > 0:
# 从第一个片段名称开始取,取到模版的“域”名称
area_pop_name = area_name_list.pop(0)
# 这里先去找media/output_dir/xx下文件然后找media/output下文件
copied_file_path = ""
# 下面if...else是找output_dir下面文件与“域”名称匹配匹配到存入copied_file_path
if doc_type == 'dg':
for file in dg_copied_files:
if file.stem == area_pop_name:
copied_file_path = file
else:
# 如果不是大纲
if round_num is None:
# 如果非回归说明、记录
for file in exclusive_copied_files:
if file.stem == area_pop_name:
copied_file_path = file
# 这里判断是否copied_file_path没取到文件然后遍历reuse下文件
if not copied_file_path:
for file in reuse_files:
if file.stem == area_pop_name:
copied_file_path = file
# 如果上面被复制文件还没找到然后遍历output_dir下文件
if not copied_file_path:
for file in dg_copied_files:
if file.stem == area_pop_name:
copied_file_path = file
else:
# 因为回归的轮次,前面会加 -> 第{round_num}轮
for file in exclusive_copied_files: # 这里多了第{round_num}轮
if file.stem == f"{round_num}{area_pop_name}":
copied_file_path = file
if not copied_file_path:
for file in reuse_files:
if file.stem == area_pop_name:
copied_file_path = file
if not copied_file_path:
for file in dg_copied_files:
if file.stem == area_pop_name:
copied_file_path = file
# 找到文档片段.docx将其数据复制到对应area_name的“域”
if copied_file_path:
doc_copied = Document(copied_file_path)
copied_element_list = []
element_list = doc_copied.element.body.inner_content_elements
for elet in element_list:
if isinstance(elet, CT_P):
copied_element_list.append(Paragraph(elet, doc_copied))
if isinstance(elet, CT_Tbl):
copied_element_list.append(Table(elet, doc_copied))
elem.clear()
for para_copied in copied_element_list:
elem.append(para_copied._element)
# 下面代码就是将图片全部提取到image_part_list以便后续插入注意这时候已经是筛选后的
doc_copied = Document(copied_file_path) # 需要重新获取否则namespace错误
copied_body = doc_copied.element.body
img_node_list = copied_body.xpath('.//pic:pic')
if not img_node_list:
pass
else:
for img_node in img_node_list:
img: CT_Picture = img_node
# 根据节点找到图片的关联id
embed = img.xpath('.//a:blip/@r:embed')[0]
# 这里得到ImagePart -> 马上要给新文档添加
related_part: ImagePart = doc_copied.part.related_parts.get(embed)
if related_part is None:
# 可选:记录警告日志,便于排查哪些文档片段有问题
print(f"警告: 文档片段 '{area_pop_name}' 中的图片引用 {embed} 未找到,已跳过!!!!")
continue
# doc_copied.part.related_parts是一个字典
image_part_list.append({'name': area_pop_name, 'img': related_part})
# 现在是替换后找到替换后文档所有pic:pic并对“域”名称进行识别
graph_node_list = body.xpath('.//pic:pic')
graph_node_list_transform = []
for picNode in graph_node_list:
# 遍历替换后模版的所有pic去找祖先
sdt_node = picNode.xpath('ancestor::w:sdt[1]')[0]
for sdt_node_child in sdt_node.iterchildren():
# 找到sdt下一级的stdPr
if sdt_node_child.tag.endswith('sdtPr'):
for sdtPr_node_child in sdt_node_child.getchildren():
if sdtPr_node_child.tag.endswith('alias'):
yu_name = sdtPr_node_child.attrib.values()[0]
graph_node_list_transform.append({'yu_name': yu_name, 'yu_node': picNode})
for graph_node in graph_node_list_transform:
image_run_node = getParentRunNode(graph_node['yu_node'])
image_run_node.clear()
# 循环去image_part_list找name和yu_name相等的图片
for img_part in image_part_list:
# 1.如果找到相等
if img_part['name'] == graph_node['yu_name']:
# 2.找到即可添加图片到“域”
image_run_node.clear()
# 辅助:去找其父节点是否为段落,是段落则存起来,后面好居中
image_run_parent_paragraph = image_run_node.getparent()
father_paragraph = None
if isinstance(image_run_parent_paragraph, CT_P):
father_paragraph = Paragraph(image_run_parent_paragraph, doc)
copied_bytes_io = BytesIO(img_part['img'].image.blob)
r_element = Run(image_run_node, doc)
inline_shape = r_element.add_picture(copied_bytes_io)
## 2.1.统一:这里设置文档片段里面的图片大小和位置
source_width = inline_shape.width
source_height = inline_shape.height
if source_width >= source_height:
inline_shape.width = Mm(120)
inline_shape.height = int(inline_shape.height * (inline_shape.width / source_width))
else:
inline_shape.height = Mm(60)
inline_shape.width = int(inline_shape.width * (inline_shape.height / source_height))
## 2.2.设置图片所在段落居中对齐
if father_paragraph:
father_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
r_element.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# 3.因为按顺序的所以移除image_part_list中已经替换的图片
image_part_list.remove(img_part)
break
try:
# 这里直接生成产品文档
doc.save(str(to_tpl_file))
return to_tpl_file, seitai_final_file
except PermissionError as e:
return {'code': 'error', 'msg': '生成的temp文件已打开请关闭后重试...'}
def get_frag_from_document(doc_path: Path) -> List[Dict]:
"""传入products的文件路径识别出所有文档片段名称数组返回要求docx里面文档名称不能更变"""
doc = Document(doc_path.as_posix())
sdt_element_list = doc.element.body.xpath('./w:sdt')
# 整个for循环识别文档片段名称
area_name_list = []
for sdt_ele in sdt_element_list:
isLock = False
alias_value = None
for elem in sdt_ele.iterchildren():
if elem.tag.endswith('sdtPr'):
for el in elem.getchildren():
if el.tag.endswith('alias'):
alias_value = el.attrib.values()
# 查找是否被用户在模版上标记了Lock
if el.tag.endswith('lock'):
isLock = True
if alias_value and len(alias_value):
area_name_list.append({'frag_name': alias_value[0], 'isLock': isLock})
return area_name_list
# 辅助函数-传入temp文件路径已替换文档片段的temp文档输出stdContent
def get_jinja_stdContent_element(temp_docx_path: Path):
doc_docx = Document(temp_docx_path.as_posix())
body = doc_docx.element.body
# 储存文本片段
text_frag_name_list = []
sdt_element_list = body.xpath('//w:sdt')
# 注意python-docx的页头的文本片段不在body里面而在section.header里面
# 所以定义辅助函数,统一处理
def deel_sdt_content(*args):
"""传入sdt_element列表将其sdtContent加入外部的文本片段列表"""
for sdt_ele in args:
# 找出每个sdt下面的3个标签
tag_value = None
alias_value = None
sdtContent_ele = None
for sdt_ele_child in sdt_ele.iterchildren():
if sdt_ele_child.tag.endswith('sdtPr'):
for sdtPr_ele_child in sdt_ele_child.getchildren():
if sdtPr_ele_child.tag.endswith('tag'):
if len(sdtPr_ele_child.attrib.values()) > 0:
tag_value = sdtPr_ele_child.attrib.values()[0]
if sdtPr_ele_child.tag.endswith('alias'):
if len(sdtPr_ele_child.attrib.values()) > 0:
alias_value = sdtPr_ele_child.attrib.values()[0]
if sdt_ele_child.tag.endswith('sdtContent'):
sdtContent_ele = sdt_ele_child
# 找出所有tag_value为jinja的文本片段
if tag_value == 'jinja' and alias_value is not None and sdtContent_ele is not None:
text_frag_name_list.append({'alias': alias_value, 'sdtContent': sdtContent_ele})
deel_sdt_content(*sdt_element_list)
for section in doc_docx.sections:
header = section.header
header_sdt_list = header.part.element.xpath('//w:sdt')
deel_sdt_content(*header_sdt_list)
return text_frag_name_list, doc_docx
# 封装一个根据alias名称修改stdContent的函数 -> 在接口处理函数中取数据放入函数修改文档
def stdContent_modify(modify_str: str | bool, doc_docx: Document, sdtContent: _Element):
# 正常处理
for ele in sdtContent:
if isinstance(ele, CT_R):
run_ele = Run(ele, doc_docx)
if isinstance(modify_str, bool):
# 如果是True则不修改原来
if modify_str:
break
else:
modify_str = ""
# 有时候会int类型转换一下防止报错
if isinstance(modify_str, int):
modify_str = str(modify_str)
run_ele.text = modify_str
sdtContent.clear()
sdtContent.append(run_ele._element)
break
if isinstance(ele, CT_P):
para_ele = Paragraph(ele, doc_docx)
if isinstance(modify_str, bool):
if modify_str:
break
else:
modify_str = ""
para_ele.clear()
para_ele.text = modify_str
sdtContent.clear()
sdtContent.append(para_ele._element)
break