"""该文件是:替换文档片段然后生成辅助生成最终文档""" from io import BytesIO from typing import List, Dict from pathlib import Path from docx import Document from docx.text.paragraph import Paragraph from docx.table import Table from docx.oxml.table import CT_Tbl from docx.oxml.text.paragraph import CT_P from docx.oxml.text.run import CT_R from docx.oxml.shape import CT_Picture from docx.parts.image import ImagePart from docx.text.run import Run from docx.shared import Mm from docx.enum.text import WD_PARAGRAPH_ALIGNMENT from lxml.etree import _Element # 路径工具 from utils.path_utils import project_path ### 模块变量:定义常用图片所在区域的宽高 Demand_table_xqms = Mm(134) # 1.测评大纲-测试项里面-需求描述单元格 Timing_diagram_width = Mm(242) # 2.测试记录-时序图 Test_result_width = Mm(78) # 3.测试记录-测试结果 Horizatal_width = Mm(130) # 4.所有文档-页面图片的横向距离(图片宽度预设置) def getParentRunNode(node): """传入oxml节点对象,获取其祖先节点的CT_R""" if isinstance(node, CT_R): return node return getParentRunNode(node.getparent()) def generate_temp_doc(doc_type: str, project_id: int, round_num=None, frag_list=None): """ 该函数参数: :param frag_list: 储存用户不覆盖的片段列表 :param round_num: 只有回归说明和回归记录有 :param project_id: 项目id :param doc_type:大纲 sm:说明 jl:记录 bg:报告 hsm:回归测试说明 hjl:回归测试记录,默认路径为dg -> 所以如果传错就生成生成大纲了 :return (to_tpl_file路径, seitai_final_file路径) """ if frag_list is None: frag_list = [] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ project_path_str = project_path(project_id) # 根据传入需要处理的文档类型,自动获路径 prefix = Path.cwd() / 'media' / project_path_str template_file: Path = prefix / 'form_template' / 'products' / '测评大纲.docx' to_tpl_file: Path = prefix / 'temp' / '测评大纲.docx' seitai_final_file: Path = prefix / 'final_seitai' / '测评大纲.docx' if doc_type == 'sm': template_file = prefix / 'form_template' / 'products' / '测试说明.docx' to_tpl_file = prefix / 'temp' / '测试说明.docx' seitai_final_file: Path = prefix / 'final_seitai' / '测试说明.docx' elif doc_type == 'jl': template_file = prefix / 'form_template' / 'products' / '测试记录.docx' to_tpl_file = prefix / 'temp' / '测试记录.docx' seitai_final_file: Path = prefix / 'final_seitai' / '测试记录.docx' elif doc_type == 'bg': template_file = prefix / 'form_template' / 'products' / '测评报告.docx' to_tpl_file = prefix / 'temp' / '测评报告.docx' seitai_final_file: Path = prefix / 'final_seitai' / '测评报告.docx' elif doc_type == 'hsm': # 如果products里面存在“用户上传的第n轮回归测试说明.docx,则使用它作为模版” template_file = prefix / 'form_template' / 'products' / f'第{round_num}轮回归测试说明.docx' if not template_file.exists(): template_file = prefix / 'form_template' / 'products' / '回归测试说明.docx' to_tpl_file = prefix / 'temp' / f'第{round_num}轮回归测试说明.docx' seitai_final_file: Path = prefix / 'final_seitai' / f'第{round_num}轮回归测试说明.docx' elif doc_type == 'hjl': # 如果products里面存在“用户上传的第n轮回归测试记录.docx,则使用它作为模版” template_file = prefix / 'form_template' / 'products' / f'第{round_num}轮回归测试记录.docx' if not template_file.exists(): template_file = prefix / 'form_template' / 'products' / '回归测试记录.docx' to_tpl_file = prefix / 'temp' / f'第{round_num}轮回归测试记录.docx' seitai_final_file: Path = prefix / 'final_seitai' / f'第{round_num}轮回归测试记录.docx' elif doc_type == 'wtd': template_file = prefix / 'form_template' / 'products' / '问题单.docx' to_tpl_file = prefix / 'temp' / '问题单.docx' seitai_final_file: Path = prefix / 'final_seitai' / '问题单.docx' # 定义找寻被复制文件根路径 - 后续会根据type找子路径 output_files_path = prefix / 'output_dir' # 这里可能修改,储存大纲里面的文档片段 dg_copied_files = [] # 储存sm/jl/hsm/hjl/bg/wtd的文档片段 exclusive_copied_files = [] # 新:储存reuse的文档片段 reuse_files = [] # 将被拷贝文件分别放入不同两个数组 for file in output_files_path.iterdir(): if file.is_file(): if file.suffix == '.docx': dg_copied_files.append(file) elif file.is_dir(): # 如果文件夹名称为sm/jl/hsm/hjl/bg/wtd则进入该判断 # 所以要求文件系统文件夹名称必须是sm/jl/hsm/hjl/bg/wtd不然无法生成 if file.stem == doc_type: for f in file.iterdir(): if f.suffix == '.docx': exclusive_copied_files.append(f) for file in (prefix / 'reuse').iterdir(): if file.is_file(): if file.suffix == '.docx': reuse_files.append(file) # 找到基础模版的所有std域 doc = Document(template_file.as_posix()) body = doc.element.body sdt_element_list = body.xpath('./w:sdt') # 找到sdt域的名称 -> 为了对应output_dir文件 / 储存所有output_dir图片 area_name_list = [] image_part_list = [] # 修改为字典两个字段{ 'name':'测评对象', 'img':ImagePart } # 筛选片段【二】:用户前端要求不要覆盖的文档片段 frag_is_cover_dict = {item.name: item.isCover for item in frag_list} # 遍历所有控件 -> 放入area_name_list【这里准备提取公共代码】 for sdt_ele in sdt_element_list: isLock = False for elem in sdt_ele.iterchildren(): # 【一】用户设置lock - 下面2个if将需要被替换的(控件名称)存入area_name_list if elem.tag.endswith('sdtPr'): for el in elem.getchildren(): if el.tag.endswith('lock'): isLock = True if elem.tag.endswith('sdtPr'): for el in elem.getchildren(): if el.tag.endswith('alias'): # 筛序【一】:取出用户设置lock的文档片段 if len(el.attrib.values()) > 0 and (isLock == False): area_name = el.attrib.values()[0] # 筛选【二】:前端用户选择要覆盖的片段 if frag_is_cover_dict.get(area_name): area_name_list.append(area_name) # 下面开始替换area_name_list的“域”(这时已经被筛选-因为sdtPr和sdtContent是成对出现) if elem.tag.endswith('sdtContent'): if len(area_name_list) > 0: # 从第一个片段名称开始取,取到模版的“域”名称 area_pop_name = area_name_list.pop(0) # 这里先去找media/output_dir/xx下文件,然后找media/output下文件 copied_file_path = "" # 下面if...else是找output_dir下面文件与“域”名称匹配,匹配到存入copied_file_path if doc_type == 'dg': for file in dg_copied_files: if file.stem == area_pop_name: copied_file_path = file else: # 如果不是大纲 if round_num is None: # 如果非回归说明、记录 for file in exclusive_copied_files: if file.stem == area_pop_name: copied_file_path = file # 这里判断是否copied_file_path没取到文件,然后遍历reuse下文件 if not copied_file_path: for file in reuse_files: if file.stem == area_pop_name: copied_file_path = file # 如果上面被复制文件还没找到,然后遍历output_dir下文件 if not copied_file_path: for file in dg_copied_files: if file.stem == area_pop_name: copied_file_path = file else: # 因为回归的轮次,前面会加 -> 第{round_num}轮 for file in exclusive_copied_files: # 这里多了第{round_num}轮 if file.stem == f"第{round_num}轮{area_pop_name}": copied_file_path = file if not copied_file_path: for file in reuse_files: if file.stem == area_pop_name: copied_file_path = file if not copied_file_path: for file in dg_copied_files: if file.stem == area_pop_name: copied_file_path = file # 找到文档片段.docx,将其数据复制到对应area_name的“域” if copied_file_path: doc_copied = Document(copied_file_path) copied_element_list = [] element_list = doc_copied.element.body.inner_content_elements for elet in element_list: if isinstance(elet, CT_P): copied_element_list.append(Paragraph(elet, doc_copied)) if isinstance(elet, CT_Tbl): copied_element_list.append(Table(elet, doc_copied)) elem.clear() for para_copied in copied_element_list: elem.append(para_copied._element) # 下面代码就是将图片全部提取到image_part_list,以便后续插入,注意这时候已经是筛选后的 doc_copied = Document(copied_file_path) # 需要重新获取否则namespace错误 copied_body = doc_copied.element.body img_node_list = copied_body.xpath('.//pic:pic') if not img_node_list: pass else: for img_node in img_node_list: img: CT_Picture = img_node # 根据节点找到图片的关联id embed = img.xpath('.//a:blip/@r:embed')[0] # 这里得到ImagePart -> 马上要给新文档添加 related_part: ImagePart = doc_copied.part.related_parts.get(embed) if related_part is None: # 可选:记录警告日志,便于排查哪些文档片段有问题 print(f"警告: 文档片段 '{area_pop_name}' 中的图片引用 {embed} 未找到,已跳过!!!!") continue # doc_copied.part.related_parts是一个字典 image_part_list.append({'name': area_pop_name, 'img': related_part}) # 现在是替换后,找到替换后文档所有pic:pic,并对“域”名称进行识别 graph_node_list = body.xpath('.//pic:pic') graph_node_list_transform = [] for picNode in graph_node_list: # 遍历替换后模版的所有pic,去找祖先 sdt_node = picNode.xpath('ancestor::w:sdt[1]')[0] for sdt_node_child in sdt_node.iterchildren(): # 找到sdt下一级的stdPr if sdt_node_child.tag.endswith('sdtPr'): for sdtPr_node_child in sdt_node_child.getchildren(): if sdtPr_node_child.tag.endswith('alias'): yu_name = sdtPr_node_child.attrib.values()[0] graph_node_list_transform.append({'yu_name': yu_name, 'yu_node': picNode}) for graph_node in graph_node_list_transform: image_run_node = getParentRunNode(graph_node['yu_node']) image_run_node.clear() # 循环去image_part_list找name和yu_name相等的图片 for img_part in image_part_list: # 1.如果找到相等 if img_part['name'] == graph_node['yu_name']: # 2.找到即可添加图片到“域” image_run_node.clear() # 辅助:去找其父节点是否为段落,是段落则存起来,后面好居中 image_run_parent_paragraph = image_run_node.getparent() father_paragraph = None if isinstance(image_run_parent_paragraph, CT_P): father_paragraph = Paragraph(image_run_parent_paragraph, doc) copied_bytes_io = BytesIO(img_part['img'].image.blob) r_element = Run(image_run_node, doc) inline_shape = r_element.add_picture(copied_bytes_io) ## 2.1.统一:这里设置文档片段里面的图片大小和位置 source_width = inline_shape.width source_height = inline_shape.height if source_width >= source_height: inline_shape.width = Mm(120) inline_shape.height = int(inline_shape.height * (inline_shape.width / source_width)) else: inline_shape.height = Mm(60) inline_shape.width = int(inline_shape.width * (inline_shape.height / source_height)) ## 2.2.设置图片所在段落居中对齐 if father_paragraph: father_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER r_element.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER # 3.因为按顺序的,所以移除image_part_list中已经替换的图片 image_part_list.remove(img_part) break try: # 这里直接生成产品文档 doc.save(str(to_tpl_file)) return to_tpl_file, seitai_final_file except PermissionError as e: return {'code': 'error', 'msg': '生成的temp文件已打开,请关闭后重试...'} def get_frag_from_document(doc_path: Path) -> List[Dict]: """传入products的文件路径,识别出所有文档片段名称,数组返回:要求docx里面文档名称不能更变""" doc = Document(doc_path.as_posix()) sdt_element_list = doc.element.body.xpath('./w:sdt') # 整个for循环识别文档片段名称 area_name_list = [] for sdt_ele in sdt_element_list: isLock = False alias_value = None for elem in sdt_ele.iterchildren(): if elem.tag.endswith('sdtPr'): for el in elem.getchildren(): if el.tag.endswith('alias'): alias_value = el.attrib.values() # 查找是否被用户在模版上标记了Lock if el.tag.endswith('lock'): isLock = True if alias_value and len(alias_value): area_name_list.append({'frag_name': alias_value[0], 'isLock': isLock}) return area_name_list # 辅助函数-传入temp文件路径(已替换文档片段的temp文档),输出stdContent def get_jinja_stdContent_element(temp_docx_path: Path): doc_docx = Document(temp_docx_path.as_posix()) body = doc_docx.element.body # 储存文本片段 text_frag_name_list = [] sdt_element_list = body.xpath('//w:sdt') # 注意python-docx的页头的文本片段不在body里面,而在section.header里面 # 所以定义辅助函数,统一处理 def deel_sdt_content(*args): """传入sdt_element列表,将其sdtContent加入外部的文本片段列表""" for sdt_ele in args: # 找出每个sdt下面的3个标签 tag_value = None alias_value = None sdtContent_ele = None for sdt_ele_child in sdt_ele.iterchildren(): if sdt_ele_child.tag.endswith('sdtPr'): for sdtPr_ele_child in sdt_ele_child.getchildren(): if sdtPr_ele_child.tag.endswith('tag'): if len(sdtPr_ele_child.attrib.values()) > 0: tag_value = sdtPr_ele_child.attrib.values()[0] if sdtPr_ele_child.tag.endswith('alias'): if len(sdtPr_ele_child.attrib.values()) > 0: alias_value = sdtPr_ele_child.attrib.values()[0] if sdt_ele_child.tag.endswith('sdtContent'): sdtContent_ele = sdt_ele_child # 找出所有tag_value为jinja的文本片段 if tag_value == 'jinja' and alias_value is not None and sdtContent_ele is not None: text_frag_name_list.append({'alias': alias_value, 'sdtContent': sdtContent_ele}) deel_sdt_content(*sdt_element_list) for section in doc_docx.sections: header = section.header header_sdt_list = header.part.element.xpath('//w:sdt') deel_sdt_content(*header_sdt_list) return text_frag_name_list, doc_docx # 封装一个根据alias名称修改stdContent的函数 -> 在接口处理函数中取数据放入函数修改文档 def stdContent_modify(modify_str: str | bool, doc_docx: Document, sdtContent: _Element): # 正常处理 for ele in sdtContent: if isinstance(ele, CT_R): run_ele = Run(ele, doc_docx) if isinstance(modify_str, bool): # 如果是True,则不修改原来 if modify_str: break else: modify_str = "" # 有时候会int类型,转换一下防止报错 if isinstance(modify_str, int): modify_str = str(modify_str) run_ele.text = modify_str sdtContent.clear() sdtContent.append(run_ele._element) break if isinstance(ele, CT_P): para_ele = Paragraph(ele, doc_docx) if isinstance(modify_str, bool): if modify_str: break else: modify_str = "" para_ele.clear() para_ele.text = modify_str sdtContent.clear() sdtContent.append(para_ele._element) break