353 lines
19 KiB
Python
353 lines
19 KiB
Python
"""该文件是:替换文档片段然后生成辅助生成最终文档"""
|
||
from io import BytesIO
|
||
from typing import List, Dict
|
||
from pathlib import Path
|
||
from docx import Document
|
||
from docx.text.paragraph import Paragraph
|
||
from docx.table import Table
|
||
from docx.oxml.table import CT_Tbl
|
||
from docx.oxml.text.paragraph import CT_P
|
||
from docx.oxml.text.run import CT_R
|
||
from docx.oxml.shape import CT_Picture
|
||
from docx.parts.image import ImagePart
|
||
from docx.text.run import Run
|
||
from docx.shared import Mm
|
||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||
from lxml.etree import _Element
|
||
|
||
# 路径工具
|
||
from utils.path_utils import project_path
|
||
|
||
### 模块变量:定义常用图片所在区域的宽高
|
||
Demand_table_xqms = Mm(134) # 1.测评大纲-测试项里面-需求描述单元格
|
||
Timing_diagram_width = Mm(242) # 2.测试记录-时序图
|
||
Test_result_width = Mm(78) # 3.测试记录-测试结果
|
||
Horizatal_width = Mm(130) # 4.所有文档-页面图片的横向距离(图片宽度预设置)
|
||
|
||
def getParentRunNode(node):
|
||
"""传入oxml节点对象,获取其祖先节点的CT_R"""
|
||
if isinstance(node, CT_R):
|
||
return node
|
||
return getParentRunNode(node.getparent())
|
||
|
||
def generate_temp_doc(doc_type: str, project_id: int, round_num=None, frag_list=None):
|
||
""" 该函数参数:
|
||
:param frag_list: 储存用户不覆盖的片段列表
|
||
:param round_num: 只有回归说明和回归记录有
|
||
:param project_id: 项目id
|
||
:param doc_type:大纲 sm:说明 jl:记录 bg:报告 hsm:回归测试说明 hjl:回归测试记录,默认路径为dg -> 所以如果传错就生成生成大纲了
|
||
:return (to_tpl_file路径, seitai_final_file路径)
|
||
"""
|
||
if frag_list is None:
|
||
frag_list = []
|
||
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||
project_path_str = project_path(project_id)
|
||
# 根据传入需要处理的文档类型,自动获路径
|
||
prefix = Path.cwd() / 'media' / project_path_str
|
||
template_file: Path = prefix / 'form_template' / 'products' / '测评大纲.docx'
|
||
to_tpl_file: Path = prefix / 'temp' / '测评大纲.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / '测评大纲.docx'
|
||
if doc_type == 'sm':
|
||
template_file = prefix / 'form_template' / 'products' / '测试说明.docx'
|
||
to_tpl_file = prefix / 'temp' / '测试说明.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / '测试说明.docx'
|
||
elif doc_type == 'jl':
|
||
template_file = prefix / 'form_template' / 'products' / '测试记录.docx'
|
||
to_tpl_file = prefix / 'temp' / '测试记录.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / '测试记录.docx'
|
||
elif doc_type == 'bg':
|
||
template_file = prefix / 'form_template' / 'products' / '测评报告.docx'
|
||
to_tpl_file = prefix / 'temp' / '测评报告.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / '测评报告.docx'
|
||
elif doc_type == 'hsm':
|
||
# 如果products里面存在“用户上传的第n轮回归测试说明.docx,则使用它作为模版”
|
||
template_file = prefix / 'form_template' / 'products' / f'第{round_num}轮回归测试说明.docx'
|
||
if not template_file.exists():
|
||
template_file = prefix / 'form_template' / 'products' / '回归测试说明.docx'
|
||
to_tpl_file = prefix / 'temp' / f'第{round_num}轮回归测试说明.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / f'第{round_num}轮回归测试说明.docx'
|
||
elif doc_type == 'hjl':
|
||
# 如果products里面存在“用户上传的第n轮回归测试记录.docx,则使用它作为模版”
|
||
template_file = prefix / 'form_template' / 'products' / f'第{round_num}轮回归测试记录.docx'
|
||
if not template_file.exists():
|
||
template_file = prefix / 'form_template' / 'products' / '回归测试记录.docx'
|
||
to_tpl_file = prefix / 'temp' / f'第{round_num}轮回归测试记录.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / f'第{round_num}轮回归测试记录.docx'
|
||
elif doc_type == 'wtd':
|
||
template_file = prefix / 'form_template' / 'products' / '问题单.docx'
|
||
to_tpl_file = prefix / 'temp' / '问题单.docx'
|
||
seitai_final_file: Path = prefix / 'final_seitai' / '问题单.docx'
|
||
# 定义找寻被复制文件根路径 - 后续会根据type找子路径
|
||
output_files_path = prefix / 'output_dir'
|
||
# 这里可能修改,储存大纲里面的文档片段
|
||
dg_copied_files = []
|
||
# 储存sm/jl/hsm/hjl/bg/wtd的文档片段
|
||
exclusive_copied_files = []
|
||
# 新:储存reuse的文档片段
|
||
reuse_files = []
|
||
# 将被拷贝文件分别放入不同两个数组
|
||
for file in output_files_path.iterdir():
|
||
if file.is_file():
|
||
if file.suffix == '.docx':
|
||
dg_copied_files.append(file)
|
||
elif file.is_dir():
|
||
# 如果文件夹名称为sm/jl/hsm/hjl/bg/wtd则进入该判断
|
||
# 所以要求文件系统文件夹名称必须是sm/jl/hsm/hjl/bg/wtd不然无法生成
|
||
if file.stem == doc_type:
|
||
for f in file.iterdir():
|
||
if f.suffix == '.docx':
|
||
exclusive_copied_files.append(f)
|
||
for file in (prefix / 'reuse').iterdir():
|
||
if file.is_file():
|
||
if file.suffix == '.docx':
|
||
reuse_files.append(file)
|
||
# 找到基础模版的所有std域
|
||
doc = Document(template_file.as_posix())
|
||
body = doc.element.body
|
||
sdt_element_list = body.xpath('./w:sdt')
|
||
# 找到sdt域的名称 -> 为了对应output_dir文件 / 储存所有output_dir图片
|
||
area_name_list = []
|
||
image_part_list = [] # 修改为字典两个字段{ 'name':'测评对象', 'img':ImagePart }
|
||
# 筛选片段【二】:用户前端要求不要覆盖的文档片段
|
||
frag_is_cover_dict = {item.name: item.isCover for item in frag_list}
|
||
# 遍历所有控件 -> 放入area_name_list【这里准备提取公共代码】
|
||
for sdt_ele in sdt_element_list:
|
||
isLock = False
|
||
for elem in sdt_ele.iterchildren():
|
||
# 【一】用户设置lock - 下面2个if将需要被替换的(控件名称)存入area_name_list
|
||
if elem.tag.endswith('sdtPr'):
|
||
for el in elem.getchildren():
|
||
if el.tag.endswith('lock'):
|
||
isLock = True
|
||
if elem.tag.endswith('sdtPr'):
|
||
for el in elem.getchildren():
|
||
if el.tag.endswith('alias'):
|
||
# 筛序【一】:取出用户设置lock的文档片段
|
||
if len(el.attrib.values()) > 0 and (isLock == False):
|
||
area_name = el.attrib.values()[0]
|
||
# 筛选【二】:前端用户选择要覆盖的片段
|
||
if frag_is_cover_dict.get(area_name):
|
||
area_name_list.append(area_name)
|
||
# 下面开始替换area_name_list的“域”(这时已经被筛选-因为sdtPr和sdtContent是成对出现)
|
||
if elem.tag.endswith('sdtContent'):
|
||
if len(area_name_list) > 0:
|
||
# 从第一个片段名称开始取,取到模版的“域”名称
|
||
area_pop_name = area_name_list.pop(0)
|
||
# 这里先去找media/output_dir/xx下文件,然后找media/output下文件
|
||
copied_file_path = ""
|
||
# 下面if...else是找output_dir下面文件与“域”名称匹配,匹配到存入copied_file_path
|
||
if doc_type == 'dg':
|
||
for file in dg_copied_files:
|
||
if file.stem == area_pop_name:
|
||
copied_file_path = file
|
||
else:
|
||
# 如果不是大纲
|
||
if round_num is None:
|
||
# 如果非回归说明、记录
|
||
for file in exclusive_copied_files:
|
||
if file.stem == area_pop_name:
|
||
copied_file_path = file
|
||
# 这里判断是否copied_file_path没取到文件,然后遍历reuse下文件
|
||
if not copied_file_path:
|
||
for file in reuse_files:
|
||
if file.stem == area_pop_name:
|
||
copied_file_path = file
|
||
# 如果上面被复制文件还没找到,然后遍历output_dir下文件
|
||
if not copied_file_path:
|
||
for file in dg_copied_files:
|
||
if file.stem == area_pop_name:
|
||
copied_file_path = file
|
||
else:
|
||
# 因为回归的轮次,前面会加 -> 第{round_num}轮
|
||
for file in exclusive_copied_files: # 这里多了第{round_num}轮
|
||
if file.stem == f"第{round_num}轮{area_pop_name}":
|
||
copied_file_path = file
|
||
if not copied_file_path:
|
||
for file in reuse_files:
|
||
if file.stem == area_pop_name:
|
||
copied_file_path = file
|
||
if not copied_file_path:
|
||
for file in dg_copied_files:
|
||
if file.stem == area_pop_name:
|
||
copied_file_path = file
|
||
# 找到文档片段.docx,将其数据复制到对应area_name的“域”
|
||
if copied_file_path:
|
||
doc_copied = Document(copied_file_path)
|
||
copied_element_list = []
|
||
element_list = doc_copied.element.body.inner_content_elements
|
||
for elet in element_list:
|
||
if isinstance(elet, CT_P):
|
||
copied_element_list.append(Paragraph(elet, doc_copied))
|
||
if isinstance(elet, CT_Tbl):
|
||
copied_element_list.append(Table(elet, doc_copied))
|
||
elem.clear()
|
||
for para_copied in copied_element_list:
|
||
elem.append(para_copied._element)
|
||
|
||
# 下面代码就是将图片全部提取到image_part_list,以便后续插入,注意这时候已经是筛选后的
|
||
doc_copied = Document(copied_file_path) # 需要重新获取否则namespace错误
|
||
copied_body = doc_copied.element.body
|
||
img_node_list = copied_body.xpath('.//pic:pic')
|
||
if not img_node_list:
|
||
pass
|
||
else:
|
||
for img_node in img_node_list:
|
||
img: CT_Picture = img_node
|
||
# 根据节点找到图片的关联id
|
||
embed = img.xpath('.//a:blip/@r:embed')[0]
|
||
# 这里得到ImagePart -> 马上要给新文档添加
|
||
related_part: ImagePart = doc_copied.part.related_parts.get(embed)
|
||
if related_part is None:
|
||
# 可选:记录警告日志,便于排查哪些文档片段有问题
|
||
print(f"警告: 文档片段 '{area_pop_name}' 中的图片引用 {embed} 未找到,已跳过!!!!")
|
||
continue
|
||
# doc_copied.part.related_parts是一个字典
|
||
image_part_list.append({'name': area_pop_name, 'img': related_part})
|
||
|
||
# 现在是替换后,找到替换后文档所有pic:pic,并对“域”名称进行识别
|
||
graph_node_list = body.xpath('.//pic:pic')
|
||
graph_node_list_transform = []
|
||
for picNode in graph_node_list:
|
||
# 遍历替换后模版的所有pic,去找祖先
|
||
sdt_node = picNode.xpath('ancestor::w:sdt[1]')[0]
|
||
for sdt_node_child in sdt_node.iterchildren():
|
||
# 找到sdt下一级的stdPr
|
||
if sdt_node_child.tag.endswith('sdtPr'):
|
||
for sdtPr_node_child in sdt_node_child.getchildren():
|
||
if sdtPr_node_child.tag.endswith('alias'):
|
||
yu_name = sdtPr_node_child.attrib.values()[0]
|
||
graph_node_list_transform.append({'yu_name': yu_name, 'yu_node': picNode})
|
||
for graph_node in graph_node_list_transform:
|
||
image_run_node = getParentRunNode(graph_node['yu_node'])
|
||
image_run_node.clear()
|
||
# 循环去image_part_list找name和yu_name相等的图片
|
||
for img_part in image_part_list:
|
||
# 1.如果找到相等
|
||
if img_part['name'] == graph_node['yu_name']:
|
||
# 2.找到即可添加图片到“域”
|
||
image_run_node.clear()
|
||
# 辅助:去找其父节点是否为段落,是段落则存起来,后面好居中
|
||
image_run_parent_paragraph = image_run_node.getparent()
|
||
father_paragraph = None
|
||
if isinstance(image_run_parent_paragraph, CT_P):
|
||
father_paragraph = Paragraph(image_run_parent_paragraph, doc)
|
||
copied_bytes_io = BytesIO(img_part['img'].image.blob)
|
||
r_element = Run(image_run_node, doc)
|
||
inline_shape = r_element.add_picture(copied_bytes_io)
|
||
## 2.1.统一:这里设置文档片段里面的图片大小和位置
|
||
source_width = inline_shape.width
|
||
source_height = inline_shape.height
|
||
if source_width >= source_height:
|
||
inline_shape.width = Mm(120)
|
||
inline_shape.height = int(inline_shape.height * (inline_shape.width / source_width))
|
||
else:
|
||
inline_shape.height = Mm(60)
|
||
inline_shape.width = int(inline_shape.width * (inline_shape.height / source_height))
|
||
## 2.2.设置图片所在段落居中对齐
|
||
if father_paragraph:
|
||
father_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||
r_element.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
|
||
# 3.因为按顺序的,所以移除image_part_list中已经替换的图片
|
||
image_part_list.remove(img_part)
|
||
break
|
||
try:
|
||
# 这里直接生成产品文档
|
||
doc.save(str(to_tpl_file))
|
||
return to_tpl_file, seitai_final_file
|
||
except PermissionError as e:
|
||
return {'code': 'error', 'msg': '生成的temp文件已打开,请关闭后重试...'}
|
||
|
||
def get_frag_from_document(doc_path: Path) -> List[Dict]:
|
||
"""传入products的文件路径,识别出所有文档片段名称,数组返回:要求docx里面文档名称不能更变"""
|
||
doc = Document(doc_path.as_posix())
|
||
sdt_element_list = doc.element.body.xpath('./w:sdt')
|
||
# 整个for循环识别文档片段名称
|
||
area_name_list = []
|
||
for sdt_ele in sdt_element_list:
|
||
isLock = False
|
||
alias_value = None
|
||
for elem in sdt_ele.iterchildren():
|
||
if elem.tag.endswith('sdtPr'):
|
||
for el in elem.getchildren():
|
||
if el.tag.endswith('alias'):
|
||
alias_value = el.attrib.values()
|
||
# 查找是否被用户在模版上标记了Lock
|
||
if el.tag.endswith('lock'):
|
||
isLock = True
|
||
if alias_value and len(alias_value):
|
||
area_name_list.append({'frag_name': alias_value[0], 'isLock': isLock})
|
||
return area_name_list
|
||
|
||
# 辅助函数-传入temp文件路径(已替换文档片段的temp文档),输出stdContent
|
||
def get_jinja_stdContent_element(temp_docx_path: Path):
|
||
doc_docx = Document(temp_docx_path.as_posix())
|
||
body = doc_docx.element.body
|
||
# 储存文本片段
|
||
text_frag_name_list = []
|
||
sdt_element_list = body.xpath('//w:sdt')
|
||
|
||
# 注意python-docx的页头的文本片段不在body里面,而在section.header里面
|
||
# 所以定义辅助函数,统一处理
|
||
def deel_sdt_content(*args):
|
||
"""传入sdt_element列表,将其sdtContent加入外部的文本片段列表"""
|
||
for sdt_ele in args:
|
||
# 找出每个sdt下面的3个标签
|
||
tag_value = None
|
||
alias_value = None
|
||
sdtContent_ele = None
|
||
for sdt_ele_child in sdt_ele.iterchildren():
|
||
if sdt_ele_child.tag.endswith('sdtPr'):
|
||
for sdtPr_ele_child in sdt_ele_child.getchildren():
|
||
if sdtPr_ele_child.tag.endswith('tag'):
|
||
if len(sdtPr_ele_child.attrib.values()) > 0:
|
||
tag_value = sdtPr_ele_child.attrib.values()[0]
|
||
if sdtPr_ele_child.tag.endswith('alias'):
|
||
if len(sdtPr_ele_child.attrib.values()) > 0:
|
||
alias_value = sdtPr_ele_child.attrib.values()[0]
|
||
if sdt_ele_child.tag.endswith('sdtContent'):
|
||
sdtContent_ele = sdt_ele_child
|
||
# 找出所有tag_value为jinja的文本片段
|
||
if tag_value == 'jinja' and alias_value is not None and sdtContent_ele is not None:
|
||
text_frag_name_list.append({'alias': alias_value, 'sdtContent': sdtContent_ele})
|
||
|
||
deel_sdt_content(*sdt_element_list)
|
||
for section in doc_docx.sections:
|
||
header = section.header
|
||
header_sdt_list = header.part.element.xpath('//w:sdt')
|
||
deel_sdt_content(*header_sdt_list)
|
||
|
||
return text_frag_name_list, doc_docx
|
||
|
||
# 封装一个根据alias名称修改stdContent的函数 -> 在接口处理函数中取数据放入函数修改文档
|
||
def stdContent_modify(modify_str: str | bool, doc_docx: Document, sdtContent: _Element):
|
||
# 正常处理
|
||
for ele in sdtContent:
|
||
if isinstance(ele, CT_R):
|
||
run_ele = Run(ele, doc_docx)
|
||
if isinstance(modify_str, bool):
|
||
# 如果是True,则不修改原来
|
||
if modify_str:
|
||
break
|
||
else:
|
||
modify_str = ""
|
||
# 有时候会int类型,转换一下防止报错
|
||
if isinstance(modify_str, int):
|
||
modify_str = str(modify_str)
|
||
run_ele.text = modify_str
|
||
sdtContent.clear()
|
||
sdtContent.append(run_ele._element)
|
||
break
|
||
|
||
if isinstance(ele, CT_P):
|
||
para_ele = Paragraph(ele, doc_docx)
|
||
if isinstance(modify_str, bool):
|
||
if modify_str:
|
||
break
|
||
else:
|
||
modify_str = ""
|
||
para_ele.clear()
|
||
para_ele.text = modify_str
|
||
sdtContent.clear()
|
||
sdtContent.append(para_ele._element)
|
||
break
|