initial commit
This commit is contained in:
BIN
apps/project/tool/__pycache__/source_counter.cpython-313.pyc
Normal file
BIN
apps/project/tool/__pycache__/source_counter.cpython-313.pyc
Normal file
Binary file not shown.
BIN
apps/project/tool/__pycache__/timeList.cpython-313.pyc
Normal file
BIN
apps/project/tool/__pycache__/timeList.cpython-313.pyc
Normal file
Binary file not shown.
BIN
apps/project/tool/__pycache__/timeList.cpython-38.pyc
Normal file
BIN
apps/project/tool/__pycache__/timeList.cpython-38.pyc
Normal file
Binary file not shown.
BIN
apps/project/tool/__pycache__/xq_parse.cpython-313.pyc
Normal file
BIN
apps/project/tool/__pycache__/xq_parse.cpython-313.pyc
Normal file
Binary file not shown.
100
apps/project/tool/source_counter.py
Normal file
100
apps/project/tool/source_counter.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import lizard
|
||||
import os
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
def analyze_code_directory(file_path):
|
||||
results = {
|
||||
'comment_rate': 0.0, # 注释率-手动
|
||||
'total_lines': 0, # 总函数
|
||||
'effective_lines': 0, # 有效代码行数
|
||||
'avg_function_lines': 0, # 平均模块行数
|
||||
'avg_cyclomatic': 0, # 平均圈复杂度
|
||||
'avg_fan_out': 0, # 平均扇出
|
||||
'function_count': 0 # 函数个数
|
||||
}
|
||||
total_comments = 0
|
||||
total_blanks = 0
|
||||
total_lines = 0
|
||||
total_effective = 0
|
||||
functions = []
|
||||
for root, _, files in os.walk(file_path):
|
||||
for file in files:
|
||||
if file.endswith(('.c', '.cpp', '.h', '.hpp', '.cc', '.cxx')):
|
||||
filepath = os.path.join(root, file)
|
||||
# 使用 lizard 分析代码结构
|
||||
analysis = lizard.analyze_file(filepath)
|
||||
functions.extend(analysis.function_list)
|
||||
# 使用 lizard 的有效代码行数统计
|
||||
total_effective += analysis.nloc
|
||||
# 手动统计注释行数(新方法)
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.readlines()
|
||||
total_comments += sum(
|
||||
1 for line in content if line.strip().startswith(('//', '/*', '*')) or '*/' in line)
|
||||
total_blanks += sum(1 for line in content if not line.strip())
|
||||
# 计算函数相关指标
|
||||
if functions:
|
||||
cyclomatic_list = [f.cyclomatic_complexity for f in functions]
|
||||
high_cyclo = sum(1 for c in cyclomatic_list if c >= 20)
|
||||
# 输出的指标
|
||||
results['function_count'] = len(functions) # 模块数量
|
||||
results['avg_function_lines'] = sum(f.length for f in functions) / len(functions) # 平均规模
|
||||
results['avg_cyclomatic'] = sum(f.cyclomatic_complexity for f in functions) / len(functions) # 平均圈复杂
|
||||
results['avg_fan_out'] = sum(f.fan_out for f in functions) / len(functions) # 平均扇出
|
||||
results['max_cyclomatic'] = max(cyclomatic_list) # 模块最大圈复杂度
|
||||
results['high_cyclomatic_ratio'] = high_cyclo / len(functions) * 100 # 圈复杂度>20比例
|
||||
total_lines = sum(f.length for f in functions)
|
||||
|
||||
# 计算全局指标 - 输出
|
||||
if total_lines > 0:
|
||||
results['comment_lines'] = total_comments if total_comments > 0 else 0
|
||||
results['comment_rate'] = total_comments / total_lines * 100 if total_comments > 0 else 0
|
||||
results['total_lines'] = total_lines
|
||||
results['effective_lines'] = total_effective
|
||||
results['total_blanks'] = total_blanks
|
||||
results['code_ratio'] = total_effective / total_lines if total_lines > 0 else 0
|
||||
return results
|
||||
|
||||
# 解压zip文件方法
|
||||
def extract_and_get_paths(zip_path: str,
|
||||
extract_to: str = 'unzipped_files') -> str:
|
||||
"""
|
||||
解压ZIP文件并返回目标扩展名文件的绝对路径列表
|
||||
参数:
|
||||
zip_path: ZIP文件路径
|
||||
extract_to: 解压目录(默认'unzipped_files')
|
||||
target_extensions: 目标文件扩展名(默认('.c', '.h'))
|
||||
|
||||
返回:
|
||||
匹配文件的绝对路径列表
|
||||
"""
|
||||
# 创建解压目录(如果不存在)
|
||||
if extract_to is None:
|
||||
extract_to = os.path.join(os.getcwd(), f"unzip_temp_{os.urandom(4).hex()}")
|
||||
os.makedirs(extract_to, exist_ok=True)
|
||||
try:
|
||||
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(extract_to)
|
||||
return os.path.abspath(extract_to)
|
||||
except zipfile.BadZipFile:
|
||||
raise ValueError(f"无效的ZIP文件: {zip_path}")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"解压失败: {str(e)}")
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
path = Path("../Cpro/")
|
||||
if not path.is_dir():
|
||||
print("错误: 路径不存在或不是目录")
|
||||
else:
|
||||
stats = analyze_code_directory(path)
|
||||
|
||||
print("\n代码分析结果:")
|
||||
print(f"1. 注释率: {stats['comment_rate']:.2f}%")
|
||||
print(
|
||||
f"2. 有效代码行数/总行数: {stats['total_lines']}/{stats['effective_lines']} (比例: {stats['code_ratio']:.2f})")
|
||||
print(f"3. 函数数量: {stats['function_count']}")
|
||||
print(f"4. 函数平均行数: {stats['avg_function_lines']:.1f}")
|
||||
print(f"5. 函数平均圈复杂度: {stats['avg_cyclomatic']:.1f}")
|
||||
print(f"6. 函数平均扇出数: {stats['avg_fan_out']:.1f}")
|
||||
104
apps/project/tool/temp.py
Normal file
104
apps/project/tool/temp.py
Normal file
File diff suppressed because one or more lines are too long
104
apps/project/tool/timeList.py
Normal file
104
apps/project/tool/timeList.py
Normal file
@@ -0,0 +1,104 @@
|
||||
from apps.createDocument.extensions.documentTime import DocTime
|
||||
from django.shortcuts import get_object_or_404
|
||||
from apps.project.models import Project
|
||||
|
||||
def time_return_to(id):
|
||||
project_obj = get_object_or_404(Project, id=id)
|
||||
time = []
|
||||
time_parser = DocTime(id)
|
||||
dg_otime = {}
|
||||
# 1.大纲-测评地点与时间相关的时间
|
||||
temp_dict = time_parser.dg_address_time()
|
||||
dg_otime['title'] = '测评大纲'
|
||||
dg_otime['被测件接收'] = [temp_dict['beginTime_strf'], temp_dict['beginTime_strf']]
|
||||
dg_otime['大纲编制时间'] = [temp_dict['dgCompileStart'], temp_dict['dgCompileEnd']]
|
||||
dg_otime['设计与实现时间'] = [temp_dict['designStart'], temp_dict['designEnd']]
|
||||
# 2.大纲-文档时间
|
||||
temp_dict = time_parser.dg_final_time()
|
||||
dg_otime['封面时间'] = temp_dict['cover_time']
|
||||
dg_otime['拟制时间'] = temp_dict['preparation_time']
|
||||
dg_otime['校对时间'] = temp_dict['inspect_time']
|
||||
dg_otime['审核时间'] = temp_dict['auditing_time']
|
||||
dg_otime['批准时间'] = temp_dict['ratify_time']
|
||||
dg_otime['创建时间'] = temp_dict['create_doc_time']
|
||||
dg_otime['v1版本时间'] = temp_dict['doc_v1_time']
|
||||
time.append(dg_otime)
|
||||
# 3.说明的时间
|
||||
temp_dict = time_parser.sm_final_time()
|
||||
sm_otime = {
|
||||
'title': '测试说明',
|
||||
'封面时间': temp_dict['cover_time'],
|
||||
'拟制时间': temp_dict['preparation_time'],
|
||||
'校对时间': temp_dict['inspect_time'],
|
||||
'审核时间': temp_dict['auditing_time'],
|
||||
'批准时间': temp_dict['ratify_time'],
|
||||
'创建时间': temp_dict['create_doc_time'],
|
||||
'v1版本时间': temp_dict['doc_v1_time']
|
||||
}
|
||||
time.append(sm_otime)
|
||||
# 4.记录时间
|
||||
temp_dict = time_parser.jl_final_time()
|
||||
jl_otime = {
|
||||
'title': '测试记录',
|
||||
'封面时间': temp_dict['cover_time'],
|
||||
'拟制时间': temp_dict['preparation_time'],
|
||||
'校对时间': temp_dict['inspect_time'],
|
||||
'审核时间': temp_dict['auditing_time'],
|
||||
'批准时间': temp_dict['ratify_time'],
|
||||
'创建时间': temp_dict['create_doc_time'],
|
||||
'v1版本时间': temp_dict['doc_v1_time']
|
||||
}
|
||||
time.append(jl_otime)
|
||||
# 5.回归说明时间
|
||||
# 5.回归记录时间
|
||||
rounds = project_obj.pField.all()
|
||||
for round in rounds:
|
||||
if round.key == '0':
|
||||
continue
|
||||
temp_dict = time_parser.hsm_final_time(round.key)
|
||||
round_otime = {
|
||||
'title': f'第{int(round.key) + 1}轮测试说明',
|
||||
'封面时间': temp_dict['cover_time'],
|
||||
'拟制时间': temp_dict['preparation_time'],
|
||||
'校对时间': temp_dict['inspect_time'],
|
||||
'审核时间': temp_dict['auditing_time'],
|
||||
'批准时间': temp_dict['ratify_time'],
|
||||
'创建时间': temp_dict['create_doc_time'],
|
||||
'v1版本时间': temp_dict['doc_v1_time']
|
||||
}
|
||||
time.append(round_otime)
|
||||
temp_dict = time_parser.hjl_final_time(round.key)
|
||||
round_otime = {
|
||||
'title': f'第{int(round.key) + 1}轮测试记录',
|
||||
'封面时间': temp_dict['cover_time'],
|
||||
'拟制时间': temp_dict['preparation_time'],
|
||||
'校对时间': temp_dict['inspect_time'],
|
||||
'审核时间': temp_dict['auditing_time'],
|
||||
'批准时间': temp_dict['ratify_time'],
|
||||
'创建时间': temp_dict['create_doc_time'],
|
||||
'v1版本时间': temp_dict['doc_v1_time']
|
||||
}
|
||||
time.append(round_otime)
|
||||
# 6.报告时间
|
||||
## 6.1.报告文档片段-测评时间和地点
|
||||
temp_dict = time_parser.bg_address_time()
|
||||
bg_otime = {
|
||||
'title': '测评报告',
|
||||
'被测件接收时间': temp_dict['begin_time'],
|
||||
'大纲编制时间': [temp_dict['dg_weave_start_date'], temp_dict['dg_weave_end_date']],
|
||||
'测评设计与实现': [temp_dict['sj_weave_start_date'], temp_dict['sj_weave_end_date']],
|
||||
'测评总结': [temp_dict['summary_start_date'], temp_dict['summary_end_date']]
|
||||
}
|
||||
for r in temp_dict['round_time_list']:
|
||||
bg_otime[r['name']] = [r['start'], r['end']]
|
||||
temp_dict = time_parser.bg_final_time()
|
||||
### 6.2.报告文档时间
|
||||
bg_otime['封面时间'] = temp_dict['cover_time'],
|
||||
bg_otime['拟制时间'] = temp_dict['preparation_time']
|
||||
bg_otime['校对时间'] = temp_dict['inspect_time'],
|
||||
bg_otime['审核时间'] = temp_dict['auditing_time'],
|
||||
bg_otime['批准时间'] = temp_dict['ratify_time'],
|
||||
bg_otime['创建时间'] = temp_dict['create_doc_time'],
|
||||
bg_otime['v1版本时间'] = temp_dict['doc_v1_time']
|
||||
time.append(bg_otime)
|
||||
return time
|
||||
238
apps/project/tool/xq_parse.py
Normal file
238
apps/project/tool/xq_parse.py
Normal file
@@ -0,0 +1,238 @@
|
||||
import re
|
||||
import docx
|
||||
from docx.document import Document
|
||||
from docx.text.paragraph import Paragraph
|
||||
from docx.parts.image import ImagePart
|
||||
from docx.table import _Cell, Table
|
||||
from docx.oxml.table import CT_Tbl
|
||||
from docx.oxml.text.paragraph import CT_P
|
||||
from collections import OrderedDict
|
||||
|
||||
class DocxChapterExtractor(object):
|
||||
def __init__(self, docx_path):
|
||||
self.doc = docx.Document(docx_path) # 解析文档
|
||||
|
||||
def extract_chapter_info(self, text):
|
||||
"""提取章节编号和标题"""
|
||||
pattern = r'^(\d+(?:\.\d+)*)\s+(.*?)(?:\s*\d+)?\s*$'
|
||||
match = re.match(pattern, text)
|
||||
chapter_num = None
|
||||
content = None
|
||||
if match:
|
||||
chapter_num = match.group(1) # '4.1' or '4'
|
||||
content = match.group(2).strip() # '外部接口需求'
|
||||
else:
|
||||
print(f"'{text}' no match")
|
||||
return chapter_num, content
|
||||
|
||||
def if_valid_match(self, chaptera_name, text):
|
||||
pattern = r'^(\d+(?:\.\d+)*)\s+' + chaptera_name + r'(?:\s*\d+)?\s*$'
|
||||
return re.match(pattern, text) is not None
|
||||
|
||||
def get_chapter_number(self, chapter_name):
|
||||
"""获取目录结构"""
|
||||
directory = []
|
||||
chapter_num = ''
|
||||
flag = False
|
||||
for paragraph in self.doc.paragraphs:
|
||||
if self.if_valid_match(chapter_name, paragraph.text) and 'toc' in paragraph.style.name:
|
||||
chapter_num, content = self.extract_chapter_info(paragraph.text)
|
||||
directory.append((chapter_num, content))
|
||||
flag = True
|
||||
continue
|
||||
if flag and paragraph.text.startswith(chapter_num) and 'toc' in paragraph.style.name:
|
||||
num, content = self.extract_chapter_info(paragraph.text)
|
||||
directory.append((num, content))
|
||||
return directory
|
||||
|
||||
def build_hierarchy(self, chapter_body_list):
|
||||
"""将线性章节列表转换为嵌套结构"""
|
||||
hierarchy = {}
|
||||
path = [] # 当前路径栈,存储章节号的字符串部分(如 ["4", "2"])
|
||||
for item in chapter_body_list:
|
||||
# 处理不同格式的输入数据
|
||||
if len(item) == 3:
|
||||
num, content, _ = item # 忽略第三个元素
|
||||
elif len(item) == 2:
|
||||
num, content = item
|
||||
else:
|
||||
continue # 跳过无效数据
|
||||
# 切割章节号为字符串列表(如 '4.2.1' -> ["4", "2", "1"])
|
||||
parts = num.split('.')
|
||||
# 1. 回溯路径找到当前层级
|
||||
while len(path) >= len(parts):
|
||||
path.pop()
|
||||
# 2. 逐级构建或定位父节点
|
||||
current_level = hierarchy
|
||||
for i in range(len(path)):
|
||||
part = path[i]
|
||||
# 如果父节点不存在,自动创建占位节点
|
||||
if part not in current_level:
|
||||
current_level[part] = {
|
||||
"number": ".".join(parts[:i + 1]),
|
||||
"title": "[未命名章节]", # 占位节点标题
|
||||
"children": {}
|
||||
}
|
||||
current_level = current_level[part]["children"]
|
||||
# 3. 插入当前节点
|
||||
current_part = parts[len(path)] # 当前层级的部分(如 "1")
|
||||
if current_part not in current_level:
|
||||
current_level[current_part] = {
|
||||
"number": num,
|
||||
"title": content,
|
||||
"children": {}
|
||||
}
|
||||
# 4. 更新路径栈
|
||||
path = parts.copy()
|
||||
return hierarchy
|
||||
|
||||
def extract_title_ordinal(self, s):
|
||||
# 正则表达式匹配以括号结尾的字符串
|
||||
pattern = r'^(.*?)\s*[((](.*?)[))]$'
|
||||
match = re.match(pattern, s)
|
||||
if match:
|
||||
# 提取标题并去除前后空格
|
||||
title = match.group(1).strip()
|
||||
# 提取序号并去除前后空格
|
||||
ordinal = match.group(2).strip()
|
||||
else:
|
||||
title = s
|
||||
ordinal = None
|
||||
return title, ordinal
|
||||
|
||||
def build_json_tree(self, chapter_body_list):
|
||||
"""直接生成树形JSON结构"""
|
||||
root = {"number": "", "title": "ROOT", "content": "", "children": []}
|
||||
node_map = OrderedDict()
|
||||
node_map[""] = root # 初始化根节点映射
|
||||
for item in chapter_body_list:
|
||||
# 处理不同格式的输入数据
|
||||
if len(item) == 3:
|
||||
num, chapter_name, chapter_content = item
|
||||
title, ordinal = self.extract_title_ordinal(chapter_name)
|
||||
elif len(item) == 2:
|
||||
num, chapter_name = item
|
||||
title, ordinal = self.extract_title_ordinal(chapter_name)
|
||||
chapter_content = ""
|
||||
else:
|
||||
continue # 跳过无效数据
|
||||
parts = num.split('.')
|
||||
parent_node = root # 始终从根节点开始查找父级
|
||||
for depth in range(len(parts)):
|
||||
current_num = ".".join(parts[:depth + 1])
|
||||
if current_num not in node_map:
|
||||
new_node = {
|
||||
"number": current_num,
|
||||
"title": title if (depth == len(parts) - 1) else "[未命名章节]",
|
||||
"ordinal": ordinal if (depth == len(parts) - 1) else "",
|
||||
"content": chapter_content if (depth == len(parts) - 1) else "",
|
||||
"children": []
|
||||
}
|
||||
parent_num = ".".join(parts[:depth])
|
||||
parent_node = node_map[parent_num]
|
||||
parent_node["children"].append(new_node)
|
||||
node_map[current_num] = new_node
|
||||
parent_node = node_map[current_num]
|
||||
# 确保最终标题和内容正确
|
||||
node_map[num]["title"] = title
|
||||
node_map[num]["ordinal"] = ordinal
|
||||
node_map[num]["content"] = chapter_content
|
||||
return root["children"][0] if root["children"] else {}
|
||||
|
||||
def is_image(self, graph: Paragraph, doc: Document):
|
||||
"""判断段落是否图片"""
|
||||
images = graph._element.xpath('.//pic:pic') # 获取所有图片
|
||||
for image in images:
|
||||
for img_id in image.xpath('.//a:blip/@r:embed'): # 获取图片id
|
||||
part = doc.part.related_parts[img_id] # 根据图片id获取对应的图片
|
||||
if isinstance(part, ImagePart):
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_ImagePart(self, graph: Paragraph, doc: Document): # 一行只能获取一个图片
|
||||
"""获取图片字节流,类型为bytes"""
|
||||
images = graph._element.xpath('.//pic:pic') # 获取所有图片
|
||||
for image in images:
|
||||
for img_id in image.xpath('.//a:blip/@r:embed'): # 获取图片id
|
||||
part = doc.part.related_parts[img_id] # 根据图片id获取对应的图片
|
||||
if isinstance(part, ImagePart):
|
||||
return part.blob
|
||||
return None
|
||||
|
||||
def iter_block_items(self, parent, directory):
|
||||
"""
|
||||
根据目录匹配章节内容
|
||||
parent: docx解析内容, 传入self.doc
|
||||
directory: 章节目录结构,例如[('4', '工程需求'), ('4.1', '外部接口需求'),
|
||||
('4.2', '功能需求'), ('4.2.1', '知识库大模型检索问答功能')]
|
||||
"""
|
||||
if isinstance(parent, Document):
|
||||
parent_elm = parent.element.body
|
||||
elif isinstance(parent, _Cell):
|
||||
parent_elm = parent._tc
|
||||
else:
|
||||
raise ValueError("something's not right")
|
||||
i = 0
|
||||
body_list = []
|
||||
body = []
|
||||
flag = False # 判断是否循环到章节标题
|
||||
for child in parent_elm.iterchildren():
|
||||
if isinstance(child, CT_P):
|
||||
paragraph = Paragraph(child, parent)
|
||||
if i < len(directory) - 1:
|
||||
if paragraph.text == directory[i][1] and 'Heading' in paragraph.style.name:
|
||||
flag = True
|
||||
continue
|
||||
if paragraph.text == directory[i + 1][1] and 'Heading' in paragraph.style.name:
|
||||
# body_list.append(body)
|
||||
new_tuple = directory[i] + (repr(body),)
|
||||
body_list.append(new_tuple)
|
||||
# print(new_tuple)
|
||||
body = []
|
||||
i += 1
|
||||
continue
|
||||
if flag:
|
||||
if self.is_image(paragraph, parent):
|
||||
body.append(self.get_ImagePart(paragraph, parent))
|
||||
|
||||
elif paragraph.text != '':
|
||||
body.append(paragraph.text)
|
||||
elif i == len(directory) - 1:
|
||||
if 'Heading' in paragraph.style.name:
|
||||
new_tuple = directory[i] + (repr(body),)
|
||||
body_list.append(new_tuple)
|
||||
break
|
||||
if self.is_image(paragraph, parent):
|
||||
body.append(self.get_ImagePart(paragraph, parent))
|
||||
elif paragraph.text != '':
|
||||
body.append(paragraph.text)
|
||||
# print(body_list)
|
||||
# print(paragraph.text, '--------------->', paragraph.style.name)
|
||||
else:
|
||||
flag = False
|
||||
elif isinstance(child, CT_Tbl):
|
||||
if flag:
|
||||
table = []
|
||||
for row in Table(child, parent).rows:
|
||||
# 获取每行的单元格文本
|
||||
row_text = [cell.text for cell in row.cells]
|
||||
# 用制表符或其他分隔符连接单元格内容
|
||||
table.append("\t".join(row_text))
|
||||
body.append(table)
|
||||
return body_list
|
||||
|
||||
def main(self, chapter_name):
|
||||
directory = self.get_chapter_number(chapter_name)
|
||||
print(directory)
|
||||
chapter_body_list = self.iter_block_items(self.doc, directory)
|
||||
print(chapter_body_list)
|
||||
# 构建层级结构
|
||||
# hierarchy = self.build_hierarchy(chapter_body_list)
|
||||
# print(hierarchy)
|
||||
json_tree = self.build_json_tree(chapter_body_list)
|
||||
print(json_tree)
|
||||
|
||||
if __name__ == '__main__':
|
||||
docx_path = 'test - 副本.docx'
|
||||
extractor = DocxChapterExtractor(docx_path)
|
||||
extractor.main('工程需求')
|
||||
Reference in New Issue
Block a user