initial commit

2025-04-29 18:09:00 +08:00
commit 4faed52de5
690 changed files with 13481 additions and 0 deletions
--- a/apps/project/tool/pycache/source_counter.cpython-313.pyc
+++ b/apps/project/tool/pycache/source_counter.cpython-313.pyc
--- a/apps/project/tool/pycache/timeList.cpython-313.pyc
+++ b/apps/project/tool/pycache/timeList.cpython-313.pyc
--- a/apps/project/tool/pycache/timeList.cpython-38.pyc
+++ b/apps/project/tool/pycache/timeList.cpython-38.pyc
--- a/apps/project/tool/pycache/xq_parse.cpython-313.pyc
+++ b/apps/project/tool/pycache/xq_parse.cpython-313.pyc
--- a/apps/project/tool/source_counter.py
+++ b/apps/project/tool/source_counter.py
@@ -0,0 +1,100 @@
+import lizard
+import os
+import zipfile
+from pathlib import Path
+
+def analyze_code_directory(file_path):
+    results = {
+        'comment_rate': 0.0,  # 注释率-手动
+        'total_lines': 0,  # 总函数
+        'effective_lines': 0,  # 有效代码行数
+        'avg_function_lines': 0,  # 平均模块行数
+        'avg_cyclomatic': 0,  # 平均圈复杂度
+        'avg_fan_out': 0,  # 平均扇出
+        'function_count': 0  # 函数个数
+    }
+    total_comments = 0
+    total_blanks = 0
+    total_lines = 0
+    total_effective = 0
+    functions = []
+    for root, _, files in os.walk(file_path):
+        for file in files:
+            if file.endswith(('.c', '.cpp', '.h', '.hpp', '.cc', '.cxx')):
+                filepath = os.path.join(root, file)
+                # 使用 lizard 分析代码结构
+                analysis = lizard.analyze_file(filepath)
+                functions.extend(analysis.function_list)
+                # 使用 lizard 的有效代码行数统计
+                total_effective += analysis.nloc
+                # 手动统计注释行数（新方法）
+                with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                    content = f.readlines()
+                    total_comments += sum(
+                        1 for line in content if line.strip().startswith(('//', '/*', '*')) or '*/' in line)
+                    total_blanks += sum(1 for line in content if not line.strip())
+    # 计算函数相关指标
+    if functions:
+        cyclomatic_list = [f.cyclomatic_complexity for f in functions]
+        high_cyclo = sum(1 for c in cyclomatic_list if c >= 20)
+        # 输出的指标
+        results['function_count'] = len(functions)  # 模块数量
+        results['avg_function_lines'] = sum(f.length for f in functions) / len(functions)  # 平均规模
+        results['avg_cyclomatic'] = sum(f.cyclomatic_complexity for f in functions) / len(functions)  # 平均圈复杂
+        results['avg_fan_out'] = sum(f.fan_out for f in functions) / len(functions)  # 平均扇出
+        results['max_cyclomatic'] = max(cyclomatic_list)  # 模块最大圈复杂度
+        results['high_cyclomatic_ratio'] = high_cyclo / len(functions) * 100  # 圈复杂度>20比例
+        total_lines = sum(f.length for f in functions)
+
+    # 计算全局指标 - 输出
+    if total_lines > 0:
+        results['comment_lines'] = total_comments if total_comments > 0 else 0
+        results['comment_rate'] = total_comments / total_lines * 100 if total_comments > 0 else 0
+        results['total_lines'] = total_lines
+        results['effective_lines'] = total_effective
+        results['total_blanks'] = total_blanks
+        results['code_ratio'] = total_effective / total_lines if total_lines > 0 else 0
+    return results
+
+# 解压zip文件方法
+def extract_and_get_paths(zip_path: str,
+                          extract_to: str = 'unzipped_files') -> str:
+    """
+    解压ZIP文件并返回目标扩展名文件的绝对路径列表
+    参数:
+        zip_path: ZIP文件路径
+        extract_to: 解压目录(默认'unzipped_files')
+        target_extensions: 目标文件扩展名(默认('.c', '.h'))
+
+    返回:
+        匹配文件的绝对路径列表
+    """
+    # 创建解压目录(如果不存在)
+    if extract_to is None:
+        extract_to = os.path.join(os.getcwd(), f"unzip_temp_{os.urandom(4).hex()}")
+    os.makedirs(extract_to, exist_ok=True)
+    try:
+        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
+            zip_ref.extractall(extract_to)
+            return os.path.abspath(extract_to)
+    except zipfile.BadZipFile:
+        raise ValueError(f"无效的ZIP文件: {zip_path}")
+    except Exception as e:
+        raise RuntimeError(f"解压失败: {str(e)}")
+
+# 使用示例
+if __name__ == "__main__":
+    path = Path("../Cpro/")
+    if not path.is_dir():
+        print("错误: 路径不存在或不是目录")
+    else:
+        stats = analyze_code_directory(path)
+
+        print("\n代码分析结果:")
+        print(f"1. 注释率: {stats['comment_rate']:.2f}%")
+        print(
+            f"2. 有效代码行数/总行数: {stats['total_lines']}/{stats['effective_lines']} (比例: {stats['code_ratio']:.2f})")
+        print(f"3. 函数数量: {stats['function_count']}")
+        print(f"4. 函数平均行数: {stats['avg_function_lines']:.1f}")
+        print(f"5. 函数平均圈复杂度: {stats['avg_cyclomatic']:.1f}")
+        print(f"6. 函数平均扇出数: {stats['avg_fan_out']:.1f}")
--- a/apps/project/tool/temp.py
+++ b/apps/project/tool/temp.py
--- a/apps/project/tool/timeList.py
+++ b/apps/project/tool/timeList.py
@@ -0,0 +1,104 @@
+from apps.createDocument.extensions.documentTime import DocTime
+from django.shortcuts import get_object_or_404
+from apps.project.models import Project
+
+def time_return_to(id):
+    project_obj = get_object_or_404(Project, id=id)
+    time = []
+    time_parser = DocTime(id)
+    dg_otime = {}
+    # 1.大纲-测评地点与时间相关的时间
+    temp_dict = time_parser.dg_address_time()
+    dg_otime['title'] = '测评大纲'
+    dg_otime['被测件接收'] = [temp_dict['beginTime_strf'], temp_dict['beginTime_strf']]
+    dg_otime['大纲编制时间'] = [temp_dict['dgCompileStart'], temp_dict['dgCompileEnd']]
+    dg_otime['设计与实现时间'] = [temp_dict['designStart'], temp_dict['designEnd']]
+    # 2.大纲-文档时间
+    temp_dict = time_parser.dg_final_time()
+    dg_otime['封面时间'] = temp_dict['cover_time']
+    dg_otime['拟制时间'] = temp_dict['preparation_time']
+    dg_otime['校对时间'] = temp_dict['inspect_time']
+    dg_otime['审核时间'] = temp_dict['auditing_time']
+    dg_otime['批准时间'] = temp_dict['ratify_time']
+    dg_otime['创建时间'] = temp_dict['create_doc_time']
+    dg_otime['v1版本时间'] = temp_dict['doc_v1_time']
+    time.append(dg_otime)
+    # 3.说明的时间
+    temp_dict = time_parser.sm_final_time()
+    sm_otime = {
+        'title': '测试说明',
+        '封面时间': temp_dict['cover_time'],
+        '拟制时间': temp_dict['preparation_time'],
+        '校对时间': temp_dict['inspect_time'],
+        '审核时间': temp_dict['auditing_time'],
+        '批准时间': temp_dict['ratify_time'],
+        '创建时间': temp_dict['create_doc_time'],
+        'v1版本时间': temp_dict['doc_v1_time']
+    }
+    time.append(sm_otime)
+    # 4.记录时间
+    temp_dict = time_parser.jl_final_time()
+    jl_otime = {
+        'title': '测试记录',
+        '封面时间': temp_dict['cover_time'],
+        '拟制时间': temp_dict['preparation_time'],
+        '校对时间': temp_dict['inspect_time'],
+        '审核时间': temp_dict['auditing_time'],
+        '批准时间': temp_dict['ratify_time'],
+        '创建时间': temp_dict['create_doc_time'],
+        'v1版本时间': temp_dict['doc_v1_time']
+    }
+    time.append(jl_otime)
+    # 5.回归说明时间
+    # 5.回归记录时间
+    rounds = project_obj.pField.all()
+    for round in rounds:
+        if round.key == '0':
+            continue
+        temp_dict = time_parser.hsm_final_time(round.key)
+        round_otime = {
+            'title': f'第{int(round.key) + 1}轮测试说明',
+            '封面时间': temp_dict['cover_time'],
+            '拟制时间': temp_dict['preparation_time'],
+            '校对时间': temp_dict['inspect_time'],
+            '审核时间': temp_dict['auditing_time'],
+            '批准时间': temp_dict['ratify_time'],
+            '创建时间': temp_dict['create_doc_time'],
+            'v1版本时间': temp_dict['doc_v1_time']
+        }
+        time.append(round_otime)
+        temp_dict = time_parser.hjl_final_time(round.key)
+        round_otime = {
+            'title': f'第{int(round.key) + 1}轮测试记录',
+            '封面时间': temp_dict['cover_time'],
+            '拟制时间': temp_dict['preparation_time'],
+            '校对时间': temp_dict['inspect_time'],
+            '审核时间': temp_dict['auditing_time'],
+            '批准时间': temp_dict['ratify_time'],
+            '创建时间': temp_dict['create_doc_time'],
+            'v1版本时间': temp_dict['doc_v1_time']
+        }
+        time.append(round_otime)
+    # 6.报告时间
+    ## 6.1.报告文档片段-测评时间和地点
+    temp_dict = time_parser.bg_address_time()
+    bg_otime = {
+        'title': '测评报告',
+        '被测件接收时间': temp_dict['begin_time'],
+        '大纲编制时间': [temp_dict['dg_weave_start_date'], temp_dict['dg_weave_end_date']],
+        '测评设计与实现': [temp_dict['sj_weave_start_date'], temp_dict['sj_weave_end_date']],
+        '测评总结': [temp_dict['summary_start_date'], temp_dict['summary_end_date']]
+    }
+    for r in temp_dict['round_time_list']:
+        bg_otime[r['name']] = [r['start'], r['end']]
+    temp_dict = time_parser.bg_final_time()
+    ### 6.2.报告文档时间
+    bg_otime['封面时间'] = temp_dict['cover_time'],
+    bg_otime['拟制时间'] = temp_dict['preparation_time']
+    bg_otime['校对时间'] = temp_dict['inspect_time'],
+    bg_otime['审核时间'] = temp_dict['auditing_time'],
+    bg_otime['批准时间'] = temp_dict['ratify_time'],
+    bg_otime['创建时间'] = temp_dict['create_doc_time'],
+    bg_otime['v1版本时间'] = temp_dict['doc_v1_time']
+    time.append(bg_otime)
+    return time
--- a/apps/project/tool/xq_parse.py
+++ b/apps/project/tool/xq_parse.py
@@ -0,0 +1,238 @@
+import re
+import docx
+from docx.document import Document
+from docx.text.paragraph import Paragraph
+from docx.parts.image import ImagePart
+from docx.table import _Cell, Table
+from docx.oxml.table import CT_Tbl
+from docx.oxml.text.paragraph import CT_P
+from collections import OrderedDict
+
+class DocxChapterExtractor(object):
+    def __init__(self, docx_path):
+        self.doc = docx.Document(docx_path)  # 解析文档
+
+    def extract_chapter_info(self, text):
+        """提取章节编号和标题"""
+        pattern = r'^(\d+(?:\.\d+)*)\s+(.*?)(?:\s*\d+)?\s*$'
+        match = re.match(pattern, text)
+        chapter_num = None
+        content = None
+        if match:
+            chapter_num = match.group(1)  # '4.1' or '4'
+            content = match.group(2).strip()  # '外部接口需求'
+        else:
+            print(f"'{text}' no match")
+        return chapter_num, content
+
+    def if_valid_match(self, chaptera_name, text):
+        pattern = r'^(\d+(?:\.\d+)*)\s+' + chaptera_name + r'(?:\s*\d+)?\s*$'
+        return re.match(pattern, text) is not None
+
+    def get_chapter_number(self, chapter_name):
+        """获取目录结构"""
+        directory = []
+        chapter_num = ''
+        flag = False
+        for paragraph in self.doc.paragraphs:
+            if self.if_valid_match(chapter_name, paragraph.text) and 'toc' in paragraph.style.name:
+                chapter_num, content = self.extract_chapter_info(paragraph.text)
+                directory.append((chapter_num, content))
+                flag = True
+                continue
+            if flag and paragraph.text.startswith(chapter_num) and 'toc' in paragraph.style.name:
+                num, content = self.extract_chapter_info(paragraph.text)
+                directory.append((num, content))
+        return directory
+
+    def build_hierarchy(self, chapter_body_list):
+        """将线性章节列表转换为嵌套结构"""
+        hierarchy = {}
+        path = []  # 当前路径栈，存储章节号的字符串部分（如 ["4", "2"]）
+        for item in chapter_body_list:
+            # 处理不同格式的输入数据
+            if len(item) == 3:
+                num, content, _ = item  # 忽略第三个元素
+            elif len(item) == 2:
+                num, content = item
+            else:
+                continue  # 跳过无效数据
+            # 切割章节号为字符串列表（如 '4.2.1' -> ["4", "2", "1"]）
+            parts = num.split('.')
+            # 1. 回溯路径找到当前层级
+            while len(path) >= len(parts):
+                path.pop()
+            # 2. 逐级构建或定位父节点
+            current_level = hierarchy
+            for i in range(len(path)):
+                part = path[i]
+                # 如果父节点不存在，自动创建占位节点
+                if part not in current_level:
+                    current_level[part] = {
+                        "number": ".".join(parts[:i + 1]),
+                        "title": "[未命名章节]",  # 占位节点标题
+                        "children": {}
+                    }
+                current_level = current_level[part]["children"]
+            # 3. 插入当前节点
+            current_part = parts[len(path)]  # 当前层级的部分（如 "1"）
+            if current_part not in current_level:
+                current_level[current_part] = {
+                    "number": num,
+                    "title": content,
+                    "children": {}
+                }
+            # 4. 更新路径栈
+            path = parts.copy()
+        return hierarchy
+
+    def extract_title_ordinal(self, s):
+        # 正则表达式匹配以括号结尾的字符串
+        pattern = r'^(.*?)\s*[(（](.*?)[)）]$'
+        match = re.match(pattern, s)
+        if match:
+            # 提取标题并去除前后空格
+            title = match.group(1).strip()
+            # 提取序号并去除前后空格
+            ordinal = match.group(2).strip()
+        else:
+            title = s
+            ordinal = None
+        return title, ordinal
+
+    def build_json_tree(self, chapter_body_list):
+        """直接生成树形JSON结构"""
+        root = {"number": "", "title": "ROOT", "content": "", "children": []}
+        node_map = OrderedDict()
+        node_map[""] = root  # 初始化根节点映射
+        for item in chapter_body_list:
+            # 处理不同格式的输入数据
+            if len(item) == 3:
+                num, chapter_name, chapter_content = item
+                title, ordinal = self.extract_title_ordinal(chapter_name)
+            elif len(item) == 2:
+                num, chapter_name = item
+                title, ordinal = self.extract_title_ordinal(chapter_name)
+                chapter_content = ""
+            else:
+                continue  # 跳过无效数据
+            parts = num.split('.')
+            parent_node = root  # 始终从根节点开始查找父级
+            for depth in range(len(parts)):
+                current_num = ".".join(parts[:depth + 1])
+                if current_num not in node_map:
+                    new_node = {
+                        "number": current_num,
+                        "title": title if (depth == len(parts) - 1) else "[未命名章节]",
+                        "ordinal": ordinal if (depth == len(parts) - 1) else "",
+                        "content": chapter_content if (depth == len(parts) - 1) else "",
+                        "children": []
+                    }
+                    parent_num = ".".join(parts[:depth])
+                    parent_node = node_map[parent_num]
+                    parent_node["children"].append(new_node)
+                    node_map[current_num] = new_node
+                parent_node = node_map[current_num]
+            # 确保最终标题和内容正确
+            node_map[num]["title"] = title
+            node_map[num]["ordinal"] = ordinal
+            node_map[num]["content"] = chapter_content
+        return root["children"][0] if root["children"] else {}
+
+    def is_image(self, graph: Paragraph, doc: Document):
+        """判断段落是否图片"""
+        images = graph._element.xpath('.//pic:pic')  # 获取所有图片
+        for image in images:
+            for img_id in image.xpath('.//a:blip/@r:embed'):  # 获取图片id
+                part = doc.part.related_parts[img_id]  # 根据图片id获取对应的图片
+                if isinstance(part, ImagePart):
+                    return True
+        return False
+
+    def get_ImagePart(self, graph: Paragraph, doc: Document):  # 一行只能获取一个图片
+        """获取图片字节流，类型为bytes"""
+        images = graph._element.xpath('.//pic:pic')  # 获取所有图片
+        for image in images:
+            for img_id in image.xpath('.//a:blip/@r:embed'):  # 获取图片id
+                part = doc.part.related_parts[img_id]  # 根据图片id获取对应的图片
+                if isinstance(part, ImagePart):
+                    return part.blob
+        return None
+
+    def iter_block_items(self, parent, directory):
+        """
+        根据目录匹配章节内容
+        parent: docx解析内容, 传入self.doc
+        directory: 章节目录结构，例如[('4', '工程需求'), ('4.1', '外部接口需求'),
+         ('4.2', '功能需求'), ('4.2.1', '知识库大模型检索问答功能')]
+        """
+        if isinstance(parent, Document):
+            parent_elm = parent.element.body
+        elif isinstance(parent, _Cell):
+            parent_elm = parent._tc
+        else:
+            raise ValueError("something's not right")
+        i = 0
+        body_list = []
+        body = []
+        flag = False  # 判断是否循环到章节标题
+        for child in parent_elm.iterchildren():
+            if isinstance(child, CT_P):
+                paragraph = Paragraph(child, parent)
+                if i < len(directory) - 1:
+                    if paragraph.text == directory[i][1] and 'Heading' in paragraph.style.name:
+                        flag = True
+                        continue
+                    if paragraph.text == directory[i + 1][1] and 'Heading' in paragraph.style.name:
+                        # body_list.append(body)
+                        new_tuple = directory[i] + (repr(body),)
+                        body_list.append(new_tuple)
+                        # print(new_tuple)
+                        body = []
+                        i += 1
+                        continue
+                    if flag:
+                        if self.is_image(paragraph, parent):
+                            body.append(self.get_ImagePart(paragraph, parent))
+
+                        elif paragraph.text != '':
+                            body.append(paragraph.text)
+                elif i == len(directory) - 1:
+                    if 'Heading' in paragraph.style.name:
+                        new_tuple = directory[i] + (repr(body),)
+                        body_list.append(new_tuple)
+                        break
+                    if self.is_image(paragraph, parent):
+                        body.append(self.get_ImagePart(paragraph, parent))
+                    elif paragraph.text != '':
+                        body.append(paragraph.text)
+                    # print(body_list)
+                    # print(paragraph.text, '--------------->', paragraph.style.name)
+                else:
+                    flag = False
+            elif isinstance(child, CT_Tbl):
+                if flag:
+                    table = []
+                    for row in Table(child, parent).rows:
+                        # 获取每行的单元格文本
+                        row_text = [cell.text for cell in row.cells]
+                        # 用制表符或其他分隔符连接单元格内容
+                        table.append("\t".join(row_text))
+                    body.append(table)
+        return body_list
+
+    def main(self, chapter_name):
+        directory = self.get_chapter_number(chapter_name)
+        print(directory)
+        chapter_body_list = self.iter_block_items(self.doc, directory)
+        print(chapter_body_list)
+        # 构建层级结构
+        # hierarchy = self.build_hierarchy(chapter_body_list)
+        # print(hierarchy)
+        json_tree = self.build_json_tree(chapter_body_list)
+        print(json_tree)
+
+if __name__ == '__main__':
+    docx_path = 'test - 副本.docx'
+    extractor = DocxChapterExtractor(docx_path)
+    extractor.main('工程需求')