diff --git a/find-repeat-3.py b/find-repeat-3.py index b62dba3..dfbf877 100644 --- a/find-repeat-3.py +++ b/find-repeat-3.py @@ -1,1804 +1,1805 @@ -import os -import re -from collections import defaultdict -import sys -from datetime import datetime -import json -import tkinter as tk -from tkinter import ttk, filedialog, messagebox, scrolledtext -import threading -from queue import Queue - -def normalize_code_line(line): - """ - 标准化代码行:移除注释、多余空格、制表符等 - 优化:排除单字符行(如单个{、}等) - """ - if not line or not line.strip(): - return "" - - line = line.rstrip('\n') - - # 处理 "//" 注释 - if '//' in line: - # 找到第一个不在字符串中的 "//" - in_string = False - for i in range(len(line) - 1): - if line[i] == '"' and (i == 0 or line[i-1] != '\\'): - in_string = not in_string - elif not in_string and line[i:i+2] == '//': - line = line[:i] - break - - # 处理 "/* */" 注释(单行内的情况) - if '/*' in line and '*/' in line: - # 找到第一个不在字符串中的 "/*" - in_string = False - comment_start = -1 - for i in range(len(line) - 1): - if line[i] == '"' and (i == 0 or line[i-1] != '\\'): - in_string = not in_string - elif not in_string and line[i:i+2] == '/*': - comment_start = i - break - - if comment_start != -1: - # 查找对应的 "*/" - comment_end = line.find('*/', comment_start) - if comment_end != -1: - line = line[:comment_start] + line[comment_end+2:] - - # 移除行尾空白字符 - line = line.strip() - - # 如果是空行,直接返回 - if not line: - return "" - - # 优化:排除单字符行(如{、}、;等) - if len(line) == 1 and line in '{;}': - return "" - - # 移除语句末尾的分号 - if line.endswith(';'): - line = line[:-1] - - # 移除代码中所有空格 - line = re.sub(r'\s+', '', line) - - return line - -def extract_assignment_sides(line): - """ - 提取赋值语句的左侧和右侧内容 - 改进版:更准确地识别赋值语句,避免将逻辑判断误判 - 修复:正确处理行尾注释和分号 - 返回: (is_assignment, left_side, right_side, operator) - """ - line = line.strip() - if not line: - return False, None, None, None - - # 先处理注释 - # 处理 "//" 注释 - if '//' in line: - # 找到第一个不在字符串中的 "//" - in_string = False - for i in range(len(line) - 1): - if line[i] == '"' and (i == 0 or line[i-1] != '\\'): - in_string = not in_string - elif not in_string and line[i:i+2] == '//': - line = line[:i].strip() # 移除注释部分 - break - - # 处理 "/* */" 注释(单行内的情况) - if '/*' in line and '*/' in line: - # 找到第一个不在字符串中的 "/*" - in_string = False - comment_start = -1 - for i in range(len(line) - 1): - if line[i] == '"' and (i == 0 or line[i-1] != '\\'): - in_string = not in_string - elif not in_string and line[i:i+2] == '/*': - comment_start = i - break - - if comment_start != -1: - # 查找对应的 "*/" - comment_end = line.find('*/', comment_start) - if comment_end != -1: - # 移除注释部分 - before_comment = line[:comment_start] - after_comment = line[comment_end+2:] - line = (before_comment + after_comment).strip() - - # 支持的赋值运算符(按长度排序,优先匹配长的) - assignment_ops = ['>>>=', '>>=', '<<=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '='] - - # 记录字符位置 - i = 0 - n = len(line) - - while i < n: - char = line[i] - - # 跳过字符串 - if char == '"' or char == "'": - quote_char = char - i += 1 - while i < n and not (line[i] == quote_char and line[i-1] != '\\'): - i += 1 - if i < n: - i += 1 - continue - - # 检查逻辑运算符并跳过 - if i < n - 1: - two_chars = line[i:i+2] - if two_chars in ('==', '!=', '<=', '>='): - i += 2 - continue - - # 检查JavaScript严格相等运算符 - if i < n - 2: - three_chars = line[i:i+3] - if three_chars in ('===', '!=='): - i += 3 - continue - - # 检查赋值运算符 - for op in assignment_ops: - op_len = len(op) - if i + op_len <= n and line[i:i+op_len] == op: - # 确保这不是其他运算符的一部分 - is_valid_assignment = True - - # 检查前面字符 - if i > 0: - prev_char = line[i-1] - # 赋值运算符前面不能是=, !, <, >等 - if prev_char in '=!<>': - is_valid_assignment = False - - # 检查后面字符 - if i + op_len < n: - next_char = line[i+op_len] - # 赋值运算符后面不能是=(避免误判==为=) - if next_char == '=': - is_valid_assignment = False - - if is_valid_assignment: - left = line[:i].strip() - right = line[i+op_len:].strip() - - # 关键修复:对右侧表达式进一步处理 - if right: - # 移除右侧可能的分号 - if right.endswith(';'): - right = right[:-1].strip() - # 移除右侧的所有空格 - right = re.sub(r'\s+', '', right) - - return True, left, right, op - - i += 1 - - return False, None, None, None - -def normalize_assignment_side(side): - """ - 标准化赋值语句的一侧(左侧或右侧) - 移除所有空格,保留核心结构 - """ - if not side: - return "" - - # 移除所有空白字符 - normalized = re.sub(r'\s+', '', side) - return normalized - -def is_constant_expression(expression): - """ - 判断表达式是否为常数 - 返回True如果是常数,False如果是变量或包含变量的表达式 - """ - if not expression: - return False - - # 标准化表达式,移除所有空格 - expr = re.sub(r'\s+', '', expression) - - # 空表达式视为不是常数 - if not expr: - return False - - # 常见的常数模式 - constant_patterns = [ - r'^-?\d+(\.\d+)?$', # 数字:-10, 3.14, 0.5 - r'^0x[0-9A-Fa-f]+$', # 十六进制数:0xFF, 0x1A - r'^0[0-7]+$', # 八进制数:077, 0123 - r'^0b[01]+$', # 二进制数:0b1010 - r'^\'.*\'$', # 字符常量:'a', '\n' - r'^\".*\"$', # 字符串常量:"hello", "world" - r'^true$', # 布尔常量 - r'^false$', # 布尔常量 - r'^null$', # null常量 - r'^NULL$', # NULL常量 - r'^None$', # Python的None - ] - - # 检查是否为简单常数 - for pattern in constant_patterns: - if re.match(pattern, expr, re.IGNORECASE): - return True - - # 检查是否为常数表达式(只包含数字和运算符) - # 创建测试表达式,移除非运算符字符 - test_expr = expr - - # 移除所有括号 - test_expr = re.sub(r'[()]', '', test_expr) - - # 检查是否只包含数字、小数点和运算符 - constant_chars = set('0123456789.+-*/%eE') - is_constant = True - - for char in test_expr: - if char not in constant_chars: - is_constant = False - break - - return is_constant - -def detect_duplicate_lines_gui(file_path, start_line, end_line, output_callback, status_callback): - """ - 在文件中查找重复的代码行(GUI版本)- 全文检查 - 优化:排除单字符重复行 - """ - if not os.path.exists(file_path): - output_callback(f"错误: 文件不存在: {file_path}\n") - return None - - try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() - except Exception as e: - output_callback(f"读取文件失败: {e}\n") - return None - - # 如果没有指定行数范围,则检查整个文件 - if start_line <= 0 and end_line <= 0: - start_line = 1 - end_line = len(lines) - - if end_line > len(lines): - end_line = len(lines) - - if start_line < 1 or end_line < start_line: - output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") - return None - - status_callback("开始检测重复代码行...") - - # 数据结构 - duplicates = defaultdict(list) - line_details = [] - duplicate_groups = [] - - # 处理指定范围内的每一行 - status_callback(f"正在处理文件: {os.path.basename(file_path)}") - status_callback(f"检查行数范围: {start_line} - {end_line}") - - for i in range(1, len(lines) + 1): - if start_line <= i <= end_line: - original_line = lines[i-1].rstrip('\n') - normalized_line = normalize_code_line(original_line) - - # 优化:跳过单字符行 - if normalized_line and len(normalized_line) > 1: # 只处理长度大于1的行 - duplicates[normalized_line].append(i) - line_details.append({ - "line_number": i, - "original": original_line, - "normalized": normalized_line - }) - - status_callback(f"已完成代码行标准化,正在分析重复行...") - - # 找出所有重复行 - for normalized_line, line_numbers in duplicates.items(): - if len(line_numbers) > 1: - # 收集重复行的原始内容 - original_lines = [] - for line_num in line_numbers: - for detail in line_details: - if detail["line_number"] == line_num: - original_lines.append(detail["original"]) - break - - duplicate_groups.append({ - "normalized_content": normalized_line, - "occurrences": len(line_numbers), - "line_numbers": line_numbers, - "original_lines": original_lines - }) - - # 计算统计信息 - actual_lines_in_range = end_line - start_line + 1 - if start_line > len(lines) or end_line < 1: - actual_lines_in_range = 0 - - total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups) - - duplicate_rate = 0.0 - if actual_lines_in_range > 0: - duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 - - result = { - "success": True, - "file_path": file_path, - "file_name": os.path.basename(file_path), - "total_lines": len(lines), - "start_line": start_line, - "end_line": end_line, - "actual_lines_in_range": actual_lines_in_range, - "unique_code_lines": len([v for v in duplicates.values() if v]), - "duplicate_groups": duplicate_groups, - "duplicate_groups_count": len(duplicate_groups), - "total_duplicate_lines": total_duplicate_lines, - "duplicate_rate": duplicate_rate, - "detection_mode": "full" # 标记检测模式为全文检测 - } - - status_callback("全文检测完成,正在生成结果...") - return result - -def detect_adjacent_duplicate_lines_gui(file_path, start_line, end_line, output_callback, status_callback): - """ - 检测相邻重复行(只检查与上一行和下一行是否重复) - 优化:当一行与上一行和下一行都重复时,三行合并为一组 - 修复:避免在三行连续重复中重复统计两行重复 - 优化:排除单字符重复行 - """ - if not os.path.exists(file_path): - output_callback(f"错误: 文件不存在: {file_path}\n") - return None - - try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() - except Exception as e: - output_callback(f"读取文件失败: {e}\n") - return None - - # 如果没有指定行数范围,则检查整个文件 - if start_line <= 0 and end_line <= 0: - start_line = 1 - end_line = len(lines) - - if end_line > len(lines): - end_line = len(lines) - - if start_line < 1 or end_line < start_line: - output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") - return None - - status_callback("开始检测相邻重复行...") - - # 处理指定范围内的每一行 - status_callback(f"正在处理文件: {os.path.basename(file_path)}") - status_callback(f"检查行数范围: {start_line} - {end_line}") - - # 预处理所有行 - line_details = [] - normalized_lines = {} # 行号 -> 标准化内容 - original_lines = {} # 行号 -> 原始内容 - - for i in range(1, len(lines) + 1): - if start_line <= i <= end_line: - original_line = lines[i-1].rstrip('\n') - normalized_line = normalize_code_line(original_line) - - # 优化:跳过单字符行 - if normalized_line and len(normalized_line) > 1: # 只处理长度大于1的行 - normalized_lines[i] = normalized_line - original_lines[i] = original_line - line_details.append({ - "line_number": i, - "original": original_line, - "normalized": normalized_line - }) - - status_callback("正在分析相邻行重复情况...") - - # 存储已处理的相邻重复组 - processed_lines = set() # 用于跟踪已处理的重复行 - adjacent_duplicates = [] # 存储相邻重复组 - - # 第一步:首先找出所有三行连续重复的情况 - triple_groups = [] # 存储三行连续重复组 - triple_processed = set() # 已处理的三行组 - - for i in range(start_line, end_line - 1): # 注意结束条件是 end_line-1 - if i > len(lines) - 2: - break - - # 检查是否满足三行连续重复的条件 - if (i in normalized_lines and - i+1 in normalized_lines and - i+2 in normalized_lines): - - line1 = normalized_lines[i] - line2 = normalized_lines[i+1] - line3 = normalized_lines[i+2] - - # 三行都相同 - if line1 == line2 == line3: - group_key = f"{i},{i+1},{i+2}" - if group_key not in triple_processed: - triple_groups.append({ - "start_line": i, - "end_line": i+2, - "normalized_content": line1, - "original_lines": [ - original_lines[i], - original_lines[i+1], - original_lines[i+2] - ] - }) - triple_processed.add(group_key) - - # 第二步:找出两行相邻重复的情况,但要排除在三行连续重复中的行 - double_groups = [] # 存储两行相邻重复组 - double_processed = set() # 已处理的两行组 - - for i in range(start_line, end_line + 1): - if i > len(lines) or i not in normalized_lines: - continue - - current_line = normalized_lines[i] - current_original = original_lines[i] - - # 检查当前行是否已经在三行连续重复中 - in_triple_group = False - for triple in triple_groups: - if triple["start_line"] <= i <= triple["end_line"]: - in_triple_group = True - break - - if in_triple_group: - # 跳过三行连续重复中的行 - continue - - # 检查与上一行是否重复 - if i-1 in normalized_lines and normalized_lines[i-1] == current_line: - # 检查上一行是否也在三行连续重复中 - prev_in_triple = False - for triple in triple_groups: - if triple["start_line"] <= i-1 <= triple["end_line"]: - prev_in_triple = True - break - - if not prev_in_triple: - group_key = f"{i-1},{i}" - if group_key not in double_processed: - double_groups.append({ - "start_line": i-1, - "end_line": i, - "normalized_content": current_line, - "original_lines": [ - original_lines[i-1], - current_original - ] - }) - double_processed.add(group_key) - # 只与下一行重复(且不与上一行重复) - elif i+1 in normalized_lines and normalized_lines[i+1] == current_line: - # 检查下一行是否也在三行连续重复中 - next_in_triple = False - for triple in triple_groups: - if triple["start_line"] <= i+1 <= triple["end_line"]: - next_in_triple = True - break - - if not next_in_triple: - group_key = f"{i},{i+1}" - if group_key not in double_processed: - double_groups.append({ - "start_line": i, - "end_line": i+1, - "normalized_content": current_line, - "original_lines": [ - current_original, - original_lines[i+1] - ] - }) - double_processed.add(group_key) - - # 第三步:合并所有组 - for triple in triple_groups: - adjacent_duplicates.append({ - "normalized_content": triple["normalized_content"], - "occurrences": 3, - "line_numbers": [triple["start_line"], triple["start_line"]+1, triple["end_line"]], - "original_lines": triple["original_lines"], - "duplicate_type": "triple_adjacent" # 三行相邻重复 - }) - - for double in double_groups: - adjacent_duplicates.append({ - "normalized_content": double["normalized_content"], - "occurrences": 2, - "line_numbers": [double["start_line"], double["end_line"]], - "original_lines": double["original_lines"], - "duplicate_type": "double_adjacent" # 两行相邻重复 - }) - - # 按起始行号排序 - adjacent_duplicates.sort(key=lambda x: x["line_numbers"][0]) - - # 计算统计信息 - actual_lines_in_range = end_line - start_line + 1 - if start_line > len(lines) or end_line < 1: - actual_lines_in_range = 0 - - total_duplicate_lines = sum(len(group["line_numbers"]) for group in adjacent_duplicates) - - duplicate_rate = 0.0 - if actual_lines_in_range > 0: - duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 - - result = { - "success": True, - "file_path": file_path, - "file_name": os.path.basename(file_path), - "total_lines": len(lines), - "start_line": start_line, - "end_line": end_line, - "actual_lines_in_range": actual_lines_in_range, - "unique_code_lines": len(set([detail["normalized"] for detail in line_details])), - "duplicate_groups": adjacent_duplicates, - "duplicate_groups_count": len(adjacent_duplicates), - "total_duplicate_lines": total_duplicate_lines, - "duplicate_rate": duplicate_rate, - "detection_mode": "adjacent" # 标记检测模式为相邻行检测 - } - - status_callback("相邻行检测完成,正在生成结果...") - return result - -def detect_left_side_equality_gui(file_path, start_line, end_line, output_callback, status_callback): - """ - 检测等号左侧相等(左侧被赋值变量相同) - """ - if not os.path.exists(file_path): - output_callback(f"错误: 文件不存在: {file_path}\n") - return None - - try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() - except Exception as e: - output_callback(f"读取文件失败: {e}\n") - return None - - # 处理行数范围 - if start_line <= 0 and end_line <= 0: - start_line = 1 - end_line = len(lines) - - if end_line > len(lines): - end_line = len(lines) - - if start_line < 1 or end_line < start_line: - output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") - return None - - status_callback("开始检测等号左侧相等...") - - # 数据结构 - left_side_map = defaultdict(list) # 左侧内容 -> 行号列表 - line_details = [] # 行详细信息 - duplicate_groups = [] # 重复组 - - status_callback(f"正在处理文件: {os.path.basename(file_path)}") - status_callback(f"检查行数范围: {start_line} - {end_line}") - - # 遍历指定范围的行 - for i in range(1, len(lines) + 1): - if start_line <= i <= end_line: - original_line = lines[i-1].rstrip('\n') - - # 检查是否为赋值语句 - is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line) - - if is_assignment and left_side: - normalized_left = normalize_assignment_side(left_side) - - if normalized_left: # 跳过空的左侧 - left_side_map[normalized_left].append(i) - line_details.append({ - "line_number": i, - "original_line": original_line, - "left_side": left_side, - "normalized_left": normalized_left, - "right_side": right_side, - "operator": operator - }) - - status_callback(f"已分析{len(line_details)}个赋值语句,正在检测左侧相等...") - - # 找出左侧相等的组 - for normalized_left, line_numbers in left_side_map.items(): - if len(line_numbers) > 1: - # 收集详细信息 - original_lines = [] - right_sides = [] - operators = [] - - for line_num in line_numbers: - for detail in line_details: - if detail["line_number"] == line_num: - original_lines.append(detail["original_line"]) - right_sides.append(detail["right_side"]) - operators.append(detail["operator"]) - break - - duplicate_groups.append({ - "normalized_content": normalized_left, - "occurrences": len(line_numbers), - "line_numbers": line_numbers, - "original_lines": original_lines, - "right_sides": right_sides, - "operators": operators, - "side": "left" # 标记是左侧检测 - }) - - # 计算统计信息 - actual_lines_in_range = end_line - start_line + 1 - if start_line > len(lines) or end_line < 1: - actual_lines_in_range = 0 - - total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups) - - duplicate_rate = 0.0 - if actual_lines_in_range > 0: - duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 - - result = { - "success": True, - "file_path": file_path, - "file_name": os.path.basename(file_path), - "total_lines": len(lines), - "start_line": start_line, - "end_line": end_line, - "actual_lines_in_range": actual_lines_in_range, - "assignment_statements": len(line_details), - "duplicate_groups": duplicate_groups, - "duplicate_groups_count": len(duplicate_groups), - "total_duplicate_lines": total_duplicate_lines, - "duplicate_rate": duplicate_rate, - "detection_mode": "left_side" # 标记检测模式为左侧相等检测 - } - - status_callback("等号左侧重复检测完成") - return result - -def detect_right_side_equality_gui(file_path, start_line, end_line, output_callback, status_callback): - """ - 检测等号右侧相等(右侧赋值表达式相同) - 优化:排除右侧为常数的情况 - """ - if not os.path.exists(file_path): - output_callback(f"错误: 文件不存在: {file_path}\n") - return None - - try: - with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: - lines = f.readlines() - except Exception as e: - output_callback(f"读取文件失败: {e}\n") - return None - - # 处理行数范围 - if start_line <= 0 and end_line <= 0: - start_line = 1 - end_line = len(lines) - - if end_line > len(lines): - end_line = len(lines) - - if start_line < 1 or end_line < start_line: - output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") - return None - - status_callback("开始检测等号右侧相等(排除常数)...") - - # 数据结构 - right_side_map = defaultdict(list) # 右侧内容 -> 行号列表 - line_details = [] # 行详细信息 - duplicate_groups = [] # 重复组 - - status_callback(f"正在处理文件: {os.path.basename(file_path)}") - status_callback(f"检查行数范围: {start_line} - {end_line}") - - # 遍历指定范围的行 - for i in range(1, len(lines) + 1): - if start_line <= i <= end_line: - original_line = lines[i-1].rstrip('\n') - - # 检查是否为赋值语句 - is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line) - - if is_assignment and right_side: - # 检查右侧是否为常数 - if not is_constant_expression(right_side): - normalized_right = normalize_assignment_side(right_side) - - if normalized_right: # 跳过空的右侧 - right_side_map[normalized_right].append(i) - line_details.append({ - "line_number": i, - "original_line": original_line, - "left_side": left_side, - "right_side": right_side, - "normalized_right": normalized_right, - "operator": operator - }) - else: - # 记录常数表达式但不参与检测 - line_details.append({ - "line_number": i, - "original_line": original_line, - "left_side": left_side, - "right_side": right_side, - "normalized_right": "", - "operator": operator, - "is_constant": True - }) - - status_callback(f"已分析{len(line_details)}个赋值语句,正在检测右侧相等(排除常数)...") - - # 找出右侧相等的组 - for normalized_right, line_numbers in right_side_map.items(): - if len(line_numbers) > 1: - # 收集详细信息 - original_lines = [] - left_sides = [] - operators = [] - - for line_num in line_numbers: - for detail in line_details: - if detail["line_number"] == line_num: - original_lines.append(detail["original_line"]) - left_sides.append(detail["left_side"]) - operators.append(detail["operator"]) - break - - duplicate_groups.append({ - "normalized_content": normalized_right, - "occurrences": len(line_numbers), - "line_numbers": line_numbers, - "original_lines": original_lines, - "left_sides": left_sides, - "operators": operators, - "side": "right" # 标记是右侧检测 - }) - - # 计算统计信息 - actual_lines_in_range = end_line - start_line + 1 - if start_line > len(lines) or end_line < 1: - actual_lines_in_range = 0 - - # 统计常数表达式数量 - constant_expressions = len([d for d in line_details if d.get("is_constant", False)]) - variable_expressions = len(line_details) - constant_expressions - - total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups) - - duplicate_rate = 0.0 - if actual_lines_in_range > 0: - duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 - - result = { - "success": True, - "file_path": file_path, - "file_name": os.path.basename(file_path), - "total_lines": len(lines), - "start_line": start_line, - "end_line": end_line, - "actual_lines_in_range": actual_lines_in_range, - "assignment_statements": len(line_details), - "variable_expressions": variable_expressions, - "constant_expressions": constant_expressions, - "duplicate_groups": duplicate_groups, - "duplicate_groups_count": len(duplicate_groups), - "total_duplicate_lines": total_duplicate_lines, - "duplicate_rate": duplicate_rate, - "detection_mode": "right_side" # 标记检测模式为右侧相等检测 - } - - status_callback(f"等号右侧重复检测完成(已排除{constant_expressions}个常数表达式)") - return result - -def display_results(result, output_callback, status_callback): - """显示检测结果""" - if not result or not result.get("success", False): - output_callback("检测失败或无重复代码行\n") - return - - file_path = result["file_path"] - start_line = result["start_line"] - end_line = result["end_line"] - duplicate_groups = result["duplicate_groups"] - detection_mode = result.get("detection_mode", "full") - - # 输出检测模式信息 - mode_info = "全文重复检测" - if detection_mode == "adjacent": - mode_info = "相邻行重复检测" - elif detection_mode == "left_side": - mode_info = "等号左侧重复检测" - elif detection_mode == "right_side": - mode_info = "等号右侧重复检测(排除常数)" - - # 输出基本信息 - output_callback("=" * 60 + "\n") - output_callback(f"重复代码检测结果 ({mode_info})\n") - output_callback("=" * 60 + "\n") - output_callback(f"文件: {file_path}\n") - output_callback(f"检查行数范围: {start_line} - {end_line}\n") - - if detection_mode in ["left_side", "right_side"]: - output_callback(f"范围内赋值语句数: {result.get('assignment_statements', 0)}\n") - - if detection_mode == "right_side": - output_callback(f" 其中变量表达式: {result.get('variable_expressions', 0)}\n") - output_callback(f" 其中常数表达式: {result.get('constant_expressions', 0)}(已过滤)\n") - - output_callback(f"范围内总行数: {result['actual_lines_in_range']}\n") - output_callback("-" * 60 + "\n") - - # 输出重复行组 - if not duplicate_groups: - if detection_mode == "adjacent": - output_callback("在指定范围内未找到相邻重复的代码行\n") - elif detection_mode == "left_side": - output_callback("在指定范围内未找到左侧相等的赋值语句\n") - elif detection_mode == "right_side": - output_callback("在指定范围内未找到右侧相等的变量表达式\n") - else: - output_callback("在指定范围内未找到重复的代码行\n") - else: - for i, group in enumerate(duplicate_groups, 1): - side = group.get("side", "") - - if detection_mode == "adjacent": - # 现有相邻检测输出 - dup_type = group.get("duplicate_type", "") - occurrences = group.get("occurrences", 2) - - if dup_type == "triple_adjacent": - desc = f"第{i}组相邻重复行号 (三行连续重复): " - elif dup_type == "double_adjacent": - desc = f"第{i}组相邻重复行号 (两行相邻重复): " - else: - desc = f"第{i}组相邻重复行号: " - elif detection_mode == "left_side": - desc = f"第{i}组左侧相同赋值 (行号): " - elif detection_mode == "right_side": - desc = f"第{i}组右侧相同变量表达式 (行号): " - else: - desc = f"第{i}组重复行号: " - - output_callback(f"{desc}{', '.join(map(str, group['line_numbers']))}\n") - - if detection_mode == "left_side": - output_callback(f" 相同左侧: {group['normalized_content']}\n") - for idx, (line_num, original_line, right_side, operator) in enumerate(zip( - group['line_numbers'], group['original_lines'], - group.get('right_sides', []), group.get('operators', []) - )): - output_callback(f" 行 {line_num}: {original_line}\n") - if right_side: - output_callback(f" 右侧表达式: {right_side}\n") - elif detection_mode == "right_side": - output_callback(f" 相同右侧变量表达式: {group['normalized_content']}\n") - for idx, (line_num, original_line, left_side, operator) in enumerate(zip( - group['line_numbers'], group['original_lines'], - group.get('left_sides', []), group.get('operators', []) - )): - output_callback(f" 行 {line_num}: {original_line}\n") - if left_side: - output_callback(f" 左侧变量: {left_side}\n") - else: - output_callback(f" 重复代码: {group['normalized_content']}\n") - for line_num, original_line in zip(group['line_numbers'], group['original_lines']): - output_callback(f" 行 {line_num}: {original_line}\n") - - output_callback("\n") - - # 输出统计信息 - output_callback("-" * 60 + "\n") - output_callback(f"重复代码总组数: {result['duplicate_groups_count']}\n") - output_callback(f"总重复行数: {result['total_duplicate_lines']}\n") - output_callback(f"重复率: {result['duplicate_rate']:.2f}%\n") - output_callback("=" * 60 + "\n\n") - - status_callback("结果显示完成") - -def save_results_to_log(result, log_file_path): - """保存结果到日志文件""" - if not result or not result.get("success", False): - return - - try: - with open(log_file_path, 'a', encoding='utf-8') as f: - timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') - detection_mode = result.get("detection_mode", "full") - - mode_desc = "全文重复检测" - if detection_mode == "adjacent": - mode_desc = "相邻行重复检测" - elif detection_mode == "left_side": - mode_desc = "等号左侧重复检测" - elif detection_mode == "right_side": - mode_desc = "等号右侧重复检测(排除常数)" - - f.write(f"\n\n{'='*60}\n") - f.write(f"重复代码检测报告 - {mode_desc} - {timestamp}\n") - f.write(f"{'='*60}\n\n") - - f.write(f"文件: {result['file_path']}\n") - f.write(f"检查行数范围: {result['start_line']} - {result['end_line']}\n") - f.write(f"范围内总行数: {result['actual_lines_in_range']}\n") - - if detection_mode in ["left_side", "right_side"]: - f.write(f"范围内赋值语句数: {result.get('assignment_statements', 0)}\n") - - if detection_mode == "right_side": - f.write(f" 其中变量表达式: {result.get('variable_expressions', 0)}\n") - f.write(f" 其中常数表达式: {result.get('constant_expressions', 0)}(已过滤)\n") - - f.write(f"检测模式: {mode_desc}\n\n") - - duplicate_groups = result.get('duplicate_groups', []) - for i, group in enumerate(duplicate_groups, 1): - side = group.get("side", "") - - if detection_mode == "adjacent": - dup_type = group.get("duplicate_type", "") - - if dup_type == "triple_adjacent": - desc = f"第{i}组相邻重复行号 (三行连续重复): " - elif dup_type == "double_adjacent": - desc = f"第{i}组相邻重复行号 (两行相邻重复): " - else: - desc = f"第{i}组相邻重复行号: " - elif detection_mode == "left_side": - desc = f"第{i}组左侧相同赋值 (行号): " - elif detection_mode == "right_side": - desc = f"第{i}组右侧相同变量表达式 (行号): " - else: - desc = f"第{i}组重复行号: " - - f.write(f"{desc}{', '.join(map(str, group['line_numbers']))}\n") - - if detection_mode == "left_side": - f.write(f" 相同左侧: {group['normalized_content']}\n") - for idx, (line_num, original_line, right_side, operator) in enumerate(zip( - group['line_numbers'], group['original_lines'], - group.get('right_sides', []), group.get('operators', []) - )): - f.write(f" 行 {line_num}: {original_line}\n") - if right_side: - f.write(f" 右侧表达式: {right_side}\n") - elif detection_mode == "right_side": - f.write(f" 相同右侧变量表达式: {group['normalized_content']}\n") - for idx, (line_num, original_line, left_side, operator) in enumerate(zip( - group['line_numbers'], group['original_lines'], - group.get('left_sides', []), group.get('operators', []) - )): - f.write(f" 行 {line_num}: {original_line}\n") - if left_side: - f.write(f" 左侧变量: {left_side}\n") - else: - f.write(f" 重复代码: {group['normalized_content']}\n") - for line_num, original_line in zip(group['line_numbers'], group['original_lines']): - f.write(f" 行 {line_num}: {original_line}\n") - - f.write("\n") - - f.write(f"重复代码总组数: {result['duplicate_groups_count']}\n") - f.write(f"总重复行数: {result['total_duplicate_lines']}\n") - f.write(f"重复率: {result['duplicate_rate']:.2f}%\n") - f.write(f"{'='*60}\n") - - return True - except Exception as e: - return False - -class DuplicateCodeDetectorGUI: - def __init__(self, root): - self.root = root - self.root.title("源代码重复行统计软件V1.00") - self.root.geometry("1100x750") - - # 设置最小窗口尺寸 - self.root.minsize(900, 600) - - # 设置窗口背景为浅蓝色 - self.root.configure(bg='#e8f4f8') - - # 设置样式 - self.setup_styles() - - # 创建主框架 - self.main_frame = ttk.Frame(root, padding="10") - self.main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # 配置网格权重 - root.columnconfigure(0, weight=1) - root.rowconfigure(0, weight=1) - - # 配置主框架的行列权重 - self.main_frame.columnconfigure(0, weight=1) - self.main_frame.rowconfigure(0, weight=0) # 标题 - self.main_frame.rowconfigure(1, weight=0) # 文件选择 - self.main_frame.rowconfigure(2, weight=0) # 行数范围 - self.main_frame.rowconfigure(3, weight=0) # 提醒信息 - self.main_frame.rowconfigure(4, weight=0) # 按钮 - self.main_frame.rowconfigure(5, weight=0) # 状态 - self.main_frame.rowconfigure(6, weight=1) # 结果面板 - self.main_frame.rowconfigure(7, weight=0) # 底部信息 - - # 创建标题 - self.create_title() - - # 创建控制面板 - self.create_control_panel() - - # 创建底部结果面板 - self.create_result_panel() - - # 创建底部信息 - self.create_footer() - - # 日志文件路径 - self.log_file = "duplicate_code_log.txt" - - # 消息队列 - self.message_queue = Queue() - - # 窗口大小变化相关 - self.last_width = 1100 - self.last_height = 750 - self.root.bind('', self.on_window_resize) - - # 初始化引用 - self.process_text_frame = None - self.result_text_frame = None - self.paned_window = None - - # 当前检测模式 - self.current_mode = "full" # 默认全文检测 - - # 启动消息队列处理 - self.process_queue() - - def setup_styles(self): - """设置样式 - 浅蓝色扁平风格""" - style = ttk.Style() - style.theme_use('clam') - - # 配置颜色方案 - bg_color = '#e8f4f8' # 浅蓝色背景 - fg_color = '#2c3e50' # 深蓝色文字 - btn_bg = '#3498db' # 蓝色按钮背景 - btn_fg = '#ffffff' # 白色按钮文字 - hover_bg = '#2980b9' # 悬停时的深蓝色 - active_bg = '#1c5c8a' # 按下时的更深蓝色 - frame_bg = '#d4eaf7' # 框架背景色 - label_frame_bg = '#d4eaf7' # 标签框架背景色 - - # 基本窗口样式 - style.configure('.', background=bg_color, foreground=fg_color, font=('微软雅黑', 10)) - - # 标签框架样式 - style.configure('TLabelframe', background=frame_bg, relief='flat', borderwidth=2) - style.configure('TLabelframe.Label', background=label_frame_bg, foreground=fg_color, font=('微软雅黑', 10, 'bold')) - - # 主按钮样式 - style.configure('TButton', - background=btn_bg, - foreground=btn_fg, - font=('微软雅黑', 10, 'bold'), - borderwidth=1, - relief='flat', - padding=6) - - # 按钮悬停效果 - style.map('TButton', - background=[('active', hover_bg), ('pressed', active_bg)], - relief=[('pressed', 'sunken')]) - - # 特殊按钮样式 - style.configure('Highlight.TButton', - background='#e74c3c', # 红色 - foreground=btn_fg, - font=('微软雅黑', 10, 'bold'), - borderwidth=1, - relief='flat', - padding=6) - - style.configure('Adjacent.TButton', - background='#2ecc71', # 绿色 - foreground=btn_fg, - font=('微软雅黑', 10, 'bold'), - borderwidth=1, - relief='flat', - padding=6) - - style.configure('LeftSide.TButton', - background='#9b59b6', # 紫色 - foreground=btn_fg, - font=('微软雅黑', 10, 'bold'), - borderwidth=1, - relief='flat', - padding=6) - - style.configure('RightSide.TButton', - background='#e67e22', # 橙色 - foreground=btn_fg, - font=('微软雅黑', 10, 'bold'), - borderwidth=1, - relief='flat', - padding=6) - - # 标签样式 - style.configure('Title.TLabel', - font=('微软雅黑', 24, 'bold'), - foreground='#e74c3c', # 红色 - background=bg_color) - - style.configure('Subtitle.TLabel', - font=('微软雅黑', 12, 'bold'), - foreground=fg_color, - background=bg_color) - - style.configure('Status.TLabel', - font=('微软雅黑', 10), - foreground='#3498db', # 蓝色 - background=bg_color) - - style.configure('Footer.TLabel', - font=('微软雅黑', 10), - foreground='#7f8c8d', # 中灰色 - background=bg_color) - - # 文本框样式 - style.configure('TEntry', - fieldbackground='#ffffff', - foreground='#2c3e50', - font=('微软雅黑', 10), - borderwidth=1, - relief='flat') - - # 滚动条样式 - style.configure('Vertical.TScrollbar', - background='#bdc3c7', - troughcolor=bg_color, - borderwidth=0, - relief='flat') - - style.configure('Horizontal.TScrollbar', - background='#bdc3c7', - troughcolor=bg_color, - borderwidth=0, - relief='flat') - - def create_title(self): - """创建标题区域 - 红色字体""" - # 创建一个单独的框架来包含标题 - title_frame = ttk.Frame(self.main_frame, style='TFrame') - title_frame.grid(row=0, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 20)) - title_frame.columnconfigure(0, weight=1) - - # 使用普通Label而不是ttk.Label,以便设置字体颜色 - # 标题改为红色字体 - title_label = tk.Label( - title_frame, - text="源代码重复行统计软件V1.00", - font=('微软雅黑', 24, 'bold'), - fg='#e74c3c', # 红色字体 - bg='#e8f4f8' # 浅蓝色背景 - ) - title_label.grid(row=0, column=0, sticky=tk.N) - - def create_control_panel(self): - """创建控制面板""" - # 文件选择区域 - file_frame = ttk.LabelFrame(self.main_frame, text="文件选择", padding="10") - file_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(0, 10)) - file_frame.columnconfigure(1, weight=1) - - ttk.Label(file_frame, text="源代码文件:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) - - self.file_path_var = tk.StringVar() - self.file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=60) - self.file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 5)) - - self.browse_button = ttk.Button( - file_frame, - text="浏览...", - command=self.browse_file, - style='Highlight.TButton' - ) - self.browse_button.grid(row=0, column=2) - - # 为浏览按钮添加提示 - self.create_tooltip(self.browse_button, "选择要检测的源代码文件") - - # 行数范围区域 - range_frame = ttk.LabelFrame(self.main_frame, text="行数范围 (可选)", padding="10") - range_frame.grid(row=2, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) - - # 配置行数范围框的内部列权重 - range_frame.columnconfigure(0, weight=0) # 标签 - range_frame.columnconfigure(1, weight=0) # 输入框 - range_frame.columnconfigure(2, weight=1) # 说明文字 - - ttk.Label(range_frame, text="起始行号:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) - self.start_entry = ttk.Entry(range_frame, width=15) - self.start_entry.grid(row=0, column=1, sticky=tk.W, padx=(0, 20)) - ttk.Label(range_frame, text="(如不填写,默认检查整个文件)").grid(row=0, column=2, sticky=tk.W) - - ttk.Label(range_frame, text="结束行号:").grid(row=1, column=0, sticky=tk.W, padx=(0, 5)) - self.end_entry = ttk.Entry(range_frame, width=15) - self.end_entry.grid(row=1, column=1, sticky=tk.W, padx=(0, 20)) - - # 为行数输入框添加提示 - self.create_tooltip(self.start_entry, "指定检测的起始行号(可选),不填写则从文件开头开始") - self.create_tooltip(self.end_entry, "指定检测的结束行号(可选),不填写则检测到文件结尾") - - # 添加提醒信息 - reminder_frame = ttk.Frame(self.main_frame) - reminder_frame.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=(5, 10)) - reminder_frame.columnconfigure(0, weight=1) - - reminder_label = tk.Label( - reminder_frame, - text="⚠️注意:1.重复行检测会忽略注释上的不同以及语句中空格等非可执行内容,单字符重复行(如{};)也将被忽略。2.右侧相等检测会排除常数表达式。 ", - font=('微软雅黑', 9), - fg='#e74c3c', # 蓝色 - bg='#e8f4f8', # 浅蓝色背景 - wraplength=800 - ) - reminder_label.grid(row=0, column=0, sticky=tk.W) - - # 按钮区域 - button_frame = ttk.Frame(self.main_frame) - button_frame.grid(row=4, column=0, pady=(0, 10)) - - # 全文检测按钮 - self.full_detect_button = ttk.Button( - button_frame, - text="检查全文重复行", - command=lambda: self.start_detection("full"), - style='TButton', - width=20 - ) - self.full_detect_button.pack(side=tk.LEFT, padx=(0, 10)) - - # 相邻行检测按钮 - self.adjacent_detect_button = ttk.Button( - button_frame, - text="检查相邻重复行", - command=lambda: self.start_detection("adjacent"), - style='Adjacent.TButton', - width=20 - ) - self.adjacent_detect_button.pack(side=tk.LEFT, padx=(0, 10)) - - # 等号左侧重复检测按钮 - self.left_side_button = ttk.Button( - button_frame, - text="等号左侧重复检测", - command=lambda: self.start_detection("left_side"), - style='LeftSide.TButton', - width=20 - ) - self.left_side_button.pack(side=tk.LEFT, padx=(0, 10)) - - # 等号右侧重复检测按钮 - self.right_side_button = ttk.Button( - button_frame, - text="等号右侧重复检测", - command=lambda: self.start_detection("right_side"), - style='RightSide.TButton', - width=20 - ) - self.right_side_button.pack(side=tk.LEFT, padx=(0, 10)) - - # 清空结果按钮 - self.clear_button = ttk.Button( - button_frame, - text="清空结果", - command=self.clear_results, - style='TButton', - width=15 - ) - self.clear_button.pack(side=tk.LEFT, padx=(0, 10)) - - # 查看日志按钮 - self.view_log_button = ttk.Button( - button_frame, - text="查看日志", - command=self.view_log, - style='TButton', - width=15 - ) - self.view_log_button.pack(side=tk.LEFT) - - # 为所有按钮添加提示 - self.create_tooltip(self.full_detect_button, "在源代码的指定行范围内查找所有行中的重复行(排除单字符重复)") - self.create_tooltip(self.adjacent_detect_button, "在源代码的指定行范围内仅查找相邻行中的重复行(排除单字符重复)") - self.create_tooltip(self.left_side_button, "在源代码的指定行范围内查找不同行之间存在的等号左侧表达式相等的行") - self.create_tooltip(self.right_side_button, "在源代码的指定行范围内查找不同行之间存在的等号右侧表达式相等的行(排除常数)") - self.create_tooltip(self.clear_button, "清空所有检测结果和处理过程信息") - self.create_tooltip(self.view_log_button, "查看历史检测日志文件") - - # 状态标签 - self.status_var = tk.StringVar(value="就绪") - self.status_label = ttk.Label( - self.main_frame, - textvariable=self.status_var, - style='Status.TLabel' - ) - self.status_label.grid(row=5, column=0, sticky=tk.W, pady=(0, 10)) - - def create_tooltip(self, widget, text): - """创建工具提示""" - def enter(event): - x, y, cx, cy = widget.bbox("insert") - x += widget.winfo_rootx() + 25 - y += widget.winfo_rooty() + 20 - - # 创建提示框 - self.tooltip = tk.Toplevel(widget) - self.tooltip.wm_overrideredirect(True) - self.tooltip.wm_geometry(f"+{x}+{y}") - - # 设置提示框样式 - label = tk.Label(self.tooltip, text=text, justify='left', - background="#f1c40f", # 亮黄色背景 - foreground="#2c3e50", # 深蓝色文字 - relief='solid', - borderwidth=1, - font=("微软雅黑", "9", "normal"), - padx=8, - pady=5) - label.pack() - - def leave(event): - if hasattr(self, 'tooltip'): - self.tooltip.destroy() - - widget.bind("", enter) - widget.bind("", leave) - - def create_result_panel(self): - """创建结果面板""" - result_frame = ttk.LabelFrame(self.main_frame, text="检测结果", padding="5") - result_frame.grid(row=6, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) - - # 正确配置结果框架的权重 - result_frame.columnconfigure(0, weight=1) - result_frame.rowconfigure(0, weight=1) - - # 创建分隔面板 - self.paned_window = ttk.PanedWindow(result_frame, orient=tk.HORIZONTAL) - self.paned_window.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # 左侧:处理过程 - process_frame = ttk.LabelFrame(self.paned_window, text="处理过程", padding="5") - self.paned_window.add(process_frame, weight=1) - - # 配置处理过程框架的内部权重 - process_frame.columnconfigure(0, weight=1) - process_frame.rowconfigure(0, weight=1) - - self.process_text = scrolledtext.ScrolledText( - process_frame, - wrap=tk.WORD, - font=('微软雅黑', 10), - bg='#ffffff', # 白色背景 - fg='#2c3e50', # 深蓝色文字 - insertbackground='#2c3e50' # 光标颜色 - ) - # 使用 grid 布局并设置 sticky - self.process_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # 右侧:检测结果 - result_display_frame = ttk.LabelFrame(self.paned_window, text="检测结果", padding="5") - self.paned_window.add(result_display_frame, weight=2) - - # 配置检测结果框架的内部权重 - result_display_frame.columnconfigure(0, weight=1) - result_display_frame.rowconfigure(0, weight=1) - - self.result_text = scrolledtext.ScrolledText( - result_display_frame, - wrap=tk.WORD, - font=('微软雅黑', 10), - bg='#ffffff', # 白色背景 - fg='#2c3e50', # 深蓝色文字 - insertbackground='#2c3e50' # 光标颜色 - ) - # 使用 grid 布局并设置 sticky - self.result_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) - - # 保存文本区域引用,用于窗口大小变化时调整 - self.process_text_frame = process_frame - self.result_text_frame = result_display_frame - - def create_footer(self): - """创建底部信息""" - footer_frame = ttk.Frame(self.main_frame) - footer_frame.grid(row=7, column=0, sticky=(tk.W, tk.E), pady=(20, 0)) - footer_frame.columnconfigure(0, weight=1) - - # 创建底部信息标签 - footer_label = ttk.Label( - footer_frame, - text="中国科学院卫星软件评测中心预研组,联系人:王明亮,联系电话:13761306729", - style='Footer.TLabel' - ) - footer_label.grid(row=0, column=0, sticky=tk.N) - - def on_window_resize(self, event): - """窗口大小变化事件处理""" - if event.widget == self.root: # 只处理主窗口变化 - # 计算尺寸变化 - width_change = event.width - self.last_width - height_change = event.height - self.last_height - - if abs(width_change) > 10 or abs(height_change) > 10: # 有显著变化 - # 动态调整文件输入框宽度 - entry_width = min(80, max(40, event.width // 15)) - self.file_entry.configure(width=entry_width) - - # 动态调整行数输入框宽度 - range_entry_width = min(20, max(10, event.width // 60)) - self.start_entry.configure(width=range_entry_width) - self.end_entry.configure(width=range_entry_width) - - # 动态调整分隔面板的比例 - self.update_paned_window_ratio(width_change) - - # 动态调整按钮宽度 - self.update_button_sizes(event.width) - - # 保存当前尺寸 - self.last_width = event.width - self.last_height = event.height - - def update_paned_window_ratio(self, width_change): - """更新分隔面板比例""" - if width_change != 0 and hasattr(self, 'paned_window'): - try: - # 获取当前窗口宽度 - current_width = self.root.winfo_width() - - # 动态调整分隔比例 - if current_width > 0: - # 计算新的sash位置(左侧1/3,右侧2/3) - new_sash_position = int(current_width * 0.35) # 调整为35% - self.paned_window.sash_place(0, new_sash_position, 0) - except: - pass # 忽略可能的错误 - - def update_button_sizes(self, window_width): - """更新按钮尺寸""" - # 动态调整按钮宽度 - button_width = min(20, max(15, window_width // 60)) - - # 更新按钮宽度 - for button in [self.full_detect_button, self.adjacent_detect_button, - self.left_side_button, self.right_side_button, - self.clear_button, self.view_log_button]: - button.configure(width=button_width) - - def browse_file(self): - """浏览文件""" - file_path = filedialog.askopenfilename( - title="选择源代码文件", - filetypes=[ - ("源代码文件", "*.c *.cpp *.h *.hpp *.java *.py *.js *.ts"), - ("C/C++文件", "*.c *.cpp *.h *.hpp"), - ("Java文件", "*.java"), - ("Python文件", "*.py"), - ("所有文件", "*.*") - ] - ) - if file_path: - self.file_path_var.set(file_path) - self.update_status(f"已选择文件: {os.path.basename(file_path)}") - - def get_line_range(self): - """获取行数范围""" - start_text = self.start_entry.get().strip() - end_text = self.end_entry.get().strip() - - start_line = 0 - end_line = 0 - - if start_text: - try: - start_line = int(start_text) - except ValueError: - messagebox.showerror("错误", "起始行号必须是有效的整数") - return None, None - - if end_text: - try: - end_line = int(end_text) - except ValueError: - messagebox.showerror("错误", "结束行号必须是有效的整数") - return None, None - - return start_line, end_line - - def start_detection(self, mode="full"): - """开始检测""" - self.current_mode = mode # 设置当前检测模式 - - file_path = self.file_path_var.get().strip() - if not file_path or not os.path.exists(file_path): - messagebox.showerror("错误", "请选择有效的源代码文件") - return - - start_line, end_line = self.get_line_range() - if start_line is None or end_line is None: - return - - # 禁用所有按钮 - self.disable_all_buttons() - - # 清空结果区域 - self.result_text.delete(1.0, tk.END) - - # 显示开始信息 - mode_desc = { - "full": "全文重复检测", - "adjacent": "相邻行重复检测", - "left_side": "等号左侧重复检测", - "right_side": "等号右侧重复检测(排除常数)" - }.get(mode, "未知检测") - - self.add_process_message("=" * 50) - self.add_process_message(f"开始{mode_desc}: {os.path.basename(file_path)}") - if start_line > 0 and end_line > 0: - self.add_process_message(f"检查行数范围: {start_line} - {end_line}") - else: - self.add_process_message("检查整个文件") - - # 在线程中执行检测 - detection_thread = threading.Thread( - target=self.run_detection, - args=(file_path, start_line, end_line, mode) - ) - detection_thread.daemon = True - detection_thread.start() - - def disable_all_buttons(self): - """禁用所有按钮""" - buttons = [ - self.full_detect_button, - self.adjacent_detect_button, - self.left_side_button, - self.right_side_button, - self.clear_button, - self.browse_button, - self.view_log_button - ] - - for button in buttons: - if button: - button.config(state='disabled') - - def enable_all_buttons(self): - """启用所有按钮""" - buttons = [ - self.full_detect_button, - self.adjacent_detect_button, - self.left_side_button, - self.right_side_button, - self.clear_button, - self.browse_button, - self.view_log_button - ] - - for button in buttons: - if button: - button.config(state='normal') - - def run_detection(self, file_path, start_line, end_line, mode): - """在线程中运行检测""" - try: - # 根据模式调用不同的检测函数 - if mode == "full": - result = detect_duplicate_lines_gui( - file_path, - start_line, - end_line, - self.add_result_message, - self.update_status_gui - ) - elif mode == "adjacent": - result = detect_adjacent_duplicate_lines_gui( - file_path, - start_line, - end_line, - self.add_result_message, - self.update_status_gui - ) - elif mode == "left_side": - result = detect_left_side_equality_gui( - file_path, - start_line, - end_line, - self.add_result_message, - self.update_status_gui - ) - elif mode == "right_side": - result = detect_right_side_equality_gui( - file_path, - start_line, - end_line, - self.add_result_message, - self.update_status_gui - ) - else: - self.add_process_message(f"错误: 未知的检测模式: {mode}") - self.root.after(0, self.enable_all_buttons) - return - - # 显示结果 - if result: - display_results(result, self.add_result_message, self.update_status_gui) - - # 保存到日志文件 - if save_results_to_log(result, self.log_file): - self.add_process_message(f"结果已保存到日志文件: {self.log_file}") - else: - self.add_process_message("警告: 保存日志文件失败") - - mode_desc = { - "full": "全文检测", - "adjacent": "相邻行检测", - "left_side": "等号左侧重复检测", - "right_side": "等号右侧重复检测" - }.get(mode, "检测") - - self.add_process_message(f"{mode_desc}完成") - - except Exception as e: - self.add_process_message(f"检测过程中发生错误: {str(e)}") - finally: - # 启用所有按钮 - self.root.after(0, self.enable_all_buttons) - - def clear_results(self): - """清空结果""" - self.process_text.delete(1.0, tk.END) - self.result_text.delete(1.0, tk.END) - self.update_status("就绪") - - def view_log(self): - """查看日志文件""" - if os.path.exists(self.log_file): - try: - with open(self.log_file, 'r', encoding='utf-8') as f: - log_content = f.read() - - # 创建日志查看窗口 - log_window = tk.Toplevel(self.root) - log_window.title("日志文件") - log_window.geometry("900x700") - log_window.minsize(700, 500) - - # 设置窗口背景 - log_window.configure(bg='#e8f4f8') - - # 创建标题 - log_title_label = tk.Label( - log_window, - text="重复代码检测日志", - font=('微软雅黑', 16, 'bold'), - fg='#3498db', # 蓝色 - bg='#e8f4f8', - pady=10 - ) - log_title_label.pack() - - # 创建文本区域 - log_text = scrolledtext.ScrolledText( - log_window, - wrap=tk.WORD, - font=('微软雅黑', 10), - bg='#ffffff', # 白色背景 - fg='#2c3e50', # 深蓝色文字 - ) - log_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=(0, 10)) - - # 插入日志内容 - log_text.insert(1.0, log_content) - log_text.config(state='disabled') - - # 添加关闭按钮 - close_button = ttk.Button( - log_window, - text="关闭", - command=log_window.destroy, - style='TButton' - ) - close_button.pack(pady=(0, 10)) - - except Exception as e: - messagebox.showerror("错误", f"无法读取日志文件: {str(e)}") - else: - messagebox.showinfo("信息", "日志文件不存在") - - def add_process_message(self, message): - """添加处理过程消息""" - timestamp = datetime.now().strftime("%H:%M:%S") - formatted_message = f"[{timestamp}] {message}\n" - self.message_queue.put(("process", formatted_message)) - - def add_result_message(self, message): - """添加结果消息""" - self.message_queue.put(("result", message)) - - def update_status_gui(self, message): - """更新状态(从线程调用)""" - self.message_queue.put(("status", message)) - - def update_status(self, message): - """更新状态(从主线程调用)""" - self.status_var.set(message) - - def process_queue(self): - """处理消息队列""" - try: - while not self.message_queue.empty(): - msg_type, message = self.message_queue.get_nowait() - - if msg_type == "process": - self.process_text.insert(tk.END, message) - self.process_text.see(tk.END) - elif msg_type == "result": - self.result_text.insert(tk.END, message) - self.result_text.see(tk.END) - elif msg_type == "status": - self.status_var.set(message) - - self.message_queue.task_done() - except: - pass - - # 100ms后再次检查队列 - self.root.after(100, self.process_queue) - -def main(): - root = tk.Tk() - app = DuplicateCodeDetectorGUI(root) - - # 初始化后强制布局一次 - root.update() - root.after(100, lambda: app.on_window_resize(None)) - - root.mainloop() - -if __name__ == "__main__": +import os +import re +from collections import defaultdict +import sys +from datetime import datetime +import json +import tkinter as tk +from tkinter import ttk, filedialog, messagebox, scrolledtext +import threading +from queue import Queue + +def normalize_code_line(line): + """ + 标准化代码行:移除注释、多余空格、制表符等 + 优化:排除单字符行(如单个{、}等) + """ + if not line or not line.strip(): + return "" + + line = line.rstrip('\n') + + + # 处理 "//" 注释 + if '//' in line: + # 找到第一个不在字符串中的 "//" + in_string = False + for i in range(len(line) - 1): + if line[i] == '"' and (i == 0 or line[i-1] != '\\'): + in_string = not in_string + elif not in_string and line[i:i+2] == '//': + line = line[:i] + break + + # 处理 "/* */" 注释(单行内的情况) + if '/*' in line and '*/' in line: + # 找到第一个不在字符串中的 "/*" + in_string = False + comment_start = -1 + for i in range(len(line) - 1): + if line[i] == '"' and (i == 0 or line[i-1] != '\\'): + in_string = not in_string + elif not in_string and line[i:i+2] == '/*': + comment_start = i + break + + if comment_start != -1: + # 查找对应的 "*/" + comment_end = line.find('*/', comment_start) + if comment_end != -1: + line = line[:comment_start] + line[comment_end+2:] + + # 移除行尾空白字符 + line = line.strip() + + # 如果是空行,直接返回 + if not line: + return "" + + # 优化:排除单字符行(如{、}、;等) + if len(line) == 1 and line in '{;}': + return "" + + # 移除语句末尾的分号 + if line.endswith(';'): + line = line[:-1] + + # 移除代码中所有空格 + line = re.sub(r'\s+', '', line) + + return line + +def extract_assignment_sides(line): + """ + 提取赋值语句的左侧和右侧内容 + 改进版:更准确地识别赋值语句,避免将逻辑判断误判 + 修复:正确处理行尾注释和分号 + 返回: (is_assignment, left_side, right_side, operator) + """ + line = line.strip() + if not line: + return False, None, None, None + + # 先处理注释 + # 处理 "//" 注释 + if '//' in line: + # 找到第一个不在字符串中的 "//" + in_string = False + for i in range(len(line) - 1): + if line[i] == '"' and (i == 0 or line[i-1] != '\\'): + in_string = not in_string + elif not in_string and line[i:i+2] == '//': + line = line[:i].strip() # 移除注释部分 + break + + # 处理 "/* */" 注释(单行内的情况) + if '/*' in line and '*/' in line: + # 找到第一个不在字符串中的 "/*" + in_string = False + comment_start = -1 + for i in range(len(line) - 1): + if line[i] == '"' and (i == 0 or line[i-1] != '\\'): + in_string = not in_string + elif not in_string and line[i:i+2] == '/*': + comment_start = i + break + + if comment_start != -1: + # 查找对应的 "*/" + comment_end = line.find('*/', comment_start) + if comment_end != -1: + # 移除注释部分 + before_comment = line[:comment_start] + after_comment = line[comment_end+2:] + line = (before_comment + after_comment).strip() + + # 支持的赋值运算符(按长度排序,优先匹配长的) + assignment_ops = ['>>>=', '>>=', '<<=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '='] + + # 记录字符位置 + i = 0 + n = len(line) + + while i < n: + char = line[i] + + # 跳过字符串 + if char == '"' or char == "'": + quote_char = char + i += 1 + while i < n and not (line[i] == quote_char and line[i-1] != '\\'): + i += 1 + if i < n: + i += 1 + continue + + # 检查逻辑运算符并跳过 + if i < n - 1: + two_chars = line[i:i+2] + if two_chars in ('==', '!=', '<=', '>='): + i += 2 + continue + + # 检查JavaScript严格相等运算符 + if i < n - 2: + three_chars = line[i:i+3] + if three_chars in ('===', '!=='): + i += 3 + continue + + # 检查赋值运算符 + for op in assignment_ops: + op_len = len(op) + if i + op_len <= n and line[i:i+op_len] == op: + # 确保这不是其他运算符的一部分 + is_valid_assignment = True + + # 检查前面字符 + if i > 0: + prev_char = line[i-1] + # 赋值运算符前面不能是=, !, <, >等 + if prev_char in '=!<>': + is_valid_assignment = False + + # 检查后面字符 + if i + op_len < n: + next_char = line[i+op_len] + # 赋值运算符后面不能是=(避免误判==为=) + if next_char == '=': + is_valid_assignment = False + + if is_valid_assignment: + left = line[:i].strip() + right = line[i+op_len:].strip() + + # 关键修复:对右侧表达式进一步处理 + if right: + # 移除右侧可能的分号 + if right.endswith(';'): + right = right[:-1].strip() + # 移除右侧的所有空格 + right = re.sub(r'\s+', '', right) + + return True, left, right, op + + i += 1 + + return False, None, None, None + +def normalize_assignment_side(side): + """ + 标准化赋值语句的一侧(左侧或右侧) + 移除所有空格,保留核心结构 + """ + if not side: + return "" + + # 移除所有空白字符 + normalized = re.sub(r'\s+', '', side) + return normalized + +def is_constant_expression(expression): + """ + 判断表达式是否为常数 + 返回True如果是常数,False如果是变量或包含变量的表达式 + """ + if not expression: + return False + + # 标准化表达式,移除所有空格 + expr = re.sub(r'\s+', '', expression) + + # 空表达式视为不是常数 + if not expr: + return False + + # 常见的常数模式 + constant_patterns = [ + r'^-?\d+(\.\d+)?$', # 数字:-10, 3.14, 0.5 + r'^0x[0-9A-Fa-f]+$', # 十六进制数:0xFF, 0x1A + r'^0[0-7]+$', # 八进制数:077, 0123 + r'^0b[01]+$', # 二进制数:0b1010 + r'^\'.*\'$', # 字符常量:'a', '\n' + r'^\".*\"$', # 字符串常量:"hello", "world" + r'^true$', # 布尔常量 + r'^false$', # 布尔常量 + r'^null$', # null常量 + r'^NULL$', # NULL常量 + r'^None$', # Python的None + ] + + # 检查是否为简单常数 + for pattern in constant_patterns: + if re.match(pattern, expr, re.IGNORECASE): + return True + + # 检查是否为常数表达式(只包含数字和运算符) + # 创建测试表达式,移除非运算符字符 + test_expr = expr + + # 移除所有括号 + test_expr = re.sub(r'[()]', '', test_expr) + + # 检查是否只包含数字、小数点和运算符 + constant_chars = set('0123456789.+-*/%eE') + is_constant = True + + for char in test_expr: + if char not in constant_chars: + is_constant = False + break + + return is_constant + +def detect_duplicate_lines_gui(file_path, start_line, end_line, output_callback, status_callback): + """ + 在文件中查找重复的代码行(GUI版本)- 全文检查 + 优化:排除单字符重复行 + """ + if not os.path.exists(file_path): + output_callback(f"错误: 文件不存在: {file_path}\n") + return None + + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + lines = f.readlines() + except Exception as e: + output_callback(f"读取文件失败: {e}\n") + return None + + # 如果没有指定行数范围,则检查整个文件 + if start_line <= 0 and end_line <= 0: + start_line = 1 + end_line = len(lines) + + if end_line > len(lines): + end_line = len(lines) + + if start_line < 1 or end_line < start_line: + output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") + return None + + status_callback("开始检测重复代码行...") + + # 数据结构 + duplicates = defaultdict(list) + line_details = [] + duplicate_groups = [] + + # 处理指定范围内的每一行 + status_callback(f"正在处理文件: {os.path.basename(file_path)}") + status_callback(f"检查行数范围: {start_line} - {end_line}") + + for i in range(1, len(lines) + 1): + if start_line <= i <= end_line: + original_line = lines[i-1].rstrip('\n') + normalized_line = normalize_code_line(original_line) + + # 优化:跳过单字符行 + if normalized_line and len(normalized_line) > 1: # 只处理长度大于1的行 + duplicates[normalized_line].append(i) + line_details.append({ + "line_number": i, + "original": original_line, + "normalized": normalized_line + }) + + status_callback(f"已完成代码行标准化,正在分析重复行...") + + # 找出所有重复行 + for normalized_line, line_numbers in duplicates.items(): + if len(line_numbers) > 1: + # 收集重复行的原始内容 + original_lines = [] + for line_num in line_numbers: + for detail in line_details: + if detail["line_number"] == line_num: + original_lines.append(detail["original"]) + break + + duplicate_groups.append({ + "normalized_content": normalized_line, + "occurrences": len(line_numbers), + "line_numbers": line_numbers, + "original_lines": original_lines + }) + + # 计算统计信息 + actual_lines_in_range = end_line - start_line + 1 + if start_line > len(lines) or end_line < 1: + actual_lines_in_range = 0 + + total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups) + + duplicate_rate = 0.0 + if actual_lines_in_range > 0: + duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 + + result = { + "success": True, + "file_path": file_path, + "file_name": os.path.basename(file_path), + "total_lines": len(lines), + "start_line": start_line, + "end_line": end_line, + "actual_lines_in_range": actual_lines_in_range, + "unique_code_lines": len([v for v in duplicates.values() if v]), + "duplicate_groups": duplicate_groups, + "duplicate_groups_count": len(duplicate_groups), + "total_duplicate_lines": total_duplicate_lines, + "duplicate_rate": duplicate_rate, + "detection_mode": "full" # 标记检测模式为全文检测 + } + + status_callback("全文检测完成,正在生成结果...") + return result + +def detect_adjacent_duplicate_lines_gui(file_path, start_line, end_line, output_callback, status_callback): + """ + 检测相邻重复行(只检查与上一行和下一行是否重复) + 优化:当一行与上一行和下一行都重复时,三行合并为一组 + 修复:避免在三行连续重复中重复统计两行重复 + 优化:排除单字符重复行 + """ + if not os.path.exists(file_path): + output_callback(f"错误: 文件不存在: {file_path}\n") + return None + + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + lines = f.readlines() + except Exception as e: + output_callback(f"读取文件失败: {e}\n") + return None + + # 如果没有指定行数范围,则检查整个文件 + if start_line <= 0 and end_line <= 0: + start_line = 1 + end_line = len(lines) + + if end_line > len(lines): + end_line = len(lines) + + if start_line < 1 or end_line < start_line: + output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") + return None + + status_callback("开始检测相邻重复行...") + + # 处理指定范围内的每一行 + status_callback(f"正在处理文件: {os.path.basename(file_path)}") + status_callback(f"检查行数范围: {start_line} - {end_line}") + + # 预处理所有行 + line_details = [] + normalized_lines = {} # 行号 -> 标准化内容 + original_lines = {} # 行号 -> 原始内容 + + for i in range(1, len(lines) + 1): + if start_line <= i <= end_line: + original_line = lines[i-1].rstrip('\n') + normalized_line = normalize_code_line(original_line) + + # 优化:跳过单字符行 + if normalized_line and len(normalized_line) > 1: # 只处理长度大于1的行 + normalized_lines[i] = normalized_line + original_lines[i] = original_line + line_details.append({ + "line_number": i, + "original": original_line, + "normalized": normalized_line + }) + + status_callback("正在分析相邻行重复情况...") + + # 存储已处理的相邻重复组 + processed_lines = set() # 用于跟踪已处理的重复行 + adjacent_duplicates = [] # 存储相邻重复组 + + # 第一步:首先找出所有三行连续重复的情况 + triple_groups = [] # 存储三行连续重复组 + triple_processed = set() # 已处理的三行组 + + for i in range(start_line, end_line - 1): # 注意结束条件是 end_line-1 + if i > len(lines) - 2: + break + + # 检查是否满足三行连续重复的条件 + if (i in normalized_lines and + i+1 in normalized_lines and + i+2 in normalized_lines): + + line1 = normalized_lines[i] + line2 = normalized_lines[i+1] + line3 = normalized_lines[i+2] + + # 三行都相同 + if line1 == line2 == line3: + group_key = f"{i},{i+1},{i+2}" + if group_key not in triple_processed: + triple_groups.append({ + "start_line": i, + "end_line": i+2, + "normalized_content": line1, + "original_lines": [ + original_lines[i], + original_lines[i+1], + original_lines[i+2] + ] + }) + triple_processed.add(group_key) + + # 第二步:找出两行相邻重复的情况,但要排除在三行连续重复中的行 + double_groups = [] # 存储两行相邻重复组 + double_processed = set() # 已处理的两行组 + + for i in range(start_line, end_line + 1): + if i > len(lines) or i not in normalized_lines: + continue + + current_line = normalized_lines[i] + current_original = original_lines[i] + + # 检查当前行是否已经在三行连续重复中 + in_triple_group = False + for triple in triple_groups: + if triple["start_line"] <= i <= triple["end_line"]: + in_triple_group = True + break + + if in_triple_group: + # 跳过三行连续重复中的行 + continue + + # 检查与上一行是否重复 + if i-1 in normalized_lines and normalized_lines[i-1] == current_line: + # 检查上一行是否也在三行连续重复中 + prev_in_triple = False + for triple in triple_groups: + if triple["start_line"] <= i-1 <= triple["end_line"]: + prev_in_triple = True + break + + if not prev_in_triple: + group_key = f"{i-1},{i}" + if group_key not in double_processed: + double_groups.append({ + "start_line": i-1, + "end_line": i, + "normalized_content": current_line, + "original_lines": [ + original_lines[i-1], + current_original + ] + }) + double_processed.add(group_key) + # 只与下一行重复(且不与上一行重复) + elif i+1 in normalized_lines and normalized_lines[i+1] == current_line: + # 检查下一行是否也在三行连续重复中 + next_in_triple = False + for triple in triple_groups: + if triple["start_line"] <= i+1 <= triple["end_line"]: + next_in_triple = True + break + + if not next_in_triple: + group_key = f"{i},{i+1}" + if group_key not in double_processed: + double_groups.append({ + "start_line": i, + "end_line": i+1, + "normalized_content": current_line, + "original_lines": [ + current_original, + original_lines[i+1] + ] + }) + double_processed.add(group_key) + + # 第三步:合并所有组 + for triple in triple_groups: + adjacent_duplicates.append({ + "normalized_content": triple["normalized_content"], + "occurrences": 3, + "line_numbers": [triple["start_line"], triple["start_line"]+1, triple["end_line"]], + "original_lines": triple["original_lines"], + "duplicate_type": "triple_adjacent" # 三行相邻重复 + }) + + for double in double_groups: + adjacent_duplicates.append({ + "normalized_content": double["normalized_content"], + "occurrences": 2, + "line_numbers": [double["start_line"], double["end_line"]], + "original_lines": double["original_lines"], + "duplicate_type": "double_adjacent" # 两行相邻重复 + }) + + # 按起始行号排序 + adjacent_duplicates.sort(key=lambda x: x["line_numbers"][0]) + + # 计算统计信息 + actual_lines_in_range = end_line - start_line + 1 + if start_line > len(lines) or end_line < 1: + actual_lines_in_range = 0 + + total_duplicate_lines = sum(len(group["line_numbers"]) for group in adjacent_duplicates) + + duplicate_rate = 0.0 + if actual_lines_in_range > 0: + duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 + + result = { + "success": True, + "file_path": file_path, + "file_name": os.path.basename(file_path), + "total_lines": len(lines), + "start_line": start_line, + "end_line": end_line, + "actual_lines_in_range": actual_lines_in_range, + "unique_code_lines": len(set([detail["normalized"] for detail in line_details])), + "duplicate_groups": adjacent_duplicates, + "duplicate_groups_count": len(adjacent_duplicates), + "total_duplicate_lines": total_duplicate_lines, + "duplicate_rate": duplicate_rate, + "detection_mode": "adjacent" # 标记检测模式为相邻行检测 + } + + status_callback("相邻行检测完成,正在生成结果...") + return result + +def detect_left_side_equality_gui(file_path, start_line, end_line, output_callback, status_callback): + """ + 检测等号左侧相等(左侧被赋值变量相同) + """ + if not os.path.exists(file_path): + output_callback(f"错误: 文件不存在: {file_path}\n") + return None + + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + lines = f.readlines() + except Exception as e: + output_callback(f"读取文件失败: {e}\n") + return None + + # 处理行数范围 + if start_line <= 0 and end_line <= 0: + start_line = 1 + end_line = len(lines) + + if end_line > len(lines): + end_line = len(lines) + + if start_line < 1 or end_line < start_line: + output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") + return None + + status_callback("开始检测等号左侧相等...") + + # 数据结构 + left_side_map = defaultdict(list) # 左侧内容 -> 行号列表 + line_details = [] # 行详细信息 + duplicate_groups = [] # 重复组 + + status_callback(f"正在处理文件: {os.path.basename(file_path)}") + status_callback(f"检查行数范围: {start_line} - {end_line}") + + # 遍历指定范围的行 + for i in range(1, len(lines) + 1): + if start_line <= i <= end_line: + original_line = lines[i-1].rstrip('\n') + + # 检查是否为赋值语句 + is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line) + + if is_assignment and left_side: + normalized_left = normalize_assignment_side(left_side) + + if normalized_left: # 跳过空的左侧 + left_side_map[normalized_left].append(i) + line_details.append({ + "line_number": i, + "original_line": original_line, + "left_side": left_side, + "normalized_left": normalized_left, + "right_side": right_side, + "operator": operator + }) + + status_callback(f"已分析{len(line_details)}个赋值语句,正在检测左侧相等...") + + # 找出左侧相等的组 + for normalized_left, line_numbers in left_side_map.items(): + if len(line_numbers) > 1: + # 收集详细信息 + original_lines = [] + right_sides = [] + operators = [] + + for line_num in line_numbers: + for detail in line_details: + if detail["line_number"] == line_num: + original_lines.append(detail["original_line"]) + right_sides.append(detail["right_side"]) + operators.append(detail["operator"]) + break + + duplicate_groups.append({ + "normalized_content": normalized_left, + "occurrences": len(line_numbers), + "line_numbers": line_numbers, + "original_lines": original_lines, + "right_sides": right_sides, + "operators": operators, + "side": "left" # 标记是左侧检测 + }) + + # 计算统计信息 + actual_lines_in_range = end_line - start_line + 1 + if start_line > len(lines) or end_line < 1: + actual_lines_in_range = 0 + + total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups) + + duplicate_rate = 0.0 + if actual_lines_in_range > 0: + duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 + + result = { + "success": True, + "file_path": file_path, + "file_name": os.path.basename(file_path), + "total_lines": len(lines), + "start_line": start_line, + "end_line": end_line, + "actual_lines_in_range": actual_lines_in_range, + "assignment_statements": len(line_details), + "duplicate_groups": duplicate_groups, + "duplicate_groups_count": len(duplicate_groups), + "total_duplicate_lines": total_duplicate_lines, + "duplicate_rate": duplicate_rate, + "detection_mode": "left_side" # 标记检测模式为左侧相等检测 + } + + status_callback("等号左侧重复检测完成") + return result + +def detect_right_side_equality_gui(file_path, start_line, end_line, output_callback, status_callback): + """ + 检测等号右侧相等(右侧赋值表达式相同) + 优化:排除右侧为常数的情况 + """ + if not os.path.exists(file_path): + output_callback(f"错误: 文件不存在: {file_path}\n") + return None + + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + lines = f.readlines() + except Exception as e: + output_callback(f"读取文件失败: {e}\n") + return None + + # 处理行数范围 + if start_line <= 0 and end_line <= 0: + start_line = 1 + end_line = len(lines) + + if end_line > len(lines): + end_line = len(lines) + + if start_line < 1 or end_line < start_line: + output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n") + return None + + status_callback("开始检测等号右侧相等(排除常数)...") + + # 数据结构 + right_side_map = defaultdict(list) # 右侧内容 -> 行号列表 + line_details = [] # 行详细信息 + duplicate_groups = [] # 重复组 + + status_callback(f"正在处理文件: {os.path.basename(file_path)}") + status_callback(f"检查行数范围: {start_line} - {end_line}") + + # 遍历指定范围的行 + for i in range(1, len(lines) + 1): + if start_line <= i <= end_line: + original_line = lines[i-1].rstrip('\n') + + # 检查是否为赋值语句 + is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line) + + if is_assignment and right_side: + # 检查右侧是否为常数 + if not is_constant_expression(right_side): + normalized_right = normalize_assignment_side(right_side) + + if normalized_right: # 跳过空的右侧 + right_side_map[normalized_right].append(i) + line_details.append({ + "line_number": i, + "original_line": original_line, + "left_side": left_side, + "right_side": right_side, + "normalized_right": normalized_right, + "operator": operator + }) + else: + # 记录常数表达式但不参与检测 + line_details.append({ + "line_number": i, + "original_line": original_line, + "left_side": left_side, + "right_side": right_side, + "normalized_right": "", + "operator": operator, + "is_constant": True + }) + + status_callback(f"已分析{len(line_details)}个赋值语句,正在检测右侧相等(排除常数)...") + + # 找出右侧相等的组 + for normalized_right, line_numbers in right_side_map.items(): + if len(line_numbers) > 1: + # 收集详细信息 + original_lines = [] + left_sides = [] + operators = [] + + for line_num in line_numbers: + for detail in line_details: + if detail["line_number"] == line_num: + original_lines.append(detail["original_line"]) + left_sides.append(detail["left_side"]) + operators.append(detail["operator"]) + break + + duplicate_groups.append({ + "normalized_content": normalized_right, + "occurrences": len(line_numbers), + "line_numbers": line_numbers, + "original_lines": original_lines, + "left_sides": left_sides, + "operators": operators, + "side": "right" # 标记是右侧检测 + }) + + # 计算统计信息 + actual_lines_in_range = end_line - start_line + 1 + if start_line > len(lines) or end_line < 1: + actual_lines_in_range = 0 + + # 统计常数表达式数量 + constant_expressions = len([d for d in line_details if d.get("is_constant", False)]) + variable_expressions = len(line_details) - constant_expressions + + total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups) + + duplicate_rate = 0.0 + if actual_lines_in_range > 0: + duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100 + + result = { + "success": True, + "file_path": file_path, + "file_name": os.path.basename(file_path), + "total_lines": len(lines), + "start_line": start_line, + "end_line": end_line, + "actual_lines_in_range": actual_lines_in_range, + "assignment_statements": len(line_details), + "variable_expressions": variable_expressions, + "constant_expressions": constant_expressions, + "duplicate_groups": duplicate_groups, + "duplicate_groups_count": len(duplicate_groups), + "total_duplicate_lines": total_duplicate_lines, + "duplicate_rate": duplicate_rate, + "detection_mode": "right_side" # 标记检测模式为右侧相等检测 + } + + status_callback(f"等号右侧重复检测完成(已排除{constant_expressions}个常数表达式)") + return result + +def display_results(result, output_callback, status_callback): + """显示检测结果""" + if not result or not result.get("success", False): + output_callback("检测失败或无重复代码行\n") + return + + file_path = result["file_path"] + start_line = result["start_line"] + end_line = result["end_line"] + duplicate_groups = result["duplicate_groups"] + detection_mode = result.get("detection_mode", "full") + + # 输出检测模式信息 + mode_info = "全文重复检测" + if detection_mode == "adjacent": + mode_info = "相邻行重复检测" + elif detection_mode == "left_side": + mode_info = "等号左侧重复检测" + elif detection_mode == "right_side": + mode_info = "等号右侧重复检测(排除常数)" + + # 输出基本信息 + output_callback("=" * 60 + "\n") + output_callback(f"重复代码检测结果 ({mode_info})\n") + output_callback("=" * 60 + "\n") + output_callback(f"文件: {file_path}\n") + output_callback(f"检查行数范围: {start_line} - {end_line}\n") + + if detection_mode in ["left_side", "right_side"]: + output_callback(f"范围内赋值语句数: {result.get('assignment_statements', 0)}\n") + + if detection_mode == "right_side": + output_callback(f" 其中变量表达式: {result.get('variable_expressions', 0)}\n") + output_callback(f" 其中常数表达式: {result.get('constant_expressions', 0)}(已过滤)\n") + + output_callback(f"范围内总行数: {result['actual_lines_in_range']}\n") + output_callback("-" * 60 + "\n") + + # 输出重复行组 + if not duplicate_groups: + if detection_mode == "adjacent": + output_callback("在指定范围内未找到相邻重复的代码行\n") + elif detection_mode == "left_side": + output_callback("在指定范围内未找到左侧相等的赋值语句\n") + elif detection_mode == "right_side": + output_callback("在指定范围内未找到右侧相等的变量表达式\n") + else: + output_callback("在指定范围内未找到重复的代码行\n") + else: + for i, group in enumerate(duplicate_groups, 1): + side = group.get("side", "") + + if detection_mode == "adjacent": + # 现有相邻检测输出 + dup_type = group.get("duplicate_type", "") + occurrences = group.get("occurrences", 2) + + if dup_type == "triple_adjacent": + desc = f"第{i}组相邻重复行号 (三行连续重复): " + elif dup_type == "double_adjacent": + desc = f"第{i}组相邻重复行号 (两行相邻重复): " + else: + desc = f"第{i}组相邻重复行号: " + elif detection_mode == "left_side": + desc = f"第{i}组左侧相同赋值 (行号): " + elif detection_mode == "right_side": + desc = f"第{i}组右侧相同变量表达式 (行号): " + else: + desc = f"第{i}组重复行号: " + + output_callback(f"{desc}{', '.join(map(str, group['line_numbers']))}\n") + + if detection_mode == "left_side": + output_callback(f" 相同左侧: {group['normalized_content']}\n") + for idx, (line_num, original_line, right_side, operator) in enumerate(zip( + group['line_numbers'], group['original_lines'], + group.get('right_sides', []), group.get('operators', []) + )): + output_callback(f" 行 {line_num}: {original_line}\n") + if right_side: + output_callback(f" 右侧表达式: {right_side}\n") + elif detection_mode == "right_side": + output_callback(f" 相同右侧变量表达式: {group['normalized_content']}\n") + for idx, (line_num, original_line, left_side, operator) in enumerate(zip( + group['line_numbers'], group['original_lines'], + group.get('left_sides', []), group.get('operators', []) + )): + output_callback(f" 行 {line_num}: {original_line}\n") + if left_side: + output_callback(f" 左侧变量: {left_side}\n") + else: + output_callback(f" 重复代码: {group['normalized_content']}\n") + for line_num, original_line in zip(group['line_numbers'], group['original_lines']): + output_callback(f" 行 {line_num}: {original_line}\n") + + output_callback("\n") + + # 输出统计信息 + output_callback("-" * 60 + "\n") + output_callback(f"重复代码总组数: {result['duplicate_groups_count']}\n") + output_callback(f"总重复行数: {result['total_duplicate_lines']}\n") + output_callback(f"重复率: {result['duplicate_rate']:.2f}%\n") + output_callback("=" * 60 + "\n\n") + + status_callback("结果显示完成") + +def save_results_to_log(result, log_file_path): + """保存结果到日志文件""" + if not result or not result.get("success", False): + return + + try: + with open(log_file_path, 'a', encoding='utf-8') as f: + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + detection_mode = result.get("detection_mode", "full") + + mode_desc = "全文重复检测" + if detection_mode == "adjacent": + mode_desc = "相邻行重复检测" + elif detection_mode == "left_side": + mode_desc = "等号左侧重复检测" + elif detection_mode == "right_side": + mode_desc = "等号右侧重复检测(排除常数)" + + f.write(f"\n\n{'='*60}\n") + f.write(f"重复代码检测报告 - {mode_desc} - {timestamp}\n") + f.write(f"{'='*60}\n\n") + + f.write(f"文件: {result['file_path']}\n") + f.write(f"检查行数范围: {result['start_line']} - {result['end_line']}\n") + f.write(f"范围内总行数: {result['actual_lines_in_range']}\n") + + if detection_mode in ["left_side", "right_side"]: + f.write(f"范围内赋值语句数: {result.get('assignment_statements', 0)}\n") + + if detection_mode == "right_side": + f.write(f" 其中变量表达式: {result.get('variable_expressions', 0)}\n") + f.write(f" 其中常数表达式: {result.get('constant_expressions', 0)}(已过滤)\n") + + f.write(f"检测模式: {mode_desc}\n\n") + + duplicate_groups = result.get('duplicate_groups', []) + for i, group in enumerate(duplicate_groups, 1): + side = group.get("side", "") + + if detection_mode == "adjacent": + dup_type = group.get("duplicate_type", "") + + if dup_type == "triple_adjacent": + desc = f"第{i}组相邻重复行号 (三行连续重复): " + elif dup_type == "double_adjacent": + desc = f"第{i}组相邻重复行号 (两行相邻重复): " + else: + desc = f"第{i}组相邻重复行号: " + elif detection_mode == "left_side": + desc = f"第{i}组左侧相同赋值 (行号): " + elif detection_mode == "right_side": + desc = f"第{i}组右侧相同变量表达式 (行号): " + else: + desc = f"第{i}组重复行号: " + + f.write(f"{desc}{', '.join(map(str, group['line_numbers']))}\n") + + if detection_mode == "left_side": + f.write(f" 相同左侧: {group['normalized_content']}\n") + for idx, (line_num, original_line, right_side, operator) in enumerate(zip( + group['line_numbers'], group['original_lines'], + group.get('right_sides', []), group.get('operators', []) + )): + f.write(f" 行 {line_num}: {original_line}\n") + if right_side: + f.write(f" 右侧表达式: {right_side}\n") + elif detection_mode == "right_side": + f.write(f" 相同右侧变量表达式: {group['normalized_content']}\n") + for idx, (line_num, original_line, left_side, operator) in enumerate(zip( + group['line_numbers'], group['original_lines'], + group.get('left_sides', []), group.get('operators', []) + )): + f.write(f" 行 {line_num}: {original_line}\n") + if left_side: + f.write(f" 左侧变量: {left_side}\n") + else: + f.write(f" 重复代码: {group['normalized_content']}\n") + for line_num, original_line in zip(group['line_numbers'], group['original_lines']): + f.write(f" 行 {line_num}: {original_line}\n") + + f.write("\n") + + f.write(f"重复代码总组数: {result['duplicate_groups_count']}\n") + f.write(f"总重复行数: {result['total_duplicate_lines']}\n") + f.write(f"重复率: {result['duplicate_rate']:.2f}%\n") + f.write(f"{'='*60}\n") + + return True + except Exception as e: + return False + +class DuplicateCodeDetectorGUI: + def __init__(self, root): + self.root = root + self.root.title("源代码重复行统计软件V1.00") + self.root.geometry("1100x750") + + # 设置最小窗口尺寸 + self.root.minsize(900, 600) + + # 设置窗口背景为浅蓝色 + self.root.configure(bg='#e8f4f8') + + # 设置样式 + self.setup_styles() + + # 创建主框架 + self.main_frame = ttk.Frame(root, padding="10") + self.main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # 配置网格权重 + root.columnconfigure(0, weight=1) + root.rowconfigure(0, weight=1) + + # 配置主框架的行列权重 + self.main_frame.columnconfigure(0, weight=1) + self.main_frame.rowconfigure(0, weight=0) # 标题 + self.main_frame.rowconfigure(1, weight=0) # 文件选择 + self.main_frame.rowconfigure(2, weight=0) # 行数范围 + self.main_frame.rowconfigure(3, weight=0) # 提醒信息 + self.main_frame.rowconfigure(4, weight=0) # 按钮 + self.main_frame.rowconfigure(5, weight=0) # 状态 + self.main_frame.rowconfigure(6, weight=1) # 结果面板 + self.main_frame.rowconfigure(7, weight=0) # 底部信息 + + # 创建标题 + self.create_title() + + # 创建控制面板 + self.create_control_panel() + + # 创建底部结果面板 + self.create_result_panel() + + # 创建底部信息 + self.create_footer() + + # 日志文件路径 + self.log_file = "duplicate_code_log.txt" + + # 消息队列 + self.message_queue = Queue() + + # 窗口大小变化相关 + self.last_width = 1100 + self.last_height = 750 + self.root.bind('', self.on_window_resize) + + # 初始化引用 + self.process_text_frame = None + self.result_text_frame = None + self.paned_window = None + + # 当前检测模式 + self.current_mode = "full" # 默认全文检测 + + # 启动消息队列处理 + self.process_queue() + + def setup_styles(self): + """设置样式 - 浅蓝色扁平风格""" + style = ttk.Style() + style.theme_use('clam') + + # 配置颜色方案 + bg_color = '#e8f4f8' # 浅蓝色背景 + fg_color = '#2c3e50' # 深蓝色文字 + btn_bg = '#3498db' # 蓝色按钮背景 + btn_fg = '#ffffff' # 白色按钮文字 + hover_bg = '#2980b9' # 悬停时的深蓝色 + active_bg = '#1c5c8a' # 按下时的更深蓝色 + frame_bg = '#d4eaf7' # 框架背景色 + label_frame_bg = '#d4eaf7' # 标签框架背景色 + + # 基本窗口样式 + style.configure('.', background=bg_color, foreground=fg_color, font=('微软雅黑', 10)) + + # 标签框架样式 + style.configure('TLabelframe', background=frame_bg, relief='flat', borderwidth=2) + style.configure('TLabelframe.Label', background=label_frame_bg, foreground=fg_color, font=('微软雅黑', 10, 'bold')) + + # 主按钮样式 + style.configure('TButton', + background=btn_bg, + foreground=btn_fg, + font=('微软雅黑', 10, 'bold'), + borderwidth=1, + relief='flat', + padding=6) + + # 按钮悬停效果 + style.map('TButton', + background=[('active', hover_bg), ('pressed', active_bg)], + relief=[('pressed', 'sunken')]) + + # 特殊按钮样式 + style.configure('Highlight.TButton', + background='#e74c3c', # 红色 + foreground=btn_fg, + font=('微软雅黑', 10, 'bold'), + borderwidth=1, + relief='flat', + padding=6) + + style.configure('Adjacent.TButton', + background='#2ecc71', # 绿色 + foreground=btn_fg, + font=('微软雅黑', 10, 'bold'), + borderwidth=1, + relief='flat', + padding=6) + + style.configure('LeftSide.TButton', + background='#9b59b6', # 紫色 + foreground=btn_fg, + font=('微软雅黑', 10, 'bold'), + borderwidth=1, + relief='flat', + padding=6) + + style.configure('RightSide.TButton', + background='#e67e22', # 橙色 + foreground=btn_fg, + font=('微软雅黑', 10, 'bold'), + borderwidth=1, + relief='flat', + padding=6) + + # 标签样式 + style.configure('Title.TLabel', + font=('微软雅黑', 24, 'bold'), + foreground='#e74c3c', # 红色 + background=bg_color) + + style.configure('Subtitle.TLabel', + font=('微软雅黑', 12, 'bold'), + foreground=fg_color, + background=bg_color) + + style.configure('Status.TLabel', + font=('微软雅黑', 10), + foreground='#3498db', # 蓝色 + background=bg_color) + + style.configure('Footer.TLabel', + font=('微软雅黑', 10), + foreground='#7f8c8d', # 中灰色 + background=bg_color) + + # 文本框样式 + style.configure('TEntry', + fieldbackground='#ffffff', + foreground='#2c3e50', + font=('微软雅黑', 10), + borderwidth=1, + relief='flat') + + # 滚动条样式 + style.configure('Vertical.TScrollbar', + background='#bdc3c7', + troughcolor=bg_color, + borderwidth=0, + relief='flat') + + style.configure('Horizontal.TScrollbar', + background='#bdc3c7', + troughcolor=bg_color, + borderwidth=0, + relief='flat') + + def create_title(self): + """创建标题区域 - 红色字体""" + # 创建一个单独的框架来包含标题 + title_frame = ttk.Frame(self.main_frame, style='TFrame') + title_frame.grid(row=0, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 20)) + title_frame.columnconfigure(0, weight=1) + + # 使用普通Label而不是ttk.Label,以便设置字体颜色 + # 标题改为红色字体 + title_label = tk.Label( + title_frame, + text="源代码重复行统计软件V1.00", + font=('微软雅黑', 24, 'bold'), + fg='#e74c3c', # 红色字体 + bg='#e8f4f8' # 浅蓝色背景 + ) + title_label.grid(row=0, column=0, sticky=tk.N) + + def create_control_panel(self): + """创建控制面板""" + # 文件选择区域 + file_frame = ttk.LabelFrame(self.main_frame, text="文件选择", padding="10") + file_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(0, 10)) + file_frame.columnconfigure(1, weight=1) + + ttk.Label(file_frame, text="源代码文件:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) + + self.file_path_var = tk.StringVar() + self.file_entry = ttk.Entry(file_frame, textvariable=self.file_path_var, width=60) + self.file_entry.grid(row=0, column=1, sticky=(tk.W, tk.E), padx=(0, 5)) + + self.browse_button = ttk.Button( + file_frame, + text="浏览...", + command=self.browse_file, + style='Highlight.TButton' + ) + self.browse_button.grid(row=0, column=2) + + # 为浏览按钮添加提示 + self.create_tooltip(self.browse_button, "选择要检测的源代码文件") + + # 行数范围区域 + range_frame = ttk.LabelFrame(self.main_frame, text="行数范围 (可选)", padding="10") + range_frame.grid(row=2, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) + + # 配置行数范围框的内部列权重 + range_frame.columnconfigure(0, weight=0) # 标签 + range_frame.columnconfigure(1, weight=0) # 输入框 + range_frame.columnconfigure(2, weight=1) # 说明文字 + + ttk.Label(range_frame, text="起始行号:").grid(row=0, column=0, sticky=tk.W, padx=(0, 5)) + self.start_entry = ttk.Entry(range_frame, width=15) + self.start_entry.grid(row=0, column=1, sticky=tk.W, padx=(0, 20)) + ttk.Label(range_frame, text="(如不填写,默认检查整个文件)").grid(row=0, column=2, sticky=tk.W) + + ttk.Label(range_frame, text="结束行号:").grid(row=1, column=0, sticky=tk.W, padx=(0, 5)) + self.end_entry = ttk.Entry(range_frame, width=15) + self.end_entry.grid(row=1, column=1, sticky=tk.W, padx=(0, 20)) + + # 为行数输入框添加提示 + self.create_tooltip(self.start_entry, "指定检测的起始行号(可选),不填写则从文件开头开始") + self.create_tooltip(self.end_entry, "指定检测的结束行号(可选),不填写则检测到文件结尾") + + # 添加提醒信息 + reminder_frame = ttk.Frame(self.main_frame) + reminder_frame.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=(5, 10)) + reminder_frame.columnconfigure(0, weight=1) + + reminder_label = tk.Label( + reminder_frame, + text="⚠️注意:1.重复行检测会忽略注释上的不同以及语句中空格等非可执行内容,单字符重复行(如{};)也将被忽略。2.右侧相等检测会排除常数表达式。 ", + font=('微软雅黑', 9), + fg='#e74c3c', # 蓝色 + bg='#e8f4f8', # 浅蓝色背景 + wraplength=800 + ) + reminder_label.grid(row=0, column=0, sticky=tk.W) + + # 按钮区域 + button_frame = ttk.Frame(self.main_frame) + button_frame.grid(row=4, column=0, pady=(0, 10)) + + # 全文检测按钮 + self.full_detect_button = ttk.Button( + button_frame, + text="检查全文重复行", + command=lambda: self.start_detection("full"), + style='TButton', + width=20 + ) + self.full_detect_button.pack(side=tk.LEFT, padx=(0, 10)) + + # 相邻行检测按钮 + self.adjacent_detect_button = ttk.Button( + button_frame, + text="检查相邻重复行", + command=lambda: self.start_detection("adjacent"), + style='Adjacent.TButton', + width=20 + ) + self.adjacent_detect_button.pack(side=tk.LEFT, padx=(0, 10)) + + # 等号左侧重复检测按钮 + self.left_side_button = ttk.Button( + button_frame, + text="等号左侧重复检测", + command=lambda: self.start_detection("left_side"), + style='LeftSide.TButton', + width=20 + ) + self.left_side_button.pack(side=tk.LEFT, padx=(0, 10)) + + # 等号右侧重复检测按钮 + self.right_side_button = ttk.Button( + button_frame, + text="等号右侧重复检测", + command=lambda: self.start_detection("right_side"), + style='RightSide.TButton', + width=20 + ) + self.right_side_button.pack(side=tk.LEFT, padx=(0, 10)) + + # 清空结果按钮 + self.clear_button = ttk.Button( + button_frame, + text="清空结果", + command=self.clear_results, + style='TButton', + width=15 + ) + self.clear_button.pack(side=tk.LEFT, padx=(0, 10)) + + # 查看日志按钮 + self.view_log_button = ttk.Button( + button_frame, + text="查看日志", + command=self.view_log, + style='TButton', + width=15 + ) + self.view_log_button.pack(side=tk.LEFT) + + # 为所有按钮添加提示 + self.create_tooltip(self.full_detect_button, "在源代码的指定行范围内查找所有行中的重复行(排除单字符重复)") + self.create_tooltip(self.adjacent_detect_button, "在源代码的指定行范围内仅查找相邻行中的重复行(排除单字符重复)") + self.create_tooltip(self.left_side_button, "在源代码的指定行范围内查找不同行之间存在的等号左侧表达式相等的行") + self.create_tooltip(self.right_side_button, "在源代码的指定行范围内查找不同行之间存在的等号右侧表达式相等的行(排除常数)") + self.create_tooltip(self.clear_button, "清空所有检测结果和处理过程信息") + self.create_tooltip(self.view_log_button, "查看历史检测日志文件") + + # 状态标签 + self.status_var = tk.StringVar(value="就绪") + self.status_label = ttk.Label( + self.main_frame, + textvariable=self.status_var, + style='Status.TLabel' + ) + self.status_label.grid(row=5, column=0, sticky=tk.W, pady=(0, 10)) + + def create_tooltip(self, widget, text): + """创建工具提示""" + def enter(event): + x, y, cx, cy = widget.bbox("insert") + x += widget.winfo_rootx() + 25 + y += widget.winfo_rooty() + 20 + + # 创建提示框 + self.tooltip = tk.Toplevel(widget) + self.tooltip.wm_overrideredirect(True) + self.tooltip.wm_geometry(f"+{x}+{y}") + + # 设置提示框样式 + label = tk.Label(self.tooltip, text=text, justify='left', + background="#f1c40f", # 亮黄色背景 + foreground="#2c3e50", # 深蓝色文字 + relief='solid', + borderwidth=1, + font=("微软雅黑", "9", "normal"), + padx=8, + pady=5) + label.pack() + + def leave(event): + if hasattr(self, 'tooltip'): + self.tooltip.destroy() + + widget.bind("", enter) + widget.bind("", leave) + + def create_result_panel(self): + """创建结果面板""" + result_frame = ttk.LabelFrame(self.main_frame, text="检测结果", padding="5") + result_frame.grid(row=6, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10)) + + # 正确配置结果框架的权重 + result_frame.columnconfigure(0, weight=1) + result_frame.rowconfigure(0, weight=1) + + # 创建分隔面板 + self.paned_window = ttk.PanedWindow(result_frame, orient=tk.HORIZONTAL) + self.paned_window.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # 左侧:处理过程 + process_frame = ttk.LabelFrame(self.paned_window, text="处理过程", padding="5") + self.paned_window.add(process_frame, weight=1) + + # 配置处理过程框架的内部权重 + process_frame.columnconfigure(0, weight=1) + process_frame.rowconfigure(0, weight=1) + + self.process_text = scrolledtext.ScrolledText( + process_frame, + wrap=tk.WORD, + font=('微软雅黑', 10), + bg='#ffffff', # 白色背景 + fg='#2c3e50', # 深蓝色文字 + insertbackground='#2c3e50' # 光标颜色 + ) + # 使用 grid 布局并设置 sticky + self.process_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # 右侧:检测结果 + result_display_frame = ttk.LabelFrame(self.paned_window, text="检测结果", padding="5") + self.paned_window.add(result_display_frame, weight=2) + + # 配置检测结果框架的内部权重 + result_display_frame.columnconfigure(0, weight=1) + result_display_frame.rowconfigure(0, weight=1) + + self.result_text = scrolledtext.ScrolledText( + result_display_frame, + wrap=tk.WORD, + font=('微软雅黑', 10), + bg='#ffffff', # 白色背景 + fg='#2c3e50', # 深蓝色文字 + insertbackground='#2c3e50' # 光标颜色 + ) + # 使用 grid 布局并设置 sticky + self.result_text.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S)) + + # 保存文本区域引用,用于窗口大小变化时调整 + self.process_text_frame = process_frame + self.result_text_frame = result_display_frame + + def create_footer(self): + """创建底部信息""" + footer_frame = ttk.Frame(self.main_frame) + footer_frame.grid(row=7, column=0, sticky=(tk.W, tk.E), pady=(20, 0)) + footer_frame.columnconfigure(0, weight=1) + + # 创建底部信息标签 + footer_label = ttk.Label( + footer_frame, + text="中国科学院卫星软件评测中心预研组,联系人:王明亮,联系电话:13761306729", + style='Footer.TLabel' + ) + footer_label.grid(row=0, column=0, sticky=tk.N) + + def on_window_resize(self, event): + """窗口大小变化事件处理""" + if event.widget == self.root: # 只处理主窗口变化 + # 计算尺寸变化 + width_change = event.width - self.last_width + height_change = event.height - self.last_height + + if abs(width_change) > 10 or abs(height_change) > 10: # 有显著变化 + # 动态调整文件输入框宽度 + entry_width = min(80, max(40, event.width // 15)) + self.file_entry.configure(width=entry_width) + + # 动态调整行数输入框宽度 + range_entry_width = min(20, max(10, event.width // 60)) + self.start_entry.configure(width=range_entry_width) + self.end_entry.configure(width=range_entry_width) + + # 动态调整分隔面板的比例 + self.update_paned_window_ratio(width_change) + + # 动态调整按钮宽度 + self.update_button_sizes(event.width) + + # 保存当前尺寸 + self.last_width = event.width + self.last_height = event.height + + def update_paned_window_ratio(self, width_change): + """更新分隔面板比例""" + if width_change != 0 and hasattr(self, 'paned_window'): + try: + # 获取当前窗口宽度 + current_width = self.root.winfo_width() + + # 动态调整分隔比例 + if current_width > 0: + # 计算新的sash位置(左侧1/3,右侧2/3) + new_sash_position = int(current_width * 0.35) # 调整为35% + self.paned_window.sash_place(0, new_sash_position, 0) + except: + pass # 忽略可能的错误 + + def update_button_sizes(self, window_width): + """更新按钮尺寸""" + # 动态调整按钮宽度 + button_width = min(20, max(15, window_width // 60)) + + # 更新按钮宽度 + for button in [self.full_detect_button, self.adjacent_detect_button, + self.left_side_button, self.right_side_button, + self.clear_button, self.view_log_button]: + button.configure(width=button_width) + + def browse_file(self): + """浏览文件""" + file_path = filedialog.askopenfilename( + title="选择源代码文件", + filetypes=[ + ("源代码文件", "*.c *.cpp *.h *.hpp *.java *.py *.js *.ts"), + ("C/C++文件", "*.c *.cpp *.h *.hpp"), + ("Java文件", "*.java"), + ("Python文件", "*.py"), + ("所有文件", "*.*") + ] + ) + if file_path: + self.file_path_var.set(file_path) + self.update_status(f"已选择文件: {os.path.basename(file_path)}") + + def get_line_range(self): + """获取行数范围""" + start_text = self.start_entry.get().strip() + end_text = self.end_entry.get().strip() + + start_line = 0 + end_line = 0 + + if start_text: + try: + start_line = int(start_text) + except ValueError: + messagebox.showerror("错误", "起始行号必须是有效的整数") + return None, None + + if end_text: + try: + end_line = int(end_text) + except ValueError: + messagebox.showerror("错误", "结束行号必须是有效的整数") + return None, None + + return start_line, end_line + + def start_detection(self, mode="full"): + """开始检测""" + self.current_mode = mode # 设置当前检测模式 + + file_path = self.file_path_var.get().strip() + if not file_path or not os.path.exists(file_path): + messagebox.showerror("错误", "请选择有效的源代码文件") + return + + start_line, end_line = self.get_line_range() + if start_line is None or end_line is None: + return + + # 禁用所有按钮 + self.disable_all_buttons() + + # 清空结果区域 + self.result_text.delete(1.0, tk.END) + + # 显示开始信息 + mode_desc = { + "full": "全文重复检测", + "adjacent": "相邻行重复检测", + "left_side": "等号左侧重复检测", + "right_side": "等号右侧重复检测(排除常数)" + }.get(mode, "未知检测") + + self.add_process_message("=" * 50) + self.add_process_message(f"开始{mode_desc}: {os.path.basename(file_path)}") + if start_line > 0 and end_line > 0: + self.add_process_message(f"检查行数范围: {start_line} - {end_line}") + else: + self.add_process_message("检查整个文件") + + # 在线程中执行检测 + detection_thread = threading.Thread( + target=self.run_detection, + args=(file_path, start_line, end_line, mode) + ) + detection_thread.daemon = True + detection_thread.start() + + def disable_all_buttons(self): + """禁用所有按钮""" + buttons = [ + self.full_detect_button, + self.adjacent_detect_button, + self.left_side_button, + self.right_side_button, + self.clear_button, + self.browse_button, + self.view_log_button + ] + + for button in buttons: + if button: + button.config(state='disabled') + + def enable_all_buttons(self): + """启用所有按钮""" + buttons = [ + self.full_detect_button, + self.adjacent_detect_button, + self.left_side_button, + self.right_side_button, + self.clear_button, + self.browse_button, + self.view_log_button + ] + + for button in buttons: + if button: + button.config(state='normal') + + def run_detection(self, file_path, start_line, end_line, mode): + """在线程中运行检测""" + try: + # 根据模式调用不同的检测函数 + if mode == "full": + result = detect_duplicate_lines_gui( + file_path, + start_line, + end_line, + self.add_result_message, + self.update_status_gui + ) + elif mode == "adjacent": + result = detect_adjacent_duplicate_lines_gui( + file_path, + start_line, + end_line, + self.add_result_message, + self.update_status_gui + ) + elif mode == "left_side": + result = detect_left_side_equality_gui( + file_path, + start_line, + end_line, + self.add_result_message, + self.update_status_gui + ) + elif mode == "right_side": + result = detect_right_side_equality_gui( + file_path, + start_line, + end_line, + self.add_result_message, + self.update_status_gui + ) + else: + self.add_process_message(f"错误: 未知的检测模式: {mode}") + self.root.after(0, self.enable_all_buttons) + return + + # 显示结果 + if result: + display_results(result, self.add_result_message, self.update_status_gui) + + # 保存到日志文件 + if save_results_to_log(result, self.log_file): + self.add_process_message(f"结果已保存到日志文件: {self.log_file}") + else: + self.add_process_message("警告: 保存日志文件失败") + + mode_desc = { + "full": "全文检测", + "adjacent": "相邻行检测", + "left_side": "等号左侧重复检测", + "right_side": "等号右侧重复检测" + }.get(mode, "检测") + + self.add_process_message(f"{mode_desc}完成") + + except Exception as e: + self.add_process_message(f"检测过程中发生错误: {str(e)}") + finally: + # 启用所有按钮 + self.root.after(0, self.enable_all_buttons) + + def clear_results(self): + """清空结果""" + self.process_text.delete(1.0, tk.END) + self.result_text.delete(1.0, tk.END) + self.update_status("就绪") + + def view_log(self): + """查看日志文件""" + if os.path.exists(self.log_file): + try: + with open(self.log_file, 'r', encoding='utf-8') as f: + log_content = f.read() + + # 创建日志查看窗口 + log_window = tk.Toplevel(self.root) + log_window.title("日志文件") + log_window.geometry("900x700") + log_window.minsize(700, 500) + + # 设置窗口背景 + log_window.configure(bg='#e8f4f8') + + # 创建标题 + log_title_label = tk.Label( + log_window, + text="重复代码检测日志", + font=('微软雅黑', 16, 'bold'), + fg='#3498db', # 蓝色 + bg='#e8f4f8', + pady=10 + ) + log_title_label.pack() + + # 创建文本区域 + log_text = scrolledtext.ScrolledText( + log_window, + wrap=tk.WORD, + font=('微软雅黑', 10), + bg='#ffffff', # 白色背景 + fg='#2c3e50', # 深蓝色文字 + ) + log_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=(0, 10)) + + # 插入日志内容 + log_text.insert(1.0, log_content) + log_text.config(state='disabled') + + # 添加关闭按钮 + close_button = ttk.Button( + log_window, + text="关闭", + command=log_window.destroy, + style='TButton' + ) + close_button.pack(pady=(0, 10)) + + except Exception as e: + messagebox.showerror("错误", f"无法读取日志文件: {str(e)}") + else: + messagebox.showinfo("信息", "日志文件不存在") + + def add_process_message(self, message): + """添加处理过程消息""" + timestamp = datetime.now().strftime("%H:%M:%S") + formatted_message = f"[{timestamp}] {message}\n" + self.message_queue.put(("process", formatted_message)) + + def add_result_message(self, message): + """添加结果消息""" + self.message_queue.put(("result", message)) + + def update_status_gui(self, message): + """更新状态(从线程调用)""" + self.message_queue.put(("status", message)) + + def update_status(self, message): + """更新状态(从主线程调用)""" + self.status_var.set(message) + + def process_queue(self): + """处理消息队列""" + try: + while not self.message_queue.empty(): + msg_type, message = self.message_queue.get_nowait() + + if msg_type == "process": + self.process_text.insert(tk.END, message) + self.process_text.see(tk.END) + elif msg_type == "result": + self.result_text.insert(tk.END, message) + self.result_text.see(tk.END) + elif msg_type == "status": + self.status_var.set(message) + + self.message_queue.task_done() + except: + pass + + # 100ms后再次检查队列 + self.root.after(100, self.process_queue) + +def main(): + root = tk.Tk() + app = DuplicateCodeDetectorGUI(root) + + # 初始化后强制布局一次 + root.update() + root.after(100, lambda: app.on_window_resize(None)) + + root.mainloop() + +if __name__ == "__main__": main() \ No newline at end of file