Files
code_check/find-repeat-9.py
wangmingliang 1aa2a67126 上传文件至「/」
增加了两个文件之间的重复行比较,以及一个目录所有文件比较等两种比较方式。
2026-04-17 09:32:19 +08:00

3478 lines
154 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
源代码重复行统计软件V1.00
功能:检测源代码中的重复行,支持全文检测、相邻行检测、等号左侧/右侧检测
支持:单文件内比较、双文件间比较、目录下多文件比较
输出TXT日志文件和Excel结果文件
"""
import os
import re
from collections import defaultdict
import sys
from datetime import datetime
import json
import tkinter as tk
from tkinter import ttk, filedialog, messagebox, scrolledtext
import threading
from queue import Queue
from tkinter import IntVar, StringVar, BooleanVar
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
def normalize_code_line(line, in_multiline_comment=False, prev_line_escaped=False):
"""
标准化代码行:移除注释、多余空格、制表符等
返回: (标准化后的行, 是否仍在多行注释中, 行是否以反斜杠结束)
增强:完全处理各种注释格式,包括跨行注释和特殊格式
"""
if not line or not line.strip():
return "", in_multiline_comment, False
original_line = line
line = line.rstrip('\n')
# 检查行是否以反斜杠结束(表示续行)
line_escaped = False
if line.endswith('\\'):
# 排除转义的反斜杠
if not (len(line) > 1 and line[-2] == '\\'):
line_escaped = True
# 临时移除结尾的反斜杠用于处理
line = line.rstrip('\\').rstrip()
# 如果已经在多行注释中
if in_multiline_comment:
# 首先检查是否是特殊格式的注释行
trimmed_line = line.strip()
# 1. 处理以\开头的注释行(如 \******************************************************************************/
if trimmed_line.startswith('\\'):
# 检查是否是注释结束
if '*/' in trimmed_line:
end_pos = trimmed_line.find('*/')
after_comment = trimmed_line[end_pos+2:].strip()
if after_comment:
return normalize_code_line(after_comment, False, line_escaped)
else:
return "", False, line_escaped
else:
# 仍然是注释的一部分
return "", True, line_escaped
# 2. 处理常规注释结束
if '*/' in line:
end_pos = line.find('*/')
after_comment = line[end_pos+2:].strip()
if after_comment:
return normalize_code_line(after_comment, False, line_escaped)
else:
return "", False, line_escaped
# 3. 整行都在注释中
return "", True, line_escaped
# 处理单行注释 "//"
if '//' in line:
in_string = False
comment_start = -1
for i in range(len(line) - 1):
if line[i] == '"' and (i == 0 or line[i-1] != '\\'):
in_string = not in_string
elif not in_string and line[i:i+2] == '//':
comment_start = i
break
if comment_start != -1:
line = line[:comment_start]
# 处理多行注释开始
if '/*' in line:
in_string = False
comment_start = -1
# 找到第一个不在字符串中的 "/*"
for i in range(len(line) - 1):
if line[i] == '"' and (i == 0 or line[i-1] != '\\'):
in_string = not in_string
elif not in_string and line[i:i+2] == '/*':
comment_start = i
break
if comment_start != -1:
# 在注释开始后查找 "*/"
comment_end = -1
in_string = False
for i in range(comment_start, len(line)):
if line[i] == '"' and (i == 0 or line[i-1] != '\\'):
in_string = not in_string
elif not in_string and i < len(line) - 1 and line[i:i+2] == '*/':
comment_end = i
break
if comment_end != -1:
# 注释在同一行内结束
before_comment = line[:comment_start]
after_comment = line[comment_end+2:]
combined = (before_comment + ' ' + after_comment).strip()
if combined:
return normalize_code_line(combined, False, line_escaped)
else:
return "", False, line_escaped
else:
# 注释跨越多行
before_comment = line[:comment_start].strip()
if before_comment:
return before_comment, True, line_escaped
else:
return "", True, line_escaped
# 处理以\开头的特殊行(如 \******************************************************************************/
trimmed_line = line.strip()
if trimmed_line.startswith('\\'):
# 检查是否是注释结束格式
if '*/' in trimmed_line:
# 找到注释结束位置
comment_end = trimmed_line.find('*/')
after_comment = trimmed_line[comment_end+2:].strip()
if after_comment:
return normalize_code_line(after_comment, False, line_escaped)
else:
return "", False, line_escaped
elif trimmed_line.startswith('\\*') and trimmed_line.endswith('*/'):
# 特殊格式:\**************/
return "", False, line_escaped
# 处理特殊的注释行
if line.strip().startswith('/') and line.strip().endswith('\\'):
# 格式如:/**************\
return "", True, line_escaped
if line.strip().startswith('\\') and line.strip().endswith('/'):
# 格式如:\**************/
return "", False, line_escaped
# 检查是否是纯注释行
trimmed_line = line.strip()
if trimmed_line.startswith('/*') and trimmed_line.endswith('*/'):
return "", False, line_escaped
if trimmed_line.startswith('//'):
return "", False, line_escaped
# 检查是否是特殊的注释行
if (trimmed_line.startswith('*') or trimmed_line.startswith('/*')) and not trimmed_line.endswith('*/'):
# 可能是多行注释的一部分
return "", True, line_escaped
# 移除行尾空白字符
line = line.strip()
# 如果是空行,直接返回
if not line:
return "", False, line_escaped
# 移除语句末尾的分号
if line.endswith(';'):
line = line[:-1]
# 移除代码中所有空格
line = re.sub(r'\s+', '', line)
return line, False, line_escaped
def normalize_code_line_legacy(line):
"""
向后兼容的包装函数
返回标准化后的代码行
对于注释行、空行、单字符行返回None不纳入重复检测
"""
result, _, is_valid = normalize_code_line(line)
# 过滤规则
if not result: # 空字符串
return None # 不参与重复检测
# 检查是否是单字符行
if len(result) == 1 and result in '{;}':
return None # 不包含单字符行
return result
def extract_assignment_sides(line):
"""
提取赋值语句的左侧和右侧内容
改进版:更准确地识别赋值语句,避免将逻辑判断误判
修复:正确处理行尾注释和分号
返回: (is_assignment, left_side, right_side, operator)
"""
line = line.strip()
if not line:
return False, None, None, None
# 先处理注释
# 处理 "//" 注释
if '//' in line:
# 找到第一个不在字符串中的 "//"
in_string = False
for i in range(len(line) - 1):
if line[i] == '"' and (i == 0 or line[i-1] != '\\'):
in_string = not in_string
elif not in_string and line[i:i+2] == '//':
line = line[:i].strip() # 移除注释部分
break
# 处理 "/* */" 注释(单行内的情况)
if '/*' in line and '*/' in line:
# 找到第一个不在字符串中的 "/*"
in_string = False
comment_start = -1
for i in range(len(line) - 1):
if line[i] == '"' and (i == 0 or line[i-1] != '\\'):
in_string = not in_string
elif not in_string and line[i:i+2] == '/*':
comment_start = i
break
if comment_start != -1:
# 查找对应的 "*/"
comment_end = line.find('*/', comment_start)
if comment_end != -1:
# 移除注释部分
before_comment = line[:comment_start]
after_comment = line[comment_end+2:]
line = (before_comment + after_comment).strip()
# 支持的赋值运算符(按长度排序,优先匹配长的)
assignment_ops = ['>>>=', '>>=', '<<=', '+=', '-=', '*=', '/=', '%=', '&=', '|=', '^=', '=']
# 记录字符位置
i = 0
n = len(line)
while i < n:
char = line[i]
# 跳过字符串
if char == '"' or char == "'":
quote_char = char
i += 1
while i < n and not (line[i] == quote_char and line[i-1] != '\\'):
i += 1
if i < n:
i += 1
continue
# 检查逻辑运算符并跳过
if i < n - 1:
two_chars = line[i:i+2]
if two_chars in ('==', '!=', '<=', '>='):
i += 2
continue
# 检查JavaScript严格相等运算符
if i < n - 2:
three_chars = line[i:i+3]
if three_chars in ('===', '!=='):
i += 3
continue
# 检查赋值运算符
for op in assignment_ops:
op_len = len(op)
if i + op_len <= n and line[i:i+op_len] == op:
# 确保这不是其他运算符的一部分
is_valid_assignment = True
# 检查前面字符
if i > 0:
prev_char = line[i-1]
# 赋值运算符前面不能是=, !, <, >等
if prev_char in '=!<>':
is_valid_assignment = False
# 检查后面字符
if i + op_len < n:
next_char = line[i+op_len]
# 赋值运算符后面不能是=(避免误判==为=
if next_char == '=':
is_valid_assignment = False
if is_valid_assignment:
left = line[:i].strip()
right = line[i+op_len:].strip()
# 关键修复:对右侧表达式进一步处理
if right:
# 移除右侧可能的分号
if right.endswith(';'):
right = right[:-1].strip()
# 移除右侧的所有空格
right = re.sub(r'\s+', '', right)
return True, left, right, op
i += 1
return False, None, None, None
def normalize_assignment_side(side):
"""
标准化赋值语句的一侧(左侧或右侧)
移除所有空格,保留核心结构
"""
if not side:
return ""
# 移除所有空白字符
normalized = re.sub(r'\s+', '', side)
return normalized
def is_constant_expression(expression):
"""
判断表达式是否为常数
返回True如果是常数False如果是变量或包含变量的表达式
"""
if not expression:
return False
# 标准化表达式,移除所有空格
expr = re.sub(r'\s+', '', expression)
# 空表达式视为不是常数
if not expr:
return False
# 常见的常数模式
constant_patterns = [
r'^-?\d+(\.\d+)?$', # 数字:-10, 3.14, 0.5
r'^0x[0-9A-Fa-f]+$', # 十六进制数0xFF, 0x1A
r'^0[0-7]+$', # 八进制数077, 0123
r'^0b[01]+$', # 二进制数0b1010
r"^'.*'$", # 字符常量:'a', '\n'
r'^".*"$', # 字符串常量:"hello", "world"
r'^true$', # 布尔常量
r'^false$', # 布尔常量
r'^null$', # null常量
r'^NULL$', # NULL常量
r'^None$', # Python的None
]
# 检查是否为简单常数
for pattern in constant_patterns:
if re.match(pattern, expr, re.IGNORECASE):
return True
# 检查是否为常数表达式(只包含数字和运算符)
# 创建测试表达式,移除非运算符字符
test_expr = expr
# 移除所有括号
test_expr = re.sub(r'[()]', '', test_expr)
# 检查是否只包含数字、小数点和运算符
constant_chars = set('0123456789.+-*/%eE')
is_constant = True
for char in test_expr:
if char not in constant_chars:
is_constant = False
break
return is_constant
def detect_duplicate_lines_gui(file_path, start_line, end_line, output_callback, status_callback, mode="full", compare_file=None, directory_path=None):
"""
在文件中查找重复的代码行GUI版本- 全文检查
优化:排除单字符重复行
支持三种比较范围模式
"""
if mode == "single_file" or mode == "two_files":
if not os.path.exists(file_path):
output_callback(f"错误: 文件不存在: {file_path}\n")
return None
elif mode == "directory" and directory_path:
if not os.path.exists(directory_path):
output_callback(f"错误: 目录不存在: {directory_path}\n")
return None
try:
if mode == "single_file" or mode == "two_files":
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
else:
lines = []
except Exception as e:
output_callback(f"读取文件失败: {e}\n")
return None
# 如果没有指定行数范围,则检查整个文件
if start_line <= 0 and end_line <= 0:
start_line = 1
if mode == "single_file" or mode == "two_files":
end_line = len(lines)
else:
end_line = 0
if mode == "single_file" or mode == "two_files":
if end_line > len(lines):
end_line = len(lines)
if start_line < 1 or end_line < start_line:
output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n")
return None
status_callback("开始检测重复代码行...")
# 数据结构
duplicates = defaultdict(list)
line_details = []
duplicate_groups = []
if mode == "single_file":
# 单文件模式
status_callback(f"正在处理文件: {os.path.basename(file_path)}")
status_callback(f"检查行数范围: {start_line} - {end_line}")
for i in range(1, len(lines) + 1):
if start_line <= i <= end_line:
original_line = lines[i-1].rstrip('\n')
normalized_line = normalize_code_line_legacy(original_line) # 使用新函数
# 只处理有效的代码行(非注释、非空行、非单字符行)
if normalized_line is not None and normalized_line != "":
duplicates[normalized_line].append(i)
line_details.append({
"line_number": i,
"original": original_line,
"normalized": normalized_line
})
status_callback(f"已完成代码行标准化,正在分析重复行...")
# 找出所有重复行
for normalized_line, line_numbers in duplicates.items():
if len(line_numbers) > 1:
# 收集重复行的原始内容
original_lines = []
for line_num in line_numbers:
for detail in line_details:
if detail["line_number"] == line_num:
original_lines.append(detail["original"])
break
duplicate_groups.append({
"normalized_content": normalized_line,
"occurrences": len(line_numbers),
"line_numbers": line_numbers,
"original_lines": original_lines
})
# 计算统计信息
if mode == "single_file":
actual_lines_in_range = end_line - start_line + 1
if start_line > len(lines) or end_line < 1:
actual_lines_in_range = 0
else:
actual_lines_in_range = 0
total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups)
duplicate_rate = 0.0
if actual_lines_in_range > 0:
duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100
result = {
"success": True,
"file_path": file_path,
"file_name": os.path.basename(file_path) if file_path else "",
"total_lines": len(lines) if mode in ["single_file", "two_files"] else 0,
"start_line": start_line,
"end_line": end_line,
"actual_lines_in_range": actual_lines_in_range,
"unique_code_lines": len([v for v in duplicates.values() if v]),
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": total_duplicate_lines,
"duplicate_rate": duplicate_rate,
"detection_mode": "full", # 标记检测模式为全文检测
"compare_mode": mode
}
status_callback("全文检测完成,正在生成结果...")
return result
def detect_adjacent_duplicate_lines_gui(file_path, start_line, end_line, output_callback, status_callback):
"""
检测相邻重复行(只检查与上一行和下一行是否重复)
优化:当一行与上一行和下一行都重复时,三行合并为一组
修复:避免在三行连续重复中重复统计两行重复
优化:排除单字符重复行
"""
if not os.path.exists(file_path):
output_callback(f"错误: 文件不存在: {file_path}\n")
return None
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except Exception as e:
output_callback(f"读取文件失败: {e}\n")
return None
# 如果没有指定行数范围,则检查整个文件
if start_line <= 0 and end_line <= 0:
start_line = 1
end_line = len(lines)
if end_line > len(lines):
end_line = len(lines)
if start_line < 1 or end_line < start_line:
output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n")
return None
status_callback("开始检测相邻重复行...")
# 处理指定范围内的每一行
status_callback(f"正在处理文件: {os.path.basename(file_path)}")
status_callback(f"检查行数范围: {start_line} - {end_line}")
# 预处理所有行
line_details = []
normalized_lines = {} # 行号 -> 标准化内容
original_lines = {} # 行号 -> 原始内容
for i in range(1, len(lines) + 1):
if start_line <= i <= end_line:
original_line = lines[i-1].rstrip('\n')
normalized_line = normalize_code_line_legacy(original_line) # 使用新函数
# 只处理有效的代码行
if normalized_line is not None and normalized_line != "":
normalized_lines[i] = normalized_line
original_lines[i] = original_line
line_details.append({
"line_number": i,
"original": original_line,
"normalized": normalized_line
})
status_callback("正在分析相邻行重复情况...")
# 存储已处理的相邻重复组
processed_lines = set() # 用于跟踪已处理的重复行
adjacent_duplicates = [] # 存储相邻重复组
# 第一步:首先找出所有三行连续重复的情况
triple_groups = [] # 存储三行连续重复组
triple_processed = set() # 已处理的三行组
for i in range(start_line, end_line - 1): # 注意结束条件是 end_line-1
if i > len(lines) - 2:
break
# 检查是否满足三行连续重复的条件
if (i in normalized_lines and
i+1 in normalized_lines and
i+2 in normalized_lines):
line1 = normalized_lines[i]
line2 = normalized_lines[i+1]
line3 = normalized_lines[i+2]
# 三行都相同
if line1 == line2 == line3:
group_key = f"{i},{i+1},{i+2}"
if group_key not in triple_processed:
triple_groups.append({
"start_line": i,
"end_line": i+2,
"normalized_content": line1,
"original_lines": [
original_lines[i],
original_lines[i+1],
original_lines[i+2]
]
})
triple_processed.add(group_key)
# 第二步:找出两行相邻重复的情况,但要排除在三行连续重复中的行
double_groups = [] # 存储两行相邻重复组
double_processed = set() # 已处理的两行组
for i in range(start_line, end_line + 1):
if i > len(lines) or i not in normalized_lines:
continue
current_line = normalized_lines[i]
current_original = original_lines[i]
# 检查当前行是否已经在三行连续重复中
in_triple_group = False
for triple in triple_groups:
if triple["start_line"] <= i <= triple["end_line"]:
in_triple_group = True
break
if in_triple_group:
# 跳过三行连续重复中的行
continue
# 检查与上一行是否重复
if i-1 in normalized_lines and normalized_lines[i-1] == current_line:
# 检查上一行是否也在三行连续重复中
prev_in_triple = False
for triple in triple_groups:
if triple["start_line"] <= i-1 <= triple["end_line"]:
prev_in_triple = True
break
if not prev_in_triple:
group_key = f"{i-1},{i}"
if group_key not in double_processed:
double_groups.append({
"start_line": i-1,
"end_line": i,
"normalized_content": current_line,
"original_lines": [
original_lines[i-1],
current_original
]
})
double_processed.add(group_key)
# 只与下一行重复(且不与上一行重复)
elif i+1 in normalized_lines and normalized_lines[i+1] == current_line:
# 检查下一行是否也在三行连续重复中
next_in_triple = False
for triple in triple_groups:
if triple["start_line"] <= i+1 <= triple["end_line"]:
next_in_triple = True
break
if not next_in_triple:
group_key = f"{i},{i+1}"
if group_key not in double_processed:
double_groups.append({
"start_line": i,
"end_line": i+1,
"normalized_content": current_line,
"original_lines": [
current_original,
original_lines[i+1]
]
})
double_processed.add(group_key)
# 第三步:合并所有组
for triple in triple_groups:
adjacent_duplicates.append({
"normalized_content": triple["normalized_content"],
"occurrences": 3,
"line_numbers": [triple["start_line"], triple["start_line"]+1, triple["end_line"]],
"original_lines": triple["original_lines"],
"duplicate_type": "triple_adjacent" # 三行相邻重复
})
for double in double_groups:
adjacent_duplicates.append({
"normalized_content": double["normalized_content"],
"occurrences": 2,
"line_numbers": [double["start_line"], double["end_line"]],
"original_lines": double["original_lines"],
"duplicate_type": "double_adjacent" # 两行相邻重复
})
# 按起始行号排序
adjacent_duplicates.sort(key=lambda x: x["line_numbers"][0])
# 计算统计信息
actual_lines_in_range = end_line - start_line + 1
if start_line > len(lines) or end_line < 1:
actual_lines_in_range = 0
total_duplicate_lines = sum(len(group["line_numbers"]) for group in adjacent_duplicates)
duplicate_rate = 0.0
if actual_lines_in_range > 0:
duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100
result = {
"success": True,
"file_path": file_path,
"file_name": os.path.basename(file_path),
"total_lines": len(lines),
"start_line": start_line,
"end_line": end_line,
"actual_lines_in_range": actual_lines_in_range,
"unique_code_lines": len(set([detail["normalized"] for detail in line_details])),
"duplicate_groups": adjacent_duplicates,
"duplicate_groups_count": len(adjacent_duplicates),
"total_duplicate_lines": total_duplicate_lines,
"duplicate_rate": duplicate_rate,
"detection_mode": "adjacent" # 标记检测模式为相邻行检测
}
status_callback("相邻行检测完成,正在生成结果...")
return result
def detect_left_side_equality_gui(file_path, start_line, end_line, output_callback, status_callback, mode="single_file", compare_file=None, directory_path=None):
"""
检测等号左侧相等(左侧被赋值变量相同)
支持三种比较范围模式
"""
if mode == "single_file" or mode == "two_files":
if not os.path.exists(file_path):
output_callback(f"错误: 文件不存在: {file_path}\n")
return None
elif mode == "directory" and directory_path:
if not os.path.exists(directory_path):
output_callback(f"错误: 目录不存在: {directory_path}\n")
return None
try:
if mode == "single_file" or mode == "two_files":
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
else:
lines = []
except Exception as e:
output_callback(f"读取文件失败: {e}\n")
return None
# 处理行数范围
if start_line <= 0 and end_line <= 0:
start_line = 1
if mode == "single_file" or mode == "two_files":
end_line = len(lines)
else:
end_line = 0
if mode == "single_file" or mode == "two_files":
if end_line > len(lines):
end_line = len(lines)
if start_line < 1 or end_line < start_line:
output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n")
return None
status_callback("开始检测等号左侧相等...")
# 数据结构
left_side_map = defaultdict(list) # 左侧内容 -> 行号列表
line_details = [] # 行详细信息
duplicate_groups = [] # 重复组
status_callback(f"正在处理文件: {os.path.basename(file_path)}")
status_callback(f"检查行数范围: {start_line} - {end_line}")
# 遍历指定范围的行
for i in range(1, len(lines) + 1):
if start_line <= i <= end_line:
original_line = lines[i-1].rstrip('\n')
# 检查是否为赋值语句
is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line)
if is_assignment and left_side:
normalized_left = normalize_assignment_side(left_side)
if normalized_left: # 跳过空的左侧
left_side_map[normalized_left].append(i)
line_details.append({
"line_number": i,
"original_line": original_line,
"left_side": left_side,
"normalized_left": normalized_left,
"right_side": right_side,
"operator": operator
})
status_callback(f"已分析{len(line_details)}个赋值语句,正在检测左侧相等...")
# 找出左侧相等的组
for normalized_left, line_numbers in left_side_map.items():
if len(line_numbers) > 1:
# 收集详细信息
original_lines = []
right_sides = []
operators = []
for line_num in line_numbers:
for detail in line_details:
if detail["line_number"] == line_num:
original_lines.append(detail["original_line"])
right_sides.append(detail["right_side"])
operators.append(detail["operator"])
break
duplicate_groups.append({
"normalized_content": normalized_left,
"occurrences": len(line_numbers),
"line_numbers": line_numbers,
"original_lines": original_lines,
"right_sides": right_sides,
"operators": operators,
"side": "left" # 标记是左侧检测
})
# 计算统计信息
if mode == "single_file":
actual_lines_in_range = end_line - start_line + 1
if start_line > len(lines) or end_line < 1:
actual_lines_in_range = 0
else:
actual_lines_in_range = 0
total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups)
duplicate_rate = 0.0
if actual_lines_in_range > 0:
duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100
result = {
"success": True,
"file_path": file_path,
"file_name": os.path.basename(file_path) if file_path else "",
"total_lines": len(lines) if mode in ["single_file", "two_files"] else 0,
"start_line": start_line,
"end_line": end_line,
"actual_lines_in_range": actual_lines_in_range,
"assignment_statements": len(line_details),
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": total_duplicate_lines,
"duplicate_rate": duplicate_rate,
"detection_mode": "left_side", # 标记检测模式为左侧相等检测
"compare_mode": mode
}
status_callback("等号左侧重复检测完成")
return result
def detect_right_side_equality_gui(file_path, start_line, end_line, output_callback, status_callback, mode="single_file", compare_file=None, directory_path=None):
"""
检测等号右侧相等(右侧赋值表达式相同)
优化:排除右侧为常数的情况
支持三种比较范围模式
"""
if mode == "single_file" or mode == "two_files":
if not os.path.exists(file_path):
output_callback(f"错误: 文件不存在: {file_path}\n")
return None
elif mode == "directory" and directory_path:
if not os.path.exists(directory_path):
output_callback(f"错误: 目录不存在: {directory_path}\n")
return None
try:
if mode == "single_file" or mode == "two_files":
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
else:
lines = []
except Exception as e:
output_callback(f"读取文件失败: {e}\n")
return None
# 处理行数范围
if start_line <= 0 and end_line <= 0:
start_line = 1
if mode == "single_file" or mode == "two_files":
end_line = len(lines)
else:
end_line = 0
if mode == "single_file" or mode == "two_files":
if end_line > len(lines):
end_line = len(lines)
if start_line < 1 or end_line < start_line:
output_callback(f"错误: 行数范围无效: {start_line}-{end_line}\n")
return None
status_callback("开始检测等号右侧相等(排除常数)...")
# 数据结构
right_side_map = defaultdict(list) # 右侧内容 -> 行号列表
line_details = [] # 行详细信息
duplicate_groups = [] # 重复组
status_callback(f"正在处理文件: {os.path.basename(file_path)}")
status_callback(f"检查行数范围: {start_line} - {end_line}")
# 遍历指定范围的行
for i in range(1, len(lines) + 1):
if start_line <= i <= end_line:
original_line = lines[i-1].rstrip('\n')
# 检查是否为赋值语句
is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line)
if is_assignment and right_side:
# 检查右侧是否为常数
if not is_constant_expression(right_side):
normalized_right = normalize_assignment_side(right_side)
if normalized_right: # 跳过空的右侧
right_side_map[normalized_right].append(i)
line_details.append({
"line_number": i,
"original_line": original_line,
"left_side": left_side,
"right_side": right_side,
"normalized_right": normalized_right,
"operator": operator
})
else:
# 记录常数表达式但不参与检测
line_details.append({
"line_number": i,
"original_line": original_line,
"left_side": left_side,
"right_side": right_side,
"normalized_right": "",
"operator": operator,
"is_constant": True
})
status_callback(f"已分析{len(line_details)}个赋值语句,正在检测右侧相等(排除常数)...")
# 找出右侧相等的组
for normalized_right, line_numbers in right_side_map.items():
if len(line_numbers) > 1:
# 收集详细信息
original_lines = []
left_sides = []
operators = []
for line_num in line_numbers:
for detail in line_details:
if detail["line_number"] == line_num:
original_lines.append(detail["original_line"])
left_sides.append(detail["left_side"])
operators.append(detail["operator"])
break
duplicate_groups.append({
"normalized_content": normalized_right,
"occurrences": len(line_numbers),
"line_numbers": line_numbers,
"original_lines": original_lines,
"left_sides": left_sides,
"operators": operators,
"side": "right" # 标记是右侧检测
})
# 计算统计信息
if mode == "single_file":
actual_lines_in_range = end_line - start_line + 1
if start_line > len(lines) or end_line < 1:
actual_lines_in_range = 0
else:
actual_lines_in_range = 0
# 统计常数表达式数量
constant_expressions = len([d for d in line_details if d.get("is_constant", False)])
variable_expressions = len(line_details) - constant_expressions
total_duplicate_lines = sum(len(group["line_numbers"]) for group in duplicate_groups)
duplicate_rate = 0.0
if actual_lines_in_range > 0:
duplicate_rate = (total_duplicate_lines / actual_lines_in_range) * 100
result = {
"success": True,
"file_path": file_path,
"file_name": os.path.basename(file_path) if file_path else "",
"total_lines": len(lines) if mode in ["single_file", "two_files"] else 0,
"start_line": start_line,
"end_line": end_line,
"actual_lines_in_range": actual_lines_in_range,
"assignment_statements": len(line_details),
"variable_expressions": variable_expressions,
"constant_expressions": constant_expressions,
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": total_duplicate_lines,
"duplicate_rate": duplicate_rate,
"detection_mode": "right_side", # 标记检测模式为右侧相等检测
"compare_mode": mode
}
status_callback(f"等号右侧重复检测完成(已排除{constant_expressions}个常数表达式)")
return result
def detect_duplicate_between_two_files(file1_path, file2_path, detection_mode, output_callback, status_callback):
"""
检测两个文件之间的重复代码
"""
if not os.path.exists(file1_path) or not os.path.exists(file2_path):
output_callback("错误: 选择的文件不存在\n")
return None
status_callback(f"开始检测文件之间的重复: {os.path.basename(file1_path)}{os.path.basename(file2_path)}")
try:
with open(file1_path, 'r', encoding='utf-8', errors='ignore') as f1:
file1_lines = f1.readlines()
with open(file2_path, 'r', encoding='utf-8', errors='ignore') as f2:
file2_lines = f2.readlines()
except Exception as e:
output_callback(f"读取文件失败: {e}\n")
return None
if detection_mode == "full":
return detect_duplicate_between_files_full(file1_path, file2_path, file1_lines, file2_lines, output_callback, status_callback)
elif detection_mode == "left_side":
return detect_assignment_duplicate_between_files(file1_path, file2_path, file1_lines, file2_lines, "left", output_callback, status_callback)
elif detection_mode == "right_side":
return detect_assignment_duplicate_between_files(file1_path, file2_path, file1_lines, file2_lines, "right", output_callback, status_callback)
return None
def detect_duplicate_between_files_full(file1_path, file2_path, file1_lines, file2_lines, output_callback, status_callback):
"""
全文重复检测 - 两个文件之间
"""
# 标准化两个文件的所有行
file1_normalized = {}
file2_normalized = {}
status_callback("正在标准化第一个文件...")
for i, line in enumerate(file1_lines, 1):
normalized_line = normalize_code_line_legacy(line.rstrip('\n'))
if normalized_line: # 只处理有效行
if normalized_line not in file1_normalized:
file1_normalized[normalized_line] = []
file1_normalized[normalized_line].append((i, line.rstrip('\n')))
status_callback("正在标准化第二个文件...")
for i, line in enumerate(file2_lines, 1):
normalized_line = normalize_code_line_legacy(line.rstrip('\n'))
if normalized_line: # 只处理有效行
if normalized_line not in file2_normalized:
file2_normalized[normalized_line] = []
file2_normalized[normalized_line].append((i, line.rstrip('\n')))
status_callback("正在比较两个文件的重复行...")
duplicate_groups = []
# 找出在两个文件中都出现的行
for normalized_line in file1_normalized:
if normalized_line in file2_normalized:
file1_occurrences = file1_normalized[normalized_line]
file2_occurrences = file2_normalized[normalized_line]
duplicate_groups.append({
"normalized_content": normalized_line,
"occurrences": len(file1_occurrences) + len(file2_occurrences),
"file1_line_numbers": [occ[0] for occ in file1_occurrences],
"file2_line_numbers": [occ[0] for occ in file2_occurrences],
"file1_original_lines": [occ[1] for occ in file1_occurrences],
"file2_original_lines": [occ[1] for occ in file2_occurrences],
"duplicate_type": "cross_file"
})
result = {
"success": True,
"file1_path": file1_path,
"file2_path": file2_path,
"file1_name": os.path.basename(file1_path),
"file2_name": os.path.basename(file2_path),
"file1_total_lines": len(file1_lines),
"file2_total_lines": len(file2_lines),
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": sum(len(group["file1_line_numbers"]) + len(group["file2_line_numbers"]) for group in duplicate_groups),
"detection_mode": "full",
"compare_mode": "two_files"
}
status_callback("文件间重复检测完成")
return result
def detect_assignment_duplicate_between_files(file1_path, file2_path, file1_lines, file2_lines, side, output_callback, status_callback):
"""
检测两个文件之间的赋值语句重复
修复:正确实现跨文件比较,而不是文件内多行比较
side: "left""right"
"""
file1_assignments = {}
file2_assignments = {}
file1_details = {}
file2_details = {}
status_callback(f"正在提取{side}侧赋值语句...")
# 处理第一个文件
for i, line in enumerate(file1_lines, 1):
original_line = line.rstrip('\n')
is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line)
if is_assignment:
if side == "left" and left_side:
normalized_side = normalize_assignment_side(left_side)
if normalized_side:
file1_assignments[normalized_side] = file1_assignments.get(normalized_side, 0) + 1
if normalized_side not in file1_details:
file1_details[normalized_side] = []
file1_details[normalized_side].append((i, original_line, right_side, operator))
elif side == "right" and right_side and not is_constant_expression(right_side):
normalized_side = normalize_assignment_side(right_side)
if normalized_side:
file1_assignments[normalized_side] = file1_assignments.get(normalized_side, 0) + 1
if normalized_side not in file1_details:
file1_details[normalized_side] = []
file1_details[normalized_side].append((i, original_line, left_side, operator))
# 处理第二个文件
for i, line in enumerate(file2_lines, 1):
original_line = line.rstrip('\n')
is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line)
if is_assignment:
if side == "left" and left_side:
normalized_side = normalize_assignment_side(left_side)
if normalized_side:
file2_assignments[normalized_side] = file2_assignments.get(normalized_side, 0) + 1
if normalized_side not in file2_details:
file2_details[normalized_side] = []
file2_details[normalized_side].append((i, original_line, right_side, operator))
elif side == "right" and right_side and not is_constant_expression(right_side):
normalized_side = normalize_assignment_side(right_side)
if normalized_side:
file2_assignments[normalized_side] = file2_assignments.get(normalized_side, 0) + 1
if normalized_side not in file2_details:
file2_details[normalized_side] = []
file2_details[normalized_side].append((i, original_line, left_side, operator))
status_callback(f"正在比较两个文件的{side}侧重复...")
duplicate_groups = []
# 找出在两个文件中都出现的右侧表达式
for normalized_side in file1_assignments:
if normalized_side in file2_assignments:
# 这是一个跨文件的重复
file1_occurrences = file1_details[normalized_side]
file2_occurrences = file2_details[normalized_side]
duplicate_groups.append({
"normalized_content": normalized_side,
"occurrences": len(file1_occurrences) + len(file2_occurrences),
"file1_line_numbers": [occ[0] for occ in file1_occurrences],
"file2_line_numbers": [occ[0] for occ in file2_occurrences],
"file1_original_lines": [occ[1] for occ in file1_occurrences],
"file2_original_lines": [occ[1] for occ in file2_occurrences],
"file1_other_sides": [occ[2] for occ in file1_occurrences],
"file2_other_sides": [occ[2] for occ in file2_occurrences],
"duplicate_type": f"cross_file_{side}"
})
mode_name = "left_side" if side == "left" else "right_side"
result = {
"success": True,
"file1_path": file1_path,
"file2_path": file2_path,
"file1_name": os.path.basename(file1_path),
"file2_name": os.path.basename(file2_path),
"file1_total_lines": len(file1_lines),
"file2_total_lines": len(file2_lines),
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": sum(len(group["file1_line_numbers"]) + len(group["file2_line_numbers"]) for group in duplicate_groups),
"detection_mode": mode_name,
"compare_mode": "two_files"
}
status_callback(f"文件间{side}侧重复检测完成,找到 {len(duplicate_groups)} 组跨文件重复")
return result
def detect_duplicate_in_directory(directory_path, detection_mode, output_callback, status_callback):
"""
检测目录下所有文件之间的重复代码
"""
if not os.path.exists(directory_path):
output_callback(f"错误: 目录不存在: {directory_path}\n")
return None
status_callback(f"开始检测目录: {directory_path}")
# 收集目录下所有源代码文件
source_files = []
for root, dirs, files in os.walk(directory_path):
for file in files:
if file.endswith(('.c', '.cpp', '.h', '.hpp', '.java', '.py', '.js', '.ts')):
file_path = os.path.join(root, file)
source_files.append(file_path)
if not source_files:
output_callback("目录中没有找到源代码文件\n")
return None
status_callback(f"找到 {len(source_files)} 个源代码文件")
if detection_mode == "full":
return detect_directory_duplicate_full(directory_path, source_files, output_callback, status_callback)
elif detection_mode == "left_side":
return detect_directory_assignment_duplicate(directory_path, source_files, "left", output_callback, status_callback)
elif detection_mode == "right_side":
return detect_directory_assignment_duplicate(directory_path, source_files, "right", output_callback, status_callback)
return None
def detect_directory_duplicate_full(directory_path, source_files, output_callback, status_callback):
"""
目录下所有文件的全文重复检测
"""
all_normalized_lines = defaultdict(list) # 标准化行 -> [(文件路径, 行号, 原始行)]
status_callback("正在处理所有文件...")
for file_idx, file_path in enumerate(source_files, 1):
status_callback(f"处理文件 {file_idx}/{len(source_files)}: {os.path.basename(file_path)}")
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
for i, line in enumerate(lines, 1):
normalized_line = normalize_code_line_legacy(line.rstrip('\n'))
if normalized_line: # 只处理有效行
all_normalized_lines[normalized_line].append((file_path, i, line.rstrip('\n')))
except Exception as e:
output_callback(f"读取文件失败 {file_path}: {e}\n")
continue
status_callback("正在分析重复行...")
duplicate_groups = []
for normalized_line, occurrences in all_normalized_lines.items():
if len(occurrences) > 1:
# 按文件分组
file_groups = defaultdict(list)
for file_path, line_num, original_line in occurrences:
file_groups[file_path].append((line_num, original_line))
# 只有跨文件的重复才记录
if len(file_groups) > 1:
duplicate_groups.append({
"normalized_content": normalized_line,
"occurrences": len(occurrences),
"file_occurrences": file_groups,
"duplicate_type": "directory_cross_file"
})
result = {
"success": True,
"directory_path": directory_path,
"source_files_count": len(source_files),
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": sum(len(occurrences) for group in duplicate_groups for occurrences in group["file_occurrences"].values()),
"detection_mode": "full",
"compare_mode": "directory"
}
status_callback(f"目录重复检测完成,找到 {len(duplicate_groups)} 组跨文件重复")
return result
def detect_directory_assignment_duplicate(directory_path, source_files, side, output_callback, status_callback):
"""
目录下所有文件的赋值语句重复检测
side: "left""right"
"""
all_assignments = defaultdict(list) # 标准化侧 -> [(文件路径, 行号, 原始行, 另一侧)]
status_callback(f"正在提取所有文件的{side}侧赋值语句...")
for file_idx, file_path in enumerate(source_files, 1):
status_callback(f"处理文件 {file_idx}/{len(source_files)}: {os.path.basename(file_path)}")
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
for i, line in enumerate(lines, 1):
original_line = line.rstrip('\n')
is_assignment, left_side, right_side, operator = extract_assignment_sides(original_line)
if is_assignment:
if side == "left" and left_side:
normalized_side = normalize_assignment_side(left_side)
if normalized_side:
all_assignments[normalized_side].append((file_path, i, original_line, right_side))
elif side == "right" and right_side and not is_constant_expression(right_side):
normalized_side = normalize_assignment_side(right_side)
if normalized_side:
all_assignments[normalized_side].append((file_path, i, original_line, left_side))
except Exception as e:
output_callback(f"读取文件失败 {file_path}: {e}\n")
continue
status_callback(f"正在分析{side}侧重复...")
duplicate_groups = []
for normalized_side, occurrences in all_assignments.items():
if len(occurrences) > 1:
# 按文件分组
file_groups = defaultdict(list)
for file_path, line_num, original_line, other_side in occurrences:
file_groups[file_path].append((line_num, original_line, other_side))
# 只有跨文件的重复才记录
if len(file_groups) > 1:
duplicate_groups.append({
"normalized_content": normalized_side,
"occurrences": len(occurrences),
"file_occurrences": file_groups,
"duplicate_type": f"directory_cross_file_{side}"
})
mode_name = "left_side" if side == "left" else "right_side"
result = {
"success": True,
"directory_path": directory_path,
"source_files_count": len(source_files),
"duplicate_groups": duplicate_groups,
"duplicate_groups_count": len(duplicate_groups),
"total_duplicate_lines": sum(len(occurrences) for group in duplicate_groups for occurrences in group["file_occurrences"].values()),
"detection_mode": mode_name,
"compare_mode": "directory"
}
status_callback(f"目录{side}侧重复检测完成,找到 {len(duplicate_groups)} 组跨文件重复")
return result
def display_results(result, output_callback, status_callback):
"""显示检测结果 - 支持三种比较范围模式"""
if not result or not result.get("success", False):
output_callback("检测失败或无重复代码行\n")
return
compare_mode = result.get("compare_mode", "single_file")
detection_mode = result.get("detection_mode", "full")
# 输出检测模式信息
mode_info = "全文重复检测"
if detection_mode == "adjacent":
mode_info = "相邻行重复检测"
elif detection_mode == "left_side":
mode_info = "等号左侧重复检测"
elif detection_mode == "right_side":
mode_info = "等号右侧重复检测(排除常数)"
# 输出比较范围信息
range_info = "单文件内检测"
if compare_mode == "two_files":
range_info = "双文件间检测"
elif compare_mode == "directory":
range_info = "目录下多文件检测"
# 输出基本信息
output_callback("=" * 70 + "\n")
output_callback(f"重复代码检测结果 ({mode_info} - {range_info})\n")
output_callback("=" * 70 + "\n")
if compare_mode == "single_file":
file_path = result["file_path"]
start_line = result["start_line"]
end_line = result["end_line"]
output_callback(f"文件: {file_path}\n")
output_callback(f"检查行数范围: {start_line} - {end_line}\n")
if detection_mode in ["left_side", "right_side"]:
output_callback(f"范围内赋值语句数: {result.get('assignment_statements', 0)}\n")
if detection_mode == "right_side":
output_callback(f" 其中变量表达式: {result.get('variable_expressions', 0)}\n")
output_callback(f" 其中常数表达式: {result.get('constant_expressions', 0)}(已过滤)\n")
output_callback(f"范围内总行数: {result['actual_lines_in_range']}\n")
elif compare_mode == "two_files":
file1_name = result["file1_name"]
file2_name = result["file2_name"]
file1_lines = result["file1_total_lines"]
file2_lines = result["file2_total_lines"]
output_callback(f"文件1: {result['file1_path']}\n")
output_callback(f" 文件名: {file1_name} (总行数: {file1_lines})\n")
output_callback(f"文件2: {result['file2_path']}\n")
output_callback(f" 文件名: {file2_name} (总行数: {file2_lines})\n")
elif compare_mode == "directory":
directory_path = result["directory_path"]
source_files_count = result["source_files_count"]
output_callback(f"目录: {directory_path}\n")
output_callback(f"源代码文件数: {source_files_count}\n")
output_callback("-" * 70 + "\n")
# 输出重复行组
duplicate_groups = result.get("duplicate_groups", [])
if not duplicate_groups:
if compare_mode == "two_files":
output_callback("在两个文件之间未找到重复的代码行\n")
elif compare_mode == "directory":
output_callback("在目录下的文件之间未找到重复的代码行\n")
else:
if detection_mode == "adjacent":
output_callback("在指定范围内未找到相邻重复的代码行\n")
elif detection_mode == "left_side":
output_callback("在指定范围内未找到左侧相等的赋值语句\n")
elif detection_mode == "right_side":
output_callback("在指定范围内未找到右侧相等的变量表达式\n")
else:
output_callback("在指定范围内未找到重复的代码行\n")
else:
for i, group in enumerate(duplicate_groups, 1):
if compare_mode == "two_files":
output_callback(f"{i}组跨文件重复:\n")
output_callback(f" 重复内容: {group['normalized_content']}\n")
output_callback(f" 总重复次数: {group['occurrences']}\n")
# 文件1的重复行
output_callback(f" {result['file1_name']} 中的行:\n")
for idx, (line_num, original_line) in enumerate(zip(group["file1_line_numbers"], group["file1_original_lines"])):
output_callback(f"{line_num}: {original_line}\n")
if detection_mode in ["left_side", "right_side"] and idx < len(group.get("file1_other_sides", [])):
other_side = group["file1_other_sides"][idx]
if other_side:
side_label = "右侧表达式" if detection_mode == "left_side" else "左侧变量"
output_callback(f" {side_label}: {other_side}\n")
# 文件2的重复行
output_callback(f" {result['file2_name']} 中的行:\n")
for idx, (line_num, original_line) in enumerate(zip(group["file2_line_numbers"], group["file2_original_lines"])):
output_callback(f"{line_num}: {original_line}\n")
if detection_mode in ["left_side", "right_side"] and idx < len(group.get("file2_other_sides", [])):
other_side = group["file2_other_sides"][idx]
if other_side:
side_label = "右侧表达式" if detection_mode == "left_side" else "左侧变量"
output_callback(f" {side_label}: {other_side}\n")
elif compare_mode == "directory":
output_callback(f"{i}组跨文件重复:\n")
output_callback(f" 重复内容: {group['normalized_content']}\n")
output_callback(f" 总重复次数: {group['occurrences']} (跨 {len(group['file_occurrences'])} 个文件)\n")
for file_path, occurrences in group["file_occurrences"].items():
file_name = os.path.basename(file_path)
output_callback(f" {file_name} 中的行:\n")
for occ in occurrences:
if len(occ) >= 2: # 至少有行号和原始行
output_callback(f"{occ[0]}: {occ[1]}\n")
if len(occ) >= 3 and occ[2]: # 有另一侧信息
side_label = "右侧表达式" if detection_mode == "left_side" else "左侧变量"
output_callback(f" {side_label}: {occ[2]}\n")
else: # single_file
side = group.get("side", "")
if detection_mode == "adjacent":
dup_type = group.get("duplicate_type", "")
occurrences = group.get("occurrences", 2)
if dup_type == "triple_adjacent":
desc = f"{i}组相邻重复行号 (三行连续重复): "
elif dup_type == "double_adjacent":
desc = f"{i}组相邻重复行号 (两行相邻重复): "
else:
desc = f"{i}组相邻重复行号: "
elif detection_mode == "left_side":
desc = f"{i}组左侧相同赋值 (行号): "
elif detection_mode == "right_side":
desc = f"{i}组右侧相同变量表达式 (行号): "
else:
desc = f"{i}组重复行号: "
output_callback(f"{desc}{', '.join(map(str, group['line_numbers']))}\n")
if detection_mode == "left_side":
output_callback(f" 相同左侧: {group['normalized_content']}\n")
for idx, (line_num, original_line, right_side, operator) in enumerate(zip(
group['line_numbers'], group['original_lines'],
group.get('right_sides', []), group.get('operators', [])
)):
output_callback(f"{line_num}: {original_line}\n")
if right_side:
output_callback(f" 右侧表达式: {right_side}\n")
elif detection_mode == "right_side":
output_callback(f" 相同右侧变量表达式: {group['normalized_content']}\n")
for idx, (line_num, original_line, left_side, operator) in enumerate(zip(
group['line_numbers'], group['original_lines'],
group.get('left_sides', []), group.get('operators', [])
)):
output_callback(f"{line_num}: {original_line}\n")
if left_side:
output_callback(f" 左侧变量: {left_side}\n")
else:
output_callback(f" 重复代码: {group['normalized_content']}\n")
for line_num, original_line in zip(group['line_numbers'], group['original_lines']):
output_callback(f"{line_num}: {original_line}\n")
output_callback("\n")
# 输出统计信息
output_callback("-" * 70 + "\n")
output_callback(f"重复代码总组数: {result.get('duplicate_groups_count', 0)}\n")
if compare_mode in ["single_file", "two_files", "directory"]:
output_callback(f"总重复行数: {result.get('total_duplicate_lines', 0)}\n")
if compare_mode == "single_file" and detection_mode != "adjacent":
duplicate_rate = result.get("duplicate_rate", 0.0)
output_callback(f"重复率: {duplicate_rate:.2f}%\n")
output_callback("=" * 70 + "\n\n")
status_callback("结果显示完成")
def save_results_to_log(result, log_file_path):
"""保存结果到日志文件 - 支持三种比较范围模式"""
if not result or not result.get("success", False):
return
try:
with open(log_file_path, 'a', encoding='utf-8') as f:
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
detection_mode = result.get("detection_mode", "full")
compare_mode = result.get("compare_mode", "single_file")
mode_desc = "全文重复检测"
if detection_mode == "adjacent":
mode_desc = "相邻行重复检测"
elif detection_mode == "left_side":
mode_desc = "等号左侧重复检测"
elif detection_mode == "right_side":
mode_desc = "等号右侧重复检测(排除常数)"
range_desc = "单文件内检测"
if compare_mode == "two_files":
range_desc = "双文件间检测"
elif compare_mode == "directory":
range_desc = "目录下多文件检测"
f.write(f"\n\n{'='*70}\n")
f.write(f"重复代码检测报告 - {mode_desc} - {range_desc} - {timestamp}\n")
f.write(f"{'='*70}\n\n")
if compare_mode == "single_file":
f.write(f"文件: {result['file_path']}\n")
f.write(f"检查行数范围: {result['start_line']} - {result['end_line']}\n")
f.write(f"范围内总行数: {result['actual_lines_in_range']}\n")
if detection_mode in ["left_side", "right_side"]:
f.write(f"范围内赋值语句数: {result.get('assignment_statements', 0)}\n")
if detection_mode == "right_side":
f.write(f" 其中变量表达式: {result.get('variable_expressions', 0)}\n")
f.write(f" 其中常数表达式: {result.get('constant_expressions', 0)}(已过滤)\n")
elif compare_mode == "two_files":
f.write(f"文件1: {result['file1_path']}\n")
f.write(f" 文件名: {result['file1_name']} (总行数: {result['file1_total_lines']})\n")
f.write(f"文件2: {result['file2_path']}\n")
f.write(f" 文件名: {result['file2_name']} (总行数: {result['file2_total_lines']})\n")
elif compare_mode == "directory":
f.write(f"目录: {result['directory_path']}\n")
f.write(f"源代码文件数: {result.get('source_files_count', 0)}\n")
f.write(f"检测模式: {mode_desc}\n")
f.write(f"比较范围: {range_desc}\n\n")
duplicate_groups = result.get('duplicate_groups', [])
for i, group in enumerate(duplicate_groups, 1):
if compare_mode == "two_files":
f.write(f"{i}组跨文件重复:\n")
f.write(f" 重复内容: {group['normalized_content']}\n")
f.write(f" 总重复次数: {group['occurrences']}\n")
f.write(f" {result['file1_name']} 中的行:\n")
for idx, (line_num, original_line) in enumerate(zip(group["file1_line_numbers"], group["file1_original_lines"])):
f.write(f"{line_num}: {original_line}\n")
if detection_mode in ["left_side", "right_side"] and idx < len(group.get("file1_other_sides", [])):
other_side = group["file1_other_sides"][idx]
if other_side:
side_label = "右侧表达式" if detection_mode == "left_side" else "左侧变量"
f.write(f" {side_label}: {other_side}\n")
f.write(f" {result['file2_name']} 中的行:\n")
for idx, (line_num, original_line) in enumerate(zip(group["file2_line_numbers"], group["file2_original_lines"])):
f.write(f"{line_num}: {original_line}\n")
if detection_mode in ["left_side", "right_side"] and idx < len(group.get("file2_other_sides", [])):
other_side = group["file2_other_sides"][idx]
if other_side:
side_label = "右侧表达式" if detection_mode == "left_side" else "左侧变量"
f.write(f" {side_label}: {other_side}\n")
elif compare_mode == "directory":
f.write(f"{i}组跨文件重复:\n")
f.write(f" 重复内容: {group['normalized_content']}\n")
f.write(f" 总重复次数: {group['occurrences']} (跨 {len(group['file_occurrences'])} 个文件)\n")
for file_path, occurrences in group["file_occurrences"].items():
file_name = os.path.basename(file_path)
f.write(f" {file_name} 中的行:\n")
for occ in occurrences:
if len(occ) >= 2:
f.write(f"{occ[0]}: {occ[1]}\n")
if len(occ) >= 3 and occ[2]:
side_label = "右侧表达式" if detection_mode == "left_side" else "左侧变量"
f.write(f" {side_label}: {occ[2]}\n")
else: # single_file
side = group.get("side", "")
if detection_mode == "adjacent":
dup_type = group.get("duplicate_type", "")
if dup_type == "triple_adjacent":
desc = f"{i}组相邻重复行号 (三行连续重复): "
elif dup_type == "double_adjacent":
desc = f"{i}组相邻重复行号 (两行相邻重复): "
else:
desc = f"{i}组相邻重复行号: "
elif detection_mode == "left_side":
desc = f"{i}组左侧相同赋值 (行号): "
elif detection_mode == "right_side":
desc = f"{i}组右侧相同变量表达式 (行号): "
else:
desc = f"{i}组重复行号: "
f.write(f"{desc}{', '.join(map(str, group['line_numbers']))}\n")
if detection_mode == "left_side":
f.write(f" 相同左侧: {group['normalized_content']}\n")
for idx, (line_num, original_line, right_side, operator) in enumerate(zip(
group['line_numbers'], group['original_lines'],
group.get('right_sides', []), group.get('operators', [])
)):
f.write(f"{line_num}: {original_line}\n")
if right_side:
f.write(f" 右侧表达式: {right_side}\n")
elif detection_mode == "right_side":
f.write(f" 相同右侧变量表达式: {group['normalized_content']}\n")
for idx, (line_num, original_line, left_side, operator) in enumerate(zip(
group['line_numbers'], group['original_lines'],
group.get('left_sides', []), group.get('operators', [])
)):
f.write(f"{line_num}: {original_line}\n")
if left_side:
f.write(f" 左侧变量: {left_side}\n")
else:
f.write(f" 重复代码: {group['normalized_content']}\n")
for line_num, original_line in zip(group['line_numbers'], group['original_lines']):
f.write(f"{line_num}: {original_line}\n")
f.write("\n")
f.write(f"重复代码总组数: {result.get('duplicate_groups_count', 0)}\n")
if compare_mode in ["single_file", "two_files", "directory"]:
f.write(f"总重复行数: {result.get('total_duplicate_lines', 0)}\n")
if compare_mode == "single_file" and detection_mode != "adjacent":
duplicate_rate = result.get("duplicate_rate", 0.0)
f.write(f"重复率: {duplicate_rate:.2f}%\n")
f.write(f"{'='*70}\n")
return True
except Exception as e:
return False
def save_results_to_excel(result, excel_file_path):
"""保存结果到Excel文件 - 支持三种比较范围模式(优化版)"""
if not result or not result.get("success", False):
return False
try:
# 如果文件不存在,创建工作簿
if not os.path.exists(excel_file_path):
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "重复代码检测"
# 设置基本样式
ws.column_dimensions['A'].width = 20
ws.column_dimensions['B'].width = 15
ws.column_dimensions['C'].width = 30
ws.column_dimensions['D'].width = 15
ws.column_dimensions['E'].width = 15
ws.column_dimensions['F'].width = 20
else:
wb = openpyxl.load_workbook(excel_file_path)
ws = wb.create_sheet(title=f"检测_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
# 获取检测信息
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
detection_mode = result.get("detection_mode", "full")
compare_mode = result.get("compare_mode", "single_file")
mode_desc = "全文重复检测"
if detection_mode == "adjacent":
mode_desc = "相邻行重复检测"
elif detection_mode == "left_side":
mode_desc = "等号左侧重复检测"
elif detection_mode == "right_side":
mode_desc = "等号右侧重复检测(排除常数)"
range_desc = "单文件内检测"
if compare_mode == "two_files":
range_desc = "双文件间检测"
elif compare_mode == "directory":
range_desc = "目录下多文件检测"
# 定义样式
header_font = Font(name='微软雅黑', size=12, bold=True, color='FFFFFF')
header_fill = PatternFill(start_color='3498db', end_color='3498db', fill_type='solid')
title_font = Font(name='微软雅黑', size=14, bold=True, color='e74c3c')
data_font = Font(name='微软雅黑', size=10)
center_alignment = Alignment(horizontal='center', vertical='center')
# 当前行索引
current_row = 1
# 写入标题
ws.merge_cells(f'A{current_row}:F{current_row}')
ws[f'A{current_row}'] = f"重复代码检测报告 - {mode_desc} - {range_desc} - {timestamp}"
ws[f'A{current_row}'].font = title_font
ws[f'A{current_row}'].alignment = center_alignment
current_row += 2
# 写入基本信息
if compare_mode == "single_file":
ws.append(["检测范围", "单文件内检测", "", "", "", ""])
ws.append(["文件路径", result.get('file_path', ''), "", "", "", ""])
ws.append(["检查行数范围", f"{result.get('start_line', 0)}-{result.get('end_line', 0)}", "", "", "", ""])
ws.append(["范围内总行数", result.get('actual_lines_in_range', 0), "", "", "", ""])
if detection_mode in ["left_side", "right_side"]:
ws.append(["赋值语句数", result.get('assignment_statements', 0), "", "", "", ""])
if detection_mode == "right_side":
ws.append(["变量表达式", result.get('variable_expressions', 0), "", "", "", ""])
ws.append(["常数表达式", f"{result.get('constant_expressions', 0)}(已过滤)", "", "", "", ""])
elif compare_mode == "two_files":
ws.append(["检测范围", "双文件间检测", "", "", "", ""])
ws.append(["文件1", result.get('file1_name', ''), f"总行数: {result.get('file1_total_lines', 0)}", "", "", ""])
ws.append(["文件2", result.get('file2_name', ''), f"总行数: {result.get('file2_total_lines', 0)}", "", "", ""])
ws.append(["文件1路径", result.get('file1_path', ''), "", "", "", ""])
ws.append(["文件2路径", result.get('file2_path', ''), "", "", "", ""])
elif compare_mode == "directory":
ws.append(["检测范围", "目录下多文件检测", "", "", "", ""])
ws.append(["目录路径", result.get('directory_path', ''), "", "", "", ""])
ws.append(["源代码文件数", result.get('source_files_count', 0), "", "", "", ""])
current_row = ws.max_row + 2
# 写入重复组标题
duplicate_groups = result.get('duplicate_groups', [])
if duplicate_groups:
ws.append(["重复组统计", "", "", "", "", ""])
current_row += 1
# 根据不同比较模式设置表头
if compare_mode == "two_files":
headers = ["组号", "重复内容", "总重复次数", f"{result.get('file1_name', '文件1')}行号",
f"{result.get('file2_name', '文件2')}行号", "备注"]
elif compare_mode == "directory":
# 为目录模式创建详细的表头
if detection_mode in ["left_side", "right_side"]:
headers = ["组号", "重复内容", "总重复次数", "涉及文件数", "文件详情", "备注"]
else:
headers = ["组号", "重复内容", "总重复次数", "涉及文件数", "文件详情", "备注"]
else: # single_file
headers = ["组号", "重复内容", "行号", "左侧/右侧", "表达式/变量", "备注"]
ws.append(headers)
# 设置表头样式
for col in range(1, len(headers) + 1):
cell = ws.cell(row=current_row, column=col)
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_alignment
column_letter = get_column_letter(col)
ws.column_dimensions[column_letter].width = 20
current_row += 1
# 写入重复组数据
for i, group in enumerate(duplicate_groups, 1):
if compare_mode == "two_files":
# 双文件模式
file1_lines = ', '.join(map(str, group.get("file1_line_numbers", [])))
file2_lines = ', '.join(map(str, group.get("file2_line_numbers", [])))
ws.append([
i,
group.get("normalized_content", ""),
group.get("occurrences", 0),
file1_lines,
file2_lines,
f"{detection_mode}重复"
])
elif compare_mode == "directory":
# 目录模式 - 详细显示文件信息
file_occurrences = group.get("file_occurrences", {})
file_count = len(file_occurrences)
# 构建文件详情字符串
file_details_list = []
for file_path, occurrences in file_occurrences.items():
file_name = os.path.basename(file_path)
for occ in occurrences:
if len(occ) >= 2: # 至少包含行号和原始行
line_num = occ[0]
original_line = occ[1]
if detection_mode in ["left_side", "right_side"] and len(occ) >= 3:
other_side = occ[2]
if detection_mode == "left_side":
file_details_list.append(f"{file_name}{line_num}: {original_line} (右侧:{other_side})")
else: # right_side
file_details_list.append(f"{file_name}{line_num}: {original_line} (左侧:{other_side})")
else:
file_details_list.append(f"{file_name}{line_num}: {original_line}")
# 将文件详情合并为字符串
file_details = "\n".join(file_details_list)
# 如果文件详情太长,可以截断
if len(file_details) > 32767: # Excel单元格最大字符限制
file_details = file_details[:32000] + "\n...(内容过长,已截断)"
ws.append([
i,
group.get("normalized_content", ""),
group.get("occurrences", 0),
file_count,
file_details,
f"{detection_mode}重复"
])
else: # single_file
# 单文件模式
line_numbers = ', '.join(map(str, group.get("line_numbers", [])))
if detection_mode == "left_side":
ws.append([
i,
group.get("normalized_content", ""),
line_numbers,
"左侧相同",
"",
f"等号左侧重复,共{len(group.get('line_numbers', []))}"
])
elif detection_mode == "right_side":
ws.append([
i,
group.get("normalized_content", ""),
line_numbers,
"右侧相同",
"",
f"等号右侧重复,共{len(group.get('line_numbers', []))}"
])
else:
ws.append([
i,
group.get("normalized_content", ""),
line_numbers,
"",
"",
f"全文重复,共{len(group.get('line_numbers', []))}"
])
current_row = ws.max_row + 2
# 写入统计信息
ws.append(["统计摘要", "", "", "", "", ""])
ws.append(["重复代码总组数", result.get('duplicate_groups_count', 0), "", "", "", ""])
ws.append(["总重复行数", result.get('total_duplicate_lines', 0), "", "", "", ""])
if compare_mode == "single_file" and detection_mode != "adjacent":
duplicate_rate = result.get("duplicate_rate", 0.0)
ws.append(["重复率", f"{duplicate_rate:.2f}%", "", "", "", ""])
else:
# 没有重复的情况
ws.append(["检测结果", "未找到重复代码", "", "", "", ""])
# 保存工作簿
wb.save(excel_file_path)
return True
except Exception as e:
print(f"保存Excel失败: {e}")
return False
def save_results_to_excel_enhanced(result, excel_file_path):
"""增强版Excel保存功能 - 为目录模式提供更详细的展示"""
if not result or not result.get("success", False):
return False
try:
# 如果文件不存在,创建工作簿
if not os.path.exists(excel_file_path):
wb = openpyxl.Workbook()
ws_summary = wb.active
ws_summary.title = "检测摘要"
else:
wb = openpyxl.load_workbook(excel_file_path)
ws_summary = wb.create_sheet(title=f"检测摘要_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
# 获取检测信息
timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
detection_mode = result.get("detection_mode", "full")
compare_mode = result.get("compare_mode", "single_file")
mode_desc = "全文重复检测"
if detection_mode == "adjacent":
mode_desc = "相邻行重复检测"
elif detection_mode == "left_side":
mode_desc = "等号左侧重复检测"
elif detection_mode == "right_side":
mode_desc = "等号右侧重复检测(排除常数)"
range_desc = "单文件内检测"
if compare_mode == "two_files":
range_desc = "双文件间检测"
elif compare_mode == "directory":
range_desc = "目录下多文件检测"
# 定义样式
header_font = Font(name='微软雅黑', size=12, bold=True, color='FFFFFF')
header_fill = PatternFill(start_color='3498db', end_color='3498db', fill_type='solid')
title_font = Font(name='微软雅黑', size=14, bold=True, color='e74c3c')
data_font = Font(name='微软雅黑', size=10)
center_alignment = Alignment(horizontal='center', vertical='center')
# 写入摘要工作表
current_row = 1
# 写入标题
ws_summary.merge_cells(f'A{current_row}:F{current_row}')
ws_summary[f'A{current_row}'] = f"重复代码检测报告 - {mode_desc} - {range_desc} - {timestamp}"
ws_summary[f'A{current_row}'].font = title_font
ws_summary[f'A{current_row}'].alignment = center_alignment
current_row += 2
# 写入基本信息
ws_summary.append(["基本信息", "", "", "", "", ""])
if compare_mode == "single_file":
ws_summary.append(["检测范围", "单文件内检测", "", "", "", ""])
ws_summary.append(["文件路径", result.get('file_path', ''), "", "", "", ""])
ws_summary.append(["检查行数范围", f"{result.get('start_line', 0)}-{result.get('end_line', 0)}", "", "", "", ""])
ws_summary.append(["范围内总行数", result.get('actual_lines_in_range', 0), "", "", "", ""])
if detection_mode in ["left_side", "right_side"]:
ws_summary.append(["赋值语句数", result.get('assignment_statements', 0), "", "", "", ""])
if detection_mode == "right_side":
ws_summary.append(["变量表达式", result.get('variable_expressions', 0), "", "", "", ""])
ws_summary.append(["常数表达式", f"{result.get('constant_expressions', 0)}(已过滤)", "", "", "", ""])
elif compare_mode == "two_files":
ws_summary.append(["检测范围", "双文件间检测", "", "", "", ""])
ws_summary.append(["文件1", result.get('file1_name', ''), f"总行数: {result.get('file1_total_lines', 0)}", "", "", ""])
ws_summary.append(["文件2", result.get('file2_name', ''), f"总行数: {result.get('file2_total_lines', 0)}", "", "", ""])
ws_summary.append(["文件1路径", result.get('file1_path', ''), "", "", "", ""])
ws_summary.append(["文件2路径", result.get('file2_path', ''), "", "", "", ""])
elif compare_mode == "directory":
ws_summary.append(["检测范围", "目录下多文件检测", "", "", "", ""])
ws_summary.append(["目录路径", result.get('directory_path', ''), "", "", "", ""])
ws_summary.append(["源代码文件数", result.get('source_files_count', 0), "", "", "", ""])
current_row = ws_summary.max_row + 2
# 写入统计信息
duplicate_groups = result.get('duplicate_groups', [])
if duplicate_groups:
ws_summary.append(["统计信息", "", "", "", "", ""])
ws_summary.append(["重复代码总组数", result.get('duplicate_groups_count', 0), "", "", "", ""])
ws_summary.append(["总重复行数", result.get('total_duplicate_lines', 0), "", "", "", ""])
if compare_mode == "single_file" and detection_mode != "adjacent":
duplicate_rate = result.get("duplicate_rate", 0.0)
ws_summary.append(["重复率", f"{duplicate_rate:.2f}%", "", "", "", ""])
else:
ws_summary.append(["检测结果", "未找到重复代码", "", "", "", ""])
# 为目录模式创建详细的重复详情工作表
if compare_mode == "directory" and duplicate_groups:
# 创建详细工作表
ws_details = wb.create_sheet(title="重复详情")
# 设置列宽
ws_details.column_dimensions['A'].width = 10
ws_details.column_dimensions['B'].width = 20
ws_details.column_dimensions['C'].width = 40
ws_details.column_dimensions['D'].width = 15
ws_details.column_dimensions['E'].width = 10
ws_details.column_dimensions['F'].width = 60
ws_details.column_dimensions['G'].width = 30
ws_details.column_dimensions['H'].width = 20
# 写入表头
headers = ["重复组号", "重复类型", "重复内容", "文件名称", "行号", "原始代码行", "另一侧表达式", "检测模式"]
ws_details.append(headers)
# 设置表头样式
for col in range(1, len(headers) + 1):
cell = ws_details.cell(row=1, column=col)
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_alignment
# 写入详细数据
row_num = 2
for i, group in enumerate(duplicate_groups, 1):
file_occurrences = group.get("file_occurrences", {})
for file_path, occurrences in file_occurrences.items():
file_name = os.path.basename(file_path)
for occ in occurrences:
if len(occ) >= 2:
line_num = occ[0]
original_line = occ[1]
# 处理另一侧表达式
other_side = ""
if len(occ) >= 3:
other_side = occ[2]
# 写入行数据
ws_details.append([
i,
detection_mode,
group.get("normalized_content", ""),
file_name,
line_num,
original_line,
other_side,
mode_desc
])
# 为行号设置数字格式
ws_details.cell(row=row_num, column=5).number_format = "0"
row_num += 1
# 在摘要工作表中添加链接到详细工作表
ws_summary.append(["", "", "", "", "", ""])
ws_summary.append(["详细结果", f"已找到 {len(duplicate_groups)} 组重复,共 {result.get('total_duplicate_lines', 0)} 行重复", "", "", "", ""])
ws_summary.append(["查看详情", "请查看'重复详情'工作表获取详细信息", "", "", "", ""])
# 保存工作簿
wb.save(excel_file_path)
return True
except Exception as e:
print(f"保存Excel失败: {e}")
return False
def save_results_to_excel_with_filename(result, excel_filename):
"""保存结果到独立的Excel文件 - 根据检测模式创建不同结构"""
if not result or not result.get("success", False):
return False
try:
# 创建新的工作簿
wb = openpyxl.Workbook()
# 获取检测信息
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
detection_mode = result.get("detection_mode", "full")
compare_mode = result.get("compare_mode", "single_file")
# 获取重复组数
duplicate_groups_count = result.get('duplicate_groups_count', 0)
duplicate_groups = result.get('duplicate_groups', [])
mode_desc = "全文重复检测"
if detection_mode == "adjacent":
mode_desc = "相邻行重复检测"
elif detection_mode == "left_side":
mode_desc = "等号左侧重复检测"
elif detection_mode == "right_side":
mode_desc = "等号右侧重复检测(排除常数)"
if compare_mode == "single_file":
# 单文件模式 - 创建一个工作表
ws = wb.active
# 使用文件名为工作表命名
file_name = result.get('file_name', '未知文件')
if len(file_name) > 30: # Excel工作表名称限制31字符
file_name = file_name[:28] + ".."
ws.title = f"检测结果"
# 设置列宽
ws.column_dimensions['A'].width = 20
ws.column_dimensions['B'].width = 15
ws.column_dimensions['C'].width = 30
ws.column_dimensions['D'].width = 15
ws.column_dimensions['E'].width = 15
ws.column_dimensions['F'].width = 20
# 写入标题
current_row = 1
ws.merge_cells(f'A{current_row}:F{current_row}')
ws[f'A{current_row}'] = f"重复代码检测报告 - {mode_desc} - 单文件内检测"
ws[f'A{current_row}'].font = Font(name='微软雅黑', size=14, bold=True, color='e74c3c')
ws[f'A{current_row}'].alignment = Alignment(horizontal='center', vertical='center')
current_row += 2
# 写入基本信息
ws.append(["检测时间", datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "", "", "", ""])
ws.append(["文件路径", result.get('file_path', ''), "", "", "", ""])
ws.append(["检查行数范围", f"{result.get('start_line', 0)}-{result.get('end_line', 0)}", "", "", "", ""])
ws.append(["范围内总行数", result.get('actual_lines_in_range', 0), "", "", "", ""])
if detection_mode in ["left_side", "right_side"]:
ws.append(["赋值语句数", result.get('assignment_statements', 0), "", "", "", ""])
if detection_mode == "right_side":
ws.append(["变量表达式", result.get('variable_expressions', 0), "", "", "", ""])
ws.append(["常数表达式", f"{result.get('constant_expressions', 0)}(已过滤)", "", "", "", ""])
current_row = ws.max_row + 2
# 写入重复组信息
if duplicate_groups:
# 设置表头
header_font = Font(name='微软雅黑', size=12, bold=True, color='FFFFFF')
header_fill = PatternFill(start_color='3498db', end_color='3498db', fill_type='solid')
if detection_mode == "left_side":
headers = ["组号", "左侧表达式", "行号", "右侧表达式", "运算符", "备注"]
elif detection_mode == "right_side":
headers = ["组号", "右侧表达式", "行号", "左侧表达式", "运算符", "备注"]
else:
headers = ["组号", "重复内容", "行号", "原始代码行", "重复次数", "备注"]
ws.append(headers)
# 设置表头样式
for col in range(1, len(headers) + 1):
cell = ws.cell(row=current_row, column=col)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal='center', vertical='center')
current_row += 1
# 写入数据
for i, group in enumerate(duplicate_groups, 1):
line_numbers = ', '.join(map(str, group.get("line_numbers", [])))
if detection_mode == "left_side":
right_sides = group.get("right_sides", [""] * len(group.get("line_numbers", [])))
operators = group.get("operators", [""] * len(group.get("line_numbers", [])))
original_lines = group.get("original_lines", [""] * len(group.get("line_numbers", [])))
ws.append([
i,
group.get("normalized_content", ""),
line_numbers,
', '.join(right_sides),
', '.join(operators),
f"左侧重复,共{len(group.get('line_numbers', []))}"
])
elif detection_mode == "right_side":
left_sides = group.get("left_sides", [""] * len(group.get("line_numbers", [])))
operators = group.get("operators", [""] * len(group.get("line_numbers", [])))
original_lines = group.get("original_lines", [""] * len(group.get("line_numbers", [])))
ws.append([
i,
group.get("normalized_content", ""),
line_numbers,
', '.join(left_sides),
', '.join(operators),
f"右侧重复,共{len(group.get('line_numbers', []))}"
])
else:
original_lines = group.get("original_lines", [])
ws.append([
i,
group.get("normalized_content", ""),
line_numbers,
'; '.join(original_lines),
group.get("occurrences", 0),
f"全文重复,共{len(group.get('line_numbers', []))}"
])
current_row = ws.max_row + 2
# 写入统计信息
ws.append(["统计摘要", "", "", "", "", ""])
ws.append(["重复代码总组数", duplicate_groups_count, "", "", "", ""])
ws.append(["总重复行数", result.get('total_duplicate_lines', 0), "", "", "", ""])
if detection_mode != "adjacent":
duplicate_rate = result.get("duplicate_rate", 0.0)
ws.append(["重复率", f"{duplicate_rate:.2f}%", "", "", "", ""])
else:
ws.append(["检测结果", "未找到重复代码", "", "", "", ""])
elif compare_mode == "two_files":
# 双文件模式 - 创建一个工作表
ws = wb.active
# 使用两个文件名组合为工作表命名
file1_name = result.get('file1_name', '文件1')
file2_name = result.get('file2_name', '文件2')
ws_name = f"{file1_name[:5]}_{file2_name[:5]}"
if len(ws_name) > 30:
ws_name = ws_name[:28] + ".."
ws.title = "检测结果"
# 设置列宽
ws.column_dimensions['A'].width = 20
ws.column_dimensions['B'].width = 15
ws.column_dimensions['C'].width = 30
ws.column_dimensions['D'].width = 15
ws.column_dimensions['E'].width = 15
ws.column_dimensions['F'].width = 20
# 写入标题
current_row = 1
ws.merge_cells(f'A{current_row}:F{current_row}')
ws[f'A{current_row}'] = f"重复代码检测报告 - {mode_desc} - 双文件间检测"
ws[f'A{current_row}'].font = Font(name='微软雅黑', size=14, bold=True, color='e74c3c')
ws[f'A{current_row}'].alignment = Alignment(horizontal='center', vertical='center')
current_row += 2
# 写入基本信息
ws.append(["检测时间", datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "", "", "", ""])
ws.append(["文件1", f"{result.get('file1_name', '')} (总行数: {result.get('file1_total_lines', 0)})", "", "", "", ""])
ws.append(["文件1路径", result.get('file1_path', ''), "", "", "", ""])
ws.append(["文件2", f"{result.get('file2_name', '')} (总行数: {result.get('file2_total_lines', 0)})", "", "", "", ""])
ws.append(["文件2路径", result.get('file2_path', ''), "", "", "", ""])
current_row = ws.max_row + 2
# 写入重复组信息
if duplicate_groups:
# 设置表头
header_font = Font(name='微软雅黑', size=12, bold=True, color='FFFFFF')
header_fill = PatternFill(start_color='3498db', end_color='3498db', fill_type='solid')
headers = ["组号", "重复内容", "总重复次数", f"{file1_name}行号", f"{file2_name}行号", "备注"]
ws.append(headers)
# 设置表头样式
for col in range(1, len(headers) + 1):
cell = ws.cell(row=current_row, column=col)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal='center', vertical='center')
current_row += 1
# 写入数据
for i, group in enumerate(duplicate_groups, 1):
file1_lines = ', '.join(map(str, group.get("file1_line_numbers", [])))
file2_lines = ', '.join(map(str, group.get("file2_line_numbers", [])))
ws.append([
i,
group.get("normalized_content", ""),
group.get("occurrences", 0),
file1_lines,
file2_lines,
f"{detection_mode}重复"
])
current_row = ws.max_row + 2
# 写入统计信息
ws.append(["统计摘要", "", "", "", "", ""])
ws.append(["重复代码总组数", duplicate_groups_count, "", "", "", ""])
ws.append(["总重复行数", result.get('total_duplicate_lines', 0), "", "", "", ""])
else:
ws.append(["检测结果", "未找到跨文件重复代码", "", "", "", ""])
elif compare_mode == "directory":
# 目录模式 - 创建两个工作表
# 第一个工作表:检测摘要
ws_summary = wb.active
ws_summary.title = "检测摘要"
# 设置列宽
ws_summary.column_dimensions['A'].width = 20
ws_summary.column_dimensions['B'].width = 15
ws_summary.column_dimensions['C'].width = 30
ws_summary.column_dimensions['D'].width = 15
ws_summary.column_dimensions['E'].width = 15
ws_summary.column_dimensions['F'].width = 20
# 写入摘要标题
current_row = 1
ws_summary.merge_cells(f'A{current_row}:F{current_row}')
ws_summary[f'A{current_row}'] = f"重复代码检测报告 - {mode_desc} - 目录下多文件检测"
ws_summary[f'A{current_row}'].font = Font(name='微软雅黑', size=14, bold=True, color='e74c3c')
ws_summary[f'A{current_row}'].alignment = Alignment(horizontal='center', vertical='center')
current_row += 2
# 写入基本信息
ws_summary.append(["检测时间", datetime.now().strftime('%Y-%m-%d %H:%M:%S'), "", "", "", ""])
ws_summary.append(["目录路径", result.get('directory_path', ''), "", "", "", ""])
ws_summary.append(["源代码文件数", result.get('source_files_count', 0), "", "", "", ""])
current_row = ws_summary.max_row + 2
if duplicate_groups:
# 写入统计信息
ws_summary.append(["统计信息", "", "", "", "", ""])
ws_summary.append(["重复代码总组数", duplicate_groups_count, "", "", "", ""])
ws_summary.append(["总重复行数", result.get('total_duplicate_lines', 0), "", "", "", ""])
ws_summary.append(["", "", "", "", "", ""])
ws_summary.append(["详细结果", f"已找到 {duplicate_groups_count} 组重复,共 {result.get('total_duplicate_lines', 0)} 行重复", "", "", "", ""])
ws_summary.append(["查看详情", "请查看'重复详情'工作表获取详细信息", "", "", "", ""])
# 第二个工作表:重复详情
ws_details = wb.create_sheet(title="重复详情")
# 设置列宽
ws_details.column_dimensions['A'].width = 10
ws_details.column_dimensions['B'].width = 20
ws_details.column_dimensions['C'].width = 40
ws_details.column_dimensions['D'].width = 20
ws_details.column_dimensions['E'].width = 10
ws_details.column_dimensions['F'].width = 60
ws_details.column_dimensions['G'].width = 30
ws_details.column_dimensions['H'].width = 20
# 写入表头
headers = ["重复组号", "重复类型", "重复内容", "文件名称", "行号", "原始代码行", "另一侧表达式", "检测模式"]
ws_details.append(headers)
# 设置表头样式
header_font = Font(name='微软雅黑', size=12, bold=True, color='FFFFFF')
header_fill = PatternFill(start_color='3498db', end_color='3498db', fill_type='solid')
for col in range(1, len(headers) + 1):
cell = ws_details.cell(row=1, column=col)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal='center', vertical='center')
# 写入详细数据
row_num = 2
for i, group in enumerate(duplicate_groups, 1):
file_occurrences = group.get("file_occurrences", {})
for file_path, occurrences in file_occurrences.items():
file_name = os.path.basename(file_path)
for occ in occurrences:
if len(occ) >= 2:
line_num = occ[0]
original_line = occ[1]
# 处理另一侧表达式
other_side = ""
if len(occ) >= 3:
other_side = occ[2]
# 写入行数据
ws_details.append([
i,
detection_mode,
group.get("normalized_content", ""),
file_name,
line_num,
original_line,
other_side,
mode_desc
])
# 为行号设置数字格式
ws_details.cell(row=row_num, column=5).number_format = "0"
row_num += 1
else:
ws_summary.append(["检测结果", "在目录下的文件之间未找到重复的代码行", "", "", "", ""])
# 保存工作簿
wb.save(excel_filename)
return True
except Exception as e:
print(f"保存Excel失败: {e}")
return False
def generate_excel_filename(result):
"""生成唯一的Excel文件名"""
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
detection_mode = result.get("detection_mode", "full")
compare_mode = result.get("compare_mode", "single_file")
mode_code = {
"full": "全文",
"adjacent": "相邻",
"left_side": "左侧",
"right_side": "右侧"
}.get(detection_mode, "检测")
if compare_mode == "single_file":
file_path = result.get("file_path", "unknown")
file_name = os.path.basename(file_path)
if len(file_name) > 20:
file_name = file_name[:20]
filename = f"重复检测_{mode_code}_{file_name}_{timestamp}.xlsx"
elif compare_mode == "two_files":
file1_name = result.get("file1_name", "文件1")
file2_name = result.get("file2_name", "文件2")
if len(file1_name) > 10:
file1_name = file1_name[:10]
if len(file2_name) > 10:
file2_name = file2_name[:10]
filename = f"重复检测_{mode_code}_{file1_name}_{file2_name}_{timestamp}.xlsx"
elif compare_mode == "directory":
dir_path = result.get("directory_path", "unknown")
dir_name = os.path.basename(dir_path)
if not dir_name:
dir_name = os.path.basename(os.path.dirname(dir_path))
if len(dir_name) > 20:
dir_name = dir_name[:20]
filename = f"重复检测_{mode_code}_目录_{dir_name}_{timestamp}.xlsx"
else:
filename = f"重复检测_{mode_code}_{timestamp}.xlsx"
return filename
class DuplicateCodeDetectorGUI:
def __init__(self, root):
self.root = root
self.root.title("源代码重复行统计软件V1.00")
self.root.geometry("1200x800")
# 设置最小窗口尺寸
self.root.minsize(1000, 650)
# 设置窗口背景为浅蓝色
self.root.configure(bg='#e8f4f8')
# 设置样式
self.setup_styles()
# 创建主框架
self.main_frame = ttk.Frame(root, padding="10")
self.main_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 配置网格权重
root.columnconfigure(0, weight=1)
root.rowconfigure(0, weight=1)
# 配置主框架的行列权重
self.main_frame.columnconfigure(0, weight=1)
self.main_frame.rowconfigure(0, weight=0) # 标题
self.main_frame.rowconfigure(1, weight=0) # 比较范围选择
self.main_frame.rowconfigure(2, weight=0) # 比较参数设置
self.main_frame.rowconfigure(3, weight=0) # 检测功能选择
self.main_frame.rowconfigure(4, weight=0) # 按钮区域
self.main_frame.rowconfigure(5, weight=0) # 状态
self.main_frame.rowconfigure(6, weight=1) # 结果面板
self.main_frame.rowconfigure(7, weight=0) # 底部信息
# 创建标题
self.create_title()
# 创建比较范围选择区域
self.create_compare_range_panel()
# 创建比较参数设置区域
self.create_compare_params_panel()
# 创建检测功能选择区域
self.create_detection_panel()
# 创建底部结果面板
self.create_result_panel()
# 创建底部信息
self.create_footer()
# 日志文件路径
self.log_file = "duplicate_code_log.txt"
# 添加最新Excel文件路径属性
self.latest_excel_file = None # 存储最新生成的Excel文件路径
# 消息队列
self.message_queue = Queue()
# 窗口大小变化相关
self.last_width = 1200
self.last_height = 800
self.root.bind('<Configure>', self.on_window_resize)
# 初始化引用
self.process_text_frame = None
self.result_text_frame = None
self.paned_window = None
# 当前检测模式
self.current_detection_mode = "full" # 默认全文检测
self.current_compare_mode = "single_file" # 默认单文件比较
# 启动消息队列处理
self.process_queue()
# 初始化UI状态
self.update_ui_for_compare_mode()
# 确保日志文件存在
self.initialize_log_file()
def setup_styles(self):
"""设置样式 - 浅蓝色扁平风格"""
style = ttk.Style()
style.theme_use('clam')
# 配置颜色方案
bg_color = '#e8f4f8' # 浅蓝色背景
fg_color = '#2c3e50' # 深蓝色文字
btn_bg = '#3498db' # 蓝色按钮背景
btn_fg = '#ffffff' # 白色按钮文字
hover_bg = '#2980b9' # 悬停时的深蓝色
active_bg = '#1c5c8a' # 按下时的更深蓝色
frame_bg = '#d4eaf7' # 框架背景色
label_frame_bg = '#d4eaf7' # 标签框架背景色
# 基本窗口样式
style.configure('.', background=bg_color, foreground=fg_color, font=('微软雅黑', 10))
# 标签框架样式
style.configure('TLabelframe', background=frame_bg, relief='flat', borderwidth=2)
style.configure('TLabelframe.Label', background=label_frame_bg, foreground=fg_color, font=('微软雅黑', 10, 'bold'))
# 主按钮样式
style.configure('TButton',
background=btn_bg,
foreground=btn_fg,
font=('微软雅黑', 10, 'bold'),
borderwidth=1,
relief='flat',
padding=6)
# 按钮悬停效果
style.map('TButton',
background=[('active', hover_bg), ('pressed', active_bg)],
relief=[('pressed', 'sunken')])
# 特殊按钮样式
style.configure('Highlight.TButton',
background='#e74c3c', # 红色
foreground=btn_fg,
font=('微软雅黑', 10, 'bold'),
borderwidth=1,
relief='flat',
padding=6)
style.configure('Adjacent.TButton',
background='#2ecc71', # 绿色
foreground=btn_fg,
font=('微软雅黑', 10, 'bold'),
borderwidth=1,
relief='flat',
padding=6)
style.configure('LeftSide.TButton',
background='#9b59b6', # 紫色
foreground=btn_fg,
font=('微软雅黑', 10, 'bold'),
borderwidth=1,
relief='flat',
padding=6)
style.configure('RightSide.TButton',
background='#e67e22', # 橙色
foreground=btn_fg,
font=('微软雅黑', 10, 'bold'),
borderwidth=1,
relief='flat',
padding=6)
# 标签样式
style.configure('Title.TLabel',
font=('微软雅黑', 24, 'bold'),
foreground='#e74c3c', # 红色
background=bg_color)
style.configure('Subtitle.TLabel',
font=('微软雅黑', 12, 'bold'),
foreground=fg_color,
background=bg_color)
style.configure('Status.TLabel',
font=('微软雅黑', 10),
foreground='#3498db', # 蓝色
background=bg_color)
style.configure('Footer.TLabel',
font=('微软雅黑', 10),
foreground='#7f8c8d', # 中灰色
background=bg_color)
# 文本框样式
style.configure('TEntry',
fieldbackground='#ffffff',
foreground='#2c3e50',
font=('微软雅黑', 10),
borderwidth=1,
relief='flat')
# 滚动条样式
style.configure('Vertical.TScrollbar',
background='#bdc3c7',
troughcolor=bg_color,
borderwidth=0,
relief='flat')
style.configure('Horizontal.TScrollbar',
background='#bdc3c7',
troughcolor=bg_color,
borderwidth=0,
relief='flat')
# Radiobutton样式
style.configure('CompareMode.TRadiobutton',
background=bg_color,
foreground=fg_color,
font=('微软雅黑', 10))
style.configure('DetectionMode.TRadiobutton',
background=bg_color,
foreground=fg_color,
font=('微软雅黑', 10))
def create_title(self):
"""创建标题区域 - 红色字体"""
# 创建一个单独的框架来包含标题
title_frame = ttk.Frame(self.main_frame, style='TFrame')
title_frame.grid(row=0, column=0, columnspan=3, sticky=(tk.W, tk.E), pady=(0, 20))
title_frame.columnconfigure(0, weight=1)
# 使用普通Label而不是ttk.Label以便设置字体颜色
# 标题改为红色字体
title_label = tk.Label(
title_frame,
text="源代码重复行统计软件V1.00",
font=('微软雅黑', 24, 'bold'),
fg='#e74c3c', # 红色字体
bg='#e8f4f8' # 浅蓝色背景
)
title_label.grid(row=0, column=0, sticky=tk.N)
def create_compare_range_panel(self):
"""创建比较范围选择面板 - 三列布局"""
range_frame = ttk.LabelFrame(self.main_frame, text="比较范围选择", padding="10")
range_frame.grid(row=1, column=0, sticky=(tk.W, tk.E), pady=(0, 10))
range_frame.columnconfigure(0, weight=1)
# 创建三列布局框架
columns_frame = ttk.Frame(range_frame)
columns_frame.grid(row=0, column=0, sticky=(tk.W, tk.E))
# 配置三列权重
columns_frame.columnconfigure(0, weight=1, uniform="range_cols")
columns_frame.columnconfigure(1, weight=1, uniform="range_cols")
columns_frame.columnconfigure(2, weight=1, uniform="range_cols")
# 第一列:单文件内比较
col1_frame = ttk.Frame(columns_frame)
col1_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), padx=5)
col1_frame.columnconfigure(0, weight=1)
# 单文件内比较单选按钮
self.compare_mode_var = tk.StringVar(value="single_file")
self.single_file_radio = ttk.Radiobutton(
col1_frame,
text="方式一:单文件内比较",
variable=self.compare_mode_var,
value="single_file",
command=self.update_ui_for_compare_mode,
style='CompareMode.TRadiobutton'
)
self.single_file_radio.grid(row=0, column=0, sticky=tk.W, pady=(0, 5))
# 文件选择
ttk.Label(col1_frame, text="源代码文件:").grid(row=1, column=0, sticky=tk.W, pady=(5, 0))
self.file_path_var = tk.StringVar()
self.file_entry = ttk.Entry(col1_frame, textvariable=self.file_path_var, width=30)
self.file_entry.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=(0, 5))
self.browse_button = ttk.Button(
col1_frame,
text="浏览...",
command=lambda: self.browse_file("single"),
style='Highlight.TButton',
width=10
)
self.browse_button.grid(row=2, column=1, sticky=tk.W, padx=(5, 0), pady=(0, 5))
# 第二列:双文件比较
col2_frame = ttk.Frame(columns_frame)
col2_frame.grid(row=0, column=1, sticky=(tk.W, tk.E, tk.N, tk.S), padx=5)
col2_frame.columnconfigure(0, weight=1)
# 双文件比较单选按钮
self.two_files_radio = ttk.Radiobutton(
col2_frame,
text="方式二:双文件间比较",
variable=self.compare_mode_var,
value="two_files",
command=self.update_ui_for_compare_mode,
style='CompareMode.TRadiobutton'
)
self.two_files_radio.grid(row=0, column=0, sticky=tk.W, pady=(0, 5))
# 文件1选择
ttk.Label(col2_frame, text="第一个文件:").grid(row=1, column=0, sticky=tk.W, pady=(5, 0))
self.file1_path_var = tk.StringVar()
self.file1_entry = ttk.Entry(col2_frame, textvariable=self.file1_path_var, width=30, state='disabled')
self.file1_entry.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=(0, 5))
self.browse_file1_button = ttk.Button(
col2_frame,
text="浏览...",
command=lambda: self.browse_file("file1"),
style='TButton',
width=10,
state='disabled'
)
self.browse_file1_button.grid(row=2, column=1, sticky=tk.W, padx=(5, 0), pady=(0, 5))
# 文件2选择
ttk.Label(col2_frame, text="第二个文件:").grid(row=3, column=0, sticky=tk.W, pady=(5, 0))
self.file2_path_var = tk.StringVar()
self.file2_entry = ttk.Entry(col2_frame, textvariable=self.file2_path_var, width=30, state='disabled')
self.file2_entry.grid(row=4, column=0, sticky=(tk.W, tk.E), pady=(0, 5))
self.browse_file2_button = ttk.Button(
col2_frame,
text="浏览...",
command=lambda: self.browse_file("file2"),
style='TButton',
width=10,
state='disabled'
)
self.browse_file2_button.grid(row=4, column=1, sticky=tk.W, padx=(5, 0), pady=(0, 5))
# 第三列:目录比较
col3_frame = ttk.Frame(columns_frame)
col3_frame.grid(row=0, column=2, sticky=(tk.W, tk.E, tk.N, tk.S), padx=5)
col3_frame.columnconfigure(0, weight=1)
# 目录比较单选按钮
self.directory_radio = ttk.Radiobutton(
col3_frame,
text="方式三:目录下多文件间比较",
variable=self.compare_mode_var,
value="directory",
command=self.update_ui_for_compare_mode,
style='CompareMode.TRadiobutton'
)
self.directory_radio.grid(row=0, column=0, sticky=tk.W, pady=(0, 5))
# 目录选择
ttk.Label(col3_frame, text="源代码目录:").grid(row=1, column=0, sticky=tk.W, pady=(5, 0))
self.directory_path_var = tk.StringVar()
self.directory_entry = ttk.Entry(col3_frame, textvariable=self.directory_path_var, width=30, state='disabled')
self.directory_entry.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=(0, 5))
self.browse_directory_button = ttk.Button(
col3_frame,
text="浏览目录...",
command=lambda: self.browse_directory(),
style='TButton',
width=10,
state='disabled'
)
self.browse_directory_button.grid(row=2, column=1, sticky=tk.W, padx=(5, 0), pady=(0, 5))
def create_compare_params_panel(self):
"""创建比较参数设置区域 - 包含行数范围"""
params_frame = ttk.LabelFrame(self.main_frame, text="比较参数设置", padding="10")
params_frame.grid(row=2, column=0, sticky=(tk.W, tk.E), pady=(0, 10))
params_frame.columnconfigure(0, weight=1)
# 行数范围设置
range_frame = ttk.Frame(params_frame)
range_frame.grid(row=0, column=0, sticky=tk.W, pady=(0, 10))
ttk.Label(range_frame, text="行数范围 (可选,不选表示全文范围内查找):").grid(row=0, column=0, sticky=tk.W, pady=(0, 5))
range_input_frame = ttk.Frame(range_frame)
range_input_frame.grid(row=1, column=0, sticky=tk.W)
ttk.Label(range_input_frame, text="起始行号:").grid(row=0, column=0, sticky=tk.W)
self.start_entry = ttk.Entry(range_input_frame, width=10)
self.start_entry.grid(row=0, column=1, sticky=tk.W, padx=(5, 10))
ttk.Label(range_input_frame, text="结束行号:").grid(row=0, column=2, sticky=tk.W)
self.end_entry = ttk.Entry(range_input_frame, width=10)
self.end_entry.grid(row=0, column=3, sticky=tk.W, padx=5)
# 添加提醒信息
reminder_label = tk.Label(
params_frame,
text="注意1.重复行检测会忽略注释上的不同以及语句中空格等非可执行内容,单字符重复行(如{};也将被忽略。2.右侧相等检测会排除常数表达式。3.方式二和方式三不支持相邻行检测。",
font=('微软雅黑', 9),
fg='#e74c3c', # 红色
bg='#d4eaf7', # 浅蓝色背景
wraplength=900
)
reminder_label.grid(row=1, column=0, sticky=tk.W)
def create_detection_panel(self):
"""创建检测功能选择区域 - 使用7个按钮在一行显示"""
detection_frame = ttk.LabelFrame(self.main_frame, text="检测模式", padding="10")
detection_frame.grid(row=3, column=0, sticky=(tk.W, tk.E), pady=(0, 10))
detection_frame.columnconfigure(0, weight=1)
# 创建单行容器,所有按钮放在一行
button_row_frame = ttk.Frame(detection_frame)
button_row_frame.grid(row=0, column=0, sticky=tk.W, pady=(0, 10))
# 全文重复检测按钮
self.full_detect_button = ttk.Button(
button_row_frame,
text="全文重复检测",
command=lambda: self.start_detection("full"),
style='TButton',
width=15
)
self.full_detect_button.grid(row=0, column=0, sticky=tk.W, padx=(0, 10))
# 相邻重复检测按钮
self.adjacent_detect_button = ttk.Button(
button_row_frame,
text="相邻重复检测",
command=lambda: self.start_detection("adjacent"),
style='Adjacent.TButton',
width=15
)
self.adjacent_detect_button.grid(row=0, column=1, sticky=tk.W, padx=(0, 10))
# 等号左侧重复检测按钮
self.left_side_button = ttk.Button(
button_row_frame,
text="等号左侧重复检测",
command=lambda: self.start_detection("left_side"),
style='LeftSide.TButton',
width=20
)
self.left_side_button.grid(row=0, column=2, sticky=tk.W, padx=(0, 10))
# 等号右侧重复检测按钮
self.right_side_button = ttk.Button(
button_row_frame,
text="等号右侧重复检测",
command=lambda: self.start_detection("right_side"),
style='RightSide.TButton',
width=20
)
self.right_side_button.grid(row=0, column=3, sticky=tk.W, padx=(0, 10))
# 清空结果按钮
self.clear_button = ttk.Button(
button_row_frame,
text="清空结果",
command=self.clear_results,
style='TButton',
width=15
)
self.clear_button.grid(row=0, column=4, sticky=tk.W, padx=(0, 10))
# 查看日志按钮
self.view_log_button = ttk.Button(
button_row_frame,
text="查看日志",
command=self.view_log,
style='TButton',
width=15
)
self.view_log_button.grid(row=0, column=5, sticky=tk.W, padx=(0, 10))
# 查看Excel按钮
self.view_excel_button = ttk.Button(
button_row_frame,
text="查看Excel",
command=self.view_excel_enhanced, # 使用增强版
style='TButton',
width=15
)
self.view_excel_button.grid(row=0, column=6, sticky=tk.W)
# 为查看Excel按钮添加上下文菜单
self.create_excel_context_menu()
# 为所有按钮添加提示
self.create_tooltip(self.full_detect_button, "在源代码的指定行范围内查找所有行中的重复行(排除单字符重复)")
self.create_tooltip(self.adjacent_detect_button, "在源代码的指定行范围内仅查找相邻行中的重复行(排除单字符重复)")
self.create_tooltip(self.left_side_button, "在源代码的指定行范围内查找不同行之间存在的等号左侧表达式相等的行")
self.create_tooltip(self.right_side_button, "在源代码的指定行范围内查找不同行之间存在的等号右侧表达式相等的行(排除常数)")
self.create_tooltip(self.clear_button, "清空所有检测结果和处理过程信息")
self.create_tooltip(self.view_log_button, "查看历史检测日志文件")
self.create_tooltip(self.view_excel_button, "左键查看最近Excel文件\n右键浏览并选择Excel文件")
# 状态标签
self.status_var = tk.StringVar(value="就绪")
status_frame = ttk.Frame(detection_frame)
status_frame.grid(row=1, column=0, sticky=tk.W, pady=(10, 0))
self.status_label = ttk.Label(
status_frame,
textvariable=self.status_var,
style='Status.TLabel'
)
self.status_label.grid(row=0, column=0, sticky=tk.W)
def create_excel_context_menu(self):
"""为查看Excel按钮创建上下文菜单"""
# 创建右键菜单
self.excel_context_menu = tk.Menu(self.root, tearoff=0)
self.excel_context_menu.add_command(label="查看最近生成的Excel",
command=self.view_excel)
self.excel_context_menu.add_command(label="浏览并选择Excel文件",
command=self.browse_and_view_excel)
self.excel_context_menu.add_separator()
# 为按钮绑定右键事件
self.view_excel_button.bind("<Button-3>", self.show_excel_context_menu)
def show_excel_context_menu(self, event):
"""显示Excel上下文菜单"""
self.excel_context_menu.tk_popup(event.x_root, event.y_root)
def create_tooltip(self, widget, text):
"""为控件创建提示"""
tooltip = None
def show_tooltip(event):
nonlocal tooltip
x, y, _, _ = widget.bbox("insert")
x += widget.winfo_rootx() + 25
y += widget.winfo_rooty() + 20
tooltip = tk.Toplevel(widget)
tooltip.wm_overrideredirect(True)
tooltip.wm_geometry(f"+{x}+{y}")
label = tk.Label(tooltip, text=text, background="#ffffe0", relief="solid", borderwidth=1,
font=("微软雅黑", 9))
label.pack()
def hide_tooltip(event):
nonlocal tooltip
if tooltip:
tooltip.destroy()
tooltip = None
widget.bind("<Enter>", show_tooltip)
widget.bind("<Leave>", hide_tooltip)
def create_result_panel(self):
"""创建底部结果面板 - 包含处理过程和检测结果"""
result_frame = ttk.LabelFrame(self.main_frame, text="检测结果", padding="5")
result_frame.grid(row=6, column=0, sticky=(tk.W, tk.E, tk.N, tk.S), pady=(0, 10))
result_frame.columnconfigure(0, weight=1)
result_frame.rowconfigure(0, weight=1)
# 创建一个可分割的面板
self.paned_window = tk.PanedWindow(result_frame, orient=tk.HORIZONTAL, sashrelief=tk.RAISED, sashwidth=8)
self.paned_window.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
# 处理过程框架
self.process_text_frame = ttk.Frame(self.paned_window)
ttk.Label(self.process_text_frame, text="处理过程:", font=('微软雅黑', 10, 'bold')).pack(anchor=tk.W, pady=(0, 5))
process_container = ttk.Frame(self.process_text_frame)
process_container.pack(fill=tk.BOTH, expand=True)
self.process_text = scrolledtext.ScrolledText(
process_container,
height=8,
width=50,
wrap=tk.WORD,
font=('微软雅黑', 9)
)
self.process_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# 结果框架
self.result_text_frame = ttk.Frame(self.paned_window)
ttk.Label(self.result_text_frame, text="检测结果:", font=('微软雅黑', 10, 'bold')).pack(anchor=tk.W, pady=(0, 5))
result_container = ttk.Frame(self.result_text_frame)
result_container.pack(fill=tk.BOTH, expand=True)
self.result_text = scrolledtext.ScrolledText(
result_container,
height=8,
width=50,
wrap=tk.WORD,
font=('微软雅黑', 9)
)
self.result_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
# 添加框架到分割窗口
self.paned_window.add(self.process_text_frame)
self.paned_window.add(self.result_text_frame)
# 设置初始分割比例
self.paned_window.paneconfigure(self.process_text_frame, minsize=200)
self.paned_window.paneconfigure(self.result_text_frame, minsize=200)
def create_footer(self):
"""创建底部信息"""
footer_frame = ttk.Frame(self.main_frame)
footer_frame.grid(row=7, column=0, sticky=(tk.W, tk.E))
footer_frame.columnconfigure(0, weight=1)
# 版本信息
version_label = tk.Label(
footer_frame,
text="源代码重复行统计软件 V1.00 © 2024",
font=('微软雅黑', 9),
fg='#7f8c8d',
bg='#e8f4f8'
)
version_label.grid(row=0, column=0, sticky=tk.W)
def initialize_log_file(self):
"""初始化日志文件"""
try:
with open(self.log_file, 'a', encoding='utf-8') as f:
f.write(f"\n\n{'='*70}\n")
f.write(f"重复代码检测日志 - 开始时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
f.write(f"{'='*70}\n")
except Exception as e:
# 如果创建日志文件失败,可以忽略
pass
def update_ui_for_compare_mode(self):
"""根据选择的比较模式更新UI状态"""
compare_mode = self.compare_mode_var.get()
self.current_compare_mode = compare_mode
# 更新单文件相关控件
is_single_file = (compare_mode == "single_file")
self.file_entry.config(state='normal' if is_single_file else 'disabled')
self.browse_button.config(state='normal' if is_single_file else 'disabled')
# 更新双文件相关控件
is_two_files = (compare_mode == "two_files")
self.file1_entry.config(state='normal' if is_two_files else 'disabled')
self.file2_entry.config(state='normal' if is_two_files else 'disabled')
self.browse_file1_button.config(state='normal' if is_two_files else 'disabled')
self.browse_file2_button.config(state='normal' if is_two_files else 'disabled')
# 更新目录相关控件
is_directory = (compare_mode == "directory")
self.directory_entry.config(state='normal' if is_directory else 'disabled')
self.browse_directory_button.config(state='normal' if is_directory else 'disabled')
# 更新相邻行检测按钮状态
is_adjacent_available = (compare_mode == "single_file")
self.adjacent_detect_button.config(state='normal' if is_adjacent_available else 'disabled')
if not is_adjacent_available:
tooltip_text = "相邻行检测仅在单文件模式下可用"
else:
tooltip_text = "在源代码的指定行范围内仅查找相邻行中的重复行(排除单字符重复)"
# 更新工具提示
for child in self.adjacent_detect_button.winfo_children():
if isinstance(child, tk.Toplevel):
child.destroy()
self.create_tooltip(self.adjacent_detect_button, tooltip_text)
def browse_file(self, file_type):
"""浏览并选择文件"""
file_path = filedialog.askopenfilename(
title="选择源代码文件",
filetypes=[
("源代码文件", "*.c;*.cpp;*.h;*.hpp;*.java;*.py;*.js;*.ts"),
("C/C++文件", "*.c;*.cpp;*.h;*.hpp"),
("Java文件", "*.java"),
("Python文件", "*.py"),
("JavaScript文件", "*.js;*.ts"),
("所有文件", "*.*")
]
)
if file_path:
if file_type == "single":
self.file_path_var.set(file_path)
elif file_type == "file1":
self.file1_path_var.set(file_path)
elif file_type == "file2":
self.file2_path_var.set(file_path)
def browse_directory(self):
"""浏览并选择目录"""
directory_path = filedialog.askdirectory(title="选择源代码目录")
if directory_path:
self.directory_path_var.set(directory_path)
def clear_results(self):
"""清空所有检测结果"""
self.process_text.delete(1.0, tk.END)
self.result_text.delete(1.0, tk.END)
self.status_var.set("就绪")
self.process_text.insert(tk.END, "处理过程已清空。\n")
self.result_text.insert(tk.END, "检测结果已清空。\n")
def view_log(self):
"""查看日志文件"""
if os.path.exists(self.log_file):
try:
os.startfile(self.log_file)
except Exception as e:
# 如果无法直接打开,显示文件路径
self.result_text.insert(tk.END, f"日志文件路径: {os.path.abspath(self.log_file)}\n")
with open(self.log_file, 'r', encoding='utf-8') as f:
content = f.read()
self.result_text.insert(tk.END, f"日志内容:\n{content}\n")
else:
self.result_text.insert(tk.END, "日志文件不存在,请先进行检测操作。\n")
def view_excel(self):
"""查看最近生成的Excel文件"""
if self.latest_excel_file and os.path.exists(self.latest_excel_file):
try:
os.startfile(self.latest_excel_file)
self.result_text.insert(tk.END, f"正在打开Excel文件: {os.path.basename(self.latest_excel_file)}\n")
except Exception as e:
self.result_text.insert(tk.END, f"打开Excel文件失败: {e}\n")
else:
self.result_text.insert(tk.END, "没有找到最近生成的Excel文件请先进行检测操作。\n")
def browse_and_view_excel(self):
"""浏览并选择Excel文件查看"""
excel_file = filedialog.askopenfilename(
title="选择Excel文件",
filetypes=[("Excel文件", "*.xlsx"), ("所有文件", "*.*")]
)
if excel_file and os.path.exists(excel_file):
try:
os.startfile(excel_file)
self.result_text.insert(tk.END, f"正在打开Excel文件: {os.path.basename(excel_file)}\n")
except Exception as e:
self.result_text.insert(tk.END, f"打开Excel文件失败: {e}\n")
elif excel_file:
self.result_text.insert(tk.END, f"选择的文件不存在: {excel_file}\n")
def view_excel_enhanced(self, event=None):
"""增强版查看Excel - 左键点击查看最近,右键点击弹出菜单"""
if event is None or (hasattr(event, 'num') and event.num == 1):
# 左键点击查看最近Excel
self.view_excel()
def get_line_range(self):
"""获取用户输入的行数范围"""
start_line = 0
end_line = 0
try:
start_text = self.start_entry.get().strip()
end_text = self.end_entry.get().strip()
if start_text:
start_line = int(start_text)
if end_text:
end_line = int(end_text)
except ValueError:
# 如果输入不是有效数字,使用默认值
pass
return start_line, end_line
def start_detection(self, detection_mode):
"""开始检测"""
self.current_detection_mode = detection_mode
compare_mode = self.compare_mode_var.get()
# 检查必要的输入
if compare_mode == "single_file":
file_path = self.file_path_var.get().strip()
if not file_path:
self.result_text.insert(tk.END, "错误: 请先选择源代码文件\n")
return
# 启动检测线程
start_line, end_line = self.get_line_range()
thread = threading.Thread(
target=self.detect_single_file,
args=(file_path, start_line, end_line, detection_mode)
)
elif compare_mode == "two_files":
file1_path = self.file1_path_var.get().strip()
file2_path = self.file2_path_var.get().strip()
if not file1_path or not file2_path:
self.result_text.insert(tk.END, "错误: 请先选择两个源代码文件\n")
return
# 启动双文件检测线程
thread = threading.Thread(
target=self.detect_two_files,
args=(file1_path, file2_path, detection_mode)
)
elif compare_mode == "directory":
directory_path = self.directory_path_var.get().strip()
if not directory_path:
self.result_text.insert(tk.END, "错误: 请先选择源代码目录\n")
return
# 启动目录检测线程
thread = threading.Thread(
target=self.detect_directory,
args=(directory_path, detection_mode)
)
else:
self.result_text.insert(tk.END, f"错误: 不支持的比较模式: {compare_mode}\n")
return
# 更新状态
self.status_var.set("正在检测中...")
self.process_text.delete(1.0, tk.END)
self.result_text.delete(1.0, tk.END)
self.process_text.insert(tk.END, f"开始检测: 模式={detection_mode}, 比较方式={compare_mode}\n")
# 启动线程
thread.daemon = True
thread.start()
def detect_single_file(self, file_path, start_line, end_line, detection_mode):
"""单文件检测"""
try:
# 定义回调函数
def output_callback(text):
self.message_queue.put(("output", text))
def status_callback(text):
self.message_queue.put(("status", text))
# 根据检测模式调用不同的检测函数
if detection_mode == "full":
result = detect_duplicate_lines_gui(
file_path, start_line, end_line,
output_callback, status_callback,
mode="single_file"
)
elif detection_mode == "adjacent":
result = detect_adjacent_duplicate_lines_gui(
file_path, start_line, end_line,
output_callback, status_callback
)
elif detection_mode == "left_side":
result = detect_left_side_equality_gui(
file_path, start_line, end_line,
output_callback, status_callback,
mode="single_file"
)
elif detection_mode == "right_side":
result = detect_right_side_equality_gui(
file_path, start_line, end_line,
output_callback, status_callback,
mode="single_file"
)
else:
self.message_queue.put(("output", f"错误: 不支持的检测模式: {detection_mode}\n"))
return
# 处理结果
if result:
# 显示结果
self.message_queue.put(("display_result", result))
# 保存结果到日志
if save_results_to_log(result, self.log_file):
self.message_queue.put(("status", "结果已保存到日志文件"))
# 生成并保存独立的Excel文件
excel_filename = generate_excel_filename(result)
if save_results_to_excel_with_filename(result, excel_filename):
self.latest_excel_file = excel_filename
self.message_queue.put(("status", f"结果已保存到Excel文件: {excel_filename}"))
else:
self.message_queue.put(("output", "未找到重复代码\n"))
except Exception as e:
self.message_queue.put(("output", f"检测过程中发生错误: {e}\n"))
finally:
self.message_queue.put(("status", "检测完成"))
def detect_two_files(self, file1_path, file2_path, detection_mode):
"""双文件检测"""
try:
# 定义回调函数
def output_callback(text):
self.message_queue.put(("output", text))
def status_callback(text):
self.message_queue.put(("status", text))
# 根据检测模式调用不同的检测函数
if detection_mode in ["full", "left_side", "right_side"]:
result = detect_duplicate_between_two_files(
file1_path, file2_path, detection_mode,
output_callback, status_callback
)
else:
self.message_queue.put(("output", f"错误: 双文件模式不支持 {detection_mode} 检测\n"))
return
# 处理结果
if result:
# 显示结果
self.message_queue.put(("display_result", result))
# 保存结果到日志
if save_results_to_log(result, self.log_file):
self.message_queue.put(("status", "结果已保存到日志文件"))
# 生成并保存独立的Excel文件
excel_filename = generate_excel_filename(result)
if save_results_to_excel_with_filename(result, excel_filename):
self.latest_excel_file = excel_filename
self.message_queue.put(("status", f"结果已保存到Excel文件: {excel_filename}"))
else:
self.message_queue.put(("output", "在两个文件之间未找到重复代码\n"))
except Exception as e:
self.message_queue.put(("output", f"检测过程中发生错误: {e}\n"))
finally:
self.message_queue.put(("status", "检测完成"))
def detect_directory(self, directory_path, detection_mode):
"""目录检测"""
try:
# 定义回调函数
def output_callback(text):
self.message_queue.put(("output", text))
def status_callback(text):
self.message_queue.put(("status", text))
# 根据检测模式调用不同的检测函数
if detection_mode in ["full", "left_side", "right_side"]:
result = detect_duplicate_in_directory(
directory_path, detection_mode,
output_callback, status_callback
)
else:
self.message_queue.put(("output", f"错误: 目录模式不支持 {detection_mode} 检测\n"))
return
# 处理结果
if result:
# 显示结果
self.message_queue.put(("display_result", result))
# 保存结果到日志
if save_results_to_log(result, self.log_file):
self.message_queue.put(("status", "结果已保存到日志文件"))
# 生成并保存独立的Excel文件
excel_filename = generate_excel_filename(result)
if save_results_to_excel_with_filename(result, excel_filename):
self.latest_excel_file = excel_filename
self.message_queue.put(("status", f"结果已保存到Excel文件: {excel_filename}"))
else:
self.message_queue.put(("output", "在目录下的文件之间未找到重复代码\n"))
except Exception as e:
self.message_queue.put(("output", f"检测过程中发生错误: {e}\n"))
finally:
self.message_queue.put(("status", "检测完成"))
def process_queue(self):
"""处理消息队列"""
try:
while True:
try:
msg_type, data = self.message_queue.get_nowait()
if msg_type == "output":
self.result_text.insert(tk.END, data)
self.result_text.see(tk.END)
elif msg_type == "status":
self.status_var.set(data)
self.process_text.insert(tk.END, f"{data}\n")
self.process_text.see(tk.END)
elif msg_type == "display_result":
def display():
display_results(data,
lambda text: self.result_text.insert(tk.END, text),
lambda text: None)
self.root.after(0, display)
except:
break
finally:
self.root.after(100, self.process_queue)
def on_window_resize(self, event):
"""窗口大小变化处理"""
if self.root.winfo_width() != self.last_width or self.root.winfo_height() != self.last_height:
self.last_width = self.root.winfo_width()
self.last_height = self.root.winfo_height()
# 可以在这里添加窗口大小变化时的额外处理逻辑
pass
def main():
"""主函数"""
root = tk.Tk()
app = DuplicateCodeDetectorGUI(root)
root.mainloop()
if __name__ == "__main__":
main()