5444 lines
234 KiB
Python
5444 lines
234 KiB
Python
# # #第四版
|
||
|
||
# # """
|
||
# # Description : Utils for CGA (CoverageParser & TBInjector)
|
||
# # - Features: Sticky Mode, Smart Noise Filtering (No assign/decls)
|
||
# # - Enhanced: Three-layer protection for black-box constraints
|
||
# # * Layer 1: Enhanced Prompt constraints (prevention)
|
||
# # * Layer 2: Smart code transformation (conversion)
|
||
# # * Layer 3: Quality assessment & retry (fallback)
|
||
# # - Integrated: Diversity Constraint Injector (Layer 1)
|
||
# # Author : CorrectBench Integration
|
||
# # """
|
||
# # import re
|
||
# # import os
|
||
# # import logging
|
||
# # from typing import Tuple, List, Optional, Dict, TYPE_CHECKING
|
||
|
||
# # # [新增] 导入多样性约束注入器
|
||
# # if TYPE_CHECKING:
|
||
# # from autoline.diversity_injector import DiversityInjector
|
||
|
||
# # # 配置日志
|
||
# # logger = logging.getLogger(__name__)
|
||
|
||
# # # ============================================================================
|
||
# # # 三层防护策略说明
|
||
# # # ============================================================================
|
||
# # # Layer 1 - Prompt约束: 动态提取允许信号列表,明确约束,正反面示例
|
||
# # # Layer 2 - 智能转换: 检测违规意图,尝试转换为合法形式
|
||
# # # Layer 3 - 质量评估: 违规比例过高时返回质量分数,触发重新生成
|
||
# # # ============================================================================
|
||
|
||
|
||
# # # ============================================================================
|
||
# # # 黑盒约束检查器 - 三层防护策略实现
|
||
# # # ============================================================================
|
||
# # class BlackBoxValidator:
|
||
# # """
|
||
# # 黑盒约束验证器 - 三层防护策略
|
||
|
||
# # Layer 1: 增强Prompt约束(预防)
|
||
# # - 动态提取允许信号列表
|
||
# # - 生成明确的约束提示
|
||
|
||
# # Layer 2: 智能代码转换(转换)
|
||
# # - 检测违规意图
|
||
# # - 尝试转换为合法的等价形式
|
||
# # - 转换失败时才注释
|
||
|
||
# # Layer 3: 质量评估(重试)
|
||
# # - 计算代码质量分数
|
||
# # - 违规比例过高时建议重试
|
||
# # """
|
||
|
||
# # # 常见的内部信号命名模式(按严重程度分类)
|
||
# # INTERNAL_SIGNAL_PATTERNS = {
|
||
# # # 高风险:FSM状态相关(绝对不能修改)
|
||
# # 'critical': [
|
||
# # (r'\bstate\b', 'FSM状态寄存器'),
|
||
# # (r'\bnext_state\b', 'FSM下一状态'),
|
||
# # (r'\bcurrent_state\b', 'FSM当前状态'),
|
||
# # (r'\bnext\b(?!\s*[,@])', '下一状态简写'),
|
||
# # ],
|
||
# # # 中风险:计数器和内部寄存器
|
||
# # 'warning': [
|
||
# # (r'\bcounter\b', '内部计数器'),
|
||
# # (r'\bcount\b', '计数寄存器'),
|
||
# # (r'\bcnt\b', '计数简写'),
|
||
# # (r'\bfall_counter\b', '下落计数器'),
|
||
# # (r'\breg_\w+', '内部寄存器'),
|
||
# # ],
|
||
# # # 低风险:可疑信号(需要确认)
|
||
# # 'info': [
|
||
# # (r'\binternal_\w+', '内部信号'),
|
||
# # (r'\btemp_\w+', '临时信号'),
|
||
# # (r'\bprev_\w+', '前一状态'),
|
||
# # ]
|
||
# # }
|
||
|
||
# # # 非法语句模式
|
||
# # FORBIDDEN_STATEMENTS = [
|
||
# # (r'\bforce\s+(\w+)', 'force语句', 'critical'),
|
||
# # (r'\bassign\s+(\w+)\s*=', '连续赋值', 'critical'),
|
||
# # (r'\bdeassign\s+', 'deassign语句', 'critical'),
|
||
# # (r'\brelease\s+', 'release语句', 'critical'),
|
||
# # ]
|
||
|
||
# # # 层次化访问模式(如 DUT.state)
|
||
# # HIERARCHICAL_ACCESS = r'(\w+)\s*\.\s*(\w+)'
|
||
|
||
# # def __init__(self, dut_inputs: List[str] = None, dut_outputs: List[str] = None):
|
||
# # """
|
||
# # Args:
|
||
# # dut_inputs: DUT模块的输入端口列表
|
||
# # dut_outputs: DUT模块的输出端口列表
|
||
# # """
|
||
# # self.dut_inputs = dut_inputs or []
|
||
# # self.dut_outputs = dut_outputs or []
|
||
# # self.violations = {'critical': [], 'warning': [], 'info': []}
|
||
# # self.transformations = []
|
||
|
||
# # def validate_and_transform(self, code: str, tb_code: str = None) -> Tuple[str, Dict]:
|
||
# # """验证并转换代码 - 主入口"""
|
||
# # self.violations = {'critical': [], 'warning': [], 'info': []}
|
||
# # self.transformations = []
|
||
|
||
# # if tb_code:
|
||
# # self._extract_signals_from_tb(tb_code)
|
||
|
||
# # original_lines = code.strip().split('\n')
|
||
# # total_lines = len([l for l in original_lines if l.strip() and not l.strip().startswith('//')])
|
||
|
||
# # # Step 1: 移除非法语句
|
||
# # code = self._transform_forbidden_statements(code)
|
||
|
||
# # # Step 2: 转换层次化访问
|
||
# # code = self._transform_hierarchical_access(code)
|
||
|
||
# # # Step 3: 智能转换内部信号访问
|
||
# # code = self._smart_transform_internal_signals(code)
|
||
|
||
# # # Step 4: 最后清理
|
||
# # code = self._final_cleanup(code)
|
||
|
||
# # # 计算质量分数
|
||
# # quality_score = self._calculate_quality_score(total_lines)
|
||
|
||
# # # 决定是否需要重试
|
||
# # should_retry = quality_score < 50 or len(self.violations['critical']) > 3
|
||
|
||
# # result = {
|
||
# # 'quality_score': quality_score,
|
||
# # 'is_valid': len(self.violations['critical']) == 0,
|
||
# # 'violations': self.violations,
|
||
# # 'transformations': self.transformations,
|
||
# # 'should_retry': should_retry,
|
||
# # 'allowed_signals': self._get_allowed_signals_info()
|
||
# # }
|
||
|
||
# # return code.strip(), result
|
||
|
||
# # def _extract_signals_from_tb(self, tb_code: str):
|
||
# # """从测试平台代码中提取DUT输入输出信号"""
|
||
# # dut_match = re.search(r'(\w+)\s+(?:DUT|dut|uut|UUT)\s*\(', tb_code, re.IGNORECASE)
|
||
# # if dut_match:
|
||
# # start = dut_match.start()
|
||
# # bracket_count = 0
|
||
# # end = start
|
||
# # for i, char in enumerate(tb_code[start:]):
|
||
# # if char == '(':
|
||
# # bracket_count += 1
|
||
# # elif char == ')':
|
||
# # bracket_count -= 1
|
||
# # if bracket_count == 0:
|
||
# # end = start + i + 1
|
||
# # break
|
||
|
||
# # dut_instance = tb_code[start:end]
|
||
# # port_pattern = r'\.(\w+)\s*\(\s*(\w+)\s*\)'
|
||
|
||
# # for match in re.finditer(port_pattern, dut_instance):
|
||
# # signal_name = match.group(2)
|
||
|
||
# # is_input = re.search(rf'\breg\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
|
||
# # is_output = re.search(rf'\bwire\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
|
||
|
||
# # if is_input and signal_name not in self.dut_inputs:
|
||
# # self.dut_inputs.append(signal_name)
|
||
# # if is_output and signal_name not in self.dut_outputs:
|
||
# # self.dut_outputs.append(signal_name)
|
||
|
||
# # # 备用方案:通过reg/wire声明推断
|
||
# # if not self.dut_inputs and not self.dut_outputs:
|
||
# # for match in re.finditer(r'\breg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
|
||
# # signal = match.group(1)
|
||
# # if signal.lower() not in ['file', 'scenario', 'i', 'j', 'k', 'cnt']:
|
||
# # if signal not in self.dut_inputs:
|
||
# # self.dut_inputs.append(signal)
|
||
|
||
# # for match in re.finditer(r'\bwire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
|
||
# # signal = match.group(1)
|
||
# # if signal not in self.dut_outputs:
|
||
# # self.dut_outputs.append(signal)
|
||
|
||
# # def _transform_forbidden_statements(self, code: str) -> str:
|
||
# # """转换非法语句"""
|
||
# # for pattern, desc, severity in self.FORBIDDEN_STATEMENTS:
|
||
# # matches = list(re.finditer(pattern, code, re.IGNORECASE))
|
||
# # for match in reversed(matches):
|
||
# # signal = match.group(1) if match.groups() else 'unknown'
|
||
# # self.violations[severity].append(f"{desc}: {signal}")
|
||
|
||
# # line_start = code.rfind('\n', 0, match.start()) + 1
|
||
# # line_end = code.find('\n', match.end())
|
||
# # if line_end == -1:
|
||
# # line_end = len(code)
|
||
# # original_line = code[line_start:line_end]
|
||
|
||
# # # 尝试转换 force -> 直接赋值(仅对输入信号)
|
||
# # if 'force' in match.group(0).lower() and signal in self.dut_inputs:
|
||
# # new_line = re.sub(r'\bforce\s+', '', original_line, flags=re.IGNORECASE)
|
||
# # code = code[:line_start] + new_line + code[line_end:]
|
||
# # self.transformations.append({
|
||
# # 'type': 'force_to_assign',
|
||
# # 'original': original_line.strip(),
|
||
# # 'transformed': new_line.strip()
|
||
# # })
|
||
# # continue
|
||
|
||
# # code = code[:line_start] + '// [BLOCKED] ' + original_line.lstrip() + code[line_end:]
|
||
# # self.transformations.append({
|
||
# # 'type': 'blocked',
|
||
# # 'original': original_line.strip(),
|
||
# # 'reason': desc
|
||
# # })
|
||
|
||
# # return code
|
||
|
||
# # def _transform_hierarchical_access(self, code: str) -> str:
|
||
# # """转换层次化访问(如 DUT.state)"""
|
||
# # for match in re.finditer(self.HIERARCHICAL_ACCESS, code):
|
||
# # prefix = match.group(1)
|
||
# # signal = match.group(2)
|
||
|
||
# # if prefix.upper() in ['DUT', 'UUT', 'TOP', 'TB']:
|
||
# # if signal not in self.dut_outputs:
|
||
# # self.violations['critical'].append(f"层次化访问内部信号: {prefix}.{signal}")
|
||
|
||
# # line_start = code.rfind('\n', 0, match.start()) + 1
|
||
# # line_end = code.find('\n', match.end())
|
||
# # if line_end == -1:
|
||
# # line_end = len(code)
|
||
# # original_line = code[line_start:line_end]
|
||
# # code = code[:line_start] + '// [HIERARCHY] ' + original_line.lstrip() + code[line_end:]
|
||
|
||
# # return code
|
||
|
||
# # def _smart_transform_internal_signals(self, code: str) -> str:
|
||
# # """智能转换内部信号访问"""
|
||
# # lines = code.split('\n')
|
||
# # transformed_lines = []
|
||
|
||
# # for line in lines:
|
||
# # stripped = line.strip()
|
||
|
||
# # if stripped.startswith('//') or not stripped:
|
||
# # transformed_lines.append(line)
|
||
# # continue
|
||
|
||
# # if (stripped.startswith('#') or stripped.startswith('$') or
|
||
# # stripped.startswith('repeat(') or stripped.startswith('@(')):
|
||
# # transformed_lines.append(line)
|
||
# # continue
|
||
|
||
# # detected_signals = self._detect_internal_signals_in_line(stripped)
|
||
# # has_critical = detected_signals.get('critical', [])
|
||
# # has_warning = detected_signals.get('warning', [])
|
||
|
||
# # if not has_critical and not has_warning:
|
||
# # transformed_lines.append(line)
|
||
# # continue
|
||
|
||
# # context = self._analyze_signal_context(stripped, detected_signals)
|
||
|
||
# # if context['type'] == 'assignment':
|
||
# # transformed_lines.append(f"// [INTERNAL_ASSIGN] Cannot modify internal signal")
|
||
# # transformed_lines.append(f"// Original: {stripped}")
|
||
# # self.violations['critical'].append(f"尝试修改内部信号: {context['signals']}")
|
||
# # elif context['type'] == 'condition':
|
||
# # transformed = self._transform_condition(stripped, context)
|
||
# # transformed_lines.append(transformed)
|
||
# # self.transformations.append({
|
||
# # 'type': 'condition_transform',
|
||
# # 'original': stripped,
|
||
# # 'transformed': transformed
|
||
# # })
|
||
# # elif context['type'] == 'wait_for_state':
|
||
# # transformed = self._transform_state_wait(stripped, context)
|
||
# # transformed_lines.append(transformed)
|
||
# # self.transformations.append({
|
||
# # 'type': 'wait_transform',
|
||
# # 'original': stripped,
|
||
# # 'transformed': transformed
|
||
# # })
|
||
# # else:
|
||
# # if has_critical:
|
||
# # transformed_lines.append(f"// [WARNING] Contains internal signal reference: {has_critical}")
|
||
# # transformed_lines.append(f"// Original: {stripped}")
|
||
# # for sig in has_critical:
|
||
# # self.violations['warning'].append(f"可疑的内部信号访问: {sig}")
|
||
# # else:
|
||
# # transformed_lines.append(line)
|
||
|
||
# # return '\n'.join(transformed_lines)
|
||
|
||
# # def _detect_internal_signals_in_line(self, line: str) -> Dict[str, List[str]]:
|
||
# # """检测行中的内部信号"""
|
||
# # detected = {'critical': [], 'warning': [], 'info': []}
|
||
|
||
# # LEGAL_KEYWORDS = {
|
||
# # 'repeat', 'posedge', 'negedge', 'begin', 'end', 'if', 'else',
|
||
# # 'while', 'for', 'case', 'default', 'always', 'initial',
|
||
# # 'assign', 'wire', 'reg', 'input', 'output', 'inout',
|
||
# # 'parameter', 'localparam', 'integer', 'real', 'time',
|
||
# # 'clk', 'clock', 'reset', 'rst', 'areset', 'rst_n',
|
||
# # 'enable', 'ena', 'valid', 'ready', 'data', 'addr', 'address',
|
||
# # 'true', 'false', 'idle', 'wait'
|
||
# # }
|
||
|
||
# # SYSTEM_FUNCTIONS = {'$display', '$write', '$monitor', '$fopen', '$fclose',
|
||
# # '$fdisplay', '$fwrite', '$readmemh', '$readmemb',
|
||
# # '$finish', '$stop', '$random', '$time', '$stime'}
|
||
|
||
# # for severity, patterns in self.INTERNAL_SIGNAL_PATTERNS.items():
|
||
# # for pattern, name in patterns:
|
||
# # matches = re.findall(pattern, line, re.IGNORECASE)
|
||
# # if matches:
|
||
# # for match in matches:
|
||
# # if isinstance(match, tuple):
|
||
# # match = match[0] if match[0] else match[1]
|
||
|
||
# # match_lower = match.lower() if match else ''
|
||
|
||
# # if match_lower in LEGAL_KEYWORDS:
|
||
# # continue
|
||
# # if match in SYSTEM_FUNCTIONS:
|
||
# # continue
|
||
# # if match in self.dut_inputs or match in self.dut_outputs:
|
||
# # continue
|
||
# # if match.startswith('$'):
|
||
# # continue
|
||
|
||
# # if match and match not in detected[severity]:
|
||
# # detected[severity].append(match)
|
||
|
||
# # return detected
|
||
|
||
# # def _analyze_signal_context(self, line: str, signals: Dict) -> Dict:
|
||
# # """分析信号使用上下文"""
|
||
# # assign_match = re.search(r'(\w+)\s*(?:=|<=)\s*', line)
|
||
# # if assign_match:
|
||
# # target = assign_match.group(1)
|
||
# # if target in signals.get('critical', []) or target in signals.get('warning', []):
|
||
# # return {'type': 'assignment', 'signals': [target], 'line': line}
|
||
|
||
# # if re.search(r'wait\s*\([^)]*state', line, re.IGNORECASE):
|
||
# # return {'type': 'wait_for_state', 'signals': signals.get('critical', []), 'line': line}
|
||
|
||
# # if re.search(r'if\s*\(|while\s*\(|@\s*\(', line):
|
||
# # return {'type': 'condition', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
|
||
|
||
# # return {'type': 'other', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
|
||
|
||
# # def _transform_condition(self, line: str, context: Dict) -> str:
|
||
# # """转换条件判断语句"""
|
||
# # original = line
|
||
|
||
# # if 'state' in str(context['signals']):
|
||
# # indent = len(line) - len(line.lstrip())
|
||
# # spaces = ' ' * indent
|
||
|
||
# # transformed = f"// [TRANSFORMED] Original: {original.strip()}\n"
|
||
# # transformed += f"{spaces}// Cannot directly check internal state\n"
|
||
# # transformed += f"{spaces}// Alternative: Wait for expected clock cycles\n"
|
||
# # transformed += f"{spaces}repeat(5) @(posedge clk); // Adjust cycles as needed"
|
||
|
||
# # self.violations['warning'].append(f"条件判断转换: {original.strip()}")
|
||
# # return transformed
|
||
|
||
# # return f"// [TRANSFORMED] {original}"
|
||
|
||
# # def _transform_state_wait(self, line: str, context: Dict) -> str:
|
||
# # """转换状态等待语句"""
|
||
# # indent = len(line) - len(line.lstrip())
|
||
# # spaces = ' ' * indent
|
||
|
||
# # transformed = f"// [TRANSFORMED] Original: {line.strip()}\n"
|
||
# # transformed += f"{spaces}// Cannot wait for internal state directly\n"
|
||
# # transformed += f"{spaces}// Alternative: Drive inputs and wait for expected cycles\n"
|
||
# # transformed += f"{spaces}repeat(10) @(posedge clk); // Adjust based on FSM design"
|
||
|
||
# # self.violations['info'].append(f"状态等待转换: {line.strip()}")
|
||
# # return transformed
|
||
|
||
# # def _final_cleanup(self, code: str) -> str:
|
||
# # """最终清理"""
|
||
# # lines = code.split('\n')
|
||
# # cleaned = []
|
||
|
||
# # for line in lines:
|
||
# # stripped = line.strip()
|
||
|
||
# # if stripped in ['begin', 'end'] and cleaned:
|
||
# # last = cleaned[-1].strip()
|
||
# # if last.startswith('// [TRANSFORMED]') or last.startswith('// [INTERNAL'):
|
||
# # continue
|
||
|
||
# # cleaned.append(line)
|
||
|
||
# # result = '\n'.join(cleaned)
|
||
# # result = re.sub(r'\n\s*\n\s*\n', '\n\n', result)
|
||
|
||
# # return result
|
||
|
||
# # def _calculate_quality_score(self, total_lines: int) -> int:
|
||
# # """计算代码质量分数"""
|
||
# # if total_lines == 0:
|
||
# # return 0
|
||
|
||
# # score = 100
|
||
# # score -= len(self.violations['critical']) * 20
|
||
# # score -= len(self.violations['warning']) * 5
|
||
# # score += len([t for t in self.transformations if 'blocked' not in t.get('type', '')]) * 5
|
||
|
||
# # return max(0, min(100, score))
|
||
|
||
# # def _get_allowed_signals_info(self) -> Dict:
|
||
# # """获取允许的信号信息"""
|
||
# # return {
|
||
# # 'inputs': self.dut_inputs,
|
||
# # 'outputs': self.dut_outputs,
|
||
# # 'all_allowed': self.dut_inputs + self.dut_outputs
|
||
# # }
|
||
|
||
# # def generate_constraint_prompt(self) -> str:
|
||
# # """生成动态约束提示 - 使用实际信号名"""
|
||
# # prompt = "\n[SIGNAL CONSTRAINTS - DERIVED FROM YOUR DUT]\n"
|
||
|
||
# # # 提取复位信号名(优先使用实际的)
|
||
# # reset_signal = self._find_reset_signal()
|
||
|
||
# # if self.dut_inputs:
|
||
# # prompt += "ALLOWED INPUTS (you CAN drive these):\n"
|
||
# # for sig in self.dut_inputs:
|
||
# # prompt += f" - {sig}\n"
|
||
# # else:
|
||
# # prompt += "ALLOWED INPUTS: Check the testbench for actual signal names\n"
|
||
|
||
# # if self.dut_outputs:
|
||
# # prompt += "\nOUTPUTS (you can READ but NOT write):\n"
|
||
# # for sig in self.dut_outputs:
|
||
# # prompt += f" - {sig}\n"
|
||
|
||
# # prompt += f"""
|
||
# # FORBIDDEN ACTIONS:
|
||
# # 1. NEVER assign values to internal signals (state, counter, etc.)
|
||
# # 2. NEVER use 'force' or 'assign' statements
|
||
# # 3. NEVER access signals like DUT.state (hierarchical access)
|
||
|
||
# # CORRECT APPROACH:
|
||
# # - To reach a specific FSM state: drive inputs and WAIT for natural transition
|
||
# # - Example: Instead of "state = IDLE", use "{reset_signal} = 1; repeat(2) @(posedge clk); {reset_signal} = 0;"
|
||
# # """
|
||
# # return prompt
|
||
|
||
# # def _find_reset_signal(self) -> str:
|
||
# # """查找复位信号名"""
|
||
# # # 按优先级查找常见的复位信号名
|
||
# # reset_candidates = ['areset', 'rst_n', 'rst', 'reset', 'rst_b']
|
||
# # for sig in reset_candidates:
|
||
# # if sig in self.dut_inputs:
|
||
# # return sig
|
||
# # # 如果没找到,检查输入列表中是否有类似名称
|
||
# # for sig in self.dut_inputs:
|
||
# # sig_lower = sig.lower()
|
||
# # if 'reset' in sig_lower or 'rst' in sig_lower:
|
||
# # return sig
|
||
# # # 默认返回第一个输入信号(排除 clk)
|
||
# # for sig in self.dut_inputs:
|
||
# # if 'clk' not in sig.lower():
|
||
# # return sig
|
||
# # return "reset" # 兜底
|
||
|
||
|
||
# # # ============================================================================
|
||
# # # CoverageParser - 覆盖率解析器
|
||
# # # ============================================================================
|
||
# # class CoverageParser:
|
||
# # """覆盖率解析器 - 从带注释的Verilog文件中提取未覆盖的代码块
|
||
|
||
# # [增强] 集成语义分析结果,提供更精准的 FSM 状态路径指导
|
||
# # [新增] 集成能量分配层,提供目标功能点优先级信息
|
||
# # [新增] 集成多样性约束注入器,避免测试用例同质化
|
||
# # """
|
||
|
||
# # def __init__(self, annotated_file, tb_code=None, semantic_result=None,
|
||
# # energy_allocator=None, diversity_injector=None):
|
||
# # self.file_path = annotated_file
|
||
# # self.tb_code = tb_code
|
||
# # self.semantic_result = semantic_result # [新增] 语义分析结果
|
||
# # self.energy_allocator = energy_allocator # [新增] 能量分配器
|
||
# # self.diversity_injector = diversity_injector # [新增] 多样性约束注入器
|
||
# # # 修复:Verilator 覆盖率标记格式为 %NNNNNN 或 ~NNNNNN 或 ^NNNNNN
|
||
# # # %NNNNNN - 行覆盖计数(%000000 表示从未执行)
|
||
# # # ~NNNNNN - 分支/条件覆盖计数
|
||
# # # ^NNNNNN - 未覆盖的分支
|
||
# # self.line_pattern = re.compile(r'^%(\d+)\s+(.*)$') # 匹配 %NNNNNN code
|
||
# # self.tilde_pattern = re.compile(r'^~(\d+)\s+(.*)$') # 匹配 ~NNNNNN code
|
||
# # self.caret_pattern = re.compile(r'^\^(\d+)\s+(.*)$') # 匹配 ^NNNNNN code
|
||
# # # 有些情况可能是纯数字开头(无前缀)
|
||
# # self.plain_pattern = re.compile(r'^\s*(\d+)\s+(.*)$')
|
||
# # self.decl_pattern = re.compile(r'^\s*(input|output|inout|wire|reg|logic|parameter|localparam|assign)\b')
|
||
|
||
# # self.validator = BlackBoxValidator()
|
||
# # if tb_code:
|
||
# # self.validator._extract_signals_from_tb(tb_code)
|
||
|
||
# # def generate_prompt(self, current_score):
|
||
# # """生成覆盖率驱动的Prompt"""
|
||
# # if not os.path.exists(self.file_path):
|
||
# # return None
|
||
|
||
# # try:
|
||
# # with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
# # lines = f.readlines()
|
||
# # except Exception:
|
||
# # return None
|
||
|
||
# # missing_blocks = []
|
||
# # current_block = []
|
||
# # recording = False
|
||
# # context_buffer = []
|
||
# # CONTEXT_SIZE = 3
|
||
|
||
# # # 收集缺失行用于 FSM 分析
|
||
# # missing_lines = []
|
||
|
||
# # for i, line in enumerate(lines):
|
||
# # line = line.strip()
|
||
# # count = -1
|
||
# # clean_code = line
|
||
# # is_tilde = False
|
||
# # is_caret = False
|
||
|
||
# # # 尝试匹配各种覆盖率标记格式
|
||
# # match_pct = self.line_pattern.match(line) # %NNNNNN code
|
||
# # match_tilde = self.tilde_pattern.match(line) # ~NNNNNN code
|
||
# # match_caret = self.caret_pattern.match(line) # ^NNNNNN code
|
||
# # match_plain = self.plain_pattern.match(line) # NNNNNN code (无前缀)
|
||
|
||
# # if match_pct:
|
||
# # count = int(match_pct.group(1))
|
||
# # clean_code = match_pct.group(2).strip()
|
||
# # elif match_tilde:
|
||
# # count = int(match_tilde.group(1))
|
||
# # clean_code = match_tilde.group(2).strip()
|
||
# # is_tilde = True
|
||
# # elif match_caret:
|
||
# # count = int(match_caret.group(1))
|
||
# # clean_code = match_caret.group(2).strip()
|
||
# # is_caret = True
|
||
# # elif match_plain:
|
||
# # # 纯数字格式(可能出现在某些 Verilator 版本)
|
||
# # count = int(match_plain.group(1))
|
||
# # clean_code = match_plain.group(2).strip()
|
||
|
||
# # if "//" in clean_code:
|
||
# # clean_code = clean_code.split("//")[0].strip()
|
||
|
||
# # is_hard_noise = (self.decl_pattern.match(clean_code) or clean_code == "endmodule")
|
||
# # is_soft_noise = (len(clean_code) < 2 or clean_code in ["end", "begin", "else", ");", "endcase", "default:"] or
|
||
# # clean_code.startswith("module ") or not any(c.isalnum() for c in clean_code))
|
||
|
||
# # # 覆盖状态判断:
|
||
# # # - %NNNNNN: count > 0 表示已覆盖,count == 0 表示未覆盖
|
||
# # # - ~NNNNNN: 分支覆盖标记,需要进一步检查
|
||
# # # - ^NNNNNN: 未覆盖分支标记
|
||
# # is_definitely_covered = (not is_tilde and not is_caret and count > 0)
|
||
# # is_definitely_missed = (not is_tilde and not is_caret and count == 0 and not is_hard_noise and not is_soft_noise) or \
|
||
# # (is_caret and not is_hard_noise and not is_soft_noise)
|
||
|
||
# # if recording:
|
||
# # if is_definitely_covered:
|
||
# # missing_blocks.append(current_block)
|
||
# # missing_lines.extend(current_block)
|
||
# # current_block = []
|
||
# # recording = False
|
||
# # if not is_hard_noise:
|
||
# # context_buffer.append(clean_code)
|
||
# # else:
|
||
# # if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
|
||
# # current_block.append(f"Line {i+1}: {clean_code}")
|
||
# # else:
|
||
# # if is_definitely_missed:
|
||
# # recording = True
|
||
# # if context_buffer:
|
||
# # current_block.append(f"... (Context)")
|
||
# # for ctx in context_buffer:
|
||
# # current_block.append(f" {ctx}")
|
||
# # current_block.append(f"Line {i+1}: {clean_code} <--- MISSING START")
|
||
# # else:
|
||
# # if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
|
||
# # context_buffer.append(clean_code)
|
||
# # if len(context_buffer) > CONTEXT_SIZE:
|
||
# # context_buffer.pop(0)
|
||
|
||
# # if recording and current_block:
|
||
# # missing_blocks.append(current_block)
|
||
# # missing_lines.extend(current_block)
|
||
# # if not missing_blocks:
|
||
# # return None
|
||
|
||
# # selected_blocks = missing_blocks[:50]
|
||
|
||
# # # 获取实际信号名用于示例
|
||
# # reset_signal = self.validator._find_reset_signal()
|
||
# # inputs_no_clk = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
|
||
# # example_signal = inputs_no_clk[0] if inputs_no_clk else (reset_signal if reset_signal != "reset" else "ena")
|
||
|
||
# # # 分析 FSM 相关的缺失代码
|
||
# # fsm_analysis = self._analyze_fsm_missing(missing_lines)
|
||
|
||
# # # [新增] 从语义分析结果获取 FSM 和功能点信息
|
||
# # semantic_context = self._generate_semantic_context()
|
||
|
||
# # prompt = f"""
|
||
# # [ROLE]
|
||
# # You are a hardware verification expert. Your task is to write a test scenario to improve code coverage.
|
||
|
||
# # [COVERAGE STATUS]
|
||
# # Current testbench achieves {current_score:.2f}% coverage.
|
||
# # The following logic blocks in the DUT are NEVER executed during simulation:
|
||
|
||
# # """
|
||
# # for idx, block in enumerate(selected_blocks):
|
||
# # prompt += f"--- Missing Logic Block {idx+1} ---\n" + "\n".join(block) + "\n\n"
|
||
|
||
# # # [新增] 添加语义分析上下文
|
||
# # if semantic_context:
|
||
# # prompt += f"""
|
||
# # [SEMANTIC ANALYSIS - MODULE UNDERSTANDING]
|
||
# # {semantic_context}
|
||
# # """
|
||
|
||
# # # === [新增] 添加能量分配目标上下文 ===
|
||
# # if self.energy_allocator:
|
||
# # energy_context = self.energy_allocator.get_target_context()
|
||
# # if energy_context:
|
||
# # prompt += f"""
|
||
# # [ENERGY-ALIGNED TARGET - PRIORITY]
|
||
# # {energy_context}
|
||
# # Focus your test scenario on covering this high-priority target first.
|
||
# # """
|
||
# # # =====================================
|
||
|
||
# # prompt += self.validator.generate_constraint_prompt()
|
||
|
||
# # # 添加 FSM 分析提示
|
||
# # if fsm_analysis:
|
||
# # prompt += f"""
|
||
# # [FSM STATE TRANSITION ANALYSIS - CRITICAL]
|
||
# # {fsm_analysis}
|
||
|
||
# # IMPORTANT: FSM transitions have PRIORITY ORDER!
|
||
# # - 'if' conditions are evaluated TOP to BOTTOM
|
||
# # - The FIRST matching condition determines the next state
|
||
# # - To trigger a branch like "else if (condition)", you MUST ensure all higher-priority conditions are FALSE
|
||
# # - Read the missing code's context carefully: what conditions precede it?
|
||
|
||
# # """
|
||
|
||
# # prompt += f"""
|
||
# # [OUTPUT REQUIREMENTS - CRITICAL]
|
||
# # 1. Return ONLY Verilog test scenario code (NOT a task definition)
|
||
# # 2. Your code will be inserted INTO an existing `initial begin ... end` block
|
||
# # 3. DO NOT wrap your code in `task ... endtask` - just write the test sequence directly
|
||
# # 4. DO NOT use `$finish` or `$stop` - the testbench handles simulation end
|
||
|
||
# # [CODING STYLE]
|
||
# # 1. Use blocking assignments for input signals: `signal = value;`
|
||
# # 2. Use `#N;` for time delays: `#10;` means wait 10 time units
|
||
# # 3. Use `repeat(N) @(posedge clk);` to wait for N clock cycles
|
||
# # 4. Start with reset sequence if needed
|
||
|
||
# # [BLACK-BOX CONSTRAINTS - CRITICAL]
|
||
# # 1. You can ONLY control module INPUTS listed above
|
||
# # 2. You CANNOT access internal signals (state, next_state, counters, etc.)
|
||
# # 3. You CANNOT use `force` or `assign` on internal signals
|
||
# # 4. To trigger a specific state: drive inputs and wait for the FSM to reach it naturally
|
||
|
||
# # [STEP-BY-STEP APPROACH - REQUIRED]
|
||
# # For each missing branch, think through:
|
||
# # 1. What STATE must the FSM be in? (Look at the case statement)
|
||
# # 2. What CONDITIONS must be true/false? (Check priority order!)
|
||
# # 3. How to reach that state from reset? (Trace state transitions)
|
||
# # 4. What inputs to apply and in what order?
|
||
|
||
# # [POSITIVE EXAMPLE - CORRECT APPROACH]
|
||
# # ```verilog
|
||
# # // Reset sequence - use ACTUAL input signal names from above
|
||
# # {reset_signal} = 1;
|
||
# # repeat(2) @(posedge clk);
|
||
# # {reset_signal} = 0;
|
||
|
||
# # // Wait for FSM to reach desired state (estimate cycles)
|
||
# # repeat(3) @(posedge clk);
|
||
|
||
# # // Trigger missing branch by driving inputs
|
||
# # {example_signal} = 1;
|
||
# # repeat(5) @(posedge clk);
|
||
# # {example_signal} = 0;
|
||
# # repeat(10) @(posedge clk);
|
||
# # ```
|
||
|
||
# # [NEGATIVE EXAMPLE - DO NOT DO THIS]
|
||
# # ```verilog
|
||
# # // WRONG: Using wrong signal name (e.g., 'reset' instead of '{reset_signal}')
|
||
# # reset = 1; // ERROR: Signal 'reset' does not exist! Use '{reset_signal}' instead!
|
||
|
||
# # // WRONG: Not considering condition priority in FSM
|
||
# # // If missing code is "else if (condition_b)", you must make condition_a FALSE first!
|
||
# # // Example: if FSM has "if (!signal_a) ... else if (signal_b) ..."
|
||
# # // Then signal_a must be 1 (FALSE) for the else-if branch to execute
|
||
# # signal_a = 0; // WRONG: This blocks the else-if branch!
|
||
# # signal_b = 1; // This will NOT trigger because signal_a=0 took priority
|
||
|
||
# # // CORRECT: Analyze priority, set higher-priority conditions to FALSE
|
||
# # signal_a = 1; // Now the first condition (!signal_a) is FALSE
|
||
# # signal_b = 1; // Now this else-if branch can execute
|
||
|
||
# # // WRONG: Trying to assign internal state
|
||
# # state = IDLE; // ERROR: Cannot modify internal signal!
|
||
|
||
# # // WRONG: Using force on internal signal
|
||
# # force DUT.state = WL; // ERROR: Cannot force internal signal!
|
||
|
||
# # // WRONG: Checking internal state in condition
|
||
# # if (state == WL) begin // ERROR: Cannot read internal signal!
|
||
# # {example_signal} = 1;
|
||
# # end
|
||
|
||
# # // CORRECT ALTERNATIVE: Estimate timing instead
|
||
# # repeat(5) @(posedge clk); // Wait for FSM to reach expected state
|
||
# # {example_signal} = 1;
|
||
# # ```
|
||
|
||
# # [SIGNAL NAME WARNING - CRITICAL]
|
||
# # - DO NOT use 'reset' if the actual signal is '{reset_signal}'
|
||
# # - DO NOT use 'rst' if the actual signal is '{reset_signal}'
|
||
# # - ALWAYS use EXACT signal names from the ALLOWED INPUTS list above
|
||
# # - Double-check every signal name before using it!
|
||
|
||
# # Now write the test scenario code to cover the missing blocks:
|
||
# # """
|
||
|
||
# # # === [新增] 注入多样性约束 ===
|
||
# # if self.diversity_injector:
|
||
# # # 获取未覆盖功能点
|
||
# # uncovered_functions = []
|
||
# # if self.semantic_result and self.semantic_result.get('function_points'):
|
||
# # uncovered_functions = [
|
||
# # fp for fp in self.semantic_result['function_points']
|
||
# # if not fp.get('covered', False)
|
||
# # ]
|
||
|
||
# # # 获取当前目标功能点
|
||
# # target_function = ""
|
||
# # if self.energy_allocator and self.energy_allocator.current_target:
|
||
# # target_function = self.energy_allocator.current_target.function_point
|
||
|
||
# # # 注入多样性约束
|
||
# # prompt = self.diversity_injector.inject_diversity_constraints(
|
||
# # prompt=prompt,
|
||
# # target_function=target_function,
|
||
# # uncovered_functions=uncovered_functions
|
||
# # )
|
||
# # # =================================
|
||
|
||
# # return prompt
|
||
|
||
# # def _analyze_fsm_missing(self, missing_lines: List[str]) -> str:
|
||
# # """分析 FSM 相关的缺失代码,生成具体的 FSM 状态转换指导"""
|
||
# # analysis = []
|
||
|
||
# # # 检查是否涉及 FSM 状态转换
|
||
# # has_state_case = any('case' in line.lower() and 'state' in line.lower() for line in missing_lines)
|
||
# # has_else_if = any('else if' in line.lower() for line in missing_lines)
|
||
# # has_if_condition = any(re.search(r'\bif\s*\(', line) for line in missing_lines)
|
||
|
||
# # if has_state_case or has_else_if:
|
||
# # analysis.append("- Missing code involves FSM state transitions or conditional branches")
|
||
|
||
# # if has_else_if or has_if_condition:
|
||
# # analysis.append("- Conditional branches have PRIORITY ORDER (top to bottom)")
|
||
# # analysis.append("- 'else if' branches require ALL previous conditions to be FALSE")
|
||
# # analysis.append("- Analyze the missing code's context: what conditions block this branch?")
|
||
|
||
# # if has_state_case:
|
||
# # analysis.append("- To trigger a state transition: first reach the source state, then drive inputs")
|
||
|
||
# # # === 新增:FSM 状态路径分析 ===
|
||
# # # 尝试从缺失代码中提取 FSM 状态信息
|
||
# # fsm_state_info = self._extract_fsm_state_from_missing(missing_lines)
|
||
# # if fsm_state_info:
|
||
# # analysis.append("")
|
||
# # analysis.append("[FSM STATE PATH ANALYSIS]")
|
||
# # analysis.extend(fsm_state_info)
|
||
|
||
# # return "\n".join(analysis) if analysis else ""
|
||
|
||
# # def _extract_fsm_state_from_missing(self, missing_lines: List[str]) -> List[str]:
|
||
# # """
|
||
# # 从缺失代码中提取 FSM 状态信息,生成具体的状态转换指导
|
||
|
||
# # 分析策略:
|
||
# # 1. 从缺失代码的上下文识别 case 分支(FSM 状态)
|
||
# # 2. 分析该状态下的条件分支优先级
|
||
# # 3. 识别需要满足的输入条件
|
||
# # """
|
||
# # info = []
|
||
|
||
# # # 从 annotated 文件中读取完整的 DUT 代码以分析 FSM 结构
|
||
# # try:
|
||
# # with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
# # full_content = f.read()
|
||
# # except:
|
||
# # return info
|
||
|
||
# # # 提取缺失代码所在的 FSM 状态
|
||
# # target_state = None
|
||
# # missing_condition = None
|
||
|
||
# # for line in missing_lines:
|
||
# # # 查找 case 分支标记(如 "WL:", "WR:", "FALLL:" 等)
|
||
# # # 格式可能是 "Line N: STATE:" 或 "STATE:"
|
||
# # state_match = re.search(r'\b([A-Z][A-Z0-9_]*)\s*:', line)
|
||
# # if state_match:
|
||
# # potential_state = state_match.group(1)
|
||
# # # 排除常见的非状态关键字
|
||
# # if potential_state not in ['IF', 'ELSE', 'CASE', 'BEGIN', 'END', 'DEFAULT']:
|
||
# # target_state = potential_state
|
||
# # break
|
||
|
||
# # # 如果没找到,尝试从整个文件中分析
|
||
# # if not target_state:
|
||
# # # 查找缺失行附近的 case 分支
|
||
# # lines = full_content.split('\n')
|
||
# # for i, line in enumerate(lines):
|
||
# # # 查找覆盖率标记为 0 的行
|
||
# # if re.match(r'^%000000', line.strip()):
|
||
# # # 向上查找最近的 case 分支(状态)
|
||
# # for j in range(i-1, max(0, i-20), -1):
|
||
# # state_match = re.search(r'^\s*([A-Z][A-Z0-9_]*)\s*:', lines[j])
|
||
# # if state_match:
|
||
# # target_state = state_match.group(1)
|
||
# # break
|
||
# # if target_state:
|
||
# # break
|
||
|
||
# # # 分析缺失的条件分支
|
||
# # for line in missing_lines:
|
||
# # # 提取 else if 条件
|
||
# # else_if_match = re.search(r'else\s+if\s*\(([^)]+)\)', line)
|
||
# # if else_if_match:
|
||
# # missing_condition = else_if_match.group(1)
|
||
# # break
|
||
# # # 提取 if 条件
|
||
# # if_match = re.search(r'\bif\s*\(([^)]+)\)', line)
|
||
# # if if_match:
|
||
# # missing_condition = if_match.group(1)
|
||
# # break
|
||
|
||
# # # 生成具体的指导信息
|
||
# # if target_state:
|
||
# # info.append(f"- Target FSM state identified: {target_state}")
|
||
|
||
# # # 查找复位后的初始状态
|
||
# # reset_state = self._find_reset_state(full_content)
|
||
# # if reset_state:
|
||
# # info.append(f"- After reset, FSM starts in state: {reset_state}")
|
||
|
||
# # if reset_state != target_state:
|
||
# # info.append(f"- CRITICAL: You must FIRST transition from {reset_state} to {target_state}!")
|
||
# # info.append(f"- Do NOT assume FSM will automatically reach {target_state}!")
|
||
|
||
# # # 尝试找到状态转换路径
|
||
# # transition_hint = self._find_state_transition_hint(full_content, reset_state, target_state)
|
||
# # if transition_hint:
|
||
# # info.append(f"- To reach {target_state}: {transition_hint}")
|
||
|
||
# # if missing_condition:
|
||
# # info.append(f"- Missing condition: \"{missing_condition}\"")
|
||
# # # 分析条件优先级
|
||
# # priority_info = self._analyze_condition_priority(full_content, target_state, missing_condition)
|
||
# # if priority_info:
|
||
# # info.extend(priority_info)
|
||
|
||
# # return info
|
||
|
||
# # def _find_reset_state(self, content: str) -> Optional[str]:
|
||
# # """从 DUT 代码中找到复位后的初始状态"""
|
||
# # # 查找复位逻辑中的状态赋值
|
||
# # # 常见模式: if (reset) state <= IDLE; 或 state <= 0;
|
||
# # patterns = [
|
||
# # r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
|
||
# # r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*(\d+);',
|
||
# # r'if\s*\([^)]*rst[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
|
||
# # ]
|
||
|
||
# # for pattern in patterns:
|
||
# # match = re.search(pattern, content, re.IGNORECASE)
|
||
# # if match:
|
||
# # state = match.group(1)
|
||
# # # 如果是数字,尝试从参数中找对应的状态名
|
||
# # if state.isdigit():
|
||
# # # 查找参数定义
|
||
# # param_match = re.search(r'parameter\s+([^;]+);', content)
|
||
# # if param_match:
|
||
# # params = param_match.group(1)
|
||
# # # 解析参数列表
|
||
# # for param in params.split(','):
|
||
# # param = param.strip()
|
||
# # if '=' in param:
|
||
# # name, value = param.split('=')
|
||
# # if value.strip() == state:
|
||
# # return name.strip()
|
||
# # return state
|
||
|
||
# # return None
|
||
|
||
# # def _find_state_transition_hint(self, content: str, from_state: str, to_state: str) -> Optional[str]:
|
||
# # """找到从一个状态到另一个状态的转换条件"""
|
||
# # # 在 case 语句中查找 from_state 分支
|
||
# # # 提取该分支下到 to_state 的转换条件
|
||
|
||
# # # 简单策略:查找 "next = TO_STATE" 或 "next <= TO_STATE"
|
||
# # pattern = rf'{from_state}\s*:.*?next\s*=?\s*{to_state}'
|
||
# # match = re.search(pattern, content, re.DOTALL)
|
||
|
||
# # if match:
|
||
# # # 提取条件
|
||
# # branch_code = match.group(0)
|
||
# # # 查找 if 条件
|
||
# # if_match = re.search(r'if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
|
||
# # if if_match:
|
||
# # return f"set condition: {if_match.group(1)}"
|
||
|
||
# # # 查找 else if 条件
|
||
# # elif_match = re.search(r'else\s+if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
|
||
# # if elif_match:
|
||
# # return f"set condition: {elif_match.group(1)} (ensure earlier conditions are FALSE)"
|
||
|
||
# # # 尝试反向查找:什么条件下会转换到目标状态
|
||
# # trans_pattern = rf'(?:if|else\s+if)\s*\(([^)]+)\)\s*(?:next\s*=?\s*{to_state}|{to_state}\s*;)'
|
||
# # trans_match = re.search(trans_pattern, content)
|
||
# # if trans_match:
|
||
# # return f"set condition: {trans_match.group(1)}"
|
||
|
||
# # return None
|
||
|
||
# # def _analyze_condition_priority(self, content: str, state: str, missing_condition: str) -> List[str]:
|
||
# # """分析条件分支的优先级,找出需要排除的条件"""
|
||
# # info = []
|
||
|
||
# # if not state:
|
||
# # return info
|
||
|
||
# # # 查找该状态下的所有条件分支
|
||
# # # 提取 state: 后面的代码块
|
||
# # state_block_pattern = rf'{state}\s*:(.*?)(?=[A-Z][A-Z0-9_]*\s*:|endcase|default:)'
|
||
# # match = re.search(state_block_pattern, content, re.DOTALL)
|
||
|
||
# # if not match:
|
||
# # return info
|
||
|
||
# # state_block = match.group(1)
|
||
|
||
# # # 提取所有条件分支
|
||
# # conditions = []
|
||
# # for cond_match in re.finditer(r'(?:if|else\s+if)\s*\(([^)]+)\)', state_block):
|
||
# # conditions.append(cond_match.group(1).strip())
|
||
|
||
# # # 找到缺失条件在列表中的位置
|
||
# # missing_idx = -1
|
||
# # for i, cond in enumerate(conditions):
|
||
# # # 简化比较(去除空格)
|
||
# # if cond.replace(' ', '') in missing_condition.replace(' ', '') or \
|
||
# # missing_condition.replace(' ', '') in cond.replace(' ', ''):
|
||
# # missing_idx = i
|
||
# # break
|
||
|
||
# # if missing_idx > 0:
|
||
# # info.append(f"- This branch is condition #{missing_idx + 1} (lower priority)")
|
||
# # info.append(f"- You must make ALL earlier conditions FALSE:")
|
||
# # for i in range(missing_idx):
|
||
# # cond = conditions[i]
|
||
# # # 分析如何使条件为 FALSE
|
||
# # false_hint = self._get_false_hint(cond)
|
||
# # info.append(f" * \"{cond}\" must be FALSE → {false_hint}")
|
||
|
||
# # return info
|
||
|
||
# # def _get_false_hint(self, condition: str) -> str:
|
||
# # """分析如何使条件为 FALSE"""
|
||
# # condition = condition.strip()
|
||
|
||
# # # 处理 !signal 形式
|
||
# # if condition.startswith('!'):
|
||
# # signal = condition[1:].strip()
|
||
# # return f"set {signal} = 1"
|
||
|
||
# # # 处理 signal 形式(布尔值)
|
||
# # if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', condition):
|
||
# # return f"set {condition} = 0"
|
||
|
||
# # # 处理比较运算符
|
||
# # if '==' in condition:
|
||
# # parts = condition.split('==')
|
||
# # if len(parts) == 2:
|
||
# # signal = parts[0].strip()
|
||
# # value = parts[1].strip()
|
||
# # if value.isdigit():
|
||
# # return f"set {signal} != {value}"
|
||
|
||
# # # 处理 >= 形式
|
||
# # if '>=' in condition:
|
||
# # parts = condition.split('>=')
|
||
# # if len(parts) == 2:
|
||
# # signal = parts[0].strip()
|
||
# # value = parts[1].strip()
|
||
# # if value.isdigit():
|
||
# # return f"set {signal} < {value}"
|
||
|
||
# # # 处理 > 形式
|
||
# # if '>' in condition and '>=' not in condition:
|
||
# # parts = condition.split('>')
|
||
# # if len(parts) == 2:
|
||
# # signal = parts[0].strip()
|
||
# # value = parts[1].strip()
|
||
# # return f"set {signal} <= {value}"
|
||
|
||
# # return "analyze the condition logic"
|
||
|
||
# # def _generate_semantic_context(self) -> str:
|
||
# # """
|
||
# # [新增] 从语义分析结果生成 Prompt 上下文
|
||
|
||
# # 整合语义分析层 (Layer 0) 的输出,为 LLM 提供更精准的指导:
|
||
# # - FSM 状态转换图
|
||
# # - 功能点重要性排序
|
||
# # - 测试场景建议
|
||
|
||
# # Returns:
|
||
# # 语义上下文字符串,用于增强 Prompt
|
||
# # """
|
||
# # if not self.semantic_result:
|
||
# # return ""
|
||
|
||
# # context_parts = []
|
||
|
||
# # # 1. 模块基础信息
|
||
# # module_name = self.semantic_result.get('module_name', '')
|
||
# # inputs = self.semantic_result.get('inputs', [])
|
||
# # outputs = self.semantic_result.get('outputs', [])
|
||
|
||
# # if module_name:
|
||
# # context_parts.append(f"Module Name: {module_name}")
|
||
# # if inputs:
|
||
# # context_parts.append(f"Module Inputs: {', '.join(inputs)}")
|
||
# # if outputs:
|
||
# # context_parts.append(f"Module Outputs: {', '.join(outputs)}")
|
||
|
||
# # # 2. FSM 信息(最关键)
|
||
# # fsm_info = self.semantic_result.get('fsm_info')
|
||
# # if fsm_info:
|
||
# # context_parts.append("")
|
||
# # context_parts.append("=== FSM STATE MACHINE DETAILS ===")
|
||
# # context_parts.append(f"State Variable: {fsm_info.get('state_variable', 'unknown')}")
|
||
|
||
# # states = fsm_info.get('states', [])
|
||
# # if states:
|
||
# # context_parts.append(f"All States ({len(states)}): {', '.join(states)}")
|
||
|
||
# # # 状态转换表
|
||
# # transitions = fsm_info.get('transitions', {})
|
||
# # if transitions:
|
||
# # context_parts.append("")
|
||
# # context_parts.append("=== STATE TRANSITION TABLE ===")
|
||
# # context_parts.append("Format: CURRENT_STATE --[CONDITION]--> NEXT_STATE")
|
||
# # context_parts.append("")
|
||
|
||
# # for state, trans_list in transitions.items():
|
||
# # for trans in trans_list:
|
||
# # condition = trans.get('condition', 'default')
|
||
# # next_state = trans.get('next_state', 'unknown')
|
||
# # if condition == 'default':
|
||
# # context_parts.append(f" {state} --[default]--> {next_state}")
|
||
# # else:
|
||
# # context_parts.append(f" {state} --[if ({condition})]--> {next_state}")
|
||
|
||
# # # 添加状态转换路径分析
|
||
# # context_parts.append("")
|
||
# # context_parts.append("=== STATE TRANSITION PATH HINTS ===")
|
||
# # reset_state = self._find_reset_state_from_fsm(fsm_info)
|
||
# # if reset_state:
|
||
# # context_parts.append(f"Initial State (after reset): {reset_state}")
|
||
# # context_parts.append("")
|
||
# # context_parts.append("IMPORTANT: To reach a target state, trace the path from reset:")
|
||
# # context_parts.append(" 1. Reset the DUT to initialize to the starting state")
|
||
# # context_parts.append(" 2. Apply inputs to trigger state transitions")
|
||
# # context_parts.append(" 3. Wait for the FSM to naturally reach the target state")
|
||
# # context_parts.append(" 4. THEN apply inputs to trigger the missing branch")
|
||
|
||
# # # 3. 功能点优先级
|
||
# # function_points = self.semantic_result.get('function_points', [])
|
||
# # if function_points:
|
||
# # context_parts.append("")
|
||
# # context_parts.append("=== FUNCTION POINTS (Ranked by Importance) ===")
|
||
|
||
# # for i, fp in enumerate(function_points[:10]): # Top 10
|
||
# # name = fp.get('name', 'unknown')
|
||
# # fp_type = fp.get('type', 'unknown')
|
||
# # importance = fp.get('importance', 0)
|
||
# # covered = fp.get('covered', False)
|
||
# # status = "✓ COVERED" if covered else "✗ NOT COVERED"
|
||
# # context_parts.append(f" {i+1}. [{status}] {name} ({fp_type}): importance={importance:.2f}")
|
||
|
||
# # # 4. 测试场景建议
|
||
# # test_scenarios = self.semantic_result.get('test_scenarios', [])
|
||
# # if test_scenarios:
|
||
# # context_parts.append("")
|
||
# # context_parts.append("=== RECOMMENDED TEST SCENARIOS ===")
|
||
|
||
# # for i, ts in enumerate(test_scenarios[:5]): # Top 5
|
||
# # name = ts.get('name', 'unknown')
|
||
# # description = ts.get('description', '')
|
||
# # priority = ts.get('priority', 0)
|
||
# # context_parts.append(f" {i+1}. {name}: {description} (priority={priority:.2f})")
|
||
|
||
# # if context_parts:
|
||
# # return "\n".join(context_parts)
|
||
# # return ""
|
||
|
||
# # def _find_reset_state_from_fsm(self, fsm_info: dict) -> Optional[str]:
|
||
# # """从 FSM 信息中推断复位后的初始状态"""
|
||
# # # 方法1:检查是否有明确的复位状态
|
||
# # transitions = fsm_info.get('transitions', {})
|
||
|
||
# # # 复位后通常进入第一个定义的状态或特定名称的状态
|
||
# # states = fsm_info.get('states', [])
|
||
|
||
# # # 常见的初始状态命名
|
||
# # initial_state_names = ['IDLE', 'INIT', 'RESET', 'START', 'BEGIN']
|
||
|
||
# # for name in initial_state_names:
|
||
# # if name in states:
|
||
# # return name
|
||
|
||
# # # 如果没有找到,返回第一个状态
|
||
# # if states:
|
||
# # return states[0]
|
||
|
||
# # return None
|
||
|
||
|
||
# # # ============================================================================
|
||
# # # TBInjector - 场景注入器
|
||
# # # ============================================================================
|
||
# # class TBInjector:
|
||
# # """
|
||
# # 场景注入器 - 将LLM生成的测试代码注入到现有测试平台
|
||
|
||
# # 集成三层防护策略:
|
||
# # 1. Layer 1: Prompt约束(由CoverageParser处理)
|
||
# # 2. Layer 2: 智能代码转换
|
||
# # 3. Layer 3: 质量评估和重试建议
|
||
# # """
|
||
|
||
# # def __init__(self, tb_code):
|
||
# # """
|
||
# # 初始化注入器
|
||
|
||
# # Args:
|
||
# # tb_code: 原始测试平台代码字符串
|
||
# # """
|
||
# # self.content = tb_code
|
||
# # self.validator = BlackBoxValidator()
|
||
# # self.validator._extract_signals_from_tb(tb_code)
|
||
# # self.last_validation_result = None
|
||
|
||
# # def inject(self, new_code, iter_idx):
|
||
# # """
|
||
# # 注入新的测试场景到测试平台
|
||
|
||
# # Args:
|
||
# # new_code: LLM生成的测试代码
|
||
# # iter_idx: 迭代序号
|
||
|
||
# # Returns:
|
||
# # 修改后的测试平台代码
|
||
# # """
|
||
# # # Step 1: 预处理代码(包含三层防护)
|
||
# # scenario_code, result = self._preprocess_code(new_code, iter_idx)
|
||
|
||
# # self.last_validation_result = result
|
||
|
||
# # # 记录日志
|
||
# # if result['violations']['critical']:
|
||
# # logger.warning(f"[CGA-{iter_idx}] Critical violations detected:")
|
||
# # for v in result['violations']['critical']:
|
||
# # logger.warning(f" - {v}")
|
||
|
||
# # if result['violations']['warning']:
|
||
# # logger.info(f"[CGA-{iter_idx}] Warnings:")
|
||
# # for v in result['violations']['warning']:
|
||
# # logger.info(f" - {v}")
|
||
|
||
# # if result['transformations']:
|
||
# # logger.info(f"[CGA-{iter_idx}] Code transformations applied:")
|
||
# # for t in result['transformations']:
|
||
# # logger.info(f" - {t['type']}: {t.get('original', 'N/A')[:50]}...")
|
||
|
||
# # # Step 2: 构建场景块
|
||
# # scenario_block = self._build_scenario_block(scenario_code, iter_idx)
|
||
|
||
# # # Step 3: 注入到TB中
|
||
# # modified_tb = self._inject_scenario(scenario_block)
|
||
|
||
# # return modified_tb
|
||
|
||
# # def should_retry(self):
|
||
# # """是否应该重试"""
|
||
# # if self.last_validation_result is None:
|
||
# # return False
|
||
# # return self.last_validation_result.get('should_retry', False)
|
||
|
||
# # def get_quality_score(self):
|
||
# # """获取代码质量分数"""
|
||
# # if self.last_validation_result is None:
|
||
# # return 0
|
||
# # return self.last_validation_result.get('quality_score', 0)
|
||
|
||
# # def _preprocess_code(self, code, iter_idx):
|
||
# # """预处理LLM生成的代码"""
|
||
# # # 移除markdown标记
|
||
# # code = re.sub(r'```(?:verilog|systemverilog|sv)?\n?', '', code)
|
||
# # code = re.sub(r'```', '', code)
|
||
|
||
# # # 移除task包装
|
||
# # code = re.sub(r'task\s+\w+\s*(?:\([^)]*\))?\s*;', '', code)
|
||
# # code = re.sub(r'endtask', '', code)
|
||
|
||
# # # 移除$finish和$stop
|
||
# # code = re.sub(r'\$finish\s*;', '', code)
|
||
# # code = re.sub(r'\$stop\s*;', '', code)
|
||
|
||
# # # 移除多余空行
|
||
# # code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
# # initial_pattern = re.compile(r'\binitial\s+begin\b.*?\bend\b', re.DOTALL | re.IGNORECASE)
|
||
# # # 检查并移除 initial begin ... end 块
|
||
# # initial_match = initial_pattern.search(code)
|
||
# # if initial_match:
|
||
# # logger.warning(f"[CGA-{iter_idx}] Detected 'initial begin...end' block in generated code - this should not be included")
|
||
# # logger.warning(f"[CGA-{iter_idx}] Removing 'initial begin...end' wrapper, keeping only the test content")
|
||
# # # 提取块内的内容
|
||
# # block_content = initial_match.group(0)
|
||
# # # 移除 initial begin 和 end 包装
|
||
# # # 保留块内的实际测试代码
|
||
# # inner_content = re.sub(r'^\s*initial\s+begin\s*', '', block_content)
|
||
# # inner_content = re.sub(r'\bend\s*$', '', inner_content)
|
||
# # # 替换整个块为内部内容
|
||
# # code = initial_pattern.sub(inner_content.strip(), code, count=1)
|
||
|
||
# # code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
# # # 信号名自动修正(在验证之前)
|
||
# # code = self._auto_correct_signal_names(code)
|
||
|
||
# # # 三层防护:黑盒约束验证和转换
|
||
# # code, result = self.validator.validate_and_transform(code, self.content)
|
||
|
||
# # code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
# # return code.strip(), result
|
||
|
||
# # def _auto_correct_signal_names(self, code: str) -> str:
|
||
# # """自动修正信号名错误"""
|
||
# # corrections = []
|
||
|
||
# # # 获取正确的复位信号名
|
||
# # reset_signal = self.validator._find_reset_signal()
|
||
|
||
# # # 如果正确的复位信号不是 'reset',则修正所有 'reset' 引用
|
||
# # if reset_signal != "reset":
|
||
# # # 匹配独立的 'reset' 单词(不包括 'areset', 'rst_n' 等)
|
||
# # pattern = r'\breset\b(?!\w)'
|
||
# # matches = re.findall(pattern, code)
|
||
# # if matches:
|
||
# # code = re.sub(pattern, reset_signal, code)
|
||
# # corrections.append(f"reset -> {reset_signal} ({len(matches)} occurrences)")
|
||
|
||
# # # 检查是否有使用 'rst' 但正确信号是 'areset' 的情况
|
||
# # if reset_signal == "areset":
|
||
# # pattern = r'\brst\b(?!\w)'
|
||
# # matches = re.findall(pattern, code)
|
||
# # if matches:
|
||
# # code = re.sub(pattern, reset_signal, code)
|
||
# # corrections.append(f"rst -> {reset_signal} ({len(matches)} occurrences)")
|
||
|
||
# # # 检查是否使用了不存在的信号
|
||
# # for signal in re.findall(r'\b(\w+)\s*=', code):
|
||
# # signal = signal.strip()
|
||
# # # 跳过已知的合法信号
|
||
# # if signal in self.validator.dut_inputs:
|
||
# # continue
|
||
# # # 检查是否是复位信号的别名
|
||
# # if signal.lower() in ['reset', 'rst', 'rst_n', 'rst_b'] and reset_signal != signal:
|
||
# # code = re.sub(rf'\b{signal}\b', reset_signal, code)
|
||
# # corrections.append(f"{signal} -> {reset_signal}")
|
||
|
||
# # if corrections:
|
||
# # logger.info(f"[Signal Correction] Applied corrections: {'; '.join(corrections)}")
|
||
|
||
# # return code
|
||
|
||
# # def _build_scenario_block(self, scenario_code, iter_idx):
|
||
# # """构建完整的场景代码块"""
|
||
# # # 格式化缩进
|
||
# # lines = scenario_code.split('\n')
|
||
# # formatted_lines = []
|
||
# # for line in lines:
|
||
# # stripped = line.strip()
|
||
# # if stripped:
|
||
# # formatted_lines.append(f" {stripped}")
|
||
# # formatted_code = '\n'.join(formatted_lines)
|
||
|
||
# # # 检测输出信号用于日志
|
||
# # output_signals = self._detect_output_signals()
|
||
# # output_log = self._generate_output_log(output_signals, iter_idx)
|
||
|
||
# # # 构建完整块
|
||
# # block = f'''
|
||
# # // ========== CGA Iteration {iter_idx} ==========
|
||
# # scenario = 100 + {iter_idx};
|
||
# # // Reset signals to safe state
|
||
# # {self._generate_signal_reset()}
|
||
# # #5;
|
||
# # // CGA generated test sequence:
|
||
# # {formatted_code}
|
||
# # // Log results
|
||
# # {output_log}
|
||
# # // ==============================================
|
||
# # '''
|
||
# # return block
|
||
|
||
# # def _detect_output_signals(self):
|
||
# # """检测DUT的输出信号"""
|
||
# # outputs = []
|
||
# # wire_pattern = re.compile(r'wire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
|
||
# # for match in wire_pattern.finditer(self.content):
|
||
# # signal = match.group(1)
|
||
# # if signal.lower() not in ['clk', 'clock', 'rst', 'reset', 'areset']:
|
||
# # outputs.append(signal)
|
||
# # return outputs
|
||
|
||
# # def _generate_signal_reset(self):
|
||
# # """生成信号重置代码"""
|
||
# # inputs = []
|
||
# # reg_pattern = re.compile(r'reg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
|
||
# # for match in reg_pattern.finditer(self.content):
|
||
# # signal = match.group(1)
|
||
# # if signal.lower() not in ['clk', 'clock', 'file', 'scenario']:
|
||
# # inputs.append(signal)
|
||
|
||
# # if inputs:
|
||
# # return " " + "; ".join([f"{sig} = 0" for sig in inputs]) + ";"
|
||
# # return " // No input signals to reset"
|
||
|
||
# # def _generate_output_log(self, signals, iter_idx):
|
||
# # """生成输出日志代码"""
|
||
# # if not signals:
|
||
# # return f' $display("[CGA-{iter_idx}] Scenario executed");'
|
||
|
||
# # sig_names = ", ".join(signals)
|
||
# # format_str = ", ".join(["%b"] * len(signals))
|
||
|
||
# # return f' $fdisplay(file, "[CGA-{iter_idx}] {sig_names} = {format_str}", {sig_names});'
|
||
|
||
# # def _inject_scenario(self, scenario_block):
|
||
# # """将场景块注入到测试平台"""
|
||
# # modified_tb = self.content
|
||
|
||
# # # 策略:如果有 $fclose,在其之前插入
|
||
# # if "$fclose" in modified_tb:
|
||
# # modified_tb = re.sub(
|
||
# # r'(\s*)(\$fclose\s*\([^)]+\)\s*;)',
|
||
# # scenario_block + r'\1\2',
|
||
# # modified_tb,
|
||
# # count=1
|
||
# # )
|
||
# # elif "$finish" in modified_tb:
|
||
# # # 否则在 $finish 之前插入
|
||
# # modified_tb = modified_tb.replace(
|
||
# # "$finish;",
|
||
# # scenario_block + "\n $finish;"
|
||
# # )
|
||
# # else:
|
||
# # # 兜底:在最后一个 end 之前插入
|
||
# # last_end = modified_tb.rfind("end")
|
||
# # if last_end != -1:
|
||
# # modified_tb = modified_tb[:last_end] + scenario_block + modified_tb[last_end:]
|
||
|
||
# # return modified_tb
|
||
|
||
|
||
# """
|
||
# Description : Utils for CGA (CoverageParser & TBInjector)
|
||
# - Features: Sticky Mode, Smart Noise Filtering (No assign/decls)
|
||
# - Enhanced: Three-layer protection for black-box constraints
|
||
# * Layer 1: Enhanced Prompt constraints (prevention)
|
||
# * Layer 2: Smart code transformation (conversion)
|
||
# * Layer 3: Quality assessment & retry (fallback)
|
||
# - Integrated: Diversity Constraint Injector (Layer 1)
|
||
# Author : CorrectBench Integration
|
||
# """
|
||
# import re
|
||
# import os
|
||
# import logging
|
||
# from typing import Tuple, List, Optional, Dict, TYPE_CHECKING
|
||
|
||
# # [新增] 导入多样性约束注入器
|
||
# if TYPE_CHECKING:
|
||
# from autoline.diversity_injector import DiversityInjector
|
||
|
||
# # 配置日志
|
||
# logger = logging.getLogger(__name__)
|
||
|
||
# # ============================================================================
|
||
# # 三层防护策略说明
|
||
# # ============================================================================
|
||
# # Layer 1 - Prompt约束: 动态提取允许信号列表,明确约束,正反面示例
|
||
# # Layer 2 - 智能转换: 检测违规意图,尝试转换为合法形式
|
||
# # Layer 3 - 质量评估: 违规比例过高时返回质量分数,触发重新生成
|
||
# # ============================================================================
|
||
|
||
|
||
# # ============================================================================
|
||
# # 黑盒约束检查器 - 三层防护策略实现
|
||
# # ============================================================================
|
||
# class BlackBoxValidator:
|
||
# """
|
||
# 黑盒约束验证器 - 三层防护策略
|
||
|
||
# Layer 1: 增强Prompt约束(预防)
|
||
# - 动态提取允许信号列表
|
||
# - 生成明确的约束提示
|
||
|
||
# Layer 2: 智能代码转换(转换)
|
||
# - 检测违规意图
|
||
# - 尝试转换为合法的等价形式
|
||
# - 转换失败时才注释
|
||
|
||
# Layer 3: 质量评估(重试)
|
||
# - 计算代码质量分数
|
||
# - 违规比例过高时建议重试
|
||
# """
|
||
|
||
# # 常见的内部信号命名模式(按严重程度分类)
|
||
# INTERNAL_SIGNAL_PATTERNS = {
|
||
# # 高风险:FSM状态相关(绝对不能修改)
|
||
# 'critical': [
|
||
# (r'\bstate\b', 'FSM状态寄存器'),
|
||
# (r'\bnext_state\b', 'FSM下一状态'),
|
||
# (r'\bcurrent_state\b', 'FSM当前状态'),
|
||
# (r'\bnext\b(?!\s*[,@])', '下一状态简写'),
|
||
# ],
|
||
# # 中风险:计数器和内部寄存器
|
||
# 'warning': [
|
||
# (r'\bcounter\b', '内部计数器'),
|
||
# (r'\bcount\b', '计数寄存器'),
|
||
# (r'\bcnt\b', '计数简写'),
|
||
# (r'\bfall_counter\b', '下落计数器'),
|
||
# (r'\breg_\w+', '内部寄存器'),
|
||
# ],
|
||
# # 低风险:可疑信号(需要确认)
|
||
# 'info': [
|
||
# (r'\binternal_\w+', '内部信号'),
|
||
# (r'\btemp_\w+', '临时信号'),
|
||
# (r'\bprev_\w+', '前一状态'),
|
||
# ]
|
||
# }
|
||
|
||
# # 非法语句模式
|
||
# FORBIDDEN_STATEMENTS = [
|
||
# (r'\bforce\s+(\w+)', 'force语句', 'critical'),
|
||
# (r'\bassign\s+(\w+)\s*=', '连续赋值', 'critical'),
|
||
# (r'\bdeassign\s+', 'deassign语句', 'critical'),
|
||
# (r'\brelease\s+', 'release语句', 'critical'),
|
||
# ]
|
||
|
||
# # 层次化访问模式(如 DUT.state)
|
||
# HIERARCHICAL_ACCESS = r'(\w+)\s*\.\s*(\w+)'
|
||
|
||
# def __init__(self, dut_inputs: List[str] = None, dut_outputs: List[str] = None):
|
||
# """
|
||
# Args:
|
||
# dut_inputs: DUT模块的输入端口列表
|
||
# dut_outputs: DUT模块的输出端口列表
|
||
# """
|
||
# self.dut_inputs = dut_inputs or []
|
||
# self.dut_outputs = dut_outputs or []
|
||
# self.violations = {'critical': [], 'warning': [], 'info': []}
|
||
# self.transformations = []
|
||
|
||
# def validate_and_transform(self, code: str, tb_code: str = None) -> Tuple[str, Dict]:
|
||
# """验证并转换代码 - 主入口"""
|
||
# self.violations = {'critical': [], 'warning': [], 'info': []}
|
||
# self.transformations = []
|
||
|
||
# if tb_code:
|
||
# self._extract_signals_from_tb(tb_code)
|
||
|
||
# original_lines = code.strip().split('\n')
|
||
# total_lines = len([l for l in original_lines if l.strip() and not l.strip().startswith('//')])
|
||
|
||
# # Step 1: 移除非法语句
|
||
# code = self._transform_forbidden_statements(code)
|
||
|
||
# # Step 2: 转换层次化访问
|
||
# code = self._transform_hierarchical_access(code)
|
||
|
||
# # Step 3: 智能转换内部信号访问
|
||
# code = self._smart_transform_internal_signals(code)
|
||
|
||
# # Step 4: 最后清理
|
||
# code = self._final_cleanup(code)
|
||
|
||
# # 计算质量分数
|
||
# quality_score = self._calculate_quality_score(total_lines)
|
||
|
||
# # 决定是否需要重试
|
||
# should_retry = quality_score < 50 or len(self.violations['critical']) > 3
|
||
|
||
# result = {
|
||
# 'quality_score': quality_score,
|
||
# 'is_valid': len(self.violations['critical']) == 0,
|
||
# 'violations': self.violations,
|
||
# 'transformations': self.transformations,
|
||
# 'should_retry': should_retry,
|
||
# 'allowed_signals': self._get_allowed_signals_info()
|
||
# }
|
||
|
||
# return code.strip(), result
|
||
|
||
# def _extract_signals_from_tb(self, tb_code: str):
|
||
# """从测试平台代码中提取DUT输入输出信号"""
|
||
# dut_match = re.search(r'(\w+)\s+(?:DUT|dut|uut|UUT)\s*\(', tb_code, re.IGNORECASE)
|
||
# if dut_match:
|
||
# start = dut_match.start()
|
||
# bracket_count = 0
|
||
# end = start
|
||
# for i, char in enumerate(tb_code[start:]):
|
||
# if char == '(':
|
||
# bracket_count += 1
|
||
# elif char == ')':
|
||
# bracket_count -= 1
|
||
# if bracket_count == 0:
|
||
# end = start + i + 1
|
||
# break
|
||
|
||
# dut_instance = tb_code[start:end]
|
||
# port_pattern = r'\.(\w+)\s*\(\s*(\w+)\s*\)'
|
||
|
||
# for match in re.finditer(port_pattern, dut_instance):
|
||
# signal_name = match.group(2)
|
||
|
||
# is_input = re.search(rf'\breg\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
|
||
# is_output = re.search(rf'\bwire\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
|
||
|
||
# if is_input and signal_name not in self.dut_inputs:
|
||
# self.dut_inputs.append(signal_name)
|
||
# if is_output and signal_name not in self.dut_outputs:
|
||
# self.dut_outputs.append(signal_name)
|
||
|
||
# # 备用方案:通过reg/wire声明推断
|
||
# if not self.dut_inputs and not self.dut_outputs:
|
||
# for match in re.finditer(r'\breg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
|
||
# signal = match.group(1)
|
||
# if signal.lower() not in ['file', 'scenario', 'i', 'j', 'k', 'cnt']:
|
||
# if signal not in self.dut_inputs:
|
||
# self.dut_inputs.append(signal)
|
||
|
||
# for match in re.finditer(r'\bwire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
|
||
# signal = match.group(1)
|
||
# if signal not in self.dut_outputs:
|
||
# self.dut_outputs.append(signal)
|
||
|
||
# def _transform_forbidden_statements(self, code: str) -> str:
|
||
# """转换非法语句"""
|
||
# for pattern, desc, severity in self.FORBIDDEN_STATEMENTS:
|
||
# matches = list(re.finditer(pattern, code, re.IGNORECASE))
|
||
# for match in reversed(matches):
|
||
# signal = match.group(1) if match.groups() else 'unknown'
|
||
# self.violations[severity].append(f"{desc}: {signal}")
|
||
|
||
# line_start = code.rfind('\n', 0, match.start()) + 1
|
||
# line_end = code.find('\n', match.end())
|
||
# if line_end == -1:
|
||
# line_end = len(code)
|
||
# original_line = code[line_start:line_end]
|
||
|
||
# # 尝试转换 force -> 直接赋值(仅对输入信号)
|
||
# if 'force' in match.group(0).lower() and signal in self.dut_inputs:
|
||
# new_line = re.sub(r'\bforce\s+', '', original_line, flags=re.IGNORECASE)
|
||
# code = code[:line_start] + new_line + code[line_end:]
|
||
# self.transformations.append({
|
||
# 'type': 'force_to_assign',
|
||
# 'original': original_line.strip(),
|
||
# 'transformed': new_line.strip()
|
||
# })
|
||
# continue
|
||
|
||
# code = code[:line_start] + '// [BLOCKED] ' + original_line.lstrip() + code[line_end:]
|
||
# self.transformations.append({
|
||
# 'type': 'blocked',
|
||
# 'original': original_line.strip(),
|
||
# 'reason': desc
|
||
# })
|
||
|
||
# return code
|
||
|
||
# def _transform_hierarchical_access(self, code: str) -> str:
|
||
# """转换层次化访问(如 DUT.state)"""
|
||
# for match in re.finditer(self.HIERARCHICAL_ACCESS, code):
|
||
# prefix = match.group(1)
|
||
# signal = match.group(2)
|
||
|
||
# if prefix.upper() in ['DUT', 'UUT', 'TOP', 'TB']:
|
||
# if signal not in self.dut_outputs:
|
||
# self.violations['critical'].append(f"层次化访问内部信号: {prefix}.{signal}")
|
||
|
||
# line_start = code.rfind('\n', 0, match.start()) + 1
|
||
# line_end = code.find('\n', match.end())
|
||
# if line_end == -1:
|
||
# line_end = len(code)
|
||
# original_line = code[line_start:line_end]
|
||
# code = code[:line_start] + '// [HIERARCHY] ' + original_line.lstrip() + code[line_end:]
|
||
|
||
# return code
|
||
|
||
# def _smart_transform_internal_signals(self, code: str) -> str:
|
||
# """智能转换内部信号访问"""
|
||
# lines = code.split('\n')
|
||
# transformed_lines = []
|
||
|
||
# for line in lines:
|
||
# stripped = line.strip()
|
||
|
||
# if stripped.startswith('//') or not stripped:
|
||
# transformed_lines.append(line)
|
||
# continue
|
||
|
||
# if (stripped.startswith('#') or stripped.startswith('$') or
|
||
# stripped.startswith('repeat(') or stripped.startswith('@(')):
|
||
# transformed_lines.append(line)
|
||
# continue
|
||
|
||
# detected_signals = self._detect_internal_signals_in_line(stripped)
|
||
# has_critical = detected_signals.get('critical', [])
|
||
# has_warning = detected_signals.get('warning', [])
|
||
|
||
# if not has_critical and not has_warning:
|
||
# transformed_lines.append(line)
|
||
# continue
|
||
|
||
# context = self._analyze_signal_context(stripped, detected_signals)
|
||
|
||
# if context['type'] == 'assignment':
|
||
# transformed_lines.append(f"// [INTERNAL_ASSIGN] Cannot modify internal signal")
|
||
# transformed_lines.append(f"// Original: {stripped}")
|
||
# self.violations['critical'].append(f"尝试修改内部信号: {context['signals']}")
|
||
# elif context['type'] == 'condition':
|
||
# transformed = self._transform_condition(stripped, context)
|
||
# transformed_lines.append(transformed)
|
||
# self.transformations.append({
|
||
# 'type': 'condition_transform',
|
||
# 'original': stripped,
|
||
# 'transformed': transformed
|
||
# })
|
||
# elif context['type'] == 'wait_for_state':
|
||
# transformed = self._transform_state_wait(stripped, context)
|
||
# transformed_lines.append(transformed)
|
||
# self.transformations.append({
|
||
# 'type': 'wait_transform',
|
||
# 'original': stripped,
|
||
# 'transformed': transformed
|
||
# })
|
||
# else:
|
||
# if has_critical:
|
||
# transformed_lines.append(f"// [WARNING] Contains internal signal reference: {has_critical}")
|
||
# transformed_lines.append(f"// Original: {stripped}")
|
||
# for sig in has_critical:
|
||
# self.violations['warning'].append(f"可疑的内部信号访问: {sig}")
|
||
# else:
|
||
# transformed_lines.append(line)
|
||
|
||
# return '\n'.join(transformed_lines)
|
||
|
||
# def _detect_internal_signals_in_line(self, line: str) -> Dict[str, List[str]]:
|
||
# """检测行中的内部信号"""
|
||
# detected = {'critical': [], 'warning': [], 'info': []}
|
||
|
||
# LEGAL_KEYWORDS = {
|
||
# 'repeat', 'posedge', 'negedge', 'begin', 'end', 'if', 'else',
|
||
# 'while', 'for', 'case', 'default', 'always', 'initial',
|
||
# 'assign', 'wire', 'reg', 'input', 'output', 'inout',
|
||
# 'parameter', 'localparam', 'integer', 'real', 'time',
|
||
# 'clk', 'clock', 'reset', 'rst', 'areset', 'rst_n',
|
||
# 'enable', 'ena', 'valid', 'ready', 'data', 'addr', 'address',
|
||
# 'true', 'false', 'idle', 'wait'
|
||
# }
|
||
|
||
# SYSTEM_FUNCTIONS = {'$display', '$write', '$monitor', '$fopen', '$fclose',
|
||
# '$fdisplay', '$fwrite', '$readmemh', '$readmemb',
|
||
# '$finish', '$stop', '$random', '$time', '$stime'}
|
||
|
||
# for severity, patterns in self.INTERNAL_SIGNAL_PATTERNS.items():
|
||
# for pattern, name in patterns:
|
||
# matches = re.findall(pattern, line, re.IGNORECASE)
|
||
# if matches:
|
||
# for match in matches:
|
||
# if isinstance(match, tuple):
|
||
# match = match[0] if match[0] else match[1]
|
||
|
||
# match_lower = match.lower() if match else ''
|
||
|
||
# if match_lower in LEGAL_KEYWORDS:
|
||
# continue
|
||
# if match in SYSTEM_FUNCTIONS:
|
||
# continue
|
||
# if match in self.dut_inputs or match in self.dut_outputs:
|
||
# continue
|
||
# if match.startswith('$'):
|
||
# continue
|
||
|
||
# if match and match not in detected[severity]:
|
||
# detected[severity].append(match)
|
||
|
||
# return detected
|
||
|
||
# def _analyze_signal_context(self, line: str, signals: Dict) -> Dict:
|
||
# """分析信号使用上下文"""
|
||
# assign_match = re.search(r'(\w+)\s*(?:=|<=)\s*', line)
|
||
# if assign_match:
|
||
# target = assign_match.group(1)
|
||
# if target in signals.get('critical', []) or target in signals.get('warning', []):
|
||
# return {'type': 'assignment', 'signals': [target], 'line': line}
|
||
|
||
# if re.search(r'wait\s*\([^)]*state', line, re.IGNORECASE):
|
||
# return {'type': 'wait_for_state', 'signals': signals.get('critical', []), 'line': line}
|
||
|
||
# if re.search(r'if\s*\(|while\s*\(|@\s*\(', line):
|
||
# return {'type': 'condition', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
|
||
|
||
# return {'type': 'other', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
|
||
|
||
# def _transform_condition(self, line: str, context: Dict) -> str:
|
||
# """转换条件判断语句"""
|
||
# original = line
|
||
|
||
# if 'state' in str(context['signals']):
|
||
# indent = len(line) - len(line.lstrip())
|
||
# spaces = ' ' * indent
|
||
|
||
# transformed = f"// [TRANSFORMED] Original: {original.strip()}\n"
|
||
# transformed += f"{spaces}// Cannot directly check internal state\n"
|
||
# transformed += f"{spaces}// Alternative: Wait for expected clock cycles\n"
|
||
# transformed += f"{spaces}repeat(5) @(posedge clk); // Adjust cycles as needed"
|
||
|
||
# self.violations['warning'].append(f"条件判断转换: {original.strip()}")
|
||
# return transformed
|
||
|
||
# return f"// [TRANSFORMED] {original}"
|
||
|
||
# def _transform_state_wait(self, line: str, context: Dict) -> str:
|
||
# """转换状态等待语句"""
|
||
# indent = len(line) - len(line.lstrip())
|
||
# spaces = ' ' * indent
|
||
|
||
# transformed = f"// [TRANSFORMED] Original: {line.strip()}\n"
|
||
# transformed += f"{spaces}// Cannot wait for internal state directly\n"
|
||
# transformed += f"{spaces}// Alternative: Drive inputs and wait for expected cycles\n"
|
||
# transformed += f"{spaces}repeat(10) @(posedge clk); // Adjust based on FSM design"
|
||
|
||
# self.violations['info'].append(f"状态等待转换: {line.strip()}")
|
||
# return transformed
|
||
|
||
# def _final_cleanup(self, code: str) -> str:
|
||
# """最终清理"""
|
||
# lines = code.split('\n')
|
||
# cleaned = []
|
||
|
||
# for line in lines:
|
||
# stripped = line.strip()
|
||
|
||
# if stripped in ['begin', 'end'] and cleaned:
|
||
# last = cleaned[-1].strip()
|
||
# if last.startswith('// [TRANSFORMED]') or last.startswith('// [INTERNAL'):
|
||
# continue
|
||
|
||
# cleaned.append(line)
|
||
|
||
# result = '\n'.join(cleaned)
|
||
# result = re.sub(r'\n\s*\n\s*\n', '\n\n', result)
|
||
|
||
# return result
|
||
|
||
# def _calculate_quality_score(self, total_lines: int) -> int:
|
||
# """计算代码质量分数"""
|
||
# if total_lines == 0:
|
||
# return 0
|
||
|
||
# score = 100
|
||
# score -= len(self.violations['critical']) * 20
|
||
# score -= len(self.violations['warning']) * 5
|
||
# score += len([t for t in self.transformations if 'blocked' not in t.get('type', '')]) * 5
|
||
|
||
# return max(0, min(100, score))
|
||
|
||
# def _get_allowed_signals_info(self) -> Dict:
|
||
# """获取允许的信号信息"""
|
||
# return {
|
||
# 'inputs': self.dut_inputs,
|
||
# 'outputs': self.dut_outputs,
|
||
# 'all_allowed': self.dut_inputs + self.dut_outputs
|
||
# }
|
||
|
||
# def generate_constraint_prompt(self) -> str:
|
||
# """生成动态约束提示 - 使用实际信号名"""
|
||
# prompt = "\n[SIGNAL CONSTRAINTS - DERIVED FROM YOUR DUT]\n"
|
||
|
||
# # 提取复位信号名(优先使用实际的)
|
||
# reset_signal = self._find_reset_signal()
|
||
|
||
# if self.dut_inputs:
|
||
# prompt += "ALLOWED INPUTS (you CAN drive these):\n"
|
||
# for sig in self.dut_inputs:
|
||
# prompt += f" - {sig}\n"
|
||
# else:
|
||
# prompt += "ALLOWED INPUTS: Check the testbench for actual signal names\n"
|
||
|
||
# if self.dut_outputs:
|
||
# prompt += "\nOUTPUTS (you can READ but NOT write):\n"
|
||
# for sig in self.dut_outputs:
|
||
# prompt += f" - {sig}\n"
|
||
|
||
# prompt += f"""
|
||
# FORBIDDEN ACTIONS:
|
||
# 1. NEVER assign values to internal signals (state, counter, etc.)
|
||
# 2. NEVER use 'force' or 'assign' statements
|
||
# 3. NEVER access signals like DUT.state (hierarchical access)
|
||
|
||
# CORRECT APPROACH:
|
||
# - To reach a specific FSM state: drive inputs and WAIT for natural transition
|
||
# - Example: Instead of "state = IDLE", use "{reset_signal} = 1; repeat(2) @(posedge clk); {reset_signal} = 0;"
|
||
# """
|
||
# return prompt
|
||
|
||
# def _find_reset_signal(self) -> str:
|
||
# """查找复位信号名"""
|
||
# # 按优先级查找常见的复位信号名
|
||
# reset_candidates = ['areset', 'rst_n', 'rst', 'reset', 'rst_b']
|
||
# for sig in reset_candidates:
|
||
# if sig in self.dut_inputs:
|
||
# return sig
|
||
# # 如果没找到,检查输入列表中是否有类似名称
|
||
# for sig in self.dut_inputs:
|
||
# sig_lower = sig.lower()
|
||
# if 'reset' in sig_lower or 'rst' in sig_lower:
|
||
# return sig
|
||
# # 默认返回第一个输入信号(排除 clk)
|
||
# for sig in self.dut_inputs:
|
||
# if 'clk' not in sig.lower():
|
||
# return sig
|
||
# return "reset" # 兜底
|
||
|
||
# # =========================================================================
|
||
# # [新增] Verilog 语法预检查 - 检测常见逻辑错误
|
||
# # =========================================================================
|
||
# def check_syntax_issues(self, code: str, signal_widths: Dict[str, int] = None, declared_signals: set = None) -> Dict:
|
||
# """
|
||
# 检测 Verilog 代码中的常见语法/逻辑问题
|
||
|
||
# Args:
|
||
# code: 待检查的代码
|
||
# signal_widths: 信号位宽映射 (可选,如 {'in': 1, 'data': 8})
|
||
# declared_signals: 已声明的信号集合 (可选,用于检测未声明信号)
|
||
|
||
# Returns:
|
||
# {
|
||
# 'width_mismatch': [...], # 位宽不匹配警告
|
||
# 'logic_issues': [...], # 逻辑问题
|
||
# 'syntax_warnings': [...], # 语法警告
|
||
# 'should_retry': bool # 是否建议重试
|
||
# }
|
||
# """
|
||
# result = {
|
||
# 'width_mismatch': [],
|
||
# 'logic_issues': [],
|
||
# 'syntax_warnings': [],
|
||
# 'should_retry': False
|
||
# }
|
||
|
||
# # 检查位宽不匹配
|
||
# result['width_mismatch'] = self._check_width_mismatch(code, signal_widths)
|
||
|
||
# # 检查逻辑问题
|
||
# result['logic_issues'] = self._check_logic_issues(code, signal_widths)
|
||
|
||
# # 检查其他语法问题(传入已声明信号)
|
||
# result['syntax_warnings'] = self._check_syntax_warnings(code, declared_signals)
|
||
|
||
# # 决定是否需要重试
|
||
# # 包括:位宽不匹配、逻辑问题、语法错误(severity='error')
|
||
# has_syntax_errors = any(
|
||
# issue.get('severity') == 'error'
|
||
# for issue in result['syntax_warnings']
|
||
# )
|
||
# result['should_retry'] = (
|
||
# len(result['width_mismatch']) > 0 or
|
||
# len(result['logic_issues']) > 0 or
|
||
# has_syntax_errors
|
||
# )
|
||
|
||
# return result
|
||
|
||
# def _check_width_mismatch(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
|
||
# """
|
||
# 检测位宽不匹配问题
|
||
|
||
# 常见问题:
|
||
# - {signal} = N'b... 将多位值赋给单比特信号
|
||
# - signal = N'b... 位宽不匹配
|
||
# """
|
||
# issues = []
|
||
# signal_widths = signal_widths or {}
|
||
|
||
# # 默认假设未声明信号为 1 位
|
||
# def get_width(sig):
|
||
# return signal_widths.get(sig, 1)
|
||
|
||
# # 模式1: {signal} = N'bvalue (拼接赋值)
|
||
# # 例: {in} = 8'b01111100 - 将 8 位赋给 1 位
|
||
# concat_pattern = re.compile(r'\{(\w+)\}\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
|
||
# for match in concat_pattern.finditer(code):
|
||
# signal = match.group(1)
|
||
# value_width = int(match.group(2))
|
||
# base = match.group(3)
|
||
# value = match.group(4)
|
||
|
||
# actual_width = get_width(signal)
|
||
|
||
# if value_width > actual_width:
|
||
# issues.append({
|
||
# 'type': 'concat_width_mismatch',
|
||
# 'signal': signal,
|
||
# 'signal_width': actual_width,
|
||
# 'assigned_width': value_width,
|
||
# 'original': match.group(0),
|
||
# 'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value via concatenation. Verilog will truncate.",
|
||
# 'severity': 'warning',
|
||
# 'suggestion': f"Use a shift register: reg [{value_width-1}:0] temp; temp = {value_width}'{base}{value}; then shift bits one by one"
|
||
# })
|
||
|
||
# # 模式2: signal = N'bvalue (直接赋值)
|
||
# assign_pattern = re.compile(r'\b(\w+)\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
|
||
# for match in assign_pattern.finditer(code):
|
||
# signal = match.group(1)
|
||
# value_width = int(match.group(2))
|
||
|
||
# # 跳过拼接赋值(已处理)
|
||
# if f'{{{signal}}}' in match.group(0):
|
||
# continue
|
||
|
||
# actual_width = get_width(signal)
|
||
|
||
# # 只有当信号已知且位宽不匹配时才警告
|
||
# if signal in signal_widths and value_width > actual_width:
|
||
# issues.append({
|
||
# 'type': 'direct_width_mismatch',
|
||
# 'signal': signal,
|
||
# 'signal_width': actual_width,
|
||
# 'assigned_width': value_width,
|
||
# 'original': match.group(0),
|
||
# 'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value. Truncation will occur.",
|
||
# 'severity': 'warning'
|
||
# })
|
||
|
||
# return issues
|
||
|
||
# def _check_logic_issues(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
|
||
# """
|
||
# 检测逻辑问题
|
||
|
||
# 常见问题:
|
||
# - 单比特信号自移位 (in = in >> 1 无效果)
|
||
# - 无效的循环条件
|
||
# """
|
||
# issues = []
|
||
# signal_widths = signal_widths or {}
|
||
|
||
# def get_width(sig):
|
||
# return signal_widths.get(sig, 1)
|
||
|
||
# # 模式: signal = signal >> N 或 signal = signal << N
|
||
# shift_pattern = re.compile(r'\b(\w+)\s*=\s*\1\s*(>>|<<)\s*(\d+)?')
|
||
# for match in shift_pattern.finditer(code):
|
||
# signal = match.group(1)
|
||
# direction = match.group(2)
|
||
# shift_amount = int(match.group(3)) if match.group(3) else 1
|
||
|
||
# actual_width = get_width(signal)
|
||
|
||
# if actual_width == 1:
|
||
# issues.append({
|
||
# 'type': 'single_bit_shift',
|
||
# 'signal': signal,
|
||
# 'direction': direction,
|
||
# 'original': match.group(0),
|
||
# 'message': f"Single-bit signal '{signal}' self-shift has no effect. Result is always 0.",
|
||
# 'severity': 'warning',
|
||
# 'suggestion': f"Use a shift register for bit-serial input, not the input signal itself"
|
||
# })
|
||
|
||
# # 模式: repeat(N) begin ... signal = signal >> 1; end (循环移位单比特)
|
||
# repeat_shift_pattern = re.compile(r'repeat\s*\(\s*\d+\s*\)\s*begin[^}]*?(\w+)\s*=\s*\1\s*(>>|<<)', re.DOTALL)
|
||
# for match in repeat_shift_pattern.finditer(code):
|
||
# signal = match.group(1)
|
||
# actual_width = get_width(signal)
|
||
|
||
# if actual_width == 1:
|
||
# issues.append({
|
||
# 'type': 'repeat_single_bit_shift',
|
||
# 'signal': signal,
|
||
# 'original': match.group(0)[:100] + '...',
|
||
# 'message': f"Repeat loop shifting single-bit signal '{signal}' is ineffective",
|
||
# 'severity': 'warning'
|
||
# })
|
||
|
||
# return issues
|
||
|
||
# def _check_syntax_warnings(self, code: str, declared_signals: set = None) -> List[Dict]:
|
||
# """
|
||
# 检测其他语法问题
|
||
|
||
# Args:
|
||
# code: 待检查的代码
|
||
# declared_signals: 已声明的信号集合 (从完整 TB 中提取)
|
||
# """
|
||
# issues = []
|
||
# declared_signals = declared_signals or set()
|
||
|
||
# # 检查: 缺少分号
|
||
# # 注意: 这只是简单检查,不是完整解析
|
||
# lines = code.split('\n')
|
||
# for i, line in enumerate(lines):
|
||
# stripped = line.strip()
|
||
# if not stripped or stripped.startswith('//'):
|
||
# continue
|
||
|
||
# # 跳过不需要分号的行
|
||
# skip_patterns = [
|
||
# r'^begin$', r'^end$', r'^endcase$', r'^endmodule$',
|
||
# r'^else$', r'^\)$', r'^\}\s*$', r'^`timescale', r'^`include'
|
||
# ]
|
||
# if any(re.match(p, stripped) for p in skip_patterns):
|
||
# continue
|
||
|
||
# # 检查是否需要分号但没有
|
||
# needs_semicolon = re.search(r'\b(initial|always|assign|reg|wire|parameter|localport)\b', stripped) is None
|
||
# has_semicolon = stripped.endswith(';') or stripped.endswith(')') or stripped.endswith('}')
|
||
|
||
# if needs_semicolon and not has_semicolon and not stripped.endswith('begin'):
|
||
# # 可能缺少分号(但不确定)
|
||
# pass # 暂不报警,避免误报
|
||
|
||
# # 检查: 不匹配的 begin/end
|
||
# begin_count = len(re.findall(r'\bbegin\b', code))
|
||
# end_count = len(re.findall(r'\bend\b', code))
|
||
# if begin_count != end_count:
|
||
# issues.append({
|
||
# 'type': 'mismatched_begin_end',
|
||
# 'message': f"Mismatched begin/end: {begin_count} begin vs {end_count} end",
|
||
# 'severity': 'error'
|
||
# })
|
||
|
||
# # 检查: 未声明的信号(在赋值左侧使用的信号)
|
||
# # 使用传入的已声明信号集合
|
||
# for match in re.finditer(r'^\s*(\w+)\s*=', code, re.MULTILINE):
|
||
# signal = match.group(1)
|
||
# # 跳过系统任务和关键字
|
||
# if signal in ['if', 'else', 'case', 'for', 'while', 'repeat', 'assign', 'force', 'release']:
|
||
# continue
|
||
# # 跳过以 $ 开头的系统任务
|
||
# if signal.startswith('$'):
|
||
# continue
|
||
# # 检查是否在已声明信号列表中
|
||
# if signal not in declared_signals:
|
||
# issues.append({
|
||
# 'type': 'undeclared_signal',
|
||
# 'signal': signal,
|
||
# 'message': f"Signal '{signal}' is used but not declared in the testbench",
|
||
# 'severity': 'error',
|
||
# 'suggestion': f"Use an existing signal name (declared: {', '.join(list(declared_signals)[:10])}...)"
|
||
# })
|
||
|
||
# # 检查: always 块与时钟生成冲突
|
||
# # 检测是否有多个 always/initial 块驱动同一信号
|
||
# always_blocks = re.findall(r'\balways\s*(@[^i]|begin)', code)
|
||
# initial_clk_blocks = len(re.findall(r'initial\s+begin[^i]*?clk\s*=', code, re.DOTALL))
|
||
# always_clk_blocks = len(re.findall(r'\balways[^i]*?clk\s*=', code, re.DOTALL))
|
||
|
||
# if initial_clk_blocks > 0 and always_clk_blocks > 0:
|
||
# issues.append({
|
||
# 'type': 'multiple_clock_drivers',
|
||
# 'message': f"Multiple clock drivers detected: {initial_clk_blocks} initial + {always_clk_blocks} always blocks driving clk",
|
||
# 'severity': 'error',
|
||
# 'suggestion': "Remove duplicate clock generation. The testbench already has clock generation."
|
||
# })
|
||
|
||
# # 检查: initial 块嵌套(生成了 initial begin ... end 在注入时会导致嵌套)
|
||
# if re.search(r'\binitial\s+begin\b', code):
|
||
# issues.append({
|
||
# 'type': 'initial_block_injection',
|
||
# 'message': "Code contains 'initial begin...end' block which should not be injected into an existing initial block",
|
||
# 'severity': 'error',
|
||
# 'suggestion': "Remove the 'initial begin...end' wrapper, keep only the test statements inside"
|
||
# })
|
||
|
||
# return issues
|
||
|
||
|
||
# # ============================================================================
|
||
# # CoverageParser - 覆盖率解析器
|
||
# # ============================================================================
|
||
# class CoverageParser:
|
||
# """覆盖率解析器 - 从带注释的Verilog文件中提取未覆盖的代码块
|
||
|
||
# [增强] 集成语义分析结果,提供更精准的 FSM 状态路径指导
|
||
# [新增] 集成能量分配层,提供目标功能点优先级信息
|
||
# [新增] 集成多样性约束注入器,避免测试用例同质化
|
||
# """
|
||
|
||
# def __init__(self, annotated_file, tb_code=None, semantic_result=None,
|
||
# energy_allocator=None, diversity_injector=None):
|
||
# self.file_path = annotated_file
|
||
# self.tb_code = tb_code
|
||
# self.semantic_result = semantic_result # [新增] 语义分析结果
|
||
# self.energy_allocator = energy_allocator # [新增] 能量分配器
|
||
# self.diversity_injector = diversity_injector # [新增] 多样性约束注入器
|
||
# # 修复:Verilator 覆盖率标记格式多样化:
|
||
# # %NNNNNN - 行覆盖计数(%000000 表示从未执行)
|
||
# # ~NNNNNN - 分支/条件覆盖计数(~000000 表示分支从未执行)
|
||
# # ^NNNNNN - 未覆盖的分支标记
|
||
# # NNNNNN - 空格开头+数字(某些 Verilator 版本)
|
||
# # NNNNNN - 纯数字开头(无前缀)
|
||
# self.line_pattern = re.compile(r'^%(\d+)\s+(.*)$') # 匹配 %NNNNNN code
|
||
# self.tilde_pattern = re.compile(r'^~(\d+)\s+(.*)$') # 匹配 ~NNNNNN code
|
||
# self.caret_pattern = re.compile(r'^\^(\d+)\s+(.*)$') # 匹配 ^NNNNNN code
|
||
# # [修复] 纯数字开头(无前缀)或空格开头
|
||
# self.plain_pattern = re.compile(r'^\s*(\d+)\s+(.*)$') # 匹配 " NNNNNN" 或 "NNNNNN"
|
||
# self.decl_pattern = re.compile(r'^\s*(input|output|inout|wire|reg|logic|parameter|localparam|assign)\b')
|
||
|
||
# self.validator = BlackBoxValidator()
|
||
# if tb_code:
|
||
# self.validator._extract_signals_from_tb(tb_code)
|
||
|
||
# def generate_prompt(self, current_score):
|
||
# """生成覆盖率驱动的Prompt"""
|
||
# if not os.path.exists(self.file_path):
|
||
# return None
|
||
|
||
# try:
|
||
# with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
# lines = f.readlines()
|
||
# except Exception:
|
||
# return None
|
||
|
||
# missing_blocks = []
|
||
# current_block = []
|
||
# recording = False
|
||
# context_buffer = []
|
||
# CONTEXT_SIZE = 3
|
||
|
||
# # 收集缺失行用于 FSM 分析
|
||
# missing_lines = []
|
||
|
||
# for i, line in enumerate(lines):
|
||
# line = line.strip()
|
||
# count = -1
|
||
# clean_code = line
|
||
# is_tilde = False
|
||
# is_caret = False
|
||
|
||
# # 尝试匹配各种覆盖率标记格式
|
||
# # Verilator 覆盖率格式:
|
||
# # - %NNNNNN: 行覆盖,NNNNNN 是执行次数,%000000 表示未执行
|
||
# # - ~NNNNNN: 分支/条件覆盖,~000000 表示分支从未执行
|
||
# # - ^NNNNNN: 未覆盖分支标记
|
||
# # - NNNNNN: 无前缀格式(某些版本)
|
||
# match_pct = self.line_pattern.match(line) # %NNNNNN code
|
||
# match_tilde = self.tilde_pattern.match(line) # ~NNNNNN code
|
||
# match_caret = self.caret_pattern.match(line) # ^NNNNNN code
|
||
# match_plain = self.plain_pattern.match(line) # NNNNNN code (无前缀)
|
||
|
||
# if match_pct:
|
||
# count = int(match_pct.group(1))
|
||
# clean_code = match_pct.group(2).strip()
|
||
# elif match_tilde:
|
||
# count = int(match_tilde.group(1))
|
||
# clean_code = match_tilde.group(2).strip()
|
||
# is_tilde = True
|
||
# elif match_caret:
|
||
# count = int(match_caret.group(1))
|
||
# clean_code = match_caret.group(2).strip()
|
||
# is_caret = True
|
||
# elif match_plain:
|
||
# # 纯数字格式(可能出现在某些 Verilator 版本)
|
||
# count = int(match_plain.group(1))
|
||
# clean_code = match_plain.group(2).strip()
|
||
|
||
# if "//" in clean_code:
|
||
# clean_code = clean_code.split("//")[0].strip()
|
||
|
||
# is_hard_noise = (self.decl_pattern.match(clean_code) or clean_code == "endmodule")
|
||
# is_soft_noise = (len(clean_code) < 2 or clean_code in ["end", "begin", "else", ");", "endcase", "default:"] or
|
||
# clean_code.startswith("module ") or not any(c.isalnum() for c in clean_code))
|
||
|
||
# # [修改] 覆盖状态判断:
|
||
# # - %NNNNNN: count > 0 表示已覆盖,count == 0 表示未覆盖
|
||
# # - ~NNNNNN: 分支覆盖标记,count == 0 也表示未覆盖!
|
||
# # - ^NNNNNN: 未覆盖分支标记
|
||
# is_definitely_covered = (not is_tilde and not is_caret and count > 0)
|
||
# # [关键修复] tilde 格式 count == 0 也应该被视为 missing
|
||
# is_definitely_missed = (
|
||
# (not is_tilde and not is_caret and count == 0 and not is_hard_noise and not is_soft_noise) or
|
||
# (is_tilde and count == 0 and not is_hard_noise and not is_soft_noise) or # [新增] ~000000 也是 missing
|
||
# (is_caret and not is_hard_noise and not is_soft_noise)
|
||
# )
|
||
|
||
# if recording:
|
||
# if is_definitely_covered:
|
||
# missing_blocks.append(current_block)
|
||
# missing_lines.extend(current_block)
|
||
# current_block = []
|
||
# recording = False
|
||
# if not is_hard_noise:
|
||
# context_buffer.append(clean_code)
|
||
# else:
|
||
# if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
|
||
# current_block.append(f"Line {i+1}: {clean_code}")
|
||
# else:
|
||
# if is_definitely_missed:
|
||
# recording = True
|
||
# if context_buffer:
|
||
# current_block.append(f"... (Context)")
|
||
# for ctx in context_buffer:
|
||
# current_block.append(f" {ctx}")
|
||
# current_block.append(f"Line {i+1}: {clean_code} <--- MISSING START")
|
||
# else:
|
||
# if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
|
||
# context_buffer.append(clean_code)
|
||
# if len(context_buffer) > CONTEXT_SIZE:
|
||
# context_buffer.pop(0)
|
||
|
||
# if recording and current_block:
|
||
# missing_blocks.append(current_block)
|
||
# missing_lines.extend(current_block)
|
||
|
||
# # [改进] 详细诊断日志 - 使用 info 级别确保可见
|
||
# total_lines = len(lines)
|
||
# parsed_lines = sum(1 for l in lines if l.strip() and (
|
||
# self.line_pattern.match(l.strip()) or
|
||
# self.tilde_pattern.match(l.strip()) or
|
||
# self.caret_pattern.match(l.strip()) or
|
||
# self.plain_pattern.match(l.strip())
|
||
# ))
|
||
|
||
# # 收集零计数行的详细信息
|
||
# zero_count_details = []
|
||
# for l in lines:
|
||
# l_stripped = l.strip()
|
||
# if not l_stripped:
|
||
# continue
|
||
# match_pct = self.line_pattern.match(l_stripped)
|
||
# match_tilde = self.tilde_pattern.match(l_stripped)
|
||
# if match_pct and int(match_pct.group(1)) == 0:
|
||
# zero_count_details.append(('%', match_pct.group(2).strip()[:50]))
|
||
# elif match_tilde and int(match_tilde.group(1)) == 0:
|
||
# zero_count_details.append(('~', match_tilde.group(2).strip()[:50]))
|
||
|
||
# zero_count_lines = len(zero_count_details)
|
||
|
||
# logger.info(f"CoverageParser: Total={total_lines}, Parsed={parsed_lines}, Zero-count={zero_count_lines}, Missing blocks={len(missing_blocks)}")
|
||
|
||
# if not missing_blocks:
|
||
# # [改进] 详细诊断信息
|
||
# if zero_count_lines > 0:
|
||
# logger.warning(f"Found {zero_count_lines} lines with zero coverage count, but no missing blocks extracted.")
|
||
# logger.warning("Zero-count lines:")
|
||
# for prefix, code in zero_count_details[:10]: # 只显示前10个
|
||
# logger.warning(f" {prefix}000000: {code}")
|
||
# if len(zero_count_details) > 10:
|
||
# logger.warning(f" ... and {len(zero_count_details) - 10} more")
|
||
# logger.warning("These lines may have been filtered as noise (declarations, etc.)")
|
||
# return None
|
||
|
||
# selected_blocks = missing_blocks[:50]
|
||
|
||
# # 获取实际信号名用于示例
|
||
# reset_signal = self.validator._find_reset_signal()
|
||
# inputs_no_clk = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
|
||
# example_signal = inputs_no_clk[0] if inputs_no_clk else (reset_signal if reset_signal != "reset" else "ena")
|
||
|
||
# # 分析 FSM 相关的缺失代码
|
||
# fsm_analysis = self._analyze_fsm_missing(missing_lines)
|
||
|
||
# # [新增] 从语义分析结果获取 FSM 和功能点信息
|
||
# semantic_context = self._generate_semantic_context()
|
||
|
||
# prompt = f"""
|
||
# [ROLE]
|
||
# You are a hardware verification expert. Your task is to write a test scenario to improve code coverage.
|
||
|
||
# [COVERAGE STATUS]
|
||
# Current testbench achieves {current_score:.2f}% coverage.
|
||
# The following logic blocks in the DUT are NEVER executed during simulation:
|
||
|
||
# """
|
||
# for idx, block in enumerate(selected_blocks):
|
||
# prompt += f"--- Missing Logic Block {idx+1} ---\n" + "\n".join(block) + "\n\n"
|
||
|
||
# # [新增] 添加语义分析上下文
|
||
# if semantic_context:
|
||
# prompt += f"""
|
||
# [SEMANTIC ANALYSIS - MODULE UNDERSTANDING]
|
||
# {semantic_context}
|
||
# """
|
||
|
||
# # === [新增] 添加能量分配目标上下文 ===
|
||
# if self.energy_allocator:
|
||
# energy_context = self.energy_allocator.get_target_context()
|
||
# if energy_context:
|
||
# prompt += f"""
|
||
# [ENERGY-ALIGNED TARGET - PRIORITY]
|
||
# {energy_context}
|
||
# Focus your test scenario on covering this high-priority target first.
|
||
# """
|
||
# # =====================================
|
||
|
||
# prompt += self.validator.generate_constraint_prompt()
|
||
|
||
# # 添加 FSM 分析提示
|
||
# if fsm_analysis:
|
||
# prompt += f"""
|
||
# [FSM STATE TRANSITION ANALYSIS - CRITICAL]
|
||
# {fsm_analysis}
|
||
|
||
# IMPORTANT: FSM transitions have PRIORITY ORDER!
|
||
# - 'if' conditions are evaluated TOP to BOTTOM
|
||
# - The FIRST matching condition determines the next state
|
||
# - To trigger a branch like "else if (condition)", you MUST ensure all higher-priority conditions are FALSE
|
||
# - Read the missing code's context carefully: what conditions precede it?
|
||
|
||
# """
|
||
|
||
# prompt += f"""
|
||
# [OUTPUT REQUIREMENTS - CRITICAL]
|
||
# 1. Return ONLY Verilog test scenario code (NOT a task definition)
|
||
# 2. Your code will be inserted INTO an existing `initial begin ... end` block
|
||
# 3. DO NOT wrap your code in `task ... endtask` - just write the test sequence directly
|
||
# 4. DO NOT use `$finish` or `$stop` - the testbench handles simulation end
|
||
|
||
# [CODING STYLE]
|
||
# 1. Use blocking assignments for input signals: `signal = value;`
|
||
# 2. Use `#N;` for time delays: `#10;` means wait 10 time units
|
||
# 3. Use `repeat(N) @(posedge clk);` to wait for N clock cycles
|
||
# 4. Start with reset sequence if needed
|
||
|
||
# [BLACK-BOX CONSTRAINTS - CRITICAL]
|
||
# 1. You can ONLY control module INPUTS listed above
|
||
# 2. You CANNOT access internal signals (state, next_state, counters, etc.)
|
||
# 3. You CANNOT use `force` or `assign` on internal signals
|
||
# 4. To trigger a specific state: drive inputs and wait for the FSM to reach it naturally
|
||
|
||
# [STEP-BY-STEP APPROACH - REQUIRED]
|
||
# For each missing branch, think through:
|
||
# 1. What STATE must the FSM be in? (Look at the case statement)
|
||
# 2. What CONDITIONS must be true/false? (Check priority order!)
|
||
# 3. How to reach that state from reset? (Trace state transitions)
|
||
# 4. What inputs to apply and in what order?
|
||
|
||
# [POSITIVE EXAMPLE - CORRECT APPROACH]
|
||
# ```verilog
|
||
# // Reset sequence - use ACTUAL input signal names from above
|
||
# {reset_signal} = 1;
|
||
# repeat(2) @(posedge clk);
|
||
# {reset_signal} = 0;
|
||
|
||
# // Wait for FSM to reach desired state (estimate cycles)
|
||
# repeat(3) @(posedge clk);
|
||
|
||
# // Trigger missing branch by driving inputs
|
||
# {example_signal} = 1;
|
||
# repeat(5) @(posedge clk);
|
||
# {example_signal} = 0;
|
||
# repeat(10) @(posedge clk);
|
||
# ```
|
||
|
||
# [NEGATIVE EXAMPLE - DO NOT DO THIS]
|
||
# ```verilog
|
||
# // WRONG: Using wrong signal name (e.g., 'reset' instead of '{reset_signal}')
|
||
# reset = 1; // ERROR: Signal 'reset' does not exist! Use '{reset_signal}' instead!
|
||
|
||
# // WRONG: Not considering condition priority in FSM
|
||
# // If missing code is "else if (condition_b)", you must make condition_a FALSE first!
|
||
# // Example: if FSM has "if (!signal_a) ... else if (signal_b) ..."
|
||
# // Then signal_a must be 1 (FALSE) for the else-if branch to execute
|
||
# signal_a = 0; // WRONG: This blocks the else-if branch!
|
||
# signal_b = 1; // This will NOT trigger because signal_a=0 took priority
|
||
|
||
# // CORRECT: Analyze priority, set higher-priority conditions to FALSE
|
||
# signal_a = 1; // Now the first condition (!signal_a) is FALSE
|
||
# signal_b = 1; // Now this else-if branch can execute
|
||
|
||
# // WRONG: Trying to assign internal state
|
||
# state = IDLE; // ERROR: Cannot modify internal signal!
|
||
|
||
# // WRONG: Using force on internal signal
|
||
# force DUT.state = WL; // ERROR: Cannot force internal signal!
|
||
|
||
# // WRONG: Checking internal state in condition
|
||
# if (state == WL) begin // ERROR: Cannot read internal signal!
|
||
# {example_signal} = 1;
|
||
# end
|
||
|
||
# // CORRECT ALTERNATIVE: Estimate timing instead
|
||
# repeat(5) @(posedge clk); // Wait for FSM to reach expected state
|
||
# {example_signal} = 1;
|
||
# ```
|
||
|
||
# [SIGNAL NAME WARNING - CRITICAL]
|
||
# - DO NOT use 'reset' if the actual signal is '{reset_signal}'
|
||
# - DO NOT use 'rst' if the actual signal is '{reset_signal}'
|
||
# - ALWAYS use EXACT signal names from the ALLOWED INPUTS list above
|
||
# - Double-check every signal name before using it!
|
||
|
||
# Now write the test scenario code to cover the missing blocks:
|
||
# """
|
||
|
||
# # === [新增] 注入多样性约束 ===
|
||
# if self.diversity_injector:
|
||
# # 获取未覆盖功能点
|
||
# uncovered_functions = []
|
||
# if self.semantic_result and self.semantic_result.get('function_points'):
|
||
# uncovered_functions = [
|
||
# fp for fp in self.semantic_result['function_points']
|
||
# if not fp.get('covered', False)
|
||
# ]
|
||
|
||
# # 获取当前目标功能点
|
||
# target_function = ""
|
||
# if self.energy_allocator and self.energy_allocator.current_target:
|
||
# target_function = self.energy_allocator.current_target.function_point
|
||
|
||
# # 注入多样性约束
|
||
# prompt = self.diversity_injector.inject_diversity_constraints(
|
||
# prompt=prompt,
|
||
# target_function=target_function,
|
||
# uncovered_functions=uncovered_functions
|
||
# )
|
||
# # =================================
|
||
|
||
# return prompt
|
||
|
||
# def _analyze_fsm_missing(self, missing_lines: List[str]) -> str:
|
||
# """分析 FSM 相关的缺失代码,生成具体的 FSM 状态转换指导"""
|
||
# analysis = []
|
||
|
||
# # 检查是否涉及 FSM 状态转换
|
||
# has_state_case = any('case' in line.lower() and 'state' in line.lower() for line in missing_lines)
|
||
# has_else_if = any('else if' in line.lower() for line in missing_lines)
|
||
# has_if_condition = any(re.search(r'\bif\s*\(', line) for line in missing_lines)
|
||
|
||
# if has_state_case or has_else_if:
|
||
# analysis.append("- Missing code involves FSM state transitions or conditional branches")
|
||
|
||
# if has_else_if or has_if_condition:
|
||
# analysis.append("- Conditional branches have PRIORITY ORDER (top to bottom)")
|
||
# analysis.append("- 'else if' branches require ALL previous conditions to be FALSE")
|
||
# analysis.append("- Analyze the missing code's context: what conditions block this branch?")
|
||
|
||
# if has_state_case:
|
||
# analysis.append("- To trigger a state transition: first reach the source state, then drive inputs")
|
||
|
||
# # === 新增:FSM 状态路径分析 ===
|
||
# # 尝试从缺失代码中提取 FSM 状态信息
|
||
# fsm_state_info = self._extract_fsm_state_from_missing(missing_lines)
|
||
# if fsm_state_info:
|
||
# analysis.append("")
|
||
# analysis.append("[FSM STATE PATH ANALYSIS]")
|
||
# analysis.extend(fsm_state_info)
|
||
|
||
# return "\n".join(analysis) if analysis else ""
|
||
|
||
# def _extract_fsm_state_from_missing(self, missing_lines: List[str]) -> List[str]:
|
||
# """
|
||
# 从缺失代码中提取 FSM 状态信息,生成具体的状态转换指导
|
||
|
||
# 分析策略:
|
||
# 1. 从缺失代码的上下文识别 case 分支(FSM 状态)
|
||
# 2. 分析该状态下的条件分支优先级
|
||
# 3. 识别需要满足的输入条件
|
||
# """
|
||
# info = []
|
||
|
||
# # 从 annotated 文件中读取完整的 DUT 代码以分析 FSM 结构
|
||
# try:
|
||
# with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
# full_content = f.read()
|
||
# except:
|
||
# return info
|
||
|
||
# # 提取缺失代码所在的 FSM 状态
|
||
# target_state = None
|
||
# missing_condition = None
|
||
|
||
# for line in missing_lines:
|
||
# # 查找 case 分支标记(如 "WL:", "WR:", "FALLL:" 等)
|
||
# # 格式可能是 "Line N: STATE:" 或 "STATE:"
|
||
# state_match = re.search(r'\b([A-Z][A-Z0-9_]*)\s*:', line)
|
||
# if state_match:
|
||
# potential_state = state_match.group(1)
|
||
# # 排除常见的非状态关键字
|
||
# if potential_state not in ['IF', 'ELSE', 'CASE', 'BEGIN', 'END', 'DEFAULT']:
|
||
# target_state = potential_state
|
||
# break
|
||
|
||
# # 如果没找到,尝试从整个文件中分析
|
||
# if not target_state:
|
||
# # 查找缺失行附近的 case 分支
|
||
# lines = full_content.split('\n')
|
||
# for i, line in enumerate(lines):
|
||
# # 查找覆盖率标记为 0 的行
|
||
# if re.match(r'^%000000', line.strip()):
|
||
# # 向上查找最近的 case 分支(状态)
|
||
# for j in range(i-1, max(0, i-20), -1):
|
||
# state_match = re.search(r'^\s*([A-Z][A-Z0-9_]*)\s*:', lines[j])
|
||
# if state_match:
|
||
# target_state = state_match.group(1)
|
||
# break
|
||
# if target_state:
|
||
# break
|
||
|
||
# # 分析缺失的条件分支
|
||
# for line in missing_lines:
|
||
# # 提取 else if 条件
|
||
# else_if_match = re.search(r'else\s+if\s*\(([^)]+)\)', line)
|
||
# if else_if_match:
|
||
# missing_condition = else_if_match.group(1)
|
||
# break
|
||
# # 提取 if 条件
|
||
# if_match = re.search(r'\bif\s*\(([^)]+)\)', line)
|
||
# if if_match:
|
||
# missing_condition = if_match.group(1)
|
||
# break
|
||
|
||
# # 生成具体的指导信息
|
||
# if target_state:
|
||
# info.append(f"- Target FSM state identified: {target_state}")
|
||
|
||
# # 查找复位后的初始状态
|
||
# reset_state = self._find_reset_state(full_content)
|
||
# if reset_state:
|
||
# info.append(f"- After reset, FSM starts in state: {reset_state}")
|
||
|
||
# if reset_state != target_state:
|
||
# info.append(f"- CRITICAL: You must FIRST transition from {reset_state} to {target_state}!")
|
||
# info.append(f"- Do NOT assume FSM will automatically reach {target_state}!")
|
||
|
||
# # 尝试找到状态转换路径
|
||
# transition_hint = self._find_state_transition_hint(full_content, reset_state, target_state)
|
||
# if transition_hint:
|
||
# info.append(f"- To reach {target_state}: {transition_hint}")
|
||
|
||
# if missing_condition:
|
||
# info.append(f"- Missing condition: \"{missing_condition}\"")
|
||
# # 分析条件优先级
|
||
# priority_info = self._analyze_condition_priority(full_content, target_state, missing_condition)
|
||
# if priority_info:
|
||
# info.extend(priority_info)
|
||
|
||
# return info
|
||
|
||
# def _find_reset_state(self, content: str) -> Optional[str]:
|
||
# """从 DUT 代码中找到复位后的初始状态"""
|
||
# # 查找复位逻辑中的状态赋值
|
||
# # 常见模式: if (reset) state <= IDLE; 或 state <= 0;
|
||
# patterns = [
|
||
# r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
|
||
# r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*(\d+);',
|
||
# r'if\s*\([^)]*rst[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
|
||
# ]
|
||
|
||
# for pattern in patterns:
|
||
# match = re.search(pattern, content, re.IGNORECASE)
|
||
# if match:
|
||
# state = match.group(1)
|
||
# # 如果是数字,尝试从参数中找对应的状态名
|
||
# if state.isdigit():
|
||
# # 查找参数定义
|
||
# param_match = re.search(r'parameter\s+([^;]+);', content)
|
||
# if param_match:
|
||
# params = param_match.group(1)
|
||
# # 解析参数列表
|
||
# for param in params.split(','):
|
||
# param = param.strip()
|
||
# if '=' in param:
|
||
# name, value = param.split('=')
|
||
# if value.strip() == state:
|
||
# return name.strip()
|
||
# return state
|
||
|
||
# return None
|
||
|
||
# def _find_state_transition_hint(self, content: str, from_state: str, to_state: str) -> Optional[str]:
|
||
# """找到从一个状态到另一个状态的转换条件"""
|
||
# # 在 case 语句中查找 from_state 分支
|
||
# # 提取该分支下到 to_state 的转换条件
|
||
|
||
# # 简单策略:查找 "next = TO_STATE" 或 "next <= TO_STATE"
|
||
# pattern = rf'{from_state}\s*:.*?next\s*=?\s*{to_state}'
|
||
# match = re.search(pattern, content, re.DOTALL)
|
||
|
||
# if match:
|
||
# # 提取条件
|
||
# branch_code = match.group(0)
|
||
# # 查找 if 条件
|
||
# if_match = re.search(r'if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
|
||
# if if_match:
|
||
# return f"set condition: {if_match.group(1)}"
|
||
|
||
# # 查找 else if 条件
|
||
# elif_match = re.search(r'else\s+if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
|
||
# if elif_match:
|
||
# return f"set condition: {elif_match.group(1)} (ensure earlier conditions are FALSE)"
|
||
|
||
# # 尝试反向查找:什么条件下会转换到目标状态
|
||
# trans_pattern = rf'(?:if|else\s+if)\s*\(([^)]+)\)\s*(?:next\s*=?\s*{to_state}|{to_state}\s*;)'
|
||
# trans_match = re.search(trans_pattern, content)
|
||
# if trans_match:
|
||
# return f"set condition: {trans_match.group(1)}"
|
||
|
||
# return None
|
||
|
||
# def _analyze_condition_priority(self, content: str, state: str, missing_condition: str) -> List[str]:
|
||
# """分析条件分支的优先级,找出需要排除的条件"""
|
||
# info = []
|
||
|
||
# if not state:
|
||
# return info
|
||
|
||
# # 查找该状态下的所有条件分支
|
||
# # 提取 state: 后面的代码块
|
||
# state_block_pattern = rf'{state}\s*:(.*?)(?=[A-Z][A-Z0-9_]*\s*:|endcase|default:)'
|
||
# match = re.search(state_block_pattern, content, re.DOTALL)
|
||
|
||
# if not match:
|
||
# return info
|
||
|
||
# state_block = match.group(1)
|
||
|
||
# # 提取所有条件分支
|
||
# conditions = []
|
||
# for cond_match in re.finditer(r'(?:if|else\s+if)\s*\(([^)]+)\)', state_block):
|
||
# conditions.append(cond_match.group(1).strip())
|
||
|
||
# # 找到缺失条件在列表中的位置
|
||
# missing_idx = -1
|
||
# for i, cond in enumerate(conditions):
|
||
# # 简化比较(去除空格)
|
||
# if cond.replace(' ', '') in missing_condition.replace(' ', '') or \
|
||
# missing_condition.replace(' ', '') in cond.replace(' ', ''):
|
||
# missing_idx = i
|
||
# break
|
||
|
||
# if missing_idx > 0:
|
||
# info.append(f"- This branch is condition #{missing_idx + 1} (lower priority)")
|
||
# info.append(f"- You must make ALL earlier conditions FALSE:")
|
||
# for i in range(missing_idx):
|
||
# cond = conditions[i]
|
||
# # 分析如何使条件为 FALSE
|
||
# false_hint = self._get_false_hint(cond)
|
||
# info.append(f" * \"{cond}\" must be FALSE → {false_hint}")
|
||
|
||
# return info
|
||
|
||
# def _get_false_hint(self, condition: str) -> str:
|
||
# """分析如何使条件为 FALSE"""
|
||
# condition = condition.strip()
|
||
|
||
# # 处理 !signal 形式
|
||
# if condition.startswith('!'):
|
||
# signal = condition[1:].strip()
|
||
# return f"set {signal} = 1"
|
||
|
||
# # 处理 signal 形式(布尔值)
|
||
# if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', condition):
|
||
# return f"set {condition} = 0"
|
||
|
||
# # 处理比较运算符
|
||
# if '==' in condition:
|
||
# parts = condition.split('==')
|
||
# if len(parts) == 2:
|
||
# signal = parts[0].strip()
|
||
# value = parts[1].strip()
|
||
# if value.isdigit():
|
||
# return f"set {signal} != {value}"
|
||
|
||
# # 处理 >= 形式
|
||
# if '>=' in condition:
|
||
# parts = condition.split('>=')
|
||
# if len(parts) == 2:
|
||
# signal = parts[0].strip()
|
||
# value = parts[1].strip()
|
||
# if value.isdigit():
|
||
# return f"set {signal} < {value}"
|
||
|
||
# # 处理 > 形式
|
||
# if '>' in condition and '>=' not in condition:
|
||
# parts = condition.split('>')
|
||
# if len(parts) == 2:
|
||
# signal = parts[0].strip()
|
||
# value = parts[1].strip()
|
||
# return f"set {signal} <= {value}"
|
||
|
||
# return "analyze the condition logic"
|
||
|
||
# def _generate_semantic_context(self) -> str:
|
||
# """
|
||
# [新增] 从语义分析结果生成 Prompt 上下文
|
||
|
||
# 整合语义分析层 (Layer 0) 的输出,为 LLM 提供更精准的指导:
|
||
# - FSM 状态转换图
|
||
# - 功能点重要性排序
|
||
# - 测试场景建议
|
||
|
||
# Returns:
|
||
# 语义上下文字符串,用于增强 Prompt
|
||
# """
|
||
# if not self.semantic_result:
|
||
# return ""
|
||
|
||
# context_parts = []
|
||
|
||
# # 1. 模块基础信息
|
||
# module_name = self.semantic_result.get('module_name', '')
|
||
# inputs = self.semantic_result.get('inputs', [])
|
||
# outputs = self.semantic_result.get('outputs', [])
|
||
|
||
# if module_name:
|
||
# context_parts.append(f"Module Name: {module_name}")
|
||
# if inputs:
|
||
# context_parts.append(f"Module Inputs: {', '.join(inputs)}")
|
||
# if outputs:
|
||
# context_parts.append(f"Module Outputs: {', '.join(outputs)}")
|
||
|
||
# # 2. FSM 信息(最关键)
|
||
# fsm_info = self.semantic_result.get('fsm_info')
|
||
# if fsm_info:
|
||
# context_parts.append("")
|
||
# context_parts.append("=== FSM STATE MACHINE DETAILS ===")
|
||
# context_parts.append(f"State Variable: {fsm_info.get('state_variable', 'unknown')}")
|
||
|
||
# states = fsm_info.get('states', [])
|
||
# if states:
|
||
# context_parts.append(f"All States ({len(states)}): {', '.join(states)}")
|
||
|
||
# # 状态转换表
|
||
# transitions = fsm_info.get('transitions', {})
|
||
# if transitions:
|
||
# context_parts.append("")
|
||
# context_parts.append("=== STATE TRANSITION TABLE ===")
|
||
# context_parts.append("Format: CURRENT_STATE --[CONDITION]--> NEXT_STATE")
|
||
# context_parts.append("")
|
||
|
||
# for state, trans_list in transitions.items():
|
||
# for trans in trans_list:
|
||
# condition = trans.get('condition', 'default')
|
||
# next_state = trans.get('next_state', 'unknown')
|
||
# if condition == 'default':
|
||
# context_parts.append(f" {state} --[default]--> {next_state}")
|
||
# else:
|
||
# context_parts.append(f" {state} --[if ({condition})]--> {next_state}")
|
||
|
||
# # 添加状态转换路径分析
|
||
# context_parts.append("")
|
||
# context_parts.append("=== STATE TRANSITION PATH HINTS ===")
|
||
# reset_state = self._find_reset_state_from_fsm(fsm_info)
|
||
# if reset_state:
|
||
# context_parts.append(f"Initial State (after reset): {reset_state}")
|
||
# context_parts.append("")
|
||
# context_parts.append("IMPORTANT: To reach a target state, trace the path from reset:")
|
||
# context_parts.append(" 1. Reset the DUT to initialize to the starting state")
|
||
# context_parts.append(" 2. Apply inputs to trigger state transitions")
|
||
# context_parts.append(" 3. Wait for the FSM to naturally reach the target state")
|
||
# context_parts.append(" 4. THEN apply inputs to trigger the missing branch")
|
||
|
||
# # 3. 功能点优先级
|
||
# function_points = self.semantic_result.get('function_points', [])
|
||
# if function_points:
|
||
# context_parts.append("")
|
||
# context_parts.append("=== FUNCTION POINTS (Ranked by Importance) ===")
|
||
|
||
# for i, fp in enumerate(function_points[:10]): # Top 10
|
||
# name = fp.get('name', 'unknown')
|
||
# fp_type = fp.get('type', 'unknown')
|
||
# importance = fp.get('importance', 0)
|
||
# covered = fp.get('covered', False)
|
||
# status = "✓ COVERED" if covered else "✗ NOT COVERED"
|
||
# context_parts.append(f" {i+1}. [{status}] {name} ({fp_type}): importance={importance:.2f}")
|
||
|
||
# # 4. 测试场景建议
|
||
# test_scenarios = self.semantic_result.get('test_scenarios', [])
|
||
# if test_scenarios:
|
||
# context_parts.append("")
|
||
# context_parts.append("=== RECOMMENDED TEST SCENARIOS ===")
|
||
|
||
# for i, ts in enumerate(test_scenarios[:5]): # Top 5
|
||
# name = ts.get('name', 'unknown')
|
||
# description = ts.get('description', '')
|
||
# priority = ts.get('priority', 0)
|
||
# context_parts.append(f" {i+1}. {name}: {description} (priority={priority:.2f})")
|
||
|
||
# if context_parts:
|
||
# return "\n".join(context_parts)
|
||
# return ""
|
||
|
||
# def _find_reset_state_from_fsm(self, fsm_info: dict) -> Optional[str]:
|
||
# """从 FSM 信息中推断复位后的初始状态"""
|
||
# # 方法1:检查是否有明确的复位状态
|
||
# transitions = fsm_info.get('transitions', {})
|
||
|
||
# # 复位后通常进入第一个定义的状态或特定名称的状态
|
||
# states = fsm_info.get('states', [])
|
||
|
||
# # 常见的初始状态命名
|
||
# initial_state_names = ['IDLE', 'INIT', 'RESET', 'START', 'BEGIN']
|
||
|
||
# for name in initial_state_names:
|
||
# if name in states:
|
||
# return name
|
||
|
||
# # 如果没有找到,返回第一个状态
|
||
# if states:
|
||
# return states[0]
|
||
|
||
# return None
|
||
|
||
|
||
# # ============================================================================
|
||
# # TBInjector - 场景注入器
|
||
# # ============================================================================
|
||
# class TBInjector:
|
||
# """
|
||
# 场景注入器 - 将LLM生成的测试代码注入到现有测试平台
|
||
|
||
# 集成三层防护策略:
|
||
# 1. Layer 1: Prompt约束(由CoverageParser处理)
|
||
# 2. Layer 2: 智能代码转换
|
||
# 3. Layer 3: 质量评估和重试建议
|
||
# """
|
||
|
||
# def __init__(self, tb_code):
|
||
# """
|
||
# 初始化注入器
|
||
|
||
# Args:
|
||
# tb_code: 原始测试平台代码字符串
|
||
# """
|
||
# self.content = tb_code
|
||
# self.validator = BlackBoxValidator()
|
||
# self.validator._extract_signals_from_tb(tb_code)
|
||
# self.last_validation_result = None
|
||
|
||
# def inject(self, new_code, iter_idx):
|
||
# """
|
||
# 注入新的测试场景到测试平台
|
||
|
||
# Args:
|
||
# new_code: LLM生成的测试代码
|
||
# iter_idx: 迭代序号
|
||
|
||
# Returns:
|
||
# 修改后的测试平台代码
|
||
# """
|
||
# # Step 1: 预处理代码(包含三层防护)
|
||
# scenario_code, result = self._preprocess_code(new_code, iter_idx)
|
||
|
||
# self.last_validation_result = result
|
||
|
||
# # 记录日志
|
||
# if result['violations']['critical']:
|
||
# logger.warning(f"[CGA-{iter_idx}] Critical violations detected:")
|
||
# for v in result['violations']['critical']:
|
||
# logger.warning(f" - {v}")
|
||
|
||
# if result['violations']['warning']:
|
||
# logger.info(f"[CGA-{iter_idx}] Warnings:")
|
||
# for v in result['violations']['warning']:
|
||
# logger.info(f" - {v}")
|
||
|
||
# if result['transformations']:
|
||
# logger.info(f"[CGA-{iter_idx}] Code transformations applied:")
|
||
# for t in result['transformations']:
|
||
# logger.info(f" - {t['type']}: {t.get('original', 'N/A')[:50]}...")
|
||
|
||
# # Step 2: 构建场景块
|
||
# scenario_block = self._build_scenario_block(scenario_code, iter_idx)
|
||
|
||
# # Step 3: 注入到TB中
|
||
# modified_tb = self._inject_scenario(scenario_block)
|
||
|
||
# return modified_tb
|
||
|
||
# def should_retry(self):
|
||
# """是否应该重试"""
|
||
# if self.last_validation_result is None:
|
||
# return False
|
||
# return self.last_validation_result.get('should_retry', False)
|
||
|
||
# def get_quality_score(self):
|
||
# """获取代码质量分数"""
|
||
# if self.last_validation_result is None:
|
||
# return 0
|
||
# return self.last_validation_result.get('quality_score', 0)
|
||
|
||
# def _preprocess_code(self, code, iter_idx):
|
||
# """预处理LLM生成的代码 - 增强版,包含语法预检查"""
|
||
# # 移除markdown标记
|
||
# code = re.sub(r'```(?:verilog|systemverilog|sv)?\n?', '', code)
|
||
# code = re.sub(r'```', '', code)
|
||
|
||
# # 移除task包装
|
||
# code = re.sub(r'task\s+\w+\s*(?:\([^)]*\))?\s*;', '', code)
|
||
# code = re.sub(r'endtask', '', code)
|
||
|
||
# # 移除$finish和$stop
|
||
# code = re.sub(r'\$finish\s*;', '', code)
|
||
# code = re.sub(r'\$stop\s*;', '', code)
|
||
|
||
# # 移除多余空行
|
||
# code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
|
||
# # [新增] 移除 initial begin ... end 代码块
|
||
# # LLM 可能生成完整的 initial begin...end 块,但我们只需要其中的测试代码
|
||
# initial_pattern = re.compile(r'\binitial\s+begin\b.*?\bend\b', re.DOTALL | re.IGNORECASE)
|
||
|
||
# # 检查并移除 initial begin ... end 块
|
||
# initial_match = initial_pattern.search(code)
|
||
# if initial_match:
|
||
# logger.warning(f"[CGA-{iter_idx}] Detected 'initial begin...end' block in generated code - this should not be included")
|
||
# logger.warning(f"[CGA-{iter_idx}] Removing 'initial begin...end' wrapper, keeping only the test content")
|
||
# # 提取块内的内容
|
||
# block_content = initial_match.group(0)
|
||
# # 移除 initial begin 和 end 包装
|
||
# # 保留块内的实际测试代码
|
||
# inner_content = re.sub(r'^\s*initial\s+begin\s*', '', block_content)
|
||
# inner_content = re.sub(r'\bend\s*$', '', inner_content)
|
||
# # 替换整个块为内部内容
|
||
# code = initial_pattern.sub(inner_content.strip(), code, count=1)
|
||
|
||
# code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
|
||
# # 信号名自动修正(在验证之前)
|
||
# code = self._auto_correct_signal_names(code)
|
||
|
||
# # 三层防护:黑盒约束验证和转换
|
||
# code, result = self.validator.validate_and_transform(code, self.content)
|
||
|
||
# # [新增] 第四层:Verilog 语法预检查
|
||
# # 提取完整 TB 中已声明的信号(不只是代码片段)
|
||
# signal_widths = self._extract_signal_widths()
|
||
# declared_signals = self._extract_declared_signals()
|
||
|
||
# # 调用语法检查,传入已声明信号列表
|
||
# syntax_result = self.validator.check_syntax_issues(
|
||
# code,
|
||
# signal_widths,
|
||
# declared_signals=declared_signals
|
||
# )
|
||
|
||
# # 合并检查结果
|
||
# result['syntax_check'] = syntax_result
|
||
|
||
# # 记录语法问题日志
|
||
# if syntax_result['width_mismatch']:
|
||
# logger.warning(f"[CGA-{iter_idx}] Width mismatch detected:")
|
||
# for issue in syntax_result['width_mismatch']:
|
||
# logger.warning(f" - {issue['message']}")
|
||
# if 'suggestion' in issue:
|
||
# logger.info(f" Suggestion: {issue['suggestion']}")
|
||
|
||
# if syntax_result['logic_issues']:
|
||
# logger.warning(f"[CGA-{iter_idx}] Logic issues detected:")
|
||
# for issue in syntax_result['logic_issues']:
|
||
# logger.warning(f" - {issue['message']}")
|
||
# if 'suggestion' in issue:
|
||
# logger.info(f" Suggestion: {issue['suggestion']}")
|
||
|
||
# if syntax_result['syntax_warnings']:
|
||
# for issue in syntax_result['syntax_warnings']:
|
||
# if issue['severity'] == 'error':
|
||
# logger.error(f"[CGA-{iter_idx}] Syntax error: {issue['message']}")
|
||
# else:
|
||
# logger.warning(f"[CGA-{iter_idx}] Syntax warning: {issue['message']}")
|
||
|
||
# # 如果语法检查发现问题,设置 should_retry
|
||
# if syntax_result['should_retry']:
|
||
# result['should_retry'] = True
|
||
# logger.warning(f"[CGA-{iter_idx}] Syntax issues detected, recommend retry with corrected code")
|
||
|
||
# code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
# return code.strip(), result
|
||
|
||
# def _extract_declared_signals(self) -> set:
|
||
# """从完整测试平台中提取所有已声明的信号"""
|
||
# signals = set()
|
||
|
||
# # 匹配 reg [N:0] signal 或 wire [N:0] signal
|
||
# for match in re.finditer(r'\b(reg|wire|logic)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
|
||
# signals.add(match.group(2))
|
||
|
||
# # 匹配 input/output 声明
|
||
# for match in re.finditer(r'\b(input|output|inout)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
|
||
# signals.add(match.group(2))
|
||
|
||
# # 匹配模块端口连接中的信号
|
||
# for match in re.finditer(r'\.(\w+)\s*\(\s*(\w+)\s*\)', self.content):
|
||
# signals.add(match.group(2)) # 添加连接的信号名
|
||
|
||
# return signals
|
||
|
||
# def _extract_signal_widths(self) -> Dict[str, int]:
|
||
# """从测试平台中提取信号位宽信息"""
|
||
# widths = {}
|
||
|
||
# # 匹配 reg [N:0] signal 或 wire [N:0] signal
|
||
# width_pattern = re.compile(r'\b(reg|wire)\s+\[(\d+):(\d+)\]\s+(\w+)')
|
||
|
||
# for match in width_pattern.finditer(self.content):
|
||
# high = int(match.group(2))
|
||
# low = int(match.group(3))
|
||
# width = high - low + 1
|
||
# signal = match.group(4)
|
||
# widths[signal] = width
|
||
|
||
# # 匹配无位宽声明的信号(默认 1 位)
|
||
# single_bit_pattern = re.compile(r'\b(reg|wire)\s+(?!.*\[)(\w+)\s*;')
|
||
# for match in single_bit_pattern.finditer(self.content):
|
||
# signal = match.group(2)
|
||
# if signal not in widths:
|
||
# widths[signal] = 1
|
||
|
||
# return widths
|
||
|
||
# def _auto_correct_signal_names(self, code: str) -> str:
|
||
# """自动修正信号名错误"""
|
||
# corrections = []
|
||
|
||
# # 获取正确的复位信号名
|
||
# reset_signal = self.validator._find_reset_signal()
|
||
|
||
# # 如果正确的复位信号不是 'reset',则修正所有 'reset' 引用
|
||
# if reset_signal != "reset":
|
||
# # 匹配独立的 'reset' 单词(不包括 'areset', 'rst_n' 等)
|
||
# pattern = r'\breset\b(?!\w)'
|
||
# matches = re.findall(pattern, code)
|
||
# if matches:
|
||
# code = re.sub(pattern, reset_signal, code)
|
||
# corrections.append(f"reset -> {reset_signal} ({len(matches)} occurrences)")
|
||
|
||
# # 检查是否有使用 'rst' 但正确信号是 'areset' 的情况
|
||
# if reset_signal == "areset":
|
||
# pattern = r'\brst\b(?!\w)'
|
||
# matches = re.findall(pattern, code)
|
||
# if matches:
|
||
# code = re.sub(pattern, reset_signal, code)
|
||
# corrections.append(f"rst -> {reset_signal} ({len(matches)} occurrences)")
|
||
|
||
# # 检查是否使用了不存在的信号
|
||
# for signal in re.findall(r'\b(\w+)\s*=', code):
|
||
# signal = signal.strip()
|
||
# # 跳过已知的合法信号
|
||
# if signal in self.validator.dut_inputs:
|
||
# continue
|
||
# # 检查是否是复位信号的别名
|
||
# if signal.lower() in ['reset', 'rst', 'rst_n', 'rst_b'] and reset_signal != signal:
|
||
# code = re.sub(rf'\b{signal}\b', reset_signal, code)
|
||
# corrections.append(f"{signal} -> {reset_signal}")
|
||
|
||
# if corrections:
|
||
# logger.info(f"[Signal Correction] Applied corrections: {'; '.join(corrections)}")
|
||
|
||
# return code
|
||
|
||
# def _build_scenario_block(self, scenario_code, iter_idx):
|
||
# """构建完整的场景代码块"""
|
||
# # 格式化缩进
|
||
# lines = scenario_code.split('\n')
|
||
# formatted_lines = []
|
||
# for line in lines:
|
||
# stripped = line.strip()
|
||
# if stripped:
|
||
# formatted_lines.append(f" {stripped}")
|
||
# formatted_code = '\n'.join(formatted_lines)
|
||
|
||
# # 检测输出信号用于日志
|
||
# output_signals = self._detect_output_signals()
|
||
# output_log = self._generate_output_log(output_signals, iter_idx)
|
||
|
||
# # 构建完整块
|
||
# block = f'''
|
||
# // ========== CGA Iteration {iter_idx} ==========
|
||
# scenario = 100 + {iter_idx};
|
||
# // Reset signals to safe state
|
||
# {self._generate_signal_reset()}
|
||
# #5;
|
||
# // CGA generated test sequence:
|
||
# {formatted_code}
|
||
# // Log results
|
||
# {output_log}
|
||
# // ==============================================
|
||
# '''
|
||
# return block
|
||
|
||
# def _detect_output_signals(self):
|
||
# """检测DUT的输出信号"""
|
||
# outputs = []
|
||
# wire_pattern = re.compile(r'wire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
|
||
# for match in wire_pattern.finditer(self.content):
|
||
# signal = match.group(1)
|
||
# if signal.lower() not in ['clk', 'clock', 'rst', 'reset', 'areset']:
|
||
# outputs.append(signal)
|
||
# return outputs
|
||
|
||
# def _generate_signal_reset(self):
|
||
# """生成信号重置代码"""
|
||
# inputs = []
|
||
# reg_pattern = re.compile(r'reg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
|
||
# for match in reg_pattern.finditer(self.content):
|
||
# signal = match.group(1)
|
||
# if signal.lower() not in ['clk', 'clock', 'file', 'scenario']:
|
||
# inputs.append(signal)
|
||
|
||
# if inputs:
|
||
# return " " + "; ".join([f"{sig} = 0" for sig in inputs]) + ";"
|
||
# return " // No input signals to reset"
|
||
|
||
# def _generate_output_log(self, signals, iter_idx):
|
||
# """生成输出日志代码"""
|
||
# if not signals:
|
||
# return f' $display("[CGA-{iter_idx}] Scenario executed");'
|
||
|
||
# sig_names = ", ".join(signals)
|
||
# format_str = ", ".join(["%b"] * len(signals))
|
||
|
||
# return f' $fdisplay(file, "[CGA-{iter_idx}] {sig_names} = {format_str}", {sig_names});'
|
||
|
||
# def _inject_scenario(self, scenario_block):
|
||
# """将场景块注入到测试平台"""
|
||
# modified_tb = self.content
|
||
|
||
# # 策略:如果有 $fclose,在其之前插入
|
||
# if "$fclose" in modified_tb:
|
||
# modified_tb = re.sub(
|
||
# r'(\s*)(\$fclose\s*\([^)]+\)\s*;)',
|
||
# scenario_block + r'\1\2',
|
||
# modified_tb,
|
||
# count=1
|
||
# )
|
||
# elif "$finish" in modified_tb:
|
||
# # 否则在 $finish 之前插入
|
||
# modified_tb = modified_tb.replace(
|
||
# "$finish;",
|
||
# scenario_block + "\n $finish;"
|
||
# )
|
||
# else:
|
||
# # 兜底:在最后一个 end 之前插入
|
||
# last_end = modified_tb.rfind("end")
|
||
# if last_end != -1:
|
||
# modified_tb = modified_tb[:last_end] + scenario_block + modified_tb[last_end:]
|
||
|
||
# return modified_tb
|
||
"""
|
||
Description : Utils for CGA (CoverageParser & TBInjector)
|
||
- Features: Sticky Mode, Smart Noise Filtering (No assign/decls)
|
||
- Enhanced: Three-layer protection for black-box constraints
|
||
* Layer 1: Enhanced Prompt constraints (prevention)
|
||
* Layer 2: Smart code transformation (conversion)
|
||
* Layer 3: Quality assessment & retry (fallback)
|
||
- Integrated: Diversity Constraint Injector (Layer 1)
|
||
Author : CorrectBench Integration
|
||
"""
|
||
import re
|
||
import os
|
||
import logging
|
||
from typing import Tuple, List, Optional, Dict, TYPE_CHECKING
|
||
|
||
# [新增] 导入多样性约束注入器
|
||
if TYPE_CHECKING:
|
||
from autoline.diversity_injector import DiversityInjector
|
||
|
||
# 配置日志
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# ============================================================================
|
||
# 三层防护策略说明
|
||
# ============================================================================
|
||
# Layer 1 - Prompt约束: 动态提取允许信号列表,明确约束,正反面示例
|
||
# Layer 2 - 智能转换: 检测违规意图,尝试转换为合法形式
|
||
# Layer 3 - 质量评估: 违规比例过高时返回质量分数,触发重新生成
|
||
# ============================================================================
|
||
|
||
|
||
# ============================================================================
|
||
# 黑盒约束检查器 - 三层防护策略实现
|
||
# ============================================================================
|
||
class BlackBoxValidator:
|
||
"""
|
||
黑盒约束验证器 - 三层防护策略
|
||
|
||
Layer 1: 增强Prompt约束(预防)
|
||
- 动态提取允许信号列表
|
||
- 生成明确的约束提示
|
||
|
||
Layer 2: 智能代码转换(转换)
|
||
- 检测违规意图
|
||
- 尝试转换为合法的等价形式
|
||
- 转换失败时才注释
|
||
|
||
Layer 3: 质量评估(重试)
|
||
- 计算代码质量分数
|
||
- 违规比例过高时建议重试
|
||
"""
|
||
|
||
# 常见的内部信号命名模式(按严重程度分类)
|
||
INTERNAL_SIGNAL_PATTERNS = {
|
||
# 高风险:FSM状态相关(绝对不能修改)
|
||
'critical': [
|
||
(r'\bstate\b', 'FSM状态寄存器'),
|
||
(r'\bnext_state\b', 'FSM下一状态'),
|
||
(r'\bcurrent_state\b', 'FSM当前状态'),
|
||
(r'\bnext\b(?!\s*[,@])', '下一状态简写'),
|
||
],
|
||
# 中风险:计数器和内部寄存器
|
||
'warning': [
|
||
(r'\bcounter\b', '内部计数器'),
|
||
(r'\bcount\b', '计数寄存器'),
|
||
(r'\bcnt\b', '计数简写'),
|
||
(r'\bfall_counter\b', '下落计数器'),
|
||
(r'\breg_\w+', '内部寄存器'),
|
||
],
|
||
# 低风险:可疑信号(需要确认)
|
||
'info': [
|
||
(r'\binternal_\w+', '内部信号'),
|
||
(r'\btemp_\w+', '临时信号'),
|
||
(r'\bprev_\w+', '前一状态'),
|
||
]
|
||
}
|
||
|
||
# 非法语句模式
|
||
FORBIDDEN_STATEMENTS = [
|
||
(r'\bforce\s+(\w+)', 'force语句', 'critical'),
|
||
(r'\bassign\s+(\w+)\s*=', '连续赋值', 'critical'),
|
||
(r'\bdeassign\s+', 'deassign语句', 'critical'),
|
||
(r'\brelease\s+', 'release语句', 'critical'),
|
||
]
|
||
|
||
# 层次化访问模式(如 DUT.state)
|
||
HIERARCHICAL_ACCESS = r'(\w+)\s*\.\s*(\w+)'
|
||
|
||
# =========================================================================
|
||
# [新增] 通用禁止信号名模式 - LLM 常见的"猜测型"命名
|
||
# =========================================================================
|
||
# 这些模式会被自动检测并禁止,防止 LLM 使用不存在的信号名
|
||
FORBIDDEN_NAME_PATTERNS = [
|
||
# 带数字后缀的通用名
|
||
(r'^input_signal_\d+$', '带数字后缀的 input_signal'),
|
||
(r'^input_\d+$', '带数字后缀的 input'),
|
||
(r'^in_\d+$', '带数字后缀的 in'),
|
||
(r'^output_signal_\d+$', '带数字后缀的 output_signal'),
|
||
(r'^output_\d+$', '带数字后缀的 output'),
|
||
(r'^out_\d+$', '带数字后缀的 out'),
|
||
(r'^data_\d+$', '带数字后缀的 data'),
|
||
(r'^data_in_\d+$', '带数字后缀的 data_in'),
|
||
(r'^data_out_\d+$', '带数字后缀的 data_out'),
|
||
(r'^signal_\d+$', '带数字后缀的 signal'),
|
||
(r'^sig_\d+$', '带数字后缀的 sig'),
|
||
(r'^port_\d+$', '带数字后缀的 port'),
|
||
# 通用控制信号名(除非实际存在)
|
||
(r'^reset$', '通用复位名'),
|
||
(r'^rst$', '通用复位简写'),
|
||
(r'^rst_n$', '通用低电平复位'),
|
||
(r'^rst_b$', '通用低电平复位'),
|
||
(r'^clr$', '通用清零'),
|
||
(r'^clear$', '通用清零'),
|
||
(r'^enable$', '通用使能'),
|
||
(r'^en$', '通用使能简写'),
|
||
(r'^ena$', '通用使能简写'),
|
||
(r'^clk_in$', '通用时钟输入'),
|
||
(r'^clock$', '通用时钟'),
|
||
(r'^clk$', '通用时钟简写'),
|
||
# 极简通用名
|
||
(r'^a$', '单字母信号名'),
|
||
(r'^b$', '单字母信号名'),
|
||
(r'^c$', '单字母信号名'),
|
||
(r'^d$', '单字母信号名'),
|
||
(r'^x$', '单字母信号名'),
|
||
(r'^y$', '单字母信号名'),
|
||
(r'^z$', '单字母信号名'),
|
||
]
|
||
|
||
def __init__(self, dut_inputs: List[str] = None, dut_outputs: List[str] = None, dut_code: str = None):
|
||
"""
|
||
Args:
|
||
dut_inputs: DUT模块的输入端口列表
|
||
dut_outputs: DUT模块的输出端口列表
|
||
dut_code: DUT模块的源代码(用于提取所有信号名)
|
||
"""
|
||
self.dut_inputs = dut_inputs or []
|
||
self.dut_outputs = dut_outputs or []
|
||
self.dut_code = dut_code or ""
|
||
|
||
# [新增] 从 DUT 代码提取的所有信号名
|
||
self.dut_all_signals = set() # 所有信号名(端口 + 内部信号)
|
||
self.dut_internal_signals = set() # 仅内部信号
|
||
self.signal_widths = {} # 信号位宽
|
||
|
||
if dut_code:
|
||
self._extract_all_signals_from_dut(dut_code)
|
||
|
||
self.violations = {'critical': [], 'warning': [], 'info': []}
|
||
self.transformations = []
|
||
|
||
# =========================================================================
|
||
# [新增] 从 DUT 代码提取所有信号名
|
||
# =========================================================================
|
||
def _extract_all_signals_from_dut(self, dut_code: str):
|
||
"""
|
||
从 DUT 代码中提取所有信号名(端口 + 内部信号)
|
||
|
||
提取内容:
|
||
- 输入端口 (input)
|
||
- 输出端口 (output)
|
||
- 双向端口 (inout)
|
||
- 内部寄存器 (reg)
|
||
- 内部连线 (wire)
|
||
- 参数 (parameter/localparam)
|
||
"""
|
||
self.dut_all_signals = set()
|
||
self.dut_internal_signals = set()
|
||
self.signal_widths = {}
|
||
|
||
# 1. 提取端口声明
|
||
# 格式: input/output/inout [width] name
|
||
port_patterns = [
|
||
# 带位宽的端口: input [7:0] data_out
|
||
(r'(?:^|[\s;,])(input|output|inout)\s+\[(\d+):(\d+)\]\s+(\w+)', 'port'),
|
||
# 无位宽的端口: input clk
|
||
(r'(?:^|[\s;,])(input|output|inout)\s+(\w+)(?=\s*[;,\n)])', 'port_simple'),
|
||
]
|
||
|
||
for pattern, ptype in port_patterns:
|
||
for match in re.finditer(pattern, dut_code, re.MULTILINE):
|
||
if ptype == 'port':
|
||
direction = match.group(1)
|
||
msb = int(match.group(2))
|
||
lsb = int(match.group(3))
|
||
name = match.group(4)
|
||
width = msb - lsb + 1
|
||
self.dut_all_signals.add(name)
|
||
self.signal_widths[name] = width
|
||
if direction == 'input' and name not in self.dut_inputs:
|
||
self.dut_inputs.append(name)
|
||
elif direction == 'output' and name not in self.dut_outputs:
|
||
self.dut_outputs.append(name)
|
||
else:
|
||
direction = match.group(1)
|
||
name = match.group(2)
|
||
# 排除关键字
|
||
if name.lower() not in ['wire', 'reg', 'logic', 'input', 'output']:
|
||
self.dut_all_signals.add(name)
|
||
self.signal_widths[name] = 1
|
||
if direction == 'input' and name not in self.dut_inputs:
|
||
self.dut_inputs.append(name)
|
||
elif direction == 'output' and name not in self.dut_outputs:
|
||
self.dut_outputs.append(name)
|
||
|
||
# 2. 提取内部信号声明 (reg, wire, logic)
|
||
internal_patterns = [
|
||
# 带位宽: reg [7:0] counter
|
||
(r'\b(reg|wire|logic)\s+\[(\d+):(\d+)\]\s+(\w+)', 'internal_width'),
|
||
# 无位宽: reg state
|
||
(r'\b(reg|wire|logic)\s+(\w+)(?=\s*[;,\n=])', 'internal_simple'),
|
||
]
|
||
|
||
for pattern, ptype in internal_patterns:
|
||
for match in re.finditer(pattern, dut_code):
|
||
if ptype == 'internal_width':
|
||
sig_type = match.group(1)
|
||
msb = int(match.group(2))
|
||
lsb = int(match.group(3))
|
||
name = match.group(4)
|
||
width = msb - lsb + 1
|
||
self.dut_all_signals.add(name)
|
||
self.dut_internal_signals.add(name)
|
||
self.signal_widths[name] = width
|
||
else:
|
||
sig_type = match.group(1)
|
||
name = match.group(2)
|
||
# 排除关键字和已提取的端口
|
||
if name.lower() not in ['wire', 'reg', 'logic', 'input', 'output', 'begin', 'end', 'if', 'else', 'case', 'always', 'initial']:
|
||
if name not in self.dut_inputs and name not in self.dut_outputs:
|
||
self.dut_all_signals.add(name)
|
||
self.dut_internal_signals.add(name)
|
||
self.signal_widths[name] = 1
|
||
|
||
# 3. 提取参数
|
||
param_pattern = r'(?:parameter|localparam)\s+(?:\[\d+:\d+\]\s*)?(\w+)\s*='
|
||
for match in re.finditer(param_pattern, dut_code):
|
||
name = match.group(1)
|
||
self.dut_all_signals.add(name)
|
||
|
||
logger.info(f"Extracted from DUT: {len(self.dut_inputs)} inputs, {len(self.dut_outputs)} outputs, "
|
||
f"{len(self.dut_internal_signals)} internal signals, total {len(self.dut_all_signals)} signals")
|
||
|
||
def get_all_allowed_signals(self) -> List[str]:
|
||
"""获取所有允许的信号名(端口 + 内部信号)"""
|
||
return sorted(list(self.dut_all_signals))
|
||
|
||
def get_drivable_signals(self) -> List[str]:
|
||
"""获取可驱动的信号名(仅输入端口)"""
|
||
return sorted(self.dut_inputs)
|
||
|
||
def get_readable_signals(self) -> List[str]:
|
||
"""获取可读取的信号名(输出端口 + 内部信号)"""
|
||
return sorted(list(set(self.dut_outputs) | self.dut_internal_signals))
|
||
|
||
def is_valid_signal(self, name: str) -> bool:
|
||
"""检查信号名是否在 DUT 中存在"""
|
||
return name in self.dut_all_signals
|
||
|
||
def is_drivable_signal(self, name: str) -> bool:
|
||
"""检查信号是否可以被驱动(赋值)"""
|
||
return name in self.dut_inputs
|
||
|
||
def generate_signal_constraint_prompt(self) -> str:
|
||
"""
|
||
生成基于 DUT 实际信号名的约束提示
|
||
|
||
直接告诉 LLM:只能使用这些已存在的信号名
|
||
"""
|
||
drivable = self.get_drivable_signals()
|
||
readable = self.get_readable_signals()
|
||
all_signals = self.get_all_allowed_signals()
|
||
|
||
reset_signal = self._find_reset_signal()
|
||
|
||
prompt = f"""
|
||
{'='*60}
|
||
📋 [DUT SIGNAL NAMES - USE ONLY THESE]
|
||
{'='*60}
|
||
|
||
⚠️ CRITICAL: You can ONLY use signal names that EXIST in the DUT!
|
||
⚠️ DO NOT invent or guess any signal name!
|
||
|
||
✅ DRIVABLE INPUTS (you CAN assign to these):
|
||
{drivable}
|
||
|
||
📖 READABLE SIGNALS (you can read but NOT assign):
|
||
{readable}
|
||
|
||
📝 ALL DUT SIGNALS (for reference):
|
||
{all_signals[:20]}{'...' if len(all_signals) > 20 else ''}
|
||
|
||
🚫 FORBIDDEN - THESE DO NOT EXIST:
|
||
- Any name NOT in the lists above
|
||
- "input_signal_1", "input_signal_2", "in_1", "data_1" (generic guesses)
|
||
- "reset" (actual: "{reset_signal}"), "rst", "enable", "en" (unless listed above)
|
||
|
||
{'='*60}
|
||
|
||
⚠️ RULE: If a signal is not listed in DRIVABLE INPUTS, it does NOT exist!
|
||
You MUST use exact signal names from the DRIVABLE INPUTS list.
|
||
|
||
✅ CORRECT EXAMPLE:
|
||
{reset_signal} = 1; // '{reset_signal}' is in DRIVABLE INPUTS
|
||
{drivable[0] if drivable else 'signal'} = 0; // Using actual signal name
|
||
|
||
❌ WRONG EXAMPLE:
|
||
input_signal_1 = 1; // Does NOT exist in DUT!
|
||
reset = 1; // Does NOT exist! Use '{reset_signal}' instead!
|
||
"""
|
||
return prompt
|
||
|
||
def validate_and_transform(self, code: str, tb_code: str = None) -> Tuple[str, Dict]:
|
||
"""验证并转换代码 - 主入口"""
|
||
self.violations = {'critical': [], 'warning': [], 'info': []}
|
||
self.transformations = []
|
||
|
||
if tb_code:
|
||
self._extract_signals_from_tb(tb_code)
|
||
|
||
original_lines = code.strip().split('\n')
|
||
total_lines = len([l for l in original_lines if l.strip() and not l.strip().startswith('//')])
|
||
|
||
# Step 1: 移除非法语句
|
||
code = self._transform_forbidden_statements(code)
|
||
|
||
# Step 2: 转换层次化访问
|
||
code = self._transform_hierarchical_access(code)
|
||
|
||
# Step 3: 智能转换内部信号访问
|
||
code = self._smart_transform_internal_signals(code)
|
||
|
||
# Step 4: 最后清理
|
||
code = self._final_cleanup(code)
|
||
|
||
# 计算质量分数
|
||
quality_score = self._calculate_quality_score(total_lines)
|
||
|
||
# 决定是否需要重试
|
||
should_retry = quality_score < 50 or len(self.violations['critical']) > 3
|
||
|
||
result = {
|
||
'quality_score': quality_score,
|
||
'is_valid': len(self.violations['critical']) == 0,
|
||
'violations': self.violations,
|
||
'transformations': self.transformations,
|
||
'should_retry': should_retry,
|
||
'allowed_signals': self._get_allowed_signals_info()
|
||
}
|
||
|
||
return code.strip(), result
|
||
|
||
def _extract_signals_from_tb(self, tb_code: str):
|
||
"""从测试平台代码中提取DUT输入输出信号"""
|
||
dut_match = re.search(r'(\w+)\s+(?:DUT|dut|uut|UUT)\s*\(', tb_code, re.IGNORECASE)
|
||
if dut_match:
|
||
start = dut_match.start()
|
||
bracket_count = 0
|
||
end = start
|
||
for i, char in enumerate(tb_code[start:]):
|
||
if char == '(':
|
||
bracket_count += 1
|
||
elif char == ')':
|
||
bracket_count -= 1
|
||
if bracket_count == 0:
|
||
end = start + i + 1
|
||
break
|
||
|
||
dut_instance = tb_code[start:end]
|
||
port_pattern = r'\.(\w+)\s*\(\s*(\w+)\s*\)'
|
||
|
||
for match in re.finditer(port_pattern, dut_instance):
|
||
signal_name = match.group(2)
|
||
|
||
is_input = re.search(rf'\breg\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
|
||
is_output = re.search(rf'\bwire\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
|
||
|
||
if is_input and signal_name not in self.dut_inputs:
|
||
self.dut_inputs.append(signal_name)
|
||
if is_output and signal_name not in self.dut_outputs:
|
||
self.dut_outputs.append(signal_name)
|
||
|
||
# 备用方案:通过reg/wire声明推断
|
||
if not self.dut_inputs and not self.dut_outputs:
|
||
for match in re.finditer(r'\breg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
|
||
signal = match.group(1)
|
||
if signal.lower() not in ['file', 'scenario', 'i', 'j', 'k', 'cnt']:
|
||
if signal not in self.dut_inputs:
|
||
self.dut_inputs.append(signal)
|
||
|
||
for match in re.finditer(r'\bwire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
|
||
signal = match.group(1)
|
||
if signal not in self.dut_outputs:
|
||
self.dut_outputs.append(signal)
|
||
|
||
def _transform_forbidden_statements(self, code: str) -> str:
|
||
"""转换非法语句"""
|
||
for pattern, desc, severity in self.FORBIDDEN_STATEMENTS:
|
||
matches = list(re.finditer(pattern, code, re.IGNORECASE))
|
||
for match in reversed(matches):
|
||
signal = match.group(1) if match.groups() else 'unknown'
|
||
self.violations[severity].append(f"{desc}: {signal}")
|
||
|
||
line_start = code.rfind('\n', 0, match.start()) + 1
|
||
line_end = code.find('\n', match.end())
|
||
if line_end == -1:
|
||
line_end = len(code)
|
||
original_line = code[line_start:line_end]
|
||
|
||
# 尝试转换 force -> 直接赋值(仅对输入信号)
|
||
if 'force' in match.group(0).lower() and signal in self.dut_inputs:
|
||
new_line = re.sub(r'\bforce\s+', '', original_line, flags=re.IGNORECASE)
|
||
code = code[:line_start] + new_line + code[line_end:]
|
||
self.transformations.append({
|
||
'type': 'force_to_assign',
|
||
'original': original_line.strip(),
|
||
'transformed': new_line.strip()
|
||
})
|
||
continue
|
||
|
||
code = code[:line_start] + '// [BLOCKED] ' + original_line.lstrip() + code[line_end:]
|
||
self.transformations.append({
|
||
'type': 'blocked',
|
||
'original': original_line.strip(),
|
||
'reason': desc
|
||
})
|
||
|
||
return code
|
||
|
||
def _transform_hierarchical_access(self, code: str) -> str:
|
||
"""转换层次化访问(如 DUT.state)"""
|
||
for match in re.finditer(self.HIERARCHICAL_ACCESS, code):
|
||
prefix = match.group(1)
|
||
signal = match.group(2)
|
||
|
||
if prefix.upper() in ['DUT', 'UUT', 'TOP', 'TB']:
|
||
if signal not in self.dut_outputs:
|
||
self.violations['critical'].append(f"层次化访问内部信号: {prefix}.{signal}")
|
||
|
||
line_start = code.rfind('\n', 0, match.start()) + 1
|
||
line_end = code.find('\n', match.end())
|
||
if line_end == -1:
|
||
line_end = len(code)
|
||
original_line = code[line_start:line_end]
|
||
code = code[:line_start] + '// [HIERARCHY] ' + original_line.lstrip() + code[line_end:]
|
||
|
||
return code
|
||
|
||
def _smart_transform_internal_signals(self, code: str) -> str:
|
||
"""智能转换内部信号访问"""
|
||
lines = code.split('\n')
|
||
transformed_lines = []
|
||
|
||
for line in lines:
|
||
stripped = line.strip()
|
||
|
||
if stripped.startswith('//') or not stripped:
|
||
transformed_lines.append(line)
|
||
continue
|
||
|
||
if (stripped.startswith('#') or stripped.startswith('$') or
|
||
stripped.startswith('repeat(') or stripped.startswith('@(')):
|
||
transformed_lines.append(line)
|
||
continue
|
||
|
||
detected_signals = self._detect_internal_signals_in_line(stripped)
|
||
has_critical = detected_signals.get('critical', [])
|
||
has_warning = detected_signals.get('warning', [])
|
||
|
||
if not has_critical and not has_warning:
|
||
transformed_lines.append(line)
|
||
continue
|
||
|
||
context = self._analyze_signal_context(stripped, detected_signals)
|
||
|
||
if context['type'] == 'assignment':
|
||
transformed_lines.append(f"// [INTERNAL_ASSIGN] Cannot modify internal signal")
|
||
transformed_lines.append(f"// Original: {stripped}")
|
||
self.violations['critical'].append(f"尝试修改内部信号: {context['signals']}")
|
||
elif context['type'] == 'condition':
|
||
transformed = self._transform_condition(stripped, context)
|
||
transformed_lines.append(transformed)
|
||
self.transformations.append({
|
||
'type': 'condition_transform',
|
||
'original': stripped,
|
||
'transformed': transformed
|
||
})
|
||
elif context['type'] == 'wait_for_state':
|
||
transformed = self._transform_state_wait(stripped, context)
|
||
transformed_lines.append(transformed)
|
||
self.transformations.append({
|
||
'type': 'wait_transform',
|
||
'original': stripped,
|
||
'transformed': transformed
|
||
})
|
||
else:
|
||
if has_critical:
|
||
transformed_lines.append(f"// [WARNING] Contains internal signal reference: {has_critical}")
|
||
transformed_lines.append(f"// Original: {stripped}")
|
||
for sig in has_critical:
|
||
self.violations['warning'].append(f"可疑的内部信号访问: {sig}")
|
||
else:
|
||
transformed_lines.append(line)
|
||
|
||
return '\n'.join(transformed_lines)
|
||
|
||
def _detect_internal_signals_in_line(self, line: str) -> Dict[str, List[str]]:
|
||
"""检测行中的内部信号"""
|
||
detected = {'critical': [], 'warning': [], 'info': []}
|
||
|
||
LEGAL_KEYWORDS = {
|
||
'repeat', 'posedge', 'negedge', 'begin', 'end', 'if', 'else',
|
||
'while', 'for', 'case', 'default', 'always', 'initial',
|
||
'assign', 'wire', 'reg', 'input', 'output', 'inout',
|
||
'parameter', 'localparam', 'integer', 'real', 'time',
|
||
'clk', 'clock', 'reset', 'rst', 'areset', 'rst_n',
|
||
'enable', 'ena', 'valid', 'ready', 'data', 'addr', 'address',
|
||
'true', 'false', 'idle', 'wait'
|
||
}
|
||
|
||
SYSTEM_FUNCTIONS = {'$display', '$write', '$monitor', '$fopen', '$fclose',
|
||
'$fdisplay', '$fwrite', '$readmemh', '$readmemb',
|
||
'$finish', '$stop', '$random', '$time', '$stime'}
|
||
|
||
for severity, patterns in self.INTERNAL_SIGNAL_PATTERNS.items():
|
||
for pattern, name in patterns:
|
||
matches = re.findall(pattern, line, re.IGNORECASE)
|
||
if matches:
|
||
for match in matches:
|
||
if isinstance(match, tuple):
|
||
match = match[0] if match[0] else match[1]
|
||
|
||
match_lower = match.lower() if match else ''
|
||
|
||
if match_lower in LEGAL_KEYWORDS:
|
||
continue
|
||
if match in SYSTEM_FUNCTIONS:
|
||
continue
|
||
if match in self.dut_inputs or match in self.dut_outputs:
|
||
continue
|
||
if match.startswith('$'):
|
||
continue
|
||
|
||
if match and match not in detected[severity]:
|
||
detected[severity].append(match)
|
||
|
||
return detected
|
||
|
||
def _analyze_signal_context(self, line: str, signals: Dict) -> Dict:
|
||
"""分析信号使用上下文"""
|
||
assign_match = re.search(r'(\w+)\s*(?:=|<=)\s*', line)
|
||
if assign_match:
|
||
target = assign_match.group(1)
|
||
if target in signals.get('critical', []) or target in signals.get('warning', []):
|
||
return {'type': 'assignment', 'signals': [target], 'line': line}
|
||
|
||
if re.search(r'wait\s*\([^)]*state', line, re.IGNORECASE):
|
||
return {'type': 'wait_for_state', 'signals': signals.get('critical', []), 'line': line}
|
||
|
||
if re.search(r'if\s*\(|while\s*\(|@\s*\(', line):
|
||
return {'type': 'condition', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
|
||
|
||
return {'type': 'other', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
|
||
|
||
def _transform_condition(self, line: str, context: Dict) -> str:
|
||
"""转换条件判断语句"""
|
||
original = line
|
||
|
||
if 'state' in str(context['signals']):
|
||
indent = len(line) - len(line.lstrip())
|
||
spaces = ' ' * indent
|
||
|
||
transformed = f"// [TRANSFORMED] Original: {original.strip()}\n"
|
||
transformed += f"{spaces}// Cannot directly check internal state\n"
|
||
transformed += f"{spaces}// Alternative: Wait for expected clock cycles\n"
|
||
transformed += f"{spaces}repeat(5) @(posedge clk); // Adjust cycles as needed"
|
||
|
||
self.violations['warning'].append(f"条件判断转换: {original.strip()}")
|
||
return transformed
|
||
|
||
return f"// [TRANSFORMED] {original}"
|
||
|
||
def _transform_state_wait(self, line: str, context: Dict) -> str:
|
||
"""转换状态等待语句"""
|
||
indent = len(line) - len(line.lstrip())
|
||
spaces = ' ' * indent
|
||
|
||
transformed = f"// [TRANSFORMED] Original: {line.strip()}\n"
|
||
transformed += f"{spaces}// Cannot wait for internal state directly\n"
|
||
transformed += f"{spaces}// Alternative: Drive inputs and wait for expected cycles\n"
|
||
transformed += f"{spaces}repeat(10) @(posedge clk); // Adjust based on FSM design"
|
||
|
||
self.violations['info'].append(f"状态等待转换: {line.strip()}")
|
||
return transformed
|
||
|
||
def _final_cleanup(self, code: str) -> str:
|
||
"""最终清理"""
|
||
lines = code.split('\n')
|
||
cleaned = []
|
||
|
||
for line in lines:
|
||
stripped = line.strip()
|
||
|
||
if stripped in ['begin', 'end'] and cleaned:
|
||
last = cleaned[-1].strip()
|
||
if last.startswith('// [TRANSFORMED]') or last.startswith('// [INTERNAL'):
|
||
continue
|
||
|
||
cleaned.append(line)
|
||
|
||
result = '\n'.join(cleaned)
|
||
result = re.sub(r'\n\s*\n\s*\n', '\n\n', result)
|
||
|
||
return result
|
||
|
||
def _calculate_quality_score(self, total_lines: int) -> int:
|
||
"""计算代码质量分数"""
|
||
if total_lines == 0:
|
||
return 0
|
||
|
||
score = 100
|
||
score -= len(self.violations['critical']) * 20
|
||
score -= len(self.violations['warning']) * 5
|
||
score += len([t for t in self.transformations if 'blocked' not in t.get('type', '')]) * 5
|
||
|
||
return max(0, min(100, score))
|
||
|
||
def _get_allowed_signals_info(self) -> Dict:
|
||
"""获取允许的信号信息"""
|
||
return {
|
||
'inputs': self.dut_inputs,
|
||
'outputs': self.dut_outputs,
|
||
'all_allowed': self.dut_inputs + self.dut_outputs
|
||
}
|
||
|
||
def generate_constraint_prompt(self) -> str:
|
||
"""
|
||
生成动态约束提示
|
||
|
||
如果从 DUT 提取了信号名,则使用精确的信号列表
|
||
否则使用通用的约束提示
|
||
"""
|
||
# [优先] 如果已经从 DUT 提取了信号名,使用精确的约束
|
||
if self.dut_all_signals:
|
||
return self.generate_signal_constraint_prompt()
|
||
|
||
# [备选] 使用通用约束提示
|
||
reset_signal = self._find_reset_signal()
|
||
inputs_list = str(self.dut_inputs) if self.dut_inputs else "[]"
|
||
|
||
# 动态获取禁止信号名示例
|
||
forbidden_examples = self.get_forbidden_examples(count=8)
|
||
forbidden_str = ", ".join(f'"{ex}"' for ex in forbidden_examples[:4])
|
||
forbidden_str2 = ", ".join(f'"{ex}"' for ex in forbidden_examples[4:8]) if len(forbidden_examples) > 4 else ""
|
||
|
||
prompt = f"""
|
||
{'='*60}
|
||
📋 [SIGNAL CONSTRAINTS - DERIVED FROM DUT]
|
||
{'='*60}
|
||
"""
|
||
|
||
if self.dut_inputs:
|
||
prompt += f"""
|
||
✅ ALLOWED INPUT SIGNALS (you CAN drive these):
|
||
{inputs_list}
|
||
|
||
🚫 FORBIDDEN SIGNAL NAMES - DO NOT USE THESE:
|
||
Generic patterns: {forbidden_str}
|
||
"""
|
||
if forbidden_str2:
|
||
prompt += f" More examples: {forbidden_str2}\n"
|
||
|
||
prompt += f"""
|
||
⚠️ Also forbidden: "reset" (actual: "{reset_signal}"), "rst", "enable", "en"
|
||
⚠️ Any name NOT in ALLOWED INPUTS above is FORBIDDEN!
|
||
|
||
⚠️ CRITICAL RULE: You MUST use ONLY the signal names from ALLOWED INPUTS!
|
||
If a signal name is not in the list, it does NOT exist in this design!
|
||
"""
|
||
else:
|
||
prompt += """
|
||
⚠️ WARNING: Could not extract input signals from testbench.
|
||
Please check the testbench code for actual signal names before writing test code.
|
||
"""
|
||
|
||
if self.dut_outputs:
|
||
outputs_list = str(self.dut_outputs)
|
||
prompt += f"""
|
||
📖 OUTPUT SIGNALS (you can READ but NOT write):
|
||
{outputs_list}
|
||
"""
|
||
|
||
prompt += f"""
|
||
🚫 FORBIDDEN ACTIONS:
|
||
1. NEVER assign to internal signals (state, counter, etc.)
|
||
2. NEVER use 'force' or 'assign' statements
|
||
3. NEVER access DUT.state (hierarchical access)
|
||
4. NEVER guess signal names - use ONLY from ALLOWED INPUTS!
|
||
|
||
✅ CORRECT APPROACH:
|
||
- To reach FSM state: drive inputs and WAIT for transition
|
||
- Example: {reset_signal} = 1; repeat(2) @(posedge clk); {reset_signal} = 0;
|
||
"""
|
||
return prompt
|
||
|
||
def _find_reset_signal(self) -> str:
|
||
"""查找复位信号名"""
|
||
# 按优先级查找常见的复位信号名
|
||
reset_candidates = ['areset', 'rst_n', 'rst', 'reset', 'rst_b']
|
||
for sig in reset_candidates:
|
||
if sig in self.dut_inputs:
|
||
return sig
|
||
# 如果没找到,检查输入列表中是否有类似名称
|
||
for sig in self.dut_inputs:
|
||
sig_lower = sig.lower()
|
||
if 'reset' in sig_lower or 'rst' in sig_lower:
|
||
return sig
|
||
# 默认返回第一个输入信号(排除 clk)
|
||
for sig in self.dut_inputs:
|
||
if 'clk' not in sig.lower():
|
||
return sig
|
||
return "reset" # 兜底
|
||
|
||
# =========================================================================
|
||
# [新增] 通用禁止信号名检测
|
||
# =========================================================================
|
||
def is_forbidden_signal_name(self, name: str) -> Tuple[bool, str]:
|
||
"""
|
||
检测信号名是否为禁止的"猜测型"名称
|
||
|
||
Args:
|
||
name: 待检测的信号名
|
||
|
||
Returns:
|
||
(is_forbidden, reason): 是否禁止及原因
|
||
"""
|
||
name_lower = name.lower()
|
||
|
||
# 如果信号名在允许列表中,则不禁止
|
||
if name in self.dut_inputs or name in self.dut_outputs:
|
||
return False, ""
|
||
|
||
# 检查是否匹配禁止模式
|
||
for pattern, reason in self.FORBIDDEN_NAME_PATTERNS:
|
||
if re.match(pattern, name_lower, re.IGNORECASE):
|
||
return True, reason
|
||
|
||
return False, ""
|
||
|
||
def get_forbidden_examples(self, count: int = 6) -> List[str]:
|
||
"""
|
||
动态生成禁止信号名示例
|
||
|
||
基于实际 DUT 信号生成有针对性的错误示例
|
||
|
||
Args:
|
||
count: 返回的示例数量
|
||
|
||
Returns:
|
||
禁止信号名示例列表
|
||
"""
|
||
examples = []
|
||
|
||
# 1. 带数字后缀的通用名(始终禁止)
|
||
generic_patterns = [
|
||
'input_signal_1', 'input_signal_2',
|
||
'in_1', 'in_2', 'in_3',
|
||
'data_1', 'data_2',
|
||
'signal_1', 'signal_2',
|
||
]
|
||
examples.extend(generic_patterns[:count])
|
||
|
||
# 2. 基于实际信号生成"错误猜测"示例
|
||
reset_signal = self._find_reset_signal()
|
||
|
||
# 如果实际复位信号不是 reset/rst,则添加这些为禁止示例
|
||
if reset_signal and reset_signal not in ['reset', 'rst']:
|
||
if reset_signal.lower() != 'reset':
|
||
examples.append('reset') # 错误:应该用 areset
|
||
if reset_signal.lower() != 'rst':
|
||
examples.append('rst') # 错误:应该用 areset
|
||
|
||
# 3. 检查是否有时钟信号
|
||
clk_signals = [s for s in self.dut_inputs if 'clk' in s.lower()]
|
||
if clk_signals:
|
||
actual_clk = clk_signals[0]
|
||
if actual_clk != 'clk':
|
||
examples.append('clk') # 错误:应该用实际时钟名
|
||
|
||
# 4. 检查是否有使能信号
|
||
en_signals = [s for s in self.dut_inputs if 'en' in s.lower() or 'enable' in s.lower()]
|
||
if not en_signals:
|
||
examples.append('enable') # 不存在的使能信号
|
||
examples.append('en')
|
||
|
||
# 去重并限制数量
|
||
seen = set()
|
||
unique_examples = []
|
||
for ex in examples:
|
||
if ex not in seen and ex not in self.dut_inputs:
|
||
seen.add(ex)
|
||
unique_examples.append(ex)
|
||
if len(unique_examples) >= count:
|
||
break
|
||
|
||
return unique_examples
|
||
|
||
def detect_forbidden_signals_in_code(self, code: str) -> List[Dict]:
|
||
"""
|
||
检测代码中使用的禁止信号名
|
||
|
||
Args:
|
||
code: Verilog 代码
|
||
|
||
Returns:
|
||
检测到的禁止信号列表
|
||
"""
|
||
forbidden_signals = []
|
||
|
||
# 提取代码中使用的所有信号名(赋值语句左侧)
|
||
# 匹配: signal = value 形式
|
||
for match in re.finditer(r'^\s*(\w+)\s*=', code, re.MULTILINE):
|
||
signal = match.group(1)
|
||
is_forbidden, reason = self.is_forbidden_signal_name(signal)
|
||
if is_forbidden:
|
||
forbidden_signals.append({
|
||
'signal': signal,
|
||
'reason': reason,
|
||
'line': match.group(0).strip()
|
||
})
|
||
|
||
return forbidden_signals
|
||
|
||
# =========================================================================
|
||
# [新增] Verilog 语法预检查 - 检测常见逻辑错误
|
||
# =========================================================================
|
||
def check_syntax_issues(self, code: str, signal_widths: Dict[str, int] = None, declared_signals: set = None) -> Dict:
|
||
"""
|
||
检测 Verilog 代码中的常见语法/逻辑问题
|
||
|
||
Args:
|
||
code: 待检查的代码
|
||
signal_widths: 信号位宽映射 (可选,如 {'in': 1, 'data': 8})
|
||
declared_signals: 已声明的信号集合 (可选,用于检测未声明信号)
|
||
|
||
Returns:
|
||
{
|
||
'width_mismatch': [...], # 位宽不匹配警告
|
||
'logic_issues': [...], # 逻辑问题
|
||
'syntax_warnings': [...], # 语法警告
|
||
'should_retry': bool # 是否建议重试
|
||
}
|
||
"""
|
||
result = {
|
||
'width_mismatch': [],
|
||
'logic_issues': [],
|
||
'syntax_warnings': [],
|
||
'should_retry': False
|
||
}
|
||
|
||
# 检查位宽不匹配
|
||
result['width_mismatch'] = self._check_width_mismatch(code, signal_widths)
|
||
|
||
# 检查逻辑问题
|
||
result['logic_issues'] = self._check_logic_issues(code, signal_widths)
|
||
|
||
# 检查其他语法问题(传入已声明信号)
|
||
result['syntax_warnings'] = self._check_syntax_warnings(code, declared_signals)
|
||
|
||
# 决定是否需要重试
|
||
# 包括:位宽不匹配、逻辑问题、语法错误(severity='error')
|
||
has_syntax_errors = any(
|
||
issue.get('severity') == 'error'
|
||
for issue in result['syntax_warnings']
|
||
)
|
||
result['should_retry'] = (
|
||
len(result['width_mismatch']) > 0 or
|
||
len(result['logic_issues']) > 0 or
|
||
has_syntax_errors
|
||
)
|
||
|
||
return result
|
||
|
||
def _check_width_mismatch(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
|
||
"""
|
||
检测位宽不匹配问题
|
||
|
||
常见问题:
|
||
- {signal} = N'b... 将多位值赋给单比特信号
|
||
- signal = N'b... 位宽不匹配
|
||
"""
|
||
issues = []
|
||
signal_widths = signal_widths or {}
|
||
|
||
# 默认假设未声明信号为 1 位
|
||
def get_width(sig):
|
||
return signal_widths.get(sig, 1)
|
||
|
||
# 模式1: {signal} = N'bvalue (拼接赋值)
|
||
# 例: {in} = 8'b01111100 - 将 8 位赋给 1 位
|
||
concat_pattern = re.compile(r'\{(\w+)\}\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
|
||
for match in concat_pattern.finditer(code):
|
||
signal = match.group(1)
|
||
value_width = int(match.group(2))
|
||
base = match.group(3)
|
||
value = match.group(4)
|
||
|
||
actual_width = get_width(signal)
|
||
|
||
if value_width > actual_width:
|
||
issues.append({
|
||
'type': 'concat_width_mismatch',
|
||
'signal': signal,
|
||
'signal_width': actual_width,
|
||
'assigned_width': value_width,
|
||
'original': match.group(0),
|
||
'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value via concatenation. Verilog will truncate.",
|
||
'severity': 'warning',
|
||
'suggestion': f"Use a shift register: reg [{value_width-1}:0] temp; temp = {value_width}'{base}{value}; then shift bits one by one"
|
||
})
|
||
|
||
# 模式2: signal = N'bvalue (直接赋值)
|
||
assign_pattern = re.compile(r'\b(\w+)\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
|
||
for match in assign_pattern.finditer(code):
|
||
signal = match.group(1)
|
||
value_width = int(match.group(2))
|
||
|
||
# 跳过拼接赋值(已处理)
|
||
if f'{{{signal}}}' in match.group(0):
|
||
continue
|
||
|
||
actual_width = get_width(signal)
|
||
|
||
# 只有当信号已知且位宽不匹配时才警告
|
||
if signal in signal_widths and value_width > actual_width:
|
||
issues.append({
|
||
'type': 'direct_width_mismatch',
|
||
'signal': signal,
|
||
'signal_width': actual_width,
|
||
'assigned_width': value_width,
|
||
'original': match.group(0),
|
||
'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value. Truncation will occur.",
|
||
'severity': 'warning'
|
||
})
|
||
|
||
return issues
|
||
|
||
def _check_logic_issues(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
|
||
"""
|
||
检测逻辑问题
|
||
|
||
常见问题:
|
||
- 单比特信号自移位 (in = in >> 1 无效果)
|
||
- 无效的循环条件
|
||
"""
|
||
issues = []
|
||
signal_widths = signal_widths or {}
|
||
|
||
def get_width(sig):
|
||
return signal_widths.get(sig, 1)
|
||
|
||
# 模式: signal = signal >> N 或 signal = signal << N
|
||
shift_pattern = re.compile(r'\b(\w+)\s*=\s*\1\s*(>>|<<)\s*(\d+)?')
|
||
for match in shift_pattern.finditer(code):
|
||
signal = match.group(1)
|
||
direction = match.group(2)
|
||
shift_amount = int(match.group(3)) if match.group(3) else 1
|
||
|
||
actual_width = get_width(signal)
|
||
|
||
if actual_width == 1:
|
||
issues.append({
|
||
'type': 'single_bit_shift',
|
||
'signal': signal,
|
||
'direction': direction,
|
||
'original': match.group(0),
|
||
'message': f"Single-bit signal '{signal}' self-shift has no effect. Result is always 0.",
|
||
'severity': 'warning',
|
||
'suggestion': f"Use a shift register for bit-serial input, not the input signal itself"
|
||
})
|
||
|
||
# 模式: repeat(N) begin ... signal = signal >> 1; end (循环移位单比特)
|
||
repeat_shift_pattern = re.compile(r'repeat\s*\(\s*\d+\s*\)\s*begin[^}]*?(\w+)\s*=\s*\1\s*(>>|<<)', re.DOTALL)
|
||
for match in repeat_shift_pattern.finditer(code):
|
||
signal = match.group(1)
|
||
actual_width = get_width(signal)
|
||
|
||
if actual_width == 1:
|
||
issues.append({
|
||
'type': 'repeat_single_bit_shift',
|
||
'signal': signal,
|
||
'original': match.group(0)[:100] + '...',
|
||
'message': f"Repeat loop shifting single-bit signal '{signal}' is ineffective",
|
||
'severity': 'warning'
|
||
})
|
||
|
||
return issues
|
||
|
||
def _check_syntax_warnings(self, code: str, declared_signals: set = None) -> List[Dict]:
|
||
"""
|
||
检测其他语法问题
|
||
|
||
Args:
|
||
code: 待检查的代码
|
||
declared_signals: 已声明的信号集合 (从完整 TB 中提取)
|
||
"""
|
||
issues = []
|
||
declared_signals = declared_signals or set()
|
||
|
||
# 检查: 缺少分号
|
||
# 注意: 这只是简单检查,不是完整解析
|
||
lines = code.split('\n')
|
||
for i, line in enumerate(lines):
|
||
stripped = line.strip()
|
||
if not stripped or stripped.startswith('//'):
|
||
continue
|
||
|
||
# 跳过不需要分号的行
|
||
skip_patterns = [
|
||
r'^begin$', r'^end$', r'^endcase$', r'^endmodule$',
|
||
r'^else$', r'^\)$', r'^\}\s*$', r'^`timescale', r'^`include'
|
||
]
|
||
if any(re.match(p, stripped) for p in skip_patterns):
|
||
continue
|
||
|
||
# 检查是否需要分号但没有
|
||
needs_semicolon = re.search(r'\b(initial|always|assign|reg|wire|parameter|localport)\b', stripped) is None
|
||
has_semicolon = stripped.endswith(';') or stripped.endswith(')') or stripped.endswith('}')
|
||
|
||
if needs_semicolon and not has_semicolon and not stripped.endswith('begin'):
|
||
# 可能缺少分号(但不确定)
|
||
pass # 暂不报警,避免误报
|
||
|
||
# 检查: 不匹配的 begin/end
|
||
begin_count = len(re.findall(r'\bbegin\b', code))
|
||
end_count = len(re.findall(r'\bend\b', code))
|
||
if begin_count != end_count:
|
||
issues.append({
|
||
'type': 'mismatched_begin_end',
|
||
'message': f"Mismatched begin/end: {begin_count} begin vs {end_count} end",
|
||
'severity': 'error'
|
||
})
|
||
|
||
# 检查: 未声明的信号(在赋值左侧使用的信号)
|
||
# 使用传入的已声明信号集合
|
||
for match in re.finditer(r'^\s*(\w+)\s*=', code, re.MULTILINE):
|
||
signal = match.group(1)
|
||
# 跳过系统任务和关键字
|
||
if signal in ['if', 'else', 'case', 'for', 'while', 'repeat', 'assign', 'force', 'release']:
|
||
continue
|
||
# 跳过以 $ 开头的系统任务
|
||
if signal.startswith('$'):
|
||
continue
|
||
# 检查是否在已声明信号列表中
|
||
if signal not in declared_signals:
|
||
issues.append({
|
||
'type': 'undeclared_signal',
|
||
'signal': signal,
|
||
'message': f"Signal '{signal}' is used but not declared in the testbench",
|
||
'severity': 'error',
|
||
'suggestion': f"Use an existing signal name (declared: {', '.join(list(declared_signals)[:10])}...)"
|
||
})
|
||
|
||
# 检查: always 块与时钟生成冲突
|
||
# 检测是否有多个 always/initial 块驱动同一信号
|
||
always_blocks = re.findall(r'\balways\s*(@[^i]|begin)', code)
|
||
initial_clk_blocks = len(re.findall(r'initial\s+begin[^i]*?clk\s*=', code, re.DOTALL))
|
||
always_clk_blocks = len(re.findall(r'\balways[^i]*?clk\s*=', code, re.DOTALL))
|
||
|
||
if initial_clk_blocks > 0 and always_clk_blocks > 0:
|
||
issues.append({
|
||
'type': 'multiple_clock_drivers',
|
||
'message': f"Multiple clock drivers detected: {initial_clk_blocks} initial + {always_clk_blocks} always blocks driving clk",
|
||
'severity': 'error',
|
||
'suggestion': "Remove duplicate clock generation. The testbench already has clock generation."
|
||
})
|
||
|
||
# 检查: initial 块嵌套(生成了 initial begin ... end 在注入时会导致嵌套)
|
||
if re.search(r'\binitial\s+begin\b', code):
|
||
issues.append({
|
||
'type': 'initial_block_injection',
|
||
'message': "Code contains 'initial begin...end' block which should not be injected into an existing initial block",
|
||
'severity': 'error',
|
||
'suggestion': "Remove the 'initial begin...end' wrapper, keep only the test statements inside"
|
||
})
|
||
|
||
return issues
|
||
|
||
|
||
# ============================================================================
|
||
# CoverageParser - 覆盖率解析器
|
||
# ============================================================================
|
||
class CoverageParser:
|
||
"""覆盖率解析器 - 从带注释的Verilog文件中提取未覆盖的代码块
|
||
|
||
[增强] 集成语义分析结果,提供更精准的 FSM 状态路径指导
|
||
[新增] 集成能量分配层,提供目标功能点优先级信息
|
||
[新增] 集成多样性约束注入器,避免测试用例同质化
|
||
[新增] 从 DUT 代码提取信号名,精确约束 LLM
|
||
"""
|
||
|
||
def __init__(self, annotated_file, tb_code=None, semantic_result=None,
|
||
energy_allocator=None, diversity_injector=None, dut_code=None):
|
||
self.file_path = annotated_file
|
||
self.tb_code = tb_code
|
||
self.semantic_result = semantic_result # [新增] 语义分析结果
|
||
self.energy_allocator = energy_allocator # [新增] 能量分配器
|
||
self.diversity_injector = diversity_injector # [新增] 多样性约束注入器
|
||
self.dut_code = dut_code # [新增] DUT 代码
|
||
# 修复:Verilator 覆盖率标记格式多样化:
|
||
# %NNNNNN - 行覆盖计数(%000000 表示从未执行)
|
||
# ~NNNNNN - 分支/条件覆盖计数(~000000 表示分支从未执行)
|
||
# ^NNNNNN - 未覆盖的分支标记
|
||
# NNNNNN - 空格开头+数字(某些 Verilator 版本)
|
||
# NNNNNN - 纯数字开头(无前缀)
|
||
self.line_pattern = re.compile(r'^%(\d+)\s+(.*)$') # 匹配 %NNNNNN code
|
||
self.tilde_pattern = re.compile(r'^~(\d+)\s+(.*)$') # 匹配 ~NNNNNN code
|
||
self.caret_pattern = re.compile(r'^\^(\d+)\s+(.*)$') # 匹配 ^NNNNNN code
|
||
# [修复] 纯数字开头(无前缀)或空格开头
|
||
self.plain_pattern = re.compile(r'^\s*(\d+)\s+(.*)$') # 匹配 " NNNNNN" 或 "NNNNNN"
|
||
self.decl_pattern = re.compile(r'^\s*(input|output|inout|wire|reg|logic|parameter|localparam|assign)\b')
|
||
|
||
# [修改] 传递 DUT 代码给 BlackBoxValidator
|
||
self.validator = BlackBoxValidator(dut_code=dut_code)
|
||
if tb_code:
|
||
self.validator._extract_signals_from_tb(tb_code)
|
||
|
||
def generate_prompt(self, current_score):
|
||
"""生成覆盖率驱动的Prompt"""
|
||
if not os.path.exists(self.file_path):
|
||
return None
|
||
|
||
try:
|
||
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
lines = f.readlines()
|
||
except Exception:
|
||
return None
|
||
|
||
missing_blocks = []
|
||
current_block = []
|
||
recording = False
|
||
context_buffer = []
|
||
CONTEXT_SIZE = 3
|
||
|
||
# 收集缺失行用于 FSM 分析
|
||
missing_lines = []
|
||
|
||
for i, line in enumerate(lines):
|
||
line = line.strip()
|
||
count = -1
|
||
clean_code = line
|
||
is_tilde = False
|
||
is_caret = False
|
||
|
||
# 尝试匹配各种覆盖率标记格式
|
||
# Verilator 覆盖率格式:
|
||
# - %NNNNNN: 行覆盖,NNNNNN 是执行次数,%000000 表示未执行
|
||
# - ~NNNNNN: 分支/条件覆盖,~000000 表示分支从未执行
|
||
# - ^NNNNNN: 未覆盖分支标记
|
||
# - NNNNNN: 无前缀格式(某些版本)
|
||
match_pct = self.line_pattern.match(line) # %NNNNNN code
|
||
match_tilde = self.tilde_pattern.match(line) # ~NNNNNN code
|
||
match_caret = self.caret_pattern.match(line) # ^NNNNNN code
|
||
match_plain = self.plain_pattern.match(line) # NNNNNN code (无前缀)
|
||
|
||
if match_pct:
|
||
count = int(match_pct.group(1))
|
||
clean_code = match_pct.group(2).strip()
|
||
elif match_tilde:
|
||
count = int(match_tilde.group(1))
|
||
clean_code = match_tilde.group(2).strip()
|
||
is_tilde = True
|
||
elif match_caret:
|
||
count = int(match_caret.group(1))
|
||
clean_code = match_caret.group(2).strip()
|
||
is_caret = True
|
||
elif match_plain:
|
||
# 纯数字格式(可能出现在某些 Verilator 版本)
|
||
count = int(match_plain.group(1))
|
||
clean_code = match_plain.group(2).strip()
|
||
|
||
if "//" in clean_code:
|
||
clean_code = clean_code.split("//")[0].strip()
|
||
|
||
is_hard_noise = (self.decl_pattern.match(clean_code) or clean_code == "endmodule")
|
||
is_soft_noise = (len(clean_code) < 2 or clean_code in ["end", "begin", "else", ");", "endcase", "default:"] or
|
||
clean_code.startswith("module ") or not any(c.isalnum() for c in clean_code))
|
||
|
||
# [修改] 覆盖状态判断:
|
||
# - %NNNNNN: count > 0 表示已覆盖,count == 0 表示未覆盖
|
||
# - ~NNNNNN: 分支覆盖标记,count == 0 也表示未覆盖!
|
||
# - ^NNNNNN: 未覆盖分支标记
|
||
is_definitely_covered = (not is_tilde and not is_caret and count > 0)
|
||
# [关键修复] tilde 格式 count == 0 也应该被视为 missing
|
||
is_definitely_missed = (
|
||
(not is_tilde and not is_caret and count == 0 and not is_hard_noise and not is_soft_noise) or
|
||
(is_tilde and count == 0 and not is_hard_noise and not is_soft_noise) or # [新增] ~000000 也是 missing
|
||
(is_caret and not is_hard_noise and not is_soft_noise)
|
||
)
|
||
|
||
if recording:
|
||
if is_definitely_covered:
|
||
missing_blocks.append(current_block)
|
||
missing_lines.extend(current_block)
|
||
current_block = []
|
||
recording = False
|
||
if not is_hard_noise:
|
||
context_buffer.append(clean_code)
|
||
else:
|
||
if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
|
||
current_block.append(f"Line {i+1}: {clean_code}")
|
||
else:
|
||
if is_definitely_missed:
|
||
recording = True
|
||
if context_buffer:
|
||
current_block.append(f"... (Context)")
|
||
for ctx in context_buffer:
|
||
current_block.append(f" {ctx}")
|
||
current_block.append(f"Line {i+1}: {clean_code} <--- MISSING START")
|
||
else:
|
||
if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
|
||
context_buffer.append(clean_code)
|
||
if len(context_buffer) > CONTEXT_SIZE:
|
||
context_buffer.pop(0)
|
||
|
||
if recording and current_block:
|
||
missing_blocks.append(current_block)
|
||
missing_lines.extend(current_block)
|
||
|
||
# [改进] 详细诊断日志 - 使用 info 级别确保可见
|
||
total_lines = len(lines)
|
||
parsed_lines = sum(1 for l in lines if l.strip() and (
|
||
self.line_pattern.match(l.strip()) or
|
||
self.tilde_pattern.match(l.strip()) or
|
||
self.caret_pattern.match(l.strip()) or
|
||
self.plain_pattern.match(l.strip())
|
||
))
|
||
|
||
# 收集零计数行的详细信息
|
||
zero_count_details = []
|
||
for l in lines:
|
||
l_stripped = l.strip()
|
||
if not l_stripped:
|
||
continue
|
||
match_pct = self.line_pattern.match(l_stripped)
|
||
match_tilde = self.tilde_pattern.match(l_stripped)
|
||
if match_pct and int(match_pct.group(1)) == 0:
|
||
zero_count_details.append(('%', match_pct.group(2).strip()[:50]))
|
||
elif match_tilde and int(match_tilde.group(1)) == 0:
|
||
zero_count_details.append(('~', match_tilde.group(2).strip()[:50]))
|
||
|
||
zero_count_lines = len(zero_count_details)
|
||
|
||
logger.info(f"CoverageParser: Total={total_lines}, Parsed={parsed_lines}, Zero-count={zero_count_lines}, Missing blocks={len(missing_blocks)}")
|
||
|
||
if not missing_blocks:
|
||
# [改进] 详细诊断信息
|
||
if zero_count_lines > 0:
|
||
logger.warning(f"Found {zero_count_lines} lines with zero coverage count, but no missing blocks extracted.")
|
||
logger.warning("Zero-count lines:")
|
||
for prefix, code in zero_count_details[:10]: # 只显示前10个
|
||
logger.warning(f" {prefix}000000: {code}")
|
||
if len(zero_count_details) > 10:
|
||
logger.warning(f" ... and {len(zero_count_details) - 10} more")
|
||
logger.warning("These lines may have been filtered as noise (declarations, etc.)")
|
||
return None
|
||
|
||
selected_blocks = missing_blocks[:50]
|
||
|
||
# 获取实际信号名用于示例
|
||
reset_signal = self.validator._find_reset_signal()
|
||
inputs_no_clk = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
|
||
example_signal = inputs_no_clk[0] if inputs_no_clk else (reset_signal if reset_signal != "reset" else "ena")
|
||
|
||
# [新增] 动态获取禁止信号名示例
|
||
forbidden_examples = self.validator.get_forbidden_examples(count=8)
|
||
forbidden_display = ", ".join(f'"{ex}"' for ex in forbidden_examples[:6])
|
||
|
||
# 分析 FSM 相关的缺失代码
|
||
fsm_analysis = self._analyze_fsm_missing(missing_lines)
|
||
|
||
# [新增] 从语义分析结果获取 FSM 和功能点信息
|
||
semantic_context = self._generate_semantic_context()
|
||
|
||
# === [新增] 在 prompt 开头添加醒目的信号名清单 ===
|
||
inputs_json = str(self.validator.dut_inputs) if self.validator.dut_inputs else "[]"
|
||
outputs_json = str(self.validator.dut_outputs) if self.validator.dut_outputs else "[]"
|
||
|
||
prompt = f"""
|
||
[ROLE]
|
||
You are a hardware verification expert. Your task is to write a test scenario to improve code coverage.
|
||
|
||
{'='*60}
|
||
⚠️ [SIGNAL NAME CONSTRAINTS - READ THIS FIRST] ⚠️
|
||
{'='*60}
|
||
|
||
📋 ALLOWED INPUT SIGNALS (you CAN drive these):
|
||
{inputs_json}
|
||
|
||
📋 OUTPUT SIGNALS (you can READ but NOT write):
|
||
{outputs_json}
|
||
|
||
🚫 FORBIDDEN SIGNAL NAMES - THESE DO NOT EXIST:
|
||
{forbidden_display}
|
||
⚠️ Also: "reset" (actual: "{reset_signal}"), "rst", "enable", "en"
|
||
⚠️ ANY name NOT in ALLOWED INPUTS above is FORBIDDEN!
|
||
|
||
{'='*60}
|
||
|
||
[COVERAGE STATUS]
|
||
Current testbench achieves {current_score:.2f}% coverage.
|
||
The following logic blocks in the DUT are NEVER executed during simulation:
|
||
|
||
"""
|
||
for idx, block in enumerate(selected_blocks):
|
||
prompt += f"--- Missing Logic Block {idx+1} ---\n" + "\n".join(block) + "\n\n"
|
||
|
||
# [新增] 添加语义分析上下文
|
||
if semantic_context:
|
||
prompt += f"""
|
||
[SEMANTIC ANALYSIS - MODULE UNDERSTANDING]
|
||
{semantic_context}
|
||
"""
|
||
|
||
# === [新增] 添加能量分配目标上下文 ===
|
||
if self.energy_allocator:
|
||
energy_context = self.energy_allocator.get_target_context()
|
||
if energy_context:
|
||
prompt += f"""
|
||
[ENERGY-ALIGNED TARGET - PRIORITY]
|
||
{energy_context}
|
||
Focus your test scenario on covering this high-priority target first.
|
||
"""
|
||
# =====================================
|
||
|
||
prompt += self.validator.generate_constraint_prompt()
|
||
|
||
# 添加 FSM 分析提示
|
||
if fsm_analysis:
|
||
prompt += f"""
|
||
[FSM STATE TRANSITION ANALYSIS - CRITICAL]
|
||
{fsm_analysis}
|
||
|
||
IMPORTANT: FSM transitions have PRIORITY ORDER!
|
||
- 'if' conditions are evaluated TOP to BOTTOM
|
||
- The FIRST matching condition determines the next state
|
||
- To trigger a branch like "else if (condition)", you MUST ensure all higher-priority conditions are FALSE
|
||
- Read the missing code's context carefully: what conditions precede it?
|
||
|
||
"""
|
||
|
||
prompt += f"""
|
||
[OUTPUT REQUIREMENTS - CRITICAL]
|
||
1. Return ONLY Verilog test scenario code (NOT a task definition)
|
||
2. Your code will be inserted INTO an existing `initial begin ... end` block
|
||
3. DO NOT wrap your code in `task ... endtask` - just write the test sequence directly
|
||
4. DO NOT use `$finish` or `$stop` - the testbench handles simulation end
|
||
|
||
[CODING STYLE]
|
||
1. Use blocking assignments for input signals: `signal = value;`
|
||
2. Use `#N;` for time delays: `#10;` means wait 10 time units
|
||
3. Use `repeat(N) @(posedge clk);` to wait for N clock cycles
|
||
4. Start with reset sequence if needed
|
||
|
||
[BLACK-BOX CONSTRAINTS - CRITICAL]
|
||
1. You can ONLY control module INPUTS listed above
|
||
2. You CANNOT access internal signals (state, next_state, counters, etc.)
|
||
3. You CANNOT use `force` or `assign` on internal signals
|
||
4. To trigger a specific state: drive inputs and wait for the FSM to reach it naturally
|
||
|
||
[STEP-BY-STEP APPROACH - REQUIRED]
|
||
For each missing branch, think through:
|
||
1. What STATE must the FSM be in? (Look at the case statement)
|
||
2. What CONDITIONS must be true/false? (Check priority order!)
|
||
3. How to reach that state from reset? (Trace state transitions)
|
||
4. What inputs to apply and in what order?
|
||
|
||
[POSITIVE EXAMPLE - CORRECT APPROACH]
|
||
```verilog
|
||
// Reset sequence - use ACTUAL input signal names from above
|
||
{reset_signal} = 1;
|
||
repeat(2) @(posedge clk);
|
||
{reset_signal} = 0;
|
||
|
||
// Wait for FSM to reach desired state (estimate cycles)
|
||
repeat(3) @(posedge clk);
|
||
|
||
// Trigger missing branch by driving inputs
|
||
{example_signal} = 1;
|
||
repeat(5) @(posedge clk);
|
||
{example_signal} = 0;
|
||
repeat(10) @(posedge clk);
|
||
```
|
||
|
||
[NEGATIVE EXAMPLE - DO NOT DO THIS]
|
||
```verilog
|
||
// WRONG: Using wrong signal name (e.g., 'reset' instead of '{reset_signal}')
|
||
reset = 1; // ERROR: Signal 'reset' does not exist! Use '{reset_signal}' instead!
|
||
|
||
// WRONG: Not considering condition priority in FSM
|
||
// If missing code is "else if (condition_b)", you must make condition_a FALSE first!
|
||
// Example: if FSM has "if (!signal_a) ... else if (signal_b) ..."
|
||
// Then signal_a must be 1 (FALSE) for the else-if branch to execute
|
||
signal_a = 0; // WRONG: This blocks the else-if branch!
|
||
signal_b = 1; // This will NOT trigger because signal_a=0 took priority
|
||
|
||
// CORRECT: Analyze priority, set higher-priority conditions to FALSE
|
||
signal_a = 1; // Now the first condition (!signal_a) is FALSE
|
||
signal_b = 1; // Now this else-if branch can execute
|
||
|
||
// WRONG: Trying to assign internal state
|
||
state = IDLE; // ERROR: Cannot modify internal signal!
|
||
|
||
// WRONG: Using force on internal signal
|
||
force DUT.state = WL; // ERROR: Cannot force internal signal!
|
||
|
||
// WRONG: Checking internal state in condition
|
||
if (state == WL) begin // ERROR: Cannot read internal signal!
|
||
{example_signal} = 1;
|
||
end
|
||
|
||
// CORRECT ALTERNATIVE: Estimate timing instead
|
||
repeat(5) @(posedge clk); // Wait for FSM to reach expected state
|
||
{example_signal} = 1;
|
||
```
|
||
|
||
{'='*60}
|
||
⚠️ [FINAL SIGNAL NAME CHECK - BEFORE YOU WRITE CODE] ⚠️
|
||
{'='*60}
|
||
|
||
🚫 STOP! Verify your signal names:
|
||
|
||
✅ ALLOWED INPUTS: {inputs_json}
|
||
🚫 FORBIDDEN: {forbidden_display}
|
||
|
||
❌ WRONG: input_signal_1 = 1; // Does NOT exist!
|
||
❌ WRONG: input_signal_2 = 0; // Does NOT exist!
|
||
❌ WRONG: reset = 1; // Wrong! Use '{reset_signal}' instead!
|
||
❌ WRONG: data_1 = 1; // Does NOT exist!
|
||
|
||
✅ CORRECT: {example_signal} = 1; // From ALLOWED INPUTS
|
||
✅ CORRECT: {reset_signal} = 1; // Actual reset signal
|
||
|
||
⚠️ RULE: If a signal name is NOT in ALLOWED INPUTS, it does NOT exist!
|
||
Now write the test scenario code using ONLY signal names from ALLOWED INPUTS.
|
||
"""
|
||
|
||
# === [新增] 注入多样性约束 ===
|
||
if self.diversity_injector:
|
||
# 获取未覆盖功能点
|
||
uncovered_functions = []
|
||
if self.semantic_result and self.semantic_result.get('function_points'):
|
||
uncovered_functions = [
|
||
fp for fp in self.semantic_result['function_points']
|
||
if not fp.get('covered', False)
|
||
]
|
||
|
||
# 获取当前目标功能点
|
||
target_function = ""
|
||
if self.energy_allocator and self.energy_allocator.current_target:
|
||
target_function = self.energy_allocator.current_target.function_point
|
||
|
||
# 注入多样性约束
|
||
prompt = self.diversity_injector.inject_diversity_constraints(
|
||
prompt=prompt,
|
||
target_function=target_function,
|
||
uncovered_functions=uncovered_functions
|
||
)
|
||
# =================================
|
||
|
||
return prompt
|
||
|
||
def _analyze_fsm_missing(self, missing_lines: List[str]) -> str:
|
||
"""分析 FSM 相关的缺失代码,生成具体的 FSM 状态转换指导"""
|
||
analysis = []
|
||
|
||
# 检查是否涉及 FSM 状态转换
|
||
has_state_case = any('case' in line.lower() and 'state' in line.lower() for line in missing_lines)
|
||
has_else_if = any('else if' in line.lower() for line in missing_lines)
|
||
has_if_condition = any(re.search(r'\bif\s*\(', line) for line in missing_lines)
|
||
|
||
if has_state_case or has_else_if:
|
||
analysis.append("- Missing code involves FSM state transitions or conditional branches")
|
||
|
||
if has_else_if or has_if_condition:
|
||
analysis.append("- Conditional branches have PRIORITY ORDER (top to bottom)")
|
||
analysis.append("- 'else if' branches require ALL previous conditions to be FALSE")
|
||
analysis.append("- Analyze the missing code's context: what conditions block this branch?")
|
||
|
||
if has_state_case:
|
||
analysis.append("- To trigger a state transition: first reach the source state, then drive inputs")
|
||
|
||
# === 新增:FSM 状态路径分析 ===
|
||
# 尝试从缺失代码中提取 FSM 状态信息
|
||
fsm_state_info = self._extract_fsm_state_from_missing(missing_lines)
|
||
if fsm_state_info:
|
||
analysis.append("")
|
||
analysis.append("[FSM STATE PATH ANALYSIS]")
|
||
analysis.extend(fsm_state_info)
|
||
|
||
return "\n".join(analysis) if analysis else ""
|
||
|
||
def _extract_fsm_state_from_missing(self, missing_lines: List[str]) -> List[str]:
|
||
"""
|
||
从缺失代码中提取 FSM 状态信息,生成具体的状态转换指导
|
||
|
||
分析策略:
|
||
1. 从缺失代码的上下文识别 case 分支(FSM 状态)
|
||
2. 分析该状态下的条件分支优先级
|
||
3. 识别需要满足的输入条件
|
||
"""
|
||
info = []
|
||
|
||
# 从 annotated 文件中读取完整的 DUT 代码以分析 FSM 结构
|
||
try:
|
||
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||
full_content = f.read()
|
||
except:
|
||
return info
|
||
|
||
# 提取缺失代码所在的 FSM 状态
|
||
target_state = None
|
||
missing_condition = None
|
||
|
||
for line in missing_lines:
|
||
# 查找 case 分支标记(如 "WL:", "WR:", "FALLL:" 等)
|
||
# 格式可能是 "Line N: STATE:" 或 "STATE:"
|
||
state_match = re.search(r'\b([A-Z][A-Z0-9_]*)\s*:', line)
|
||
if state_match:
|
||
potential_state = state_match.group(1)
|
||
# 排除常见的非状态关键字
|
||
if potential_state not in ['IF', 'ELSE', 'CASE', 'BEGIN', 'END', 'DEFAULT']:
|
||
target_state = potential_state
|
||
break
|
||
|
||
# 如果没找到,尝试从整个文件中分析
|
||
if not target_state:
|
||
# 查找缺失行附近的 case 分支
|
||
lines = full_content.split('\n')
|
||
for i, line in enumerate(lines):
|
||
# 查找覆盖率标记为 0 的行
|
||
if re.match(r'^%000000', line.strip()):
|
||
# 向上查找最近的 case 分支(状态)
|
||
for j in range(i-1, max(0, i-20), -1):
|
||
state_match = re.search(r'^\s*([A-Z][A-Z0-9_]*)\s*:', lines[j])
|
||
if state_match:
|
||
target_state = state_match.group(1)
|
||
break
|
||
if target_state:
|
||
break
|
||
|
||
# 分析缺失的条件分支
|
||
for line in missing_lines:
|
||
# 提取 else if 条件
|
||
else_if_match = re.search(r'else\s+if\s*\(([^)]+)\)', line)
|
||
if else_if_match:
|
||
missing_condition = else_if_match.group(1)
|
||
break
|
||
# 提取 if 条件
|
||
if_match = re.search(r'\bif\s*\(([^)]+)\)', line)
|
||
if if_match:
|
||
missing_condition = if_match.group(1)
|
||
break
|
||
|
||
# 生成具体的指导信息
|
||
if target_state:
|
||
info.append(f"- Target FSM state identified: {target_state}")
|
||
|
||
# 查找复位后的初始状态
|
||
reset_state = self._find_reset_state(full_content)
|
||
if reset_state:
|
||
info.append(f"- After reset, FSM starts in state: {reset_state}")
|
||
|
||
if reset_state != target_state:
|
||
info.append(f"- CRITICAL: You must FIRST transition from {reset_state} to {target_state}!")
|
||
info.append(f"- Do NOT assume FSM will automatically reach {target_state}!")
|
||
|
||
# 尝试找到状态转换路径
|
||
transition_hint = self._find_state_transition_hint(full_content, reset_state, target_state)
|
||
if transition_hint:
|
||
info.append(f"- To reach {target_state}: {transition_hint}")
|
||
|
||
if missing_condition:
|
||
info.append(f"- Missing condition: \"{missing_condition}\"")
|
||
# 分析条件优先级
|
||
priority_info = self._analyze_condition_priority(full_content, target_state, missing_condition)
|
||
if priority_info:
|
||
info.extend(priority_info)
|
||
|
||
return info
|
||
|
||
def _find_reset_state(self, content: str) -> Optional[str]:
|
||
"""从 DUT 代码中找到复位后的初始状态"""
|
||
# 查找复位逻辑中的状态赋值
|
||
# 常见模式: if (reset) state <= IDLE; 或 state <= 0;
|
||
patterns = [
|
||
r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
|
||
r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*(\d+);',
|
||
r'if\s*\([^)]*rst[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
|
||
]
|
||
|
||
for pattern in patterns:
|
||
match = re.search(pattern, content, re.IGNORECASE)
|
||
if match:
|
||
state = match.group(1)
|
||
# 如果是数字,尝试从参数中找对应的状态名
|
||
if state.isdigit():
|
||
# 查找参数定义
|
||
param_match = re.search(r'parameter\s+([^;]+);', content)
|
||
if param_match:
|
||
params = param_match.group(1)
|
||
# 解析参数列表
|
||
for param in params.split(','):
|
||
param = param.strip()
|
||
if '=' in param:
|
||
name, value = param.split('=')
|
||
if value.strip() == state:
|
||
return name.strip()
|
||
return state
|
||
|
||
return None
|
||
|
||
def _find_state_transition_hint(self, content: str, from_state: str, to_state: str) -> Optional[str]:
|
||
"""找到从一个状态到另一个状态的转换条件"""
|
||
# 在 case 语句中查找 from_state 分支
|
||
# 提取该分支下到 to_state 的转换条件
|
||
|
||
# 简单策略:查找 "next = TO_STATE" 或 "next <= TO_STATE"
|
||
pattern = rf'{from_state}\s*:.*?next\s*=?\s*{to_state}'
|
||
match = re.search(pattern, content, re.DOTALL)
|
||
|
||
if match:
|
||
# 提取条件
|
||
branch_code = match.group(0)
|
||
# 查找 if 条件
|
||
if_match = re.search(r'if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
|
||
if if_match:
|
||
return f"set condition: {if_match.group(1)}"
|
||
|
||
# 查找 else if 条件
|
||
elif_match = re.search(r'else\s+if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
|
||
if elif_match:
|
||
return f"set condition: {elif_match.group(1)} (ensure earlier conditions are FALSE)"
|
||
|
||
# 尝试反向查找:什么条件下会转换到目标状态
|
||
trans_pattern = rf'(?:if|else\s+if)\s*\(([^)]+)\)\s*(?:next\s*=?\s*{to_state}|{to_state}\s*;)'
|
||
trans_match = re.search(trans_pattern, content)
|
||
if trans_match:
|
||
return f"set condition: {trans_match.group(1)}"
|
||
|
||
return None
|
||
|
||
def _analyze_condition_priority(self, content: str, state: str, missing_condition: str) -> List[str]:
|
||
"""分析条件分支的优先级,找出需要排除的条件"""
|
||
info = []
|
||
|
||
if not state:
|
||
return info
|
||
|
||
# 查找该状态下的所有条件分支
|
||
# 提取 state: 后面的代码块
|
||
state_block_pattern = rf'{state}\s*:(.*?)(?=[A-Z][A-Z0-9_]*\s*:|endcase|default:)'
|
||
match = re.search(state_block_pattern, content, re.DOTALL)
|
||
|
||
if not match:
|
||
return info
|
||
|
||
state_block = match.group(1)
|
||
|
||
# 提取所有条件分支
|
||
conditions = []
|
||
for cond_match in re.finditer(r'(?:if|else\s+if)\s*\(([^)]+)\)', state_block):
|
||
conditions.append(cond_match.group(1).strip())
|
||
|
||
# 找到缺失条件在列表中的位置
|
||
missing_idx = -1
|
||
for i, cond in enumerate(conditions):
|
||
# 简化比较(去除空格)
|
||
if cond.replace(' ', '') in missing_condition.replace(' ', '') or \
|
||
missing_condition.replace(' ', '') in cond.replace(' ', ''):
|
||
missing_idx = i
|
||
break
|
||
|
||
if missing_idx > 0:
|
||
info.append(f"- This branch is condition #{missing_idx + 1} (lower priority)")
|
||
info.append(f"- You must make ALL earlier conditions FALSE:")
|
||
for i in range(missing_idx):
|
||
cond = conditions[i]
|
||
# 分析如何使条件为 FALSE
|
||
false_hint = self._get_false_hint(cond)
|
||
info.append(f" * \"{cond}\" must be FALSE → {false_hint}")
|
||
|
||
return info
|
||
|
||
def _get_false_hint(self, condition: str) -> str:
|
||
"""分析如何使条件为 FALSE"""
|
||
condition = condition.strip()
|
||
|
||
# 处理 !signal 形式
|
||
if condition.startswith('!'):
|
||
signal = condition[1:].strip()
|
||
return f"set {signal} = 1"
|
||
|
||
# 处理 signal 形式(布尔值)
|
||
if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', condition):
|
||
return f"set {condition} = 0"
|
||
|
||
# 处理比较运算符
|
||
if '==' in condition:
|
||
parts = condition.split('==')
|
||
if len(parts) == 2:
|
||
signal = parts[0].strip()
|
||
value = parts[1].strip()
|
||
if value.isdigit():
|
||
return f"set {signal} != {value}"
|
||
|
||
# 处理 >= 形式
|
||
if '>=' in condition:
|
||
parts = condition.split('>=')
|
||
if len(parts) == 2:
|
||
signal = parts[0].strip()
|
||
value = parts[1].strip()
|
||
if value.isdigit():
|
||
return f"set {signal} < {value}"
|
||
|
||
# 处理 > 形式
|
||
if '>' in condition and '>=' not in condition:
|
||
parts = condition.split('>')
|
||
if len(parts) == 2:
|
||
signal = parts[0].strip()
|
||
value = parts[1].strip()
|
||
return f"set {signal} <= {value}"
|
||
|
||
return "analyze the condition logic"
|
||
|
||
def _generate_semantic_context(self) -> str:
|
||
"""
|
||
[新增] 从语义分析结果生成 Prompt 上下文
|
||
|
||
整合语义分析层 (Layer 0) 的输出,为 LLM 提供更精准的指导:
|
||
- FSM 状态转换图
|
||
- 功能点重要性排序
|
||
- 测试场景建议
|
||
|
||
Returns:
|
||
语义上下文字符串,用于增强 Prompt
|
||
"""
|
||
if not self.semantic_result:
|
||
return ""
|
||
|
||
context_parts = []
|
||
|
||
# 1. 模块基础信息
|
||
module_name = self.semantic_result.get('module_name', '')
|
||
inputs = self.semantic_result.get('inputs', [])
|
||
outputs = self.semantic_result.get('outputs', [])
|
||
|
||
if module_name:
|
||
context_parts.append(f"Module Name: {module_name}")
|
||
if inputs:
|
||
context_parts.append(f"Module Inputs: {', '.join(inputs)}")
|
||
if outputs:
|
||
context_parts.append(f"Module Outputs: {', '.join(outputs)}")
|
||
|
||
# 2. FSM 信息(最关键)
|
||
fsm_info = self.semantic_result.get('fsm_info')
|
||
if fsm_info:
|
||
context_parts.append("")
|
||
context_parts.append("=== FSM STATE MACHINE DETAILS ===")
|
||
context_parts.append(f"State Variable: {fsm_info.get('state_variable', 'unknown')}")
|
||
|
||
states = fsm_info.get('states', [])
|
||
if states:
|
||
context_parts.append(f"All States ({len(states)}): {', '.join(states)}")
|
||
|
||
# 状态转换表
|
||
transitions = fsm_info.get('transitions', {})
|
||
if transitions:
|
||
context_parts.append("")
|
||
context_parts.append("=== STATE TRANSITION TABLE ===")
|
||
context_parts.append("Format: CURRENT_STATE --[CONDITION]--> NEXT_STATE")
|
||
context_parts.append("")
|
||
|
||
for state, trans_list in transitions.items():
|
||
for trans in trans_list:
|
||
condition = trans.get('condition', 'default')
|
||
next_state = trans.get('next_state', 'unknown')
|
||
if condition == 'default':
|
||
context_parts.append(f" {state} --[default]--> {next_state}")
|
||
else:
|
||
context_parts.append(f" {state} --[if ({condition})]--> {next_state}")
|
||
|
||
# 添加状态转换路径分析
|
||
context_parts.append("")
|
||
context_parts.append("=== STATE TRANSITION PATH HINTS ===")
|
||
reset_state = self._find_reset_state_from_fsm(fsm_info)
|
||
if reset_state:
|
||
context_parts.append(f"Initial State (after reset): {reset_state}")
|
||
context_parts.append("")
|
||
context_parts.append("IMPORTANT: To reach a target state, trace the path from reset:")
|
||
context_parts.append(" 1. Reset the DUT to initialize to the starting state")
|
||
context_parts.append(" 2. Apply inputs to trigger state transitions")
|
||
context_parts.append(" 3. Wait for the FSM to naturally reach the target state")
|
||
context_parts.append(" 4. THEN apply inputs to trigger the missing branch")
|
||
|
||
# 3. 功能点优先级
|
||
function_points = self.semantic_result.get('function_points', [])
|
||
if function_points:
|
||
context_parts.append("")
|
||
context_parts.append("=== FUNCTION POINTS (Ranked by Importance) ===")
|
||
|
||
for i, fp in enumerate(function_points[:10]): # Top 10
|
||
name = fp.get('name', 'unknown')
|
||
fp_type = fp.get('type', 'unknown')
|
||
importance = fp.get('importance', 0)
|
||
covered = fp.get('covered', False)
|
||
status = "✓ COVERED" if covered else "✗ NOT COVERED"
|
||
context_parts.append(f" {i+1}. [{status}] {name} ({fp_type}): importance={importance:.2f}")
|
||
|
||
# 4. 测试场景建议
|
||
test_scenarios = self.semantic_result.get('test_scenarios', [])
|
||
if test_scenarios:
|
||
context_parts.append("")
|
||
context_parts.append("=== RECOMMENDED TEST SCENARIOS ===")
|
||
|
||
for i, ts in enumerate(test_scenarios[:5]): # Top 5
|
||
name = ts.get('name', 'unknown')
|
||
description = ts.get('description', '')
|
||
priority = ts.get('priority', 0)
|
||
context_parts.append(f" {i+1}. {name}: {description} (priority={priority:.2f})")
|
||
|
||
if context_parts:
|
||
return "\n".join(context_parts)
|
||
return ""
|
||
|
||
def _find_reset_state_from_fsm(self, fsm_info: dict) -> Optional[str]:
|
||
"""从 FSM 信息中推断复位后的初始状态"""
|
||
# 方法1:检查是否有明确的复位状态
|
||
transitions = fsm_info.get('transitions', {})
|
||
|
||
# 复位后通常进入第一个定义的状态或特定名称的状态
|
||
states = fsm_info.get('states', [])
|
||
|
||
# 常见的初始状态命名
|
||
initial_state_names = ['IDLE', 'INIT', 'RESET', 'START', 'BEGIN']
|
||
|
||
for name in initial_state_names:
|
||
if name in states:
|
||
return name
|
||
|
||
# 如果没有找到,返回第一个状态
|
||
if states:
|
||
return states[0]
|
||
|
||
return None
|
||
|
||
|
||
# ============================================================================
|
||
# TBInjector - 场景注入器
|
||
# ============================================================================
|
||
class TBInjector:
|
||
"""
|
||
场景注入器 - 将LLM生成的测试代码注入到现有测试平台
|
||
|
||
集成三层防护策略:
|
||
1. Layer 1: Prompt约束(由CoverageParser处理)
|
||
2. Layer 2: 智能代码转换
|
||
3. Layer 3: 质量评估和重试建议
|
||
"""
|
||
|
||
def __init__(self, tb_code):
|
||
"""
|
||
初始化注入器
|
||
|
||
Args:
|
||
tb_code: 原始测试平台代码字符串
|
||
"""
|
||
self.content = tb_code
|
||
self.validator = BlackBoxValidator()
|
||
self.validator._extract_signals_from_tb(tb_code)
|
||
self.last_validation_result = None
|
||
|
||
def inject(self, new_code, iter_idx):
|
||
"""
|
||
注入新的测试场景到测试平台
|
||
|
||
Args:
|
||
new_code: LLM生成的测试代码
|
||
iter_idx: 迭代序号
|
||
|
||
Returns:
|
||
修改后的测试平台代码
|
||
"""
|
||
# Step 1: 预处理代码(包含三层防护)
|
||
scenario_code, result = self._preprocess_code(new_code, iter_idx)
|
||
|
||
self.last_validation_result = result
|
||
|
||
# 记录日志
|
||
if result['violations']['critical']:
|
||
logger.warning(f"[CGA-{iter_idx}] Critical violations detected:")
|
||
for v in result['violations']['critical']:
|
||
logger.warning(f" - {v}")
|
||
|
||
if result['violations']['warning']:
|
||
logger.info(f"[CGA-{iter_idx}] Warnings:")
|
||
for v in result['violations']['warning']:
|
||
logger.info(f" - {v}")
|
||
|
||
if result['transformations']:
|
||
logger.info(f"[CGA-{iter_idx}] Code transformations applied:")
|
||
for t in result['transformations']:
|
||
logger.info(f" - {t['type']}: {t.get('original', 'N/A')[:50]}...")
|
||
|
||
# Step 2: 构建场景块
|
||
scenario_block = self._build_scenario_block(scenario_code, iter_idx)
|
||
|
||
# Step 3: 注入到TB中
|
||
modified_tb = self._inject_scenario(scenario_block)
|
||
|
||
return modified_tb
|
||
|
||
def should_retry(self):
|
||
"""是否应该重试"""
|
||
if self.last_validation_result is None:
|
||
return False
|
||
return self.last_validation_result.get('should_retry', False)
|
||
|
||
def get_quality_score(self):
|
||
"""获取代码质量分数"""
|
||
if self.last_validation_result is None:
|
||
return 0
|
||
return self.last_validation_result.get('quality_score', 0)
|
||
|
||
def _preprocess_code(self, code, iter_idx):
|
||
"""预处理LLM生成的代码 - 增强版,包含语法预检查"""
|
||
# 移除markdown标记
|
||
code = re.sub(r'```(?:verilog|systemverilog|sv)?\n?', '', code)
|
||
code = re.sub(r'```', '', code)
|
||
|
||
# 移除task包装
|
||
code = re.sub(r'task\s+\w+\s*(?:\([^)]*\))?\s*;', '', code)
|
||
code = re.sub(r'endtask', '', code)
|
||
|
||
# 移除$finish和$stop
|
||
code = re.sub(r'\$finish\s*;', '', code)
|
||
code = re.sub(r'\$stop\s*;', '', code)
|
||
|
||
# 移除多余空行
|
||
code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
|
||
# [修复] 移除 initial begin ... end 代码块
|
||
# LLM 可能生成完整的 initial begin...end 块,但我们只需要其中的测试代码
|
||
# 使用括号计数来正确匹配嵌套结构,而不是简单的正则表达式
|
||
|
||
initial_match = re.search(r'\binitial\s+begin\b', code, re.IGNORECASE)
|
||
if initial_match:
|
||
logger.warning(f"[CGA-{iter_idx}] Detected 'initial begin...end' block in generated code - this should not be included")
|
||
logger.warning(f"[CGA-{iter_idx}] Removing 'initial begin...end' wrapper, keeping only the test content")
|
||
|
||
# 找到 initial begin 后的起始位置
|
||
start_pos = initial_match.end()
|
||
|
||
# 使用括号计数找到匹配的 end
|
||
begin_count = 1 # 已经遇到一个 begin (initial begin)
|
||
end_pos = start_pos
|
||
code_after_initial = code[start_pos:]
|
||
|
||
for i, char in enumerate(code_after_initial):
|
||
# 检查是否是关键字 begin 或 end
|
||
remaining = code_after_initial[i:]
|
||
if re.match(r'\bbegin\b', remaining, re.IGNORECASE):
|
||
begin_count += 1
|
||
elif re.match(r'\bend\b', remaining, re.IGNORECASE):
|
||
begin_count -= 1
|
||
if begin_count == 0:
|
||
# 找到匹配的 end
|
||
end_pos = start_pos + i
|
||
break
|
||
|
||
if begin_count == 0:
|
||
# 提取块内的内容
|
||
inner_content = code[start_pos:end_pos].strip()
|
||
# 移除末尾的 end(如果有)
|
||
inner_content = re.sub(r'\bend\s*$', '', inner_content.strip())
|
||
# 重建代码:移除 initial begin ... end 包装
|
||
code = code[:initial_match.start()] + inner_content + code[end_pos + 3:] # +3 跳过 'end'
|
||
logger.info(f"[CGA-{iter_idx}] Successfully removed 'initial begin...end' wrapper")
|
||
else:
|
||
logger.warning(f"[CGA-{iter_idx}] Could not find matching 'end' for 'initial begin', keeping code as-is")
|
||
|
||
code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
|
||
# 信号名自动修正(在验证之前)
|
||
code = self._auto_correct_signal_names(code)
|
||
|
||
# 三层防护:黑盒约束验证和转换
|
||
code, result = self.validator.validate_and_transform(code, self.content)
|
||
|
||
# [新增] 第四层:Verilog 语法预检查
|
||
# 提取完整 TB 中已声明的信号(不只是代码片段)
|
||
signal_widths = self._extract_signal_widths()
|
||
declared_signals = self._extract_declared_signals()
|
||
|
||
# 调用语法检查,传入已声明信号列表
|
||
syntax_result = self.validator.check_syntax_issues(
|
||
code,
|
||
signal_widths,
|
||
declared_signals=declared_signals
|
||
)
|
||
|
||
# 合并检查结果
|
||
result['syntax_check'] = syntax_result
|
||
|
||
# 记录语法问题日志
|
||
if syntax_result['width_mismatch']:
|
||
logger.warning(f"[CGA-{iter_idx}] Width mismatch detected:")
|
||
for issue in syntax_result['width_mismatch']:
|
||
logger.warning(f" - {issue['message']}")
|
||
if 'suggestion' in issue:
|
||
logger.info(f" Suggestion: {issue['suggestion']}")
|
||
|
||
if syntax_result['logic_issues']:
|
||
logger.warning(f"[CGA-{iter_idx}] Logic issues detected:")
|
||
for issue in syntax_result['logic_issues']:
|
||
logger.warning(f" - {issue['message']}")
|
||
if 'suggestion' in issue:
|
||
logger.info(f" Suggestion: {issue['suggestion']}")
|
||
|
||
if syntax_result['syntax_warnings']:
|
||
for issue in syntax_result['syntax_warnings']:
|
||
if issue['severity'] == 'error':
|
||
logger.error(f"[CGA-{iter_idx}] Syntax error: {issue['message']}")
|
||
else:
|
||
logger.warning(f"[CGA-{iter_idx}] Syntax warning: {issue['message']}")
|
||
|
||
# 如果语法检查发现问题,设置 should_retry
|
||
if syntax_result['should_retry']:
|
||
result['should_retry'] = True
|
||
logger.warning(f"[CGA-{iter_idx}] Syntax issues detected, recommend retry with corrected code")
|
||
|
||
code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
|
||
|
||
return code.strip(), result
|
||
|
||
def _extract_declared_signals(self) -> set:
|
||
"""从完整测试平台中提取所有已声明的信号"""
|
||
signals = set()
|
||
|
||
# 匹配 reg [N:0] signal 或 wire [N:0] signal
|
||
for match in re.finditer(r'\b(reg|wire|logic)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
|
||
signals.add(match.group(2))
|
||
|
||
# 匹配 input/output 声明
|
||
for match in re.finditer(r'\b(input|output|inout)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
|
||
signals.add(match.group(2))
|
||
|
||
# 匹配模块端口连接中的信号
|
||
for match in re.finditer(r'\.(\w+)\s*\(\s*(\w+)\s*\)', self.content):
|
||
signals.add(match.group(2)) # 添加连接的信号名
|
||
|
||
return signals
|
||
|
||
def _extract_signal_widths(self) -> Dict[str, int]:
|
||
"""从测试平台中提取信号位宽信息"""
|
||
widths = {}
|
||
|
||
# 匹配 reg [N:0] signal 或 wire [N:0] signal
|
||
width_pattern = re.compile(r'\b(reg|wire)\s+\[(\d+):(\d+)\]\s+(\w+)')
|
||
|
||
for match in width_pattern.finditer(self.content):
|
||
high = int(match.group(2))
|
||
low = int(match.group(3))
|
||
width = high - low + 1
|
||
signal = match.group(4)
|
||
widths[signal] = width
|
||
|
||
# 匹配无位宽声明的信号(默认 1 位)
|
||
single_bit_pattern = re.compile(r'\b(reg|wire)\s+(?!.*\[)(\w+)\s*;')
|
||
for match in single_bit_pattern.finditer(self.content):
|
||
signal = match.group(2)
|
||
if signal not in widths:
|
||
widths[signal] = 1
|
||
|
||
return widths
|
||
|
||
def _auto_correct_signal_names(self, code: str) -> str:
|
||
"""自动修正信号名错误"""
|
||
corrections = []
|
||
|
||
# 获取正确的复位信号名
|
||
reset_signal = self.validator._find_reset_signal()
|
||
|
||
# 如果正确的复位信号不是 'reset',则修正所有 'reset' 引用
|
||
if reset_signal != "reset":
|
||
# 匹配独立的 'reset' 单词(不包括 'areset', 'rst_n' 等)
|
||
pattern = r'\breset\b(?!\w)'
|
||
matches = re.findall(pattern, code)
|
||
if matches:
|
||
code = re.sub(pattern, reset_signal, code)
|
||
corrections.append(f"reset -> {reset_signal} ({len(matches)} occurrences)")
|
||
|
||
# 检查是否有使用 'rst' 但正确信号是 'areset' 的情况
|
||
if reset_signal == "areset":
|
||
pattern = r'\brst\b(?!\w)'
|
||
matches = re.findall(pattern, code)
|
||
if matches:
|
||
code = re.sub(pattern, reset_signal, code)
|
||
corrections.append(f"rst -> {reset_signal} ({len(matches)} occurrences)")
|
||
|
||
# 检查是否使用了不存在的信号
|
||
for signal in re.findall(r'\b(\w+)\s*=', code):
|
||
signal = signal.strip()
|
||
# 跳过已知的合法信号
|
||
if signal in self.validator.dut_inputs:
|
||
continue
|
||
# 检查是否是复位信号的别名
|
||
if signal.lower() in ['reset', 'rst', 'rst_n', 'rst_b'] and reset_signal != signal:
|
||
code = re.sub(rf'\b{signal}\b', reset_signal, code)
|
||
corrections.append(f"{signal} -> {reset_signal}")
|
||
|
||
# [新增] 检测并修正类似 input_signal_N 的通用命名模式
|
||
# 这是 LLM 总是使用的通用名称,需要映射到实际信号
|
||
valid_inputs = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
|
||
|
||
# 处理 input_signal_1, input_signal_2, ... 等模式
|
||
for match in re.finditer(r'\b(input_signal_\d+)\b', code):
|
||
wrong_signal = match.group(1)
|
||
if valid_inputs:
|
||
# 提取数字后缀,尝试按顺序映射
|
||
num_match = re.search(r'(\d+)$', wrong_signal)
|
||
if num_match:
|
||
idx = int(num_match.group(1)) - 1 # input_signal_1 -> index 0
|
||
if idx < len(valid_inputs):
|
||
correct_signal = valid_inputs[idx]
|
||
else:
|
||
correct_signal = valid_inputs[0] # fallback
|
||
else:
|
||
correct_signal = valid_inputs[0]
|
||
|
||
code = re.sub(rf'\b{re.escape(wrong_signal)}\b', correct_signal, code)
|
||
corrections.append(f"{wrong_signal} -> {correct_signal} (generic name mapped)")
|
||
|
||
# 处理 in, data 等太通用的名称
|
||
for generic_name in ['in', 'data', 'input', 'din']:
|
||
if re.search(rf'\b{generic_name}\s*=', code):
|
||
if valid_inputs:
|
||
# 使用第一个有效的输入信号作为替代
|
||
correct_signal = valid_inputs[0]
|
||
code = re.sub(rf'\b{generic_name}\s*=', f'{correct_signal} =', code)
|
||
corrections.append(f"{generic_name} -> {correct_signal} (too generic)")
|
||
|
||
if corrections:
|
||
logger.info(f"[Signal Correction] Applied corrections: {'; '.join(corrections)}")
|
||
|
||
return code
|
||
|
||
def _build_scenario_block(self, scenario_code, iter_idx):
|
||
"""构建完整的场景代码块"""
|
||
# 格式化缩进
|
||
lines = scenario_code.split('\n')
|
||
formatted_lines = []
|
||
for line in lines:
|
||
stripped = line.strip()
|
||
if stripped:
|
||
formatted_lines.append(f" {stripped}")
|
||
formatted_code = '\n'.join(formatted_lines)
|
||
|
||
# 检测输出信号用于日志
|
||
output_signals = self._detect_output_signals()
|
||
output_log = self._generate_output_log(output_signals, iter_idx)
|
||
|
||
# 构建完整块
|
||
block = f'''
|
||
// ========== CGA Iteration {iter_idx} ==========
|
||
scenario = 100 + {iter_idx};
|
||
// Reset signals to safe state
|
||
{self._generate_signal_reset()}
|
||
#5;
|
||
// CGA generated test sequence:
|
||
{formatted_code}
|
||
// Log results
|
||
{output_log}
|
||
// ==============================================
|
||
'''
|
||
return block
|
||
|
||
def _detect_output_signals(self):
|
||
"""检测DUT的输出信号"""
|
||
outputs = []
|
||
wire_pattern = re.compile(r'wire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
|
||
for match in wire_pattern.finditer(self.content):
|
||
signal = match.group(1)
|
||
if signal.lower() not in ['clk', 'clock', 'rst', 'reset', 'areset']:
|
||
outputs.append(signal)
|
||
return outputs
|
||
|
||
def _generate_signal_reset(self):
|
||
"""生成信号重置代码"""
|
||
inputs = []
|
||
reg_pattern = re.compile(r'reg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
|
||
for match in reg_pattern.finditer(self.content):
|
||
signal = match.group(1)
|
||
if signal.lower() not in ['clk', 'clock', 'file', 'scenario']:
|
||
inputs.append(signal)
|
||
|
||
if inputs:
|
||
return " " + "; ".join([f"{sig} = 0" for sig in inputs]) + ";"
|
||
return " // No input signals to reset"
|
||
|
||
def _generate_output_log(self, signals, iter_idx):
|
||
"""生成输出日志代码"""
|
||
if not signals:
|
||
return f' $display("[CGA-{iter_idx}] Scenario executed");'
|
||
|
||
sig_names = ", ".join(signals)
|
||
format_str = ", ".join(["%b"] * len(signals))
|
||
|
||
return f' $fdisplay(file, "[CGA-{iter_idx}] {sig_names} = {format_str}", {sig_names});'
|
||
|
||
def _inject_scenario(self, scenario_block):
|
||
"""将场景块注入到测试平台"""
|
||
modified_tb = self.content
|
||
|
||
# 策略:如果有 $fclose,在其之前插入
|
||
if "$fclose" in modified_tb:
|
||
modified_tb = re.sub(
|
||
r'(\s*)(\$fclose\s*\([^)]+\)\s*;)',
|
||
scenario_block + r'\1\2',
|
||
modified_tb,
|
||
count=1
|
||
)
|
||
elif "$finish" in modified_tb:
|
||
# 否则在 $finish 之前插入
|
||
modified_tb = modified_tb.replace(
|
||
"$finish;",
|
||
scenario_block + "\n $finish;"
|
||
)
|
||
else:
|
||
# 兜底:在最后一个 end 之前插入
|
||
last_end = modified_tb.rfind("end")
|
||
if last_end != -1:
|
||
modified_tb = modified_tb[:last_end] + scenario_block + modified_tb[last_end:]
|
||
|
||
return modified_tb |