Files
TBgen_App/autoline/cga_utils.py
2026-03-30 16:46:48 +08:00

5444 lines
234 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# # #第四版
# # """
# # Description : Utils for CGA (CoverageParser & TBInjector)
# # - Features: Sticky Mode, Smart Noise Filtering (No assign/decls)
# # - Enhanced: Three-layer protection for black-box constraints
# # * Layer 1: Enhanced Prompt constraints (prevention)
# # * Layer 2: Smart code transformation (conversion)
# # * Layer 3: Quality assessment & retry (fallback)
# # - Integrated: Diversity Constraint Injector (Layer 1)
# # Author : CorrectBench Integration
# # """
# # import re
# # import os
# # import logging
# # from typing import Tuple, List, Optional, Dict, TYPE_CHECKING
# # # [新增] 导入多样性约束注入器
# # if TYPE_CHECKING:
# # from autoline.diversity_injector import DiversityInjector
# # # 配置日志
# # logger = logging.getLogger(__name__)
# # # ============================================================================
# # # 三层防护策略说明
# # # ============================================================================
# # # Layer 1 - Prompt约束: 动态提取允许信号列表,明确约束,正反面示例
# # # Layer 2 - 智能转换: 检测违规意图,尝试转换为合法形式
# # # Layer 3 - 质量评估: 违规比例过高时返回质量分数,触发重新生成
# # # ============================================================================
# # # ============================================================================
# # # 黑盒约束检查器 - 三层防护策略实现
# # # ============================================================================
# # class BlackBoxValidator:
# # """
# # 黑盒约束验证器 - 三层防护策略
# # Layer 1: 增强Prompt约束预防
# # - 动态提取允许信号列表
# # - 生成明确的约束提示
# # Layer 2: 智能代码转换(转换)
# # - 检测违规意图
# # - 尝试转换为合法的等价形式
# # - 转换失败时才注释
# # Layer 3: 质量评估(重试)
# # - 计算代码质量分数
# # - 违规比例过高时建议重试
# # """
# # # 常见的内部信号命名模式(按严重程度分类)
# # INTERNAL_SIGNAL_PATTERNS = {
# # # 高风险FSM状态相关绝对不能修改
# # 'critical': [
# # (r'\bstate\b', 'FSM状态寄存器'),
# # (r'\bnext_state\b', 'FSM下一状态'),
# # (r'\bcurrent_state\b', 'FSM当前状态'),
# # (r'\bnext\b(?!\s*[,@])', '下一状态简写'),
# # ],
# # # 中风险:计数器和内部寄存器
# # 'warning': [
# # (r'\bcounter\b', '内部计数器'),
# # (r'\bcount\b', '计数寄存器'),
# # (r'\bcnt\b', '计数简写'),
# # (r'\bfall_counter\b', '下落计数器'),
# # (r'\breg_\w+', '内部寄存器'),
# # ],
# # # 低风险:可疑信号(需要确认)
# # 'info': [
# # (r'\binternal_\w+', '内部信号'),
# # (r'\btemp_\w+', '临时信号'),
# # (r'\bprev_\w+', '前一状态'),
# # ]
# # }
# # # 非法语句模式
# # FORBIDDEN_STATEMENTS = [
# # (r'\bforce\s+(\w+)', 'force语句', 'critical'),
# # (r'\bassign\s+(\w+)\s*=', '连续赋值', 'critical'),
# # (r'\bdeassign\s+', 'deassign语句', 'critical'),
# # (r'\brelease\s+', 'release语句', 'critical'),
# # ]
# # # 层次化访问模式(如 DUT.state
# # HIERARCHICAL_ACCESS = r'(\w+)\s*\.\s*(\w+)'
# # def __init__(self, dut_inputs: List[str] = None, dut_outputs: List[str] = None):
# # """
# # Args:
# # dut_inputs: DUT模块的输入端口列表
# # dut_outputs: DUT模块的输出端口列表
# # """
# # self.dut_inputs = dut_inputs or []
# # self.dut_outputs = dut_outputs or []
# # self.violations = {'critical': [], 'warning': [], 'info': []}
# # self.transformations = []
# # def validate_and_transform(self, code: str, tb_code: str = None) -> Tuple[str, Dict]:
# # """验证并转换代码 - 主入口"""
# # self.violations = {'critical': [], 'warning': [], 'info': []}
# # self.transformations = []
# # if tb_code:
# # self._extract_signals_from_tb(tb_code)
# # original_lines = code.strip().split('\n')
# # total_lines = len([l for l in original_lines if l.strip() and not l.strip().startswith('//')])
# # # Step 1: 移除非法语句
# # code = self._transform_forbidden_statements(code)
# # # Step 2: 转换层次化访问
# # code = self._transform_hierarchical_access(code)
# # # Step 3: 智能转换内部信号访问
# # code = self._smart_transform_internal_signals(code)
# # # Step 4: 最后清理
# # code = self._final_cleanup(code)
# # # 计算质量分数
# # quality_score = self._calculate_quality_score(total_lines)
# # # 决定是否需要重试
# # should_retry = quality_score < 50 or len(self.violations['critical']) > 3
# # result = {
# # 'quality_score': quality_score,
# # 'is_valid': len(self.violations['critical']) == 0,
# # 'violations': self.violations,
# # 'transformations': self.transformations,
# # 'should_retry': should_retry,
# # 'allowed_signals': self._get_allowed_signals_info()
# # }
# # return code.strip(), result
# # def _extract_signals_from_tb(self, tb_code: str):
# # """从测试平台代码中提取DUT输入输出信号"""
# # dut_match = re.search(r'(\w+)\s+(?:DUT|dut|uut|UUT)\s*\(', tb_code, re.IGNORECASE)
# # if dut_match:
# # start = dut_match.start()
# # bracket_count = 0
# # end = start
# # for i, char in enumerate(tb_code[start:]):
# # if char == '(':
# # bracket_count += 1
# # elif char == ')':
# # bracket_count -= 1
# # if bracket_count == 0:
# # end = start + i + 1
# # break
# # dut_instance = tb_code[start:end]
# # port_pattern = r'\.(\w+)\s*\(\s*(\w+)\s*\)'
# # for match in re.finditer(port_pattern, dut_instance):
# # signal_name = match.group(2)
# # is_input = re.search(rf'\breg\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
# # is_output = re.search(rf'\bwire\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
# # if is_input and signal_name not in self.dut_inputs:
# # self.dut_inputs.append(signal_name)
# # if is_output and signal_name not in self.dut_outputs:
# # self.dut_outputs.append(signal_name)
# # # 备用方案通过reg/wire声明推断
# # if not self.dut_inputs and not self.dut_outputs:
# # for match in re.finditer(r'\breg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
# # signal = match.group(1)
# # if signal.lower() not in ['file', 'scenario', 'i', 'j', 'k', 'cnt']:
# # if signal not in self.dut_inputs:
# # self.dut_inputs.append(signal)
# # for match in re.finditer(r'\bwire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
# # signal = match.group(1)
# # if signal not in self.dut_outputs:
# # self.dut_outputs.append(signal)
# # def _transform_forbidden_statements(self, code: str) -> str:
# # """转换非法语句"""
# # for pattern, desc, severity in self.FORBIDDEN_STATEMENTS:
# # matches = list(re.finditer(pattern, code, re.IGNORECASE))
# # for match in reversed(matches):
# # signal = match.group(1) if match.groups() else 'unknown'
# # self.violations[severity].append(f"{desc}: {signal}")
# # line_start = code.rfind('\n', 0, match.start()) + 1
# # line_end = code.find('\n', match.end())
# # if line_end == -1:
# # line_end = len(code)
# # original_line = code[line_start:line_end]
# # # 尝试转换 force -> 直接赋值(仅对输入信号)
# # if 'force' in match.group(0).lower() and signal in self.dut_inputs:
# # new_line = re.sub(r'\bforce\s+', '', original_line, flags=re.IGNORECASE)
# # code = code[:line_start] + new_line + code[line_end:]
# # self.transformations.append({
# # 'type': 'force_to_assign',
# # 'original': original_line.strip(),
# # 'transformed': new_line.strip()
# # })
# # continue
# # code = code[:line_start] + '// [BLOCKED] ' + original_line.lstrip() + code[line_end:]
# # self.transformations.append({
# # 'type': 'blocked',
# # 'original': original_line.strip(),
# # 'reason': desc
# # })
# # return code
# # def _transform_hierarchical_access(self, code: str) -> str:
# # """转换层次化访问(如 DUT.state"""
# # for match in re.finditer(self.HIERARCHICAL_ACCESS, code):
# # prefix = match.group(1)
# # signal = match.group(2)
# # if prefix.upper() in ['DUT', 'UUT', 'TOP', 'TB']:
# # if signal not in self.dut_outputs:
# # self.violations['critical'].append(f"层次化访问内部信号: {prefix}.{signal}")
# # line_start = code.rfind('\n', 0, match.start()) + 1
# # line_end = code.find('\n', match.end())
# # if line_end == -1:
# # line_end = len(code)
# # original_line = code[line_start:line_end]
# # code = code[:line_start] + '// [HIERARCHY] ' + original_line.lstrip() + code[line_end:]
# # return code
# # def _smart_transform_internal_signals(self, code: str) -> str:
# # """智能转换内部信号访问"""
# # lines = code.split('\n')
# # transformed_lines = []
# # for line in lines:
# # stripped = line.strip()
# # if stripped.startswith('//') or not stripped:
# # transformed_lines.append(line)
# # continue
# # if (stripped.startswith('#') or stripped.startswith('$') or
# # stripped.startswith('repeat(') or stripped.startswith('@(')):
# # transformed_lines.append(line)
# # continue
# # detected_signals = self._detect_internal_signals_in_line(stripped)
# # has_critical = detected_signals.get('critical', [])
# # has_warning = detected_signals.get('warning', [])
# # if not has_critical and not has_warning:
# # transformed_lines.append(line)
# # continue
# # context = self._analyze_signal_context(stripped, detected_signals)
# # if context['type'] == 'assignment':
# # transformed_lines.append(f"// [INTERNAL_ASSIGN] Cannot modify internal signal")
# # transformed_lines.append(f"// Original: {stripped}")
# # self.violations['critical'].append(f"尝试修改内部信号: {context['signals']}")
# # elif context['type'] == 'condition':
# # transformed = self._transform_condition(stripped, context)
# # transformed_lines.append(transformed)
# # self.transformations.append({
# # 'type': 'condition_transform',
# # 'original': stripped,
# # 'transformed': transformed
# # })
# # elif context['type'] == 'wait_for_state':
# # transformed = self._transform_state_wait(stripped, context)
# # transformed_lines.append(transformed)
# # self.transformations.append({
# # 'type': 'wait_transform',
# # 'original': stripped,
# # 'transformed': transformed
# # })
# # else:
# # if has_critical:
# # transformed_lines.append(f"// [WARNING] Contains internal signal reference: {has_critical}")
# # transformed_lines.append(f"// Original: {stripped}")
# # for sig in has_critical:
# # self.violations['warning'].append(f"可疑的内部信号访问: {sig}")
# # else:
# # transformed_lines.append(line)
# # return '\n'.join(transformed_lines)
# # def _detect_internal_signals_in_line(self, line: str) -> Dict[str, List[str]]:
# # """检测行中的内部信号"""
# # detected = {'critical': [], 'warning': [], 'info': []}
# # LEGAL_KEYWORDS = {
# # 'repeat', 'posedge', 'negedge', 'begin', 'end', 'if', 'else',
# # 'while', 'for', 'case', 'default', 'always', 'initial',
# # 'assign', 'wire', 'reg', 'input', 'output', 'inout',
# # 'parameter', 'localparam', 'integer', 'real', 'time',
# # 'clk', 'clock', 'reset', 'rst', 'areset', 'rst_n',
# # 'enable', 'ena', 'valid', 'ready', 'data', 'addr', 'address',
# # 'true', 'false', 'idle', 'wait'
# # }
# # SYSTEM_FUNCTIONS = {'$display', '$write', '$monitor', '$fopen', '$fclose',
# # '$fdisplay', '$fwrite', '$readmemh', '$readmemb',
# # '$finish', '$stop', '$random', '$time', '$stime'}
# # for severity, patterns in self.INTERNAL_SIGNAL_PATTERNS.items():
# # for pattern, name in patterns:
# # matches = re.findall(pattern, line, re.IGNORECASE)
# # if matches:
# # for match in matches:
# # if isinstance(match, tuple):
# # match = match[0] if match[0] else match[1]
# # match_lower = match.lower() if match else ''
# # if match_lower in LEGAL_KEYWORDS:
# # continue
# # if match in SYSTEM_FUNCTIONS:
# # continue
# # if match in self.dut_inputs or match in self.dut_outputs:
# # continue
# # if match.startswith('$'):
# # continue
# # if match and match not in detected[severity]:
# # detected[severity].append(match)
# # return detected
# # def _analyze_signal_context(self, line: str, signals: Dict) -> Dict:
# # """分析信号使用上下文"""
# # assign_match = re.search(r'(\w+)\s*(?:=|<=)\s*', line)
# # if assign_match:
# # target = assign_match.group(1)
# # if target in signals.get('critical', []) or target in signals.get('warning', []):
# # return {'type': 'assignment', 'signals': [target], 'line': line}
# # if re.search(r'wait\s*\([^)]*state', line, re.IGNORECASE):
# # return {'type': 'wait_for_state', 'signals': signals.get('critical', []), 'line': line}
# # if re.search(r'if\s*\(|while\s*\(|@\s*\(', line):
# # return {'type': 'condition', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
# # return {'type': 'other', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
# # def _transform_condition(self, line: str, context: Dict) -> str:
# # """转换条件判断语句"""
# # original = line
# # if 'state' in str(context['signals']):
# # indent = len(line) - len(line.lstrip())
# # spaces = ' ' * indent
# # transformed = f"// [TRANSFORMED] Original: {original.strip()}\n"
# # transformed += f"{spaces}// Cannot directly check internal state\n"
# # transformed += f"{spaces}// Alternative: Wait for expected clock cycles\n"
# # transformed += f"{spaces}repeat(5) @(posedge clk); // Adjust cycles as needed"
# # self.violations['warning'].append(f"条件判断转换: {original.strip()}")
# # return transformed
# # return f"// [TRANSFORMED] {original}"
# # def _transform_state_wait(self, line: str, context: Dict) -> str:
# # """转换状态等待语句"""
# # indent = len(line) - len(line.lstrip())
# # spaces = ' ' * indent
# # transformed = f"// [TRANSFORMED] Original: {line.strip()}\n"
# # transformed += f"{spaces}// Cannot wait for internal state directly\n"
# # transformed += f"{spaces}// Alternative: Drive inputs and wait for expected cycles\n"
# # transformed += f"{spaces}repeat(10) @(posedge clk); // Adjust based on FSM design"
# # self.violations['info'].append(f"状态等待转换: {line.strip()}")
# # return transformed
# # def _final_cleanup(self, code: str) -> str:
# # """最终清理"""
# # lines = code.split('\n')
# # cleaned = []
# # for line in lines:
# # stripped = line.strip()
# # if stripped in ['begin', 'end'] and cleaned:
# # last = cleaned[-1].strip()
# # if last.startswith('// [TRANSFORMED]') or last.startswith('// [INTERNAL'):
# # continue
# # cleaned.append(line)
# # result = '\n'.join(cleaned)
# # result = re.sub(r'\n\s*\n\s*\n', '\n\n', result)
# # return result
# # def _calculate_quality_score(self, total_lines: int) -> int:
# # """计算代码质量分数"""
# # if total_lines == 0:
# # return 0
# # score = 100
# # score -= len(self.violations['critical']) * 20
# # score -= len(self.violations['warning']) * 5
# # score += len([t for t in self.transformations if 'blocked' not in t.get('type', '')]) * 5
# # return max(0, min(100, score))
# # def _get_allowed_signals_info(self) -> Dict:
# # """获取允许的信号信息"""
# # return {
# # 'inputs': self.dut_inputs,
# # 'outputs': self.dut_outputs,
# # 'all_allowed': self.dut_inputs + self.dut_outputs
# # }
# # def generate_constraint_prompt(self) -> str:
# # """生成动态约束提示 - 使用实际信号名"""
# # prompt = "\n[SIGNAL CONSTRAINTS - DERIVED FROM YOUR DUT]\n"
# # # 提取复位信号名(优先使用实际的)
# # reset_signal = self._find_reset_signal()
# # if self.dut_inputs:
# # prompt += "ALLOWED INPUTS (you CAN drive these):\n"
# # for sig in self.dut_inputs:
# # prompt += f" - {sig}\n"
# # else:
# # prompt += "ALLOWED INPUTS: Check the testbench for actual signal names\n"
# # if self.dut_outputs:
# # prompt += "\nOUTPUTS (you can READ but NOT write):\n"
# # for sig in self.dut_outputs:
# # prompt += f" - {sig}\n"
# # prompt += f"""
# # FORBIDDEN ACTIONS:
# # 1. NEVER assign values to internal signals (state, counter, etc.)
# # 2. NEVER use 'force' or 'assign' statements
# # 3. NEVER access signals like DUT.state (hierarchical access)
# # CORRECT APPROACH:
# # - To reach a specific FSM state: drive inputs and WAIT for natural transition
# # - Example: Instead of "state = IDLE", use "{reset_signal} = 1; repeat(2) @(posedge clk); {reset_signal} = 0;"
# # """
# # return prompt
# # def _find_reset_signal(self) -> str:
# # """查找复位信号名"""
# # # 按优先级查找常见的复位信号名
# # reset_candidates = ['areset', 'rst_n', 'rst', 'reset', 'rst_b']
# # for sig in reset_candidates:
# # if sig in self.dut_inputs:
# # return sig
# # # 如果没找到,检查输入列表中是否有类似名称
# # for sig in self.dut_inputs:
# # sig_lower = sig.lower()
# # if 'reset' in sig_lower or 'rst' in sig_lower:
# # return sig
# # # 默认返回第一个输入信号(排除 clk
# # for sig in self.dut_inputs:
# # if 'clk' not in sig.lower():
# # return sig
# # return "reset" # 兜底
# # # ============================================================================
# # # CoverageParser - 覆盖率解析器
# # # ============================================================================
# # class CoverageParser:
# # """覆盖率解析器 - 从带注释的Verilog文件中提取未覆盖的代码块
# # [增强] 集成语义分析结果,提供更精准的 FSM 状态路径指导
# # [新增] 集成能量分配层,提供目标功能点优先级信息
# # [新增] 集成多样性约束注入器,避免测试用例同质化
# # """
# # def __init__(self, annotated_file, tb_code=None, semantic_result=None,
# # energy_allocator=None, diversity_injector=None):
# # self.file_path = annotated_file
# # self.tb_code = tb_code
# # self.semantic_result = semantic_result # [新增] 语义分析结果
# # self.energy_allocator = energy_allocator # [新增] 能量分配器
# # self.diversity_injector = diversity_injector # [新增] 多样性约束注入器
# # # 修复Verilator 覆盖率标记格式为 %NNNNNN 或 ~NNNNNN 或 ^NNNNNN
# # # %NNNNNN - 行覆盖计数(%000000 表示从未执行)
# # # ~NNNNNN - 分支/条件覆盖计数
# # # ^NNNNNN - 未覆盖的分支
# # self.line_pattern = re.compile(r'^%(\d+)\s+(.*)$') # 匹配 %NNNNNN code
# # self.tilde_pattern = re.compile(r'^~(\d+)\s+(.*)$') # 匹配 ~NNNNNN code
# # self.caret_pattern = re.compile(r'^\^(\d+)\s+(.*)$') # 匹配 ^NNNNNN code
# # # 有些情况可能是纯数字开头(无前缀)
# # self.plain_pattern = re.compile(r'^\s*(\d+)\s+(.*)$')
# # self.decl_pattern = re.compile(r'^\s*(input|output|inout|wire|reg|logic|parameter|localparam|assign)\b')
# # self.validator = BlackBoxValidator()
# # if tb_code:
# # self.validator._extract_signals_from_tb(tb_code)
# # def generate_prompt(self, current_score):
# # """生成覆盖率驱动的Prompt"""
# # if not os.path.exists(self.file_path):
# # return None
# # try:
# # with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
# # lines = f.readlines()
# # except Exception:
# # return None
# # missing_blocks = []
# # current_block = []
# # recording = False
# # context_buffer = []
# # CONTEXT_SIZE = 3
# # # 收集缺失行用于 FSM 分析
# # missing_lines = []
# # for i, line in enumerate(lines):
# # line = line.strip()
# # count = -1
# # clean_code = line
# # is_tilde = False
# # is_caret = False
# # # 尝试匹配各种覆盖率标记格式
# # match_pct = self.line_pattern.match(line) # %NNNNNN code
# # match_tilde = self.tilde_pattern.match(line) # ~NNNNNN code
# # match_caret = self.caret_pattern.match(line) # ^NNNNNN code
# # match_plain = self.plain_pattern.match(line) # NNNNNN code (无前缀)
# # if match_pct:
# # count = int(match_pct.group(1))
# # clean_code = match_pct.group(2).strip()
# # elif match_tilde:
# # count = int(match_tilde.group(1))
# # clean_code = match_tilde.group(2).strip()
# # is_tilde = True
# # elif match_caret:
# # count = int(match_caret.group(1))
# # clean_code = match_caret.group(2).strip()
# # is_caret = True
# # elif match_plain:
# # # 纯数字格式(可能出现在某些 Verilator 版本)
# # count = int(match_plain.group(1))
# # clean_code = match_plain.group(2).strip()
# # if "//" in clean_code:
# # clean_code = clean_code.split("//")[0].strip()
# # is_hard_noise = (self.decl_pattern.match(clean_code) or clean_code == "endmodule")
# # is_soft_noise = (len(clean_code) < 2 or clean_code in ["end", "begin", "else", ");", "endcase", "default:"] or
# # clean_code.startswith("module ") or not any(c.isalnum() for c in clean_code))
# # # 覆盖状态判断:
# # # - %NNNNNN: count > 0 表示已覆盖count == 0 表示未覆盖
# # # - ~NNNNNN: 分支覆盖标记,需要进一步检查
# # # - ^NNNNNN: 未覆盖分支标记
# # is_definitely_covered = (not is_tilde and not is_caret and count > 0)
# # is_definitely_missed = (not is_tilde and not is_caret and count == 0 and not is_hard_noise and not is_soft_noise) or \
# # (is_caret and not is_hard_noise and not is_soft_noise)
# # if recording:
# # if is_definitely_covered:
# # missing_blocks.append(current_block)
# # missing_lines.extend(current_block)
# # current_block = []
# # recording = False
# # if not is_hard_noise:
# # context_buffer.append(clean_code)
# # else:
# # if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
# # current_block.append(f"Line {i+1}: {clean_code}")
# # else:
# # if is_definitely_missed:
# # recording = True
# # if context_buffer:
# # current_block.append(f"... (Context)")
# # for ctx in context_buffer:
# # current_block.append(f" {ctx}")
# # current_block.append(f"Line {i+1}: {clean_code} <--- MISSING START")
# # else:
# # if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
# # context_buffer.append(clean_code)
# # if len(context_buffer) > CONTEXT_SIZE:
# # context_buffer.pop(0)
# # if recording and current_block:
# # missing_blocks.append(current_block)
# # missing_lines.extend(current_block)
# # if not missing_blocks:
# # return None
# # selected_blocks = missing_blocks[:50]
# # # 获取实际信号名用于示例
# # reset_signal = self.validator._find_reset_signal()
# # inputs_no_clk = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
# # example_signal = inputs_no_clk[0] if inputs_no_clk else (reset_signal if reset_signal != "reset" else "ena")
# # # 分析 FSM 相关的缺失代码
# # fsm_analysis = self._analyze_fsm_missing(missing_lines)
# # # [新增] 从语义分析结果获取 FSM 和功能点信息
# # semantic_context = self._generate_semantic_context()
# # prompt = f"""
# # [ROLE]
# # You are a hardware verification expert. Your task is to write a test scenario to improve code coverage.
# # [COVERAGE STATUS]
# # Current testbench achieves {current_score:.2f}% coverage.
# # The following logic blocks in the DUT are NEVER executed during simulation:
# # """
# # for idx, block in enumerate(selected_blocks):
# # prompt += f"--- Missing Logic Block {idx+1} ---\n" + "\n".join(block) + "\n\n"
# # # [新增] 添加语义分析上下文
# # if semantic_context:
# # prompt += f"""
# # [SEMANTIC ANALYSIS - MODULE UNDERSTANDING]
# # {semantic_context}
# # """
# # # === [新增] 添加能量分配目标上下文 ===
# # if self.energy_allocator:
# # energy_context = self.energy_allocator.get_target_context()
# # if energy_context:
# # prompt += f"""
# # [ENERGY-ALIGNED TARGET - PRIORITY]
# # {energy_context}
# # Focus your test scenario on covering this high-priority target first.
# # """
# # # =====================================
# # prompt += self.validator.generate_constraint_prompt()
# # # 添加 FSM 分析提示
# # if fsm_analysis:
# # prompt += f"""
# # [FSM STATE TRANSITION ANALYSIS - CRITICAL]
# # {fsm_analysis}
# # IMPORTANT: FSM transitions have PRIORITY ORDER!
# # - 'if' conditions are evaluated TOP to BOTTOM
# # - The FIRST matching condition determines the next state
# # - To trigger a branch like "else if (condition)", you MUST ensure all higher-priority conditions are FALSE
# # - Read the missing code's context carefully: what conditions precede it?
# # """
# # prompt += f"""
# # [OUTPUT REQUIREMENTS - CRITICAL]
# # 1. Return ONLY Verilog test scenario code (NOT a task definition)
# # 2. Your code will be inserted INTO an existing `initial begin ... end` block
# # 3. DO NOT wrap your code in `task ... endtask` - just write the test sequence directly
# # 4. DO NOT use `$finish` or `$stop` - the testbench handles simulation end
# # [CODING STYLE]
# # 1. Use blocking assignments for input signals: `signal = value;`
# # 2. Use `#N;` for time delays: `#10;` means wait 10 time units
# # 3. Use `repeat(N) @(posedge clk);` to wait for N clock cycles
# # 4. Start with reset sequence if needed
# # [BLACK-BOX CONSTRAINTS - CRITICAL]
# # 1. You can ONLY control module INPUTS listed above
# # 2. You CANNOT access internal signals (state, next_state, counters, etc.)
# # 3. You CANNOT use `force` or `assign` on internal signals
# # 4. To trigger a specific state: drive inputs and wait for the FSM to reach it naturally
# # [STEP-BY-STEP APPROACH - REQUIRED]
# # For each missing branch, think through:
# # 1. What STATE must the FSM be in? (Look at the case statement)
# # 2. What CONDITIONS must be true/false? (Check priority order!)
# # 3. How to reach that state from reset? (Trace state transitions)
# # 4. What inputs to apply and in what order?
# # [POSITIVE EXAMPLE - CORRECT APPROACH]
# # ```verilog
# # // Reset sequence - use ACTUAL input signal names from above
# # {reset_signal} = 1;
# # repeat(2) @(posedge clk);
# # {reset_signal} = 0;
# # // Wait for FSM to reach desired state (estimate cycles)
# # repeat(3) @(posedge clk);
# # // Trigger missing branch by driving inputs
# # {example_signal} = 1;
# # repeat(5) @(posedge clk);
# # {example_signal} = 0;
# # repeat(10) @(posedge clk);
# # ```
# # [NEGATIVE EXAMPLE - DO NOT DO THIS]
# # ```verilog
# # // WRONG: Using wrong signal name (e.g., 'reset' instead of '{reset_signal}')
# # reset = 1; // ERROR: Signal 'reset' does not exist! Use '{reset_signal}' instead!
# # // WRONG: Not considering condition priority in FSM
# # // If missing code is "else if (condition_b)", you must make condition_a FALSE first!
# # // Example: if FSM has "if (!signal_a) ... else if (signal_b) ..."
# # // Then signal_a must be 1 (FALSE) for the else-if branch to execute
# # signal_a = 0; // WRONG: This blocks the else-if branch!
# # signal_b = 1; // This will NOT trigger because signal_a=0 took priority
# # // CORRECT: Analyze priority, set higher-priority conditions to FALSE
# # signal_a = 1; // Now the first condition (!signal_a) is FALSE
# # signal_b = 1; // Now this else-if branch can execute
# # // WRONG: Trying to assign internal state
# # state = IDLE; // ERROR: Cannot modify internal signal!
# # // WRONG: Using force on internal signal
# # force DUT.state = WL; // ERROR: Cannot force internal signal!
# # // WRONG: Checking internal state in condition
# # if (state == WL) begin // ERROR: Cannot read internal signal!
# # {example_signal} = 1;
# # end
# # // CORRECT ALTERNATIVE: Estimate timing instead
# # repeat(5) @(posedge clk); // Wait for FSM to reach expected state
# # {example_signal} = 1;
# # ```
# # [SIGNAL NAME WARNING - CRITICAL]
# # - DO NOT use 'reset' if the actual signal is '{reset_signal}'
# # - DO NOT use 'rst' if the actual signal is '{reset_signal}'
# # - ALWAYS use EXACT signal names from the ALLOWED INPUTS list above
# # - Double-check every signal name before using it!
# # Now write the test scenario code to cover the missing blocks:
# # """
# # # === [新增] 注入多样性约束 ===
# # if self.diversity_injector:
# # # 获取未覆盖功能点
# # uncovered_functions = []
# # if self.semantic_result and self.semantic_result.get('function_points'):
# # uncovered_functions = [
# # fp for fp in self.semantic_result['function_points']
# # if not fp.get('covered', False)
# # ]
# # # 获取当前目标功能点
# # target_function = ""
# # if self.energy_allocator and self.energy_allocator.current_target:
# # target_function = self.energy_allocator.current_target.function_point
# # # 注入多样性约束
# # prompt = self.diversity_injector.inject_diversity_constraints(
# # prompt=prompt,
# # target_function=target_function,
# # uncovered_functions=uncovered_functions
# # )
# # # =================================
# # return prompt
# # def _analyze_fsm_missing(self, missing_lines: List[str]) -> str:
# # """分析 FSM 相关的缺失代码,生成具体的 FSM 状态转换指导"""
# # analysis = []
# # # 检查是否涉及 FSM 状态转换
# # has_state_case = any('case' in line.lower() and 'state' in line.lower() for line in missing_lines)
# # has_else_if = any('else if' in line.lower() for line in missing_lines)
# # has_if_condition = any(re.search(r'\bif\s*\(', line) for line in missing_lines)
# # if has_state_case or has_else_if:
# # analysis.append("- Missing code involves FSM state transitions or conditional branches")
# # if has_else_if or has_if_condition:
# # analysis.append("- Conditional branches have PRIORITY ORDER (top to bottom)")
# # analysis.append("- 'else if' branches require ALL previous conditions to be FALSE")
# # analysis.append("- Analyze the missing code's context: what conditions block this branch?")
# # if has_state_case:
# # analysis.append("- To trigger a state transition: first reach the source state, then drive inputs")
# # # === 新增FSM 状态路径分析 ===
# # # 尝试从缺失代码中提取 FSM 状态信息
# # fsm_state_info = self._extract_fsm_state_from_missing(missing_lines)
# # if fsm_state_info:
# # analysis.append("")
# # analysis.append("[FSM STATE PATH ANALYSIS]")
# # analysis.extend(fsm_state_info)
# # return "\n".join(analysis) if analysis else ""
# # def _extract_fsm_state_from_missing(self, missing_lines: List[str]) -> List[str]:
# # """
# # 从缺失代码中提取 FSM 状态信息,生成具体的状态转换指导
# # 分析策略:
# # 1. 从缺失代码的上下文识别 case 分支FSM 状态)
# # 2. 分析该状态下的条件分支优先级
# # 3. 识别需要满足的输入条件
# # """
# # info = []
# # # 从 annotated 文件中读取完整的 DUT 代码以分析 FSM 结构
# # try:
# # with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
# # full_content = f.read()
# # except:
# # return info
# # # 提取缺失代码所在的 FSM 状态
# # target_state = None
# # missing_condition = None
# # for line in missing_lines:
# # # 查找 case 分支标记(如 "WL:", "WR:", "FALLL:" 等)
# # # 格式可能是 "Line N: STATE:" 或 "STATE:"
# # state_match = re.search(r'\b([A-Z][A-Z0-9_]*)\s*:', line)
# # if state_match:
# # potential_state = state_match.group(1)
# # # 排除常见的非状态关键字
# # if potential_state not in ['IF', 'ELSE', 'CASE', 'BEGIN', 'END', 'DEFAULT']:
# # target_state = potential_state
# # break
# # # 如果没找到,尝试从整个文件中分析
# # if not target_state:
# # # 查找缺失行附近的 case 分支
# # lines = full_content.split('\n')
# # for i, line in enumerate(lines):
# # # 查找覆盖率标记为 0 的行
# # if re.match(r'^%000000', line.strip()):
# # # 向上查找最近的 case 分支(状态)
# # for j in range(i-1, max(0, i-20), -1):
# # state_match = re.search(r'^\s*([A-Z][A-Z0-9_]*)\s*:', lines[j])
# # if state_match:
# # target_state = state_match.group(1)
# # break
# # if target_state:
# # break
# # # 分析缺失的条件分支
# # for line in missing_lines:
# # # 提取 else if 条件
# # else_if_match = re.search(r'else\s+if\s*\(([^)]+)\)', line)
# # if else_if_match:
# # missing_condition = else_if_match.group(1)
# # break
# # # 提取 if 条件
# # if_match = re.search(r'\bif\s*\(([^)]+)\)', line)
# # if if_match:
# # missing_condition = if_match.group(1)
# # break
# # # 生成具体的指导信息
# # if target_state:
# # info.append(f"- Target FSM state identified: {target_state}")
# # # 查找复位后的初始状态
# # reset_state = self._find_reset_state(full_content)
# # if reset_state:
# # info.append(f"- After reset, FSM starts in state: {reset_state}")
# # if reset_state != target_state:
# # info.append(f"- CRITICAL: You must FIRST transition from {reset_state} to {target_state}!")
# # info.append(f"- Do NOT assume FSM will automatically reach {target_state}!")
# # # 尝试找到状态转换路径
# # transition_hint = self._find_state_transition_hint(full_content, reset_state, target_state)
# # if transition_hint:
# # info.append(f"- To reach {target_state}: {transition_hint}")
# # if missing_condition:
# # info.append(f"- Missing condition: \"{missing_condition}\"")
# # # 分析条件优先级
# # priority_info = self._analyze_condition_priority(full_content, target_state, missing_condition)
# # if priority_info:
# # info.extend(priority_info)
# # return info
# # def _find_reset_state(self, content: str) -> Optional[str]:
# # """从 DUT 代码中找到复位后的初始状态"""
# # # 查找复位逻辑中的状态赋值
# # # 常见模式: if (reset) state <= IDLE; 或 state <= 0;
# # patterns = [
# # r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
# # r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*(\d+);',
# # r'if\s*\([^)]*rst[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
# # ]
# # for pattern in patterns:
# # match = re.search(pattern, content, re.IGNORECASE)
# # if match:
# # state = match.group(1)
# # # 如果是数字,尝试从参数中找对应的状态名
# # if state.isdigit():
# # # 查找参数定义
# # param_match = re.search(r'parameter\s+([^;]+);', content)
# # if param_match:
# # params = param_match.group(1)
# # # 解析参数列表
# # for param in params.split(','):
# # param = param.strip()
# # if '=' in param:
# # name, value = param.split('=')
# # if value.strip() == state:
# # return name.strip()
# # return state
# # return None
# # def _find_state_transition_hint(self, content: str, from_state: str, to_state: str) -> Optional[str]:
# # """找到从一个状态到另一个状态的转换条件"""
# # # 在 case 语句中查找 from_state 分支
# # # 提取该分支下到 to_state 的转换条件
# # # 简单策略:查找 "next = TO_STATE" 或 "next <= TO_STATE"
# # pattern = rf'{from_state}\s*:.*?next\s*=?\s*{to_state}'
# # match = re.search(pattern, content, re.DOTALL)
# # if match:
# # # 提取条件
# # branch_code = match.group(0)
# # # 查找 if 条件
# # if_match = re.search(r'if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
# # if if_match:
# # return f"set condition: {if_match.group(1)}"
# # # 查找 else if 条件
# # elif_match = re.search(r'else\s+if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
# # if elif_match:
# # return f"set condition: {elif_match.group(1)} (ensure earlier conditions are FALSE)"
# # # 尝试反向查找:什么条件下会转换到目标状态
# # trans_pattern = rf'(?:if|else\s+if)\s*\(([^)]+)\)\s*(?:next\s*=?\s*{to_state}|{to_state}\s*;)'
# # trans_match = re.search(trans_pattern, content)
# # if trans_match:
# # return f"set condition: {trans_match.group(1)}"
# # return None
# # def _analyze_condition_priority(self, content: str, state: str, missing_condition: str) -> List[str]:
# # """分析条件分支的优先级,找出需要排除的条件"""
# # info = []
# # if not state:
# # return info
# # # 查找该状态下的所有条件分支
# # # 提取 state: 后面的代码块
# # state_block_pattern = rf'{state}\s*:(.*?)(?=[A-Z][A-Z0-9_]*\s*:|endcase|default:)'
# # match = re.search(state_block_pattern, content, re.DOTALL)
# # if not match:
# # return info
# # state_block = match.group(1)
# # # 提取所有条件分支
# # conditions = []
# # for cond_match in re.finditer(r'(?:if|else\s+if)\s*\(([^)]+)\)', state_block):
# # conditions.append(cond_match.group(1).strip())
# # # 找到缺失条件在列表中的位置
# # missing_idx = -1
# # for i, cond in enumerate(conditions):
# # # 简化比较(去除空格)
# # if cond.replace(' ', '') in missing_condition.replace(' ', '') or \
# # missing_condition.replace(' ', '') in cond.replace(' ', ''):
# # missing_idx = i
# # break
# # if missing_idx > 0:
# # info.append(f"- This branch is condition #{missing_idx + 1} (lower priority)")
# # info.append(f"- You must make ALL earlier conditions FALSE:")
# # for i in range(missing_idx):
# # cond = conditions[i]
# # # 分析如何使条件为 FALSE
# # false_hint = self._get_false_hint(cond)
# # info.append(f" * \"{cond}\" must be FALSE → {false_hint}")
# # return info
# # def _get_false_hint(self, condition: str) -> str:
# # """分析如何使条件为 FALSE"""
# # condition = condition.strip()
# # # 处理 !signal 形式
# # if condition.startswith('!'):
# # signal = condition[1:].strip()
# # return f"set {signal} = 1"
# # # 处理 signal 形式(布尔值)
# # if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', condition):
# # return f"set {condition} = 0"
# # # 处理比较运算符
# # if '==' in condition:
# # parts = condition.split('==')
# # if len(parts) == 2:
# # signal = parts[0].strip()
# # value = parts[1].strip()
# # if value.isdigit():
# # return f"set {signal} != {value}"
# # # 处理 >= 形式
# # if '>=' in condition:
# # parts = condition.split('>=')
# # if len(parts) == 2:
# # signal = parts[0].strip()
# # value = parts[1].strip()
# # if value.isdigit():
# # return f"set {signal} < {value}"
# # # 处理 > 形式
# # if '>' in condition and '>=' not in condition:
# # parts = condition.split('>')
# # if len(parts) == 2:
# # signal = parts[0].strip()
# # value = parts[1].strip()
# # return f"set {signal} <= {value}"
# # return "analyze the condition logic"
# # def _generate_semantic_context(self) -> str:
# # """
# # [新增] 从语义分析结果生成 Prompt 上下文
# # 整合语义分析层 (Layer 0) 的输出,为 LLM 提供更精准的指导:
# # - FSM 状态转换图
# # - 功能点重要性排序
# # - 测试场景建议
# # Returns:
# # 语义上下文字符串,用于增强 Prompt
# # """
# # if not self.semantic_result:
# # return ""
# # context_parts = []
# # # 1. 模块基础信息
# # module_name = self.semantic_result.get('module_name', '')
# # inputs = self.semantic_result.get('inputs', [])
# # outputs = self.semantic_result.get('outputs', [])
# # if module_name:
# # context_parts.append(f"Module Name: {module_name}")
# # if inputs:
# # context_parts.append(f"Module Inputs: {', '.join(inputs)}")
# # if outputs:
# # context_parts.append(f"Module Outputs: {', '.join(outputs)}")
# # # 2. FSM 信息(最关键)
# # fsm_info = self.semantic_result.get('fsm_info')
# # if fsm_info:
# # context_parts.append("")
# # context_parts.append("=== FSM STATE MACHINE DETAILS ===")
# # context_parts.append(f"State Variable: {fsm_info.get('state_variable', 'unknown')}")
# # states = fsm_info.get('states', [])
# # if states:
# # context_parts.append(f"All States ({len(states)}): {', '.join(states)}")
# # # 状态转换表
# # transitions = fsm_info.get('transitions', {})
# # if transitions:
# # context_parts.append("")
# # context_parts.append("=== STATE TRANSITION TABLE ===")
# # context_parts.append("Format: CURRENT_STATE --[CONDITION]--> NEXT_STATE")
# # context_parts.append("")
# # for state, trans_list in transitions.items():
# # for trans in trans_list:
# # condition = trans.get('condition', 'default')
# # next_state = trans.get('next_state', 'unknown')
# # if condition == 'default':
# # context_parts.append(f" {state} --[default]--> {next_state}")
# # else:
# # context_parts.append(f" {state} --[if ({condition})]--> {next_state}")
# # # 添加状态转换路径分析
# # context_parts.append("")
# # context_parts.append("=== STATE TRANSITION PATH HINTS ===")
# # reset_state = self._find_reset_state_from_fsm(fsm_info)
# # if reset_state:
# # context_parts.append(f"Initial State (after reset): {reset_state}")
# # context_parts.append("")
# # context_parts.append("IMPORTANT: To reach a target state, trace the path from reset:")
# # context_parts.append(" 1. Reset the DUT to initialize to the starting state")
# # context_parts.append(" 2. Apply inputs to trigger state transitions")
# # context_parts.append(" 3. Wait for the FSM to naturally reach the target state")
# # context_parts.append(" 4. THEN apply inputs to trigger the missing branch")
# # # 3. 功能点优先级
# # function_points = self.semantic_result.get('function_points', [])
# # if function_points:
# # context_parts.append("")
# # context_parts.append("=== FUNCTION POINTS (Ranked by Importance) ===")
# # for i, fp in enumerate(function_points[:10]): # Top 10
# # name = fp.get('name', 'unknown')
# # fp_type = fp.get('type', 'unknown')
# # importance = fp.get('importance', 0)
# # covered = fp.get('covered', False)
# # status = "✓ COVERED" if covered else "✗ NOT COVERED"
# # context_parts.append(f" {i+1}. [{status}] {name} ({fp_type}): importance={importance:.2f}")
# # # 4. 测试场景建议
# # test_scenarios = self.semantic_result.get('test_scenarios', [])
# # if test_scenarios:
# # context_parts.append("")
# # context_parts.append("=== RECOMMENDED TEST SCENARIOS ===")
# # for i, ts in enumerate(test_scenarios[:5]): # Top 5
# # name = ts.get('name', 'unknown')
# # description = ts.get('description', '')
# # priority = ts.get('priority', 0)
# # context_parts.append(f" {i+1}. {name}: {description} (priority={priority:.2f})")
# # if context_parts:
# # return "\n".join(context_parts)
# # return ""
# # def _find_reset_state_from_fsm(self, fsm_info: dict) -> Optional[str]:
# # """从 FSM 信息中推断复位后的初始状态"""
# # # 方法1检查是否有明确的复位状态
# # transitions = fsm_info.get('transitions', {})
# # # 复位后通常进入第一个定义的状态或特定名称的状态
# # states = fsm_info.get('states', [])
# # # 常见的初始状态命名
# # initial_state_names = ['IDLE', 'INIT', 'RESET', 'START', 'BEGIN']
# # for name in initial_state_names:
# # if name in states:
# # return name
# # # 如果没有找到,返回第一个状态
# # if states:
# # return states[0]
# # return None
# # # ============================================================================
# # # TBInjector - 场景注入器
# # # ============================================================================
# # class TBInjector:
# # """
# # 场景注入器 - 将LLM生成的测试代码注入到现有测试平台
# # 集成三层防护策略:
# # 1. Layer 1: Prompt约束由CoverageParser处理
# # 2. Layer 2: 智能代码转换
# # 3. Layer 3: 质量评估和重试建议
# # """
# # def __init__(self, tb_code):
# # """
# # 初始化注入器
# # Args:
# # tb_code: 原始测试平台代码字符串
# # """
# # self.content = tb_code
# # self.validator = BlackBoxValidator()
# # self.validator._extract_signals_from_tb(tb_code)
# # self.last_validation_result = None
# # def inject(self, new_code, iter_idx):
# # """
# # 注入新的测试场景到测试平台
# # Args:
# # new_code: LLM生成的测试代码
# # iter_idx: 迭代序号
# # Returns:
# # 修改后的测试平台代码
# # """
# # # Step 1: 预处理代码(包含三层防护)
# # scenario_code, result = self._preprocess_code(new_code, iter_idx)
# # self.last_validation_result = result
# # # 记录日志
# # if result['violations']['critical']:
# # logger.warning(f"[CGA-{iter_idx}] Critical violations detected:")
# # for v in result['violations']['critical']:
# # logger.warning(f" - {v}")
# # if result['violations']['warning']:
# # logger.info(f"[CGA-{iter_idx}] Warnings:")
# # for v in result['violations']['warning']:
# # logger.info(f" - {v}")
# # if result['transformations']:
# # logger.info(f"[CGA-{iter_idx}] Code transformations applied:")
# # for t in result['transformations']:
# # logger.info(f" - {t['type']}: {t.get('original', 'N/A')[:50]}...")
# # # Step 2: 构建场景块
# # scenario_block = self._build_scenario_block(scenario_code, iter_idx)
# # # Step 3: 注入到TB中
# # modified_tb = self._inject_scenario(scenario_block)
# # return modified_tb
# # def should_retry(self):
# # """是否应该重试"""
# # if self.last_validation_result is None:
# # return False
# # return self.last_validation_result.get('should_retry', False)
# # def get_quality_score(self):
# # """获取代码质量分数"""
# # if self.last_validation_result is None:
# # return 0
# # return self.last_validation_result.get('quality_score', 0)
# # def _preprocess_code(self, code, iter_idx):
# # """预处理LLM生成的代码"""
# # # 移除markdown标记
# # code = re.sub(r'```(?:verilog|systemverilog|sv)?\n?', '', code)
# # code = re.sub(r'```', '', code)
# # # 移除task包装
# # code = re.sub(r'task\s+\w+\s*(?:\([^)]*\))?\s*;', '', code)
# # code = re.sub(r'endtask', '', code)
# # # 移除$finish和$stop
# # code = re.sub(r'\$finish\s*;', '', code)
# # code = re.sub(r'\$stop\s*;', '', code)
# # # 移除多余空行
# # code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# # initial_pattern = re.compile(r'\binitial\s+begin\b.*?\bend\b', re.DOTALL | re.IGNORECASE)
# # # 检查并移除 initial begin ... end 块
# # initial_match = initial_pattern.search(code)
# # if initial_match:
# # logger.warning(f"[CGA-{iter_idx}] Detected 'initial begin...end' block in generated code - this should not be included")
# # logger.warning(f"[CGA-{iter_idx}] Removing 'initial begin...end' wrapper, keeping only the test content")
# # # 提取块内的内容
# # block_content = initial_match.group(0)
# # # 移除 initial begin 和 end 包装
# # # 保留块内的实际测试代码
# # inner_content = re.sub(r'^\s*initial\s+begin\s*', '', block_content)
# # inner_content = re.sub(r'\bend\s*$', '', inner_content)
# # # 替换整个块为内部内容
# # code = initial_pattern.sub(inner_content.strip(), code, count=1)
# # code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# # # 信号名自动修正(在验证之前)
# # code = self._auto_correct_signal_names(code)
# # # 三层防护:黑盒约束验证和转换
# # code, result = self.validator.validate_and_transform(code, self.content)
# # code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# # return code.strip(), result
# # def _auto_correct_signal_names(self, code: str) -> str:
# # """自动修正信号名错误"""
# # corrections = []
# # # 获取正确的复位信号名
# # reset_signal = self.validator._find_reset_signal()
# # # 如果正确的复位信号不是 'reset',则修正所有 'reset' 引用
# # if reset_signal != "reset":
# # # 匹配独立的 'reset' 单词(不包括 'areset', 'rst_n' 等)
# # pattern = r'\breset\b(?!\w)'
# # matches = re.findall(pattern, code)
# # if matches:
# # code = re.sub(pattern, reset_signal, code)
# # corrections.append(f"reset -> {reset_signal} ({len(matches)} occurrences)")
# # # 检查是否有使用 'rst' 但正确信号是 'areset' 的情况
# # if reset_signal == "areset":
# # pattern = r'\brst\b(?!\w)'
# # matches = re.findall(pattern, code)
# # if matches:
# # code = re.sub(pattern, reset_signal, code)
# # corrections.append(f"rst -> {reset_signal} ({len(matches)} occurrences)")
# # # 检查是否使用了不存在的信号
# # for signal in re.findall(r'\b(\w+)\s*=', code):
# # signal = signal.strip()
# # # 跳过已知的合法信号
# # if signal in self.validator.dut_inputs:
# # continue
# # # 检查是否是复位信号的别名
# # if signal.lower() in ['reset', 'rst', 'rst_n', 'rst_b'] and reset_signal != signal:
# # code = re.sub(rf'\b{signal}\b', reset_signal, code)
# # corrections.append(f"{signal} -> {reset_signal}")
# # if corrections:
# # logger.info(f"[Signal Correction] Applied corrections: {'; '.join(corrections)}")
# # return code
# # def _build_scenario_block(self, scenario_code, iter_idx):
# # """构建完整的场景代码块"""
# # # 格式化缩进
# # lines = scenario_code.split('\n')
# # formatted_lines = []
# # for line in lines:
# # stripped = line.strip()
# # if stripped:
# # formatted_lines.append(f" {stripped}")
# # formatted_code = '\n'.join(formatted_lines)
# # # 检测输出信号用于日志
# # output_signals = self._detect_output_signals()
# # output_log = self._generate_output_log(output_signals, iter_idx)
# # # 构建完整块
# # block = f'''
# # // ========== CGA Iteration {iter_idx} ==========
# # scenario = 100 + {iter_idx};
# # // Reset signals to safe state
# # {self._generate_signal_reset()}
# # #5;
# # // CGA generated test sequence:
# # {formatted_code}
# # // Log results
# # {output_log}
# # // ==============================================
# # '''
# # return block
# # def _detect_output_signals(self):
# # """检测DUT的输出信号"""
# # outputs = []
# # wire_pattern = re.compile(r'wire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
# # for match in wire_pattern.finditer(self.content):
# # signal = match.group(1)
# # if signal.lower() not in ['clk', 'clock', 'rst', 'reset', 'areset']:
# # outputs.append(signal)
# # return outputs
# # def _generate_signal_reset(self):
# # """生成信号重置代码"""
# # inputs = []
# # reg_pattern = re.compile(r'reg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
# # for match in reg_pattern.finditer(self.content):
# # signal = match.group(1)
# # if signal.lower() not in ['clk', 'clock', 'file', 'scenario']:
# # inputs.append(signal)
# # if inputs:
# # return " " + "; ".join([f"{sig} = 0" for sig in inputs]) + ";"
# # return " // No input signals to reset"
# # def _generate_output_log(self, signals, iter_idx):
# # """生成输出日志代码"""
# # if not signals:
# # return f' $display("[CGA-{iter_idx}] Scenario executed");'
# # sig_names = ", ".join(signals)
# # format_str = ", ".join(["%b"] * len(signals))
# # return f' $fdisplay(file, "[CGA-{iter_idx}] {sig_names} = {format_str}", {sig_names});'
# # def _inject_scenario(self, scenario_block):
# # """将场景块注入到测试平台"""
# # modified_tb = self.content
# # # 策略:如果有 $fclose在其之前插入
# # if "$fclose" in modified_tb:
# # modified_tb = re.sub(
# # r'(\s*)(\$fclose\s*\([^)]+\)\s*;)',
# # scenario_block + r'\1\2',
# # modified_tb,
# # count=1
# # )
# # elif "$finish" in modified_tb:
# # # 否则在 $finish 之前插入
# # modified_tb = modified_tb.replace(
# # "$finish;",
# # scenario_block + "\n $finish;"
# # )
# # else:
# # # 兜底:在最后一个 end 之前插入
# # last_end = modified_tb.rfind("end")
# # if last_end != -1:
# # modified_tb = modified_tb[:last_end] + scenario_block + modified_tb[last_end:]
# # return modified_tb
# """
# Description : Utils for CGA (CoverageParser & TBInjector)
# - Features: Sticky Mode, Smart Noise Filtering (No assign/decls)
# - Enhanced: Three-layer protection for black-box constraints
# * Layer 1: Enhanced Prompt constraints (prevention)
# * Layer 2: Smart code transformation (conversion)
# * Layer 3: Quality assessment & retry (fallback)
# - Integrated: Diversity Constraint Injector (Layer 1)
# Author : CorrectBench Integration
# """
# import re
# import os
# import logging
# from typing import Tuple, List, Optional, Dict, TYPE_CHECKING
# # [新增] 导入多样性约束注入器
# if TYPE_CHECKING:
# from autoline.diversity_injector import DiversityInjector
# # 配置日志
# logger = logging.getLogger(__name__)
# # ============================================================================
# # 三层防护策略说明
# # ============================================================================
# # Layer 1 - Prompt约束: 动态提取允许信号列表,明确约束,正反面示例
# # Layer 2 - 智能转换: 检测违规意图,尝试转换为合法形式
# # Layer 3 - 质量评估: 违规比例过高时返回质量分数,触发重新生成
# # ============================================================================
# # ============================================================================
# # 黑盒约束检查器 - 三层防护策略实现
# # ============================================================================
# class BlackBoxValidator:
# """
# 黑盒约束验证器 - 三层防护策略
# Layer 1: 增强Prompt约束预防
# - 动态提取允许信号列表
# - 生成明确的约束提示
# Layer 2: 智能代码转换(转换)
# - 检测违规意图
# - 尝试转换为合法的等价形式
# - 转换失败时才注释
# Layer 3: 质量评估(重试)
# - 计算代码质量分数
# - 违规比例过高时建议重试
# """
# # 常见的内部信号命名模式(按严重程度分类)
# INTERNAL_SIGNAL_PATTERNS = {
# # 高风险FSM状态相关绝对不能修改
# 'critical': [
# (r'\bstate\b', 'FSM状态寄存器'),
# (r'\bnext_state\b', 'FSM下一状态'),
# (r'\bcurrent_state\b', 'FSM当前状态'),
# (r'\bnext\b(?!\s*[,@])', '下一状态简写'),
# ],
# # 中风险:计数器和内部寄存器
# 'warning': [
# (r'\bcounter\b', '内部计数器'),
# (r'\bcount\b', '计数寄存器'),
# (r'\bcnt\b', '计数简写'),
# (r'\bfall_counter\b', '下落计数器'),
# (r'\breg_\w+', '内部寄存器'),
# ],
# # 低风险:可疑信号(需要确认)
# 'info': [
# (r'\binternal_\w+', '内部信号'),
# (r'\btemp_\w+', '临时信号'),
# (r'\bprev_\w+', '前一状态'),
# ]
# }
# # 非法语句模式
# FORBIDDEN_STATEMENTS = [
# (r'\bforce\s+(\w+)', 'force语句', 'critical'),
# (r'\bassign\s+(\w+)\s*=', '连续赋值', 'critical'),
# (r'\bdeassign\s+', 'deassign语句', 'critical'),
# (r'\brelease\s+', 'release语句', 'critical'),
# ]
# # 层次化访问模式(如 DUT.state
# HIERARCHICAL_ACCESS = r'(\w+)\s*\.\s*(\w+)'
# def __init__(self, dut_inputs: List[str] = None, dut_outputs: List[str] = None):
# """
# Args:
# dut_inputs: DUT模块的输入端口列表
# dut_outputs: DUT模块的输出端口列表
# """
# self.dut_inputs = dut_inputs or []
# self.dut_outputs = dut_outputs or []
# self.violations = {'critical': [], 'warning': [], 'info': []}
# self.transformations = []
# def validate_and_transform(self, code: str, tb_code: str = None) -> Tuple[str, Dict]:
# """验证并转换代码 - 主入口"""
# self.violations = {'critical': [], 'warning': [], 'info': []}
# self.transformations = []
# if tb_code:
# self._extract_signals_from_tb(tb_code)
# original_lines = code.strip().split('\n')
# total_lines = len([l for l in original_lines if l.strip() and not l.strip().startswith('//')])
# # Step 1: 移除非法语句
# code = self._transform_forbidden_statements(code)
# # Step 2: 转换层次化访问
# code = self._transform_hierarchical_access(code)
# # Step 3: 智能转换内部信号访问
# code = self._smart_transform_internal_signals(code)
# # Step 4: 最后清理
# code = self._final_cleanup(code)
# # 计算质量分数
# quality_score = self._calculate_quality_score(total_lines)
# # 决定是否需要重试
# should_retry = quality_score < 50 or len(self.violations['critical']) > 3
# result = {
# 'quality_score': quality_score,
# 'is_valid': len(self.violations['critical']) == 0,
# 'violations': self.violations,
# 'transformations': self.transformations,
# 'should_retry': should_retry,
# 'allowed_signals': self._get_allowed_signals_info()
# }
# return code.strip(), result
# def _extract_signals_from_tb(self, tb_code: str):
# """从测试平台代码中提取DUT输入输出信号"""
# dut_match = re.search(r'(\w+)\s+(?:DUT|dut|uut|UUT)\s*\(', tb_code, re.IGNORECASE)
# if dut_match:
# start = dut_match.start()
# bracket_count = 0
# end = start
# for i, char in enumerate(tb_code[start:]):
# if char == '(':
# bracket_count += 1
# elif char == ')':
# bracket_count -= 1
# if bracket_count == 0:
# end = start + i + 1
# break
# dut_instance = tb_code[start:end]
# port_pattern = r'\.(\w+)\s*\(\s*(\w+)\s*\)'
# for match in re.finditer(port_pattern, dut_instance):
# signal_name = match.group(2)
# is_input = re.search(rf'\breg\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
# is_output = re.search(rf'\bwire\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
# if is_input and signal_name not in self.dut_inputs:
# self.dut_inputs.append(signal_name)
# if is_output and signal_name not in self.dut_outputs:
# self.dut_outputs.append(signal_name)
# # 备用方案通过reg/wire声明推断
# if not self.dut_inputs and not self.dut_outputs:
# for match in re.finditer(r'\breg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
# signal = match.group(1)
# if signal.lower() not in ['file', 'scenario', 'i', 'j', 'k', 'cnt']:
# if signal not in self.dut_inputs:
# self.dut_inputs.append(signal)
# for match in re.finditer(r'\bwire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
# signal = match.group(1)
# if signal not in self.dut_outputs:
# self.dut_outputs.append(signal)
# def _transform_forbidden_statements(self, code: str) -> str:
# """转换非法语句"""
# for pattern, desc, severity in self.FORBIDDEN_STATEMENTS:
# matches = list(re.finditer(pattern, code, re.IGNORECASE))
# for match in reversed(matches):
# signal = match.group(1) if match.groups() else 'unknown'
# self.violations[severity].append(f"{desc}: {signal}")
# line_start = code.rfind('\n', 0, match.start()) + 1
# line_end = code.find('\n', match.end())
# if line_end == -1:
# line_end = len(code)
# original_line = code[line_start:line_end]
# # 尝试转换 force -> 直接赋值(仅对输入信号)
# if 'force' in match.group(0).lower() and signal in self.dut_inputs:
# new_line = re.sub(r'\bforce\s+', '', original_line, flags=re.IGNORECASE)
# code = code[:line_start] + new_line + code[line_end:]
# self.transformations.append({
# 'type': 'force_to_assign',
# 'original': original_line.strip(),
# 'transformed': new_line.strip()
# })
# continue
# code = code[:line_start] + '// [BLOCKED] ' + original_line.lstrip() + code[line_end:]
# self.transformations.append({
# 'type': 'blocked',
# 'original': original_line.strip(),
# 'reason': desc
# })
# return code
# def _transform_hierarchical_access(self, code: str) -> str:
# """转换层次化访问(如 DUT.state"""
# for match in re.finditer(self.HIERARCHICAL_ACCESS, code):
# prefix = match.group(1)
# signal = match.group(2)
# if prefix.upper() in ['DUT', 'UUT', 'TOP', 'TB']:
# if signal not in self.dut_outputs:
# self.violations['critical'].append(f"层次化访问内部信号: {prefix}.{signal}")
# line_start = code.rfind('\n', 0, match.start()) + 1
# line_end = code.find('\n', match.end())
# if line_end == -1:
# line_end = len(code)
# original_line = code[line_start:line_end]
# code = code[:line_start] + '// [HIERARCHY] ' + original_line.lstrip() + code[line_end:]
# return code
# def _smart_transform_internal_signals(self, code: str) -> str:
# """智能转换内部信号访问"""
# lines = code.split('\n')
# transformed_lines = []
# for line in lines:
# stripped = line.strip()
# if stripped.startswith('//') or not stripped:
# transformed_lines.append(line)
# continue
# if (stripped.startswith('#') or stripped.startswith('$') or
# stripped.startswith('repeat(') or stripped.startswith('@(')):
# transformed_lines.append(line)
# continue
# detected_signals = self._detect_internal_signals_in_line(stripped)
# has_critical = detected_signals.get('critical', [])
# has_warning = detected_signals.get('warning', [])
# if not has_critical and not has_warning:
# transformed_lines.append(line)
# continue
# context = self._analyze_signal_context(stripped, detected_signals)
# if context['type'] == 'assignment':
# transformed_lines.append(f"// [INTERNAL_ASSIGN] Cannot modify internal signal")
# transformed_lines.append(f"// Original: {stripped}")
# self.violations['critical'].append(f"尝试修改内部信号: {context['signals']}")
# elif context['type'] == 'condition':
# transformed = self._transform_condition(stripped, context)
# transformed_lines.append(transformed)
# self.transformations.append({
# 'type': 'condition_transform',
# 'original': stripped,
# 'transformed': transformed
# })
# elif context['type'] == 'wait_for_state':
# transformed = self._transform_state_wait(stripped, context)
# transformed_lines.append(transformed)
# self.transformations.append({
# 'type': 'wait_transform',
# 'original': stripped,
# 'transformed': transformed
# })
# else:
# if has_critical:
# transformed_lines.append(f"// [WARNING] Contains internal signal reference: {has_critical}")
# transformed_lines.append(f"// Original: {stripped}")
# for sig in has_critical:
# self.violations['warning'].append(f"可疑的内部信号访问: {sig}")
# else:
# transformed_lines.append(line)
# return '\n'.join(transformed_lines)
# def _detect_internal_signals_in_line(self, line: str) -> Dict[str, List[str]]:
# """检测行中的内部信号"""
# detected = {'critical': [], 'warning': [], 'info': []}
# LEGAL_KEYWORDS = {
# 'repeat', 'posedge', 'negedge', 'begin', 'end', 'if', 'else',
# 'while', 'for', 'case', 'default', 'always', 'initial',
# 'assign', 'wire', 'reg', 'input', 'output', 'inout',
# 'parameter', 'localparam', 'integer', 'real', 'time',
# 'clk', 'clock', 'reset', 'rst', 'areset', 'rst_n',
# 'enable', 'ena', 'valid', 'ready', 'data', 'addr', 'address',
# 'true', 'false', 'idle', 'wait'
# }
# SYSTEM_FUNCTIONS = {'$display', '$write', '$monitor', '$fopen', '$fclose',
# '$fdisplay', '$fwrite', '$readmemh', '$readmemb',
# '$finish', '$stop', '$random', '$time', '$stime'}
# for severity, patterns in self.INTERNAL_SIGNAL_PATTERNS.items():
# for pattern, name in patterns:
# matches = re.findall(pattern, line, re.IGNORECASE)
# if matches:
# for match in matches:
# if isinstance(match, tuple):
# match = match[0] if match[0] else match[1]
# match_lower = match.lower() if match else ''
# if match_lower in LEGAL_KEYWORDS:
# continue
# if match in SYSTEM_FUNCTIONS:
# continue
# if match in self.dut_inputs or match in self.dut_outputs:
# continue
# if match.startswith('$'):
# continue
# if match and match not in detected[severity]:
# detected[severity].append(match)
# return detected
# def _analyze_signal_context(self, line: str, signals: Dict) -> Dict:
# """分析信号使用上下文"""
# assign_match = re.search(r'(\w+)\s*(?:=|<=)\s*', line)
# if assign_match:
# target = assign_match.group(1)
# if target in signals.get('critical', []) or target in signals.get('warning', []):
# return {'type': 'assignment', 'signals': [target], 'line': line}
# if re.search(r'wait\s*\([^)]*state', line, re.IGNORECASE):
# return {'type': 'wait_for_state', 'signals': signals.get('critical', []), 'line': line}
# if re.search(r'if\s*\(|while\s*\(|@\s*\(', line):
# return {'type': 'condition', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
# return {'type': 'other', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
# def _transform_condition(self, line: str, context: Dict) -> str:
# """转换条件判断语句"""
# original = line
# if 'state' in str(context['signals']):
# indent = len(line) - len(line.lstrip())
# spaces = ' ' * indent
# transformed = f"// [TRANSFORMED] Original: {original.strip()}\n"
# transformed += f"{spaces}// Cannot directly check internal state\n"
# transformed += f"{spaces}// Alternative: Wait for expected clock cycles\n"
# transformed += f"{spaces}repeat(5) @(posedge clk); // Adjust cycles as needed"
# self.violations['warning'].append(f"条件判断转换: {original.strip()}")
# return transformed
# return f"// [TRANSFORMED] {original}"
# def _transform_state_wait(self, line: str, context: Dict) -> str:
# """转换状态等待语句"""
# indent = len(line) - len(line.lstrip())
# spaces = ' ' * indent
# transformed = f"// [TRANSFORMED] Original: {line.strip()}\n"
# transformed += f"{spaces}// Cannot wait for internal state directly\n"
# transformed += f"{spaces}// Alternative: Drive inputs and wait for expected cycles\n"
# transformed += f"{spaces}repeat(10) @(posedge clk); // Adjust based on FSM design"
# self.violations['info'].append(f"状态等待转换: {line.strip()}")
# return transformed
# def _final_cleanup(self, code: str) -> str:
# """最终清理"""
# lines = code.split('\n')
# cleaned = []
# for line in lines:
# stripped = line.strip()
# if stripped in ['begin', 'end'] and cleaned:
# last = cleaned[-1].strip()
# if last.startswith('// [TRANSFORMED]') or last.startswith('// [INTERNAL'):
# continue
# cleaned.append(line)
# result = '\n'.join(cleaned)
# result = re.sub(r'\n\s*\n\s*\n', '\n\n', result)
# return result
# def _calculate_quality_score(self, total_lines: int) -> int:
# """计算代码质量分数"""
# if total_lines == 0:
# return 0
# score = 100
# score -= len(self.violations['critical']) * 20
# score -= len(self.violations['warning']) * 5
# score += len([t for t in self.transformations if 'blocked' not in t.get('type', '')]) * 5
# return max(0, min(100, score))
# def _get_allowed_signals_info(self) -> Dict:
# """获取允许的信号信息"""
# return {
# 'inputs': self.dut_inputs,
# 'outputs': self.dut_outputs,
# 'all_allowed': self.dut_inputs + self.dut_outputs
# }
# def generate_constraint_prompt(self) -> str:
# """生成动态约束提示 - 使用实际信号名"""
# prompt = "\n[SIGNAL CONSTRAINTS - DERIVED FROM YOUR DUT]\n"
# # 提取复位信号名(优先使用实际的)
# reset_signal = self._find_reset_signal()
# if self.dut_inputs:
# prompt += "ALLOWED INPUTS (you CAN drive these):\n"
# for sig in self.dut_inputs:
# prompt += f" - {sig}\n"
# else:
# prompt += "ALLOWED INPUTS: Check the testbench for actual signal names\n"
# if self.dut_outputs:
# prompt += "\nOUTPUTS (you can READ but NOT write):\n"
# for sig in self.dut_outputs:
# prompt += f" - {sig}\n"
# prompt += f"""
# FORBIDDEN ACTIONS:
# 1. NEVER assign values to internal signals (state, counter, etc.)
# 2. NEVER use 'force' or 'assign' statements
# 3. NEVER access signals like DUT.state (hierarchical access)
# CORRECT APPROACH:
# - To reach a specific FSM state: drive inputs and WAIT for natural transition
# - Example: Instead of "state = IDLE", use "{reset_signal} = 1; repeat(2) @(posedge clk); {reset_signal} = 0;"
# """
# return prompt
# def _find_reset_signal(self) -> str:
# """查找复位信号名"""
# # 按优先级查找常见的复位信号名
# reset_candidates = ['areset', 'rst_n', 'rst', 'reset', 'rst_b']
# for sig in reset_candidates:
# if sig in self.dut_inputs:
# return sig
# # 如果没找到,检查输入列表中是否有类似名称
# for sig in self.dut_inputs:
# sig_lower = sig.lower()
# if 'reset' in sig_lower or 'rst' in sig_lower:
# return sig
# # 默认返回第一个输入信号(排除 clk
# for sig in self.dut_inputs:
# if 'clk' not in sig.lower():
# return sig
# return "reset" # 兜底
# # =========================================================================
# # [新增] Verilog 语法预检查 - 检测常见逻辑错误
# # =========================================================================
# def check_syntax_issues(self, code: str, signal_widths: Dict[str, int] = None, declared_signals: set = None) -> Dict:
# """
# 检测 Verilog 代码中的常见语法/逻辑问题
# Args:
# code: 待检查的代码
# signal_widths: 信号位宽映射 (可选,如 {'in': 1, 'data': 8})
# declared_signals: 已声明的信号集合 (可选,用于检测未声明信号)
# Returns:
# {
# 'width_mismatch': [...], # 位宽不匹配警告
# 'logic_issues': [...], # 逻辑问题
# 'syntax_warnings': [...], # 语法警告
# 'should_retry': bool # 是否建议重试
# }
# """
# result = {
# 'width_mismatch': [],
# 'logic_issues': [],
# 'syntax_warnings': [],
# 'should_retry': False
# }
# # 检查位宽不匹配
# result['width_mismatch'] = self._check_width_mismatch(code, signal_widths)
# # 检查逻辑问题
# result['logic_issues'] = self._check_logic_issues(code, signal_widths)
# # 检查其他语法问题(传入已声明信号)
# result['syntax_warnings'] = self._check_syntax_warnings(code, declared_signals)
# # 决定是否需要重试
# # 包括位宽不匹配、逻辑问题、语法错误severity='error'
# has_syntax_errors = any(
# issue.get('severity') == 'error'
# for issue in result['syntax_warnings']
# )
# result['should_retry'] = (
# len(result['width_mismatch']) > 0 or
# len(result['logic_issues']) > 0 or
# has_syntax_errors
# )
# return result
# def _check_width_mismatch(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
# """
# 检测位宽不匹配问题
# 常见问题:
# - {signal} = N'b... 将多位值赋给单比特信号
# - signal = N'b... 位宽不匹配
# """
# issues = []
# signal_widths = signal_widths or {}
# # 默认假设未声明信号为 1 位
# def get_width(sig):
# return signal_widths.get(sig, 1)
# # 模式1: {signal} = N'bvalue (拼接赋值)
# # 例: {in} = 8'b01111100 - 将 8 位赋给 1 位
# concat_pattern = re.compile(r'\{(\w+)\}\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
# for match in concat_pattern.finditer(code):
# signal = match.group(1)
# value_width = int(match.group(2))
# base = match.group(3)
# value = match.group(4)
# actual_width = get_width(signal)
# if value_width > actual_width:
# issues.append({
# 'type': 'concat_width_mismatch',
# 'signal': signal,
# 'signal_width': actual_width,
# 'assigned_width': value_width,
# 'original': match.group(0),
# 'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value via concatenation. Verilog will truncate.",
# 'severity': 'warning',
# 'suggestion': f"Use a shift register: reg [{value_width-1}:0] temp; temp = {value_width}'{base}{value}; then shift bits one by one"
# })
# # 模式2: signal = N'bvalue (直接赋值)
# assign_pattern = re.compile(r'\b(\w+)\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
# for match in assign_pattern.finditer(code):
# signal = match.group(1)
# value_width = int(match.group(2))
# # 跳过拼接赋值(已处理)
# if f'{{{signal}}}' in match.group(0):
# continue
# actual_width = get_width(signal)
# # 只有当信号已知且位宽不匹配时才警告
# if signal in signal_widths and value_width > actual_width:
# issues.append({
# 'type': 'direct_width_mismatch',
# 'signal': signal,
# 'signal_width': actual_width,
# 'assigned_width': value_width,
# 'original': match.group(0),
# 'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value. Truncation will occur.",
# 'severity': 'warning'
# })
# return issues
# def _check_logic_issues(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
# """
# 检测逻辑问题
# 常见问题:
# - 单比特信号自移位 (in = in >> 1 无效果)
# - 无效的循环条件
# """
# issues = []
# signal_widths = signal_widths or {}
# def get_width(sig):
# return signal_widths.get(sig, 1)
# # 模式: signal = signal >> N 或 signal = signal << N
# shift_pattern = re.compile(r'\b(\w+)\s*=\s*\1\s*(>>|<<)\s*(\d+)?')
# for match in shift_pattern.finditer(code):
# signal = match.group(1)
# direction = match.group(2)
# shift_amount = int(match.group(3)) if match.group(3) else 1
# actual_width = get_width(signal)
# if actual_width == 1:
# issues.append({
# 'type': 'single_bit_shift',
# 'signal': signal,
# 'direction': direction,
# 'original': match.group(0),
# 'message': f"Single-bit signal '{signal}' self-shift has no effect. Result is always 0.",
# 'severity': 'warning',
# 'suggestion': f"Use a shift register for bit-serial input, not the input signal itself"
# })
# # 模式: repeat(N) begin ... signal = signal >> 1; end (循环移位单比特)
# repeat_shift_pattern = re.compile(r'repeat\s*\(\s*\d+\s*\)\s*begin[^}]*?(\w+)\s*=\s*\1\s*(>>|<<)', re.DOTALL)
# for match in repeat_shift_pattern.finditer(code):
# signal = match.group(1)
# actual_width = get_width(signal)
# if actual_width == 1:
# issues.append({
# 'type': 'repeat_single_bit_shift',
# 'signal': signal,
# 'original': match.group(0)[:100] + '...',
# 'message': f"Repeat loop shifting single-bit signal '{signal}' is ineffective",
# 'severity': 'warning'
# })
# return issues
# def _check_syntax_warnings(self, code: str, declared_signals: set = None) -> List[Dict]:
# """
# 检测其他语法问题
# Args:
# code: 待检查的代码
# declared_signals: 已声明的信号集合 (从完整 TB 中提取)
# """
# issues = []
# declared_signals = declared_signals or set()
# # 检查: 缺少分号
# # 注意: 这只是简单检查,不是完整解析
# lines = code.split('\n')
# for i, line in enumerate(lines):
# stripped = line.strip()
# if not stripped or stripped.startswith('//'):
# continue
# # 跳过不需要分号的行
# skip_patterns = [
# r'^begin$', r'^end$', r'^endcase$', r'^endmodule$',
# r'^else$', r'^\)$', r'^\}\s*$', r'^`timescale', r'^`include'
# ]
# if any(re.match(p, stripped) for p in skip_patterns):
# continue
# # 检查是否需要分号但没有
# needs_semicolon = re.search(r'\b(initial|always|assign|reg|wire|parameter|localport)\b', stripped) is None
# has_semicolon = stripped.endswith(';') or stripped.endswith(')') or stripped.endswith('}')
# if needs_semicolon and not has_semicolon and not stripped.endswith('begin'):
# # 可能缺少分号(但不确定)
# pass # 暂不报警,避免误报
# # 检查: 不匹配的 begin/end
# begin_count = len(re.findall(r'\bbegin\b', code))
# end_count = len(re.findall(r'\bend\b', code))
# if begin_count != end_count:
# issues.append({
# 'type': 'mismatched_begin_end',
# 'message': f"Mismatched begin/end: {begin_count} begin vs {end_count} end",
# 'severity': 'error'
# })
# # 检查: 未声明的信号(在赋值左侧使用的信号)
# # 使用传入的已声明信号集合
# for match in re.finditer(r'^\s*(\w+)\s*=', code, re.MULTILINE):
# signal = match.group(1)
# # 跳过系统任务和关键字
# if signal in ['if', 'else', 'case', 'for', 'while', 'repeat', 'assign', 'force', 'release']:
# continue
# # 跳过以 $ 开头的系统任务
# if signal.startswith('$'):
# continue
# # 检查是否在已声明信号列表中
# if signal not in declared_signals:
# issues.append({
# 'type': 'undeclared_signal',
# 'signal': signal,
# 'message': f"Signal '{signal}' is used but not declared in the testbench",
# 'severity': 'error',
# 'suggestion': f"Use an existing signal name (declared: {', '.join(list(declared_signals)[:10])}...)"
# })
# # 检查: always 块与时钟生成冲突
# # 检测是否有多个 always/initial 块驱动同一信号
# always_blocks = re.findall(r'\balways\s*(@[^i]|begin)', code)
# initial_clk_blocks = len(re.findall(r'initial\s+begin[^i]*?clk\s*=', code, re.DOTALL))
# always_clk_blocks = len(re.findall(r'\balways[^i]*?clk\s*=', code, re.DOTALL))
# if initial_clk_blocks > 0 and always_clk_blocks > 0:
# issues.append({
# 'type': 'multiple_clock_drivers',
# 'message': f"Multiple clock drivers detected: {initial_clk_blocks} initial + {always_clk_blocks} always blocks driving clk",
# 'severity': 'error',
# 'suggestion': "Remove duplicate clock generation. The testbench already has clock generation."
# })
# # 检查: initial 块嵌套(生成了 initial begin ... end 在注入时会导致嵌套)
# if re.search(r'\binitial\s+begin\b', code):
# issues.append({
# 'type': 'initial_block_injection',
# 'message': "Code contains 'initial begin...end' block which should not be injected into an existing initial block",
# 'severity': 'error',
# 'suggestion': "Remove the 'initial begin...end' wrapper, keep only the test statements inside"
# })
# return issues
# # ============================================================================
# # CoverageParser - 覆盖率解析器
# # ============================================================================
# class CoverageParser:
# """覆盖率解析器 - 从带注释的Verilog文件中提取未覆盖的代码块
# [增强] 集成语义分析结果,提供更精准的 FSM 状态路径指导
# [新增] 集成能量分配层,提供目标功能点优先级信息
# [新增] 集成多样性约束注入器,避免测试用例同质化
# """
# def __init__(self, annotated_file, tb_code=None, semantic_result=None,
# energy_allocator=None, diversity_injector=None):
# self.file_path = annotated_file
# self.tb_code = tb_code
# self.semantic_result = semantic_result # [新增] 语义分析结果
# self.energy_allocator = energy_allocator # [新增] 能量分配器
# self.diversity_injector = diversity_injector # [新增] 多样性约束注入器
# # 修复Verilator 覆盖率标记格式多样化:
# # %NNNNNN - 行覆盖计数(%000000 表示从未执行)
# # ~NNNNNN - 分支/条件覆盖计数(~000000 表示分支从未执行)
# # ^NNNNNN - 未覆盖的分支标记
# # NNNNNN - 空格开头+数字(某些 Verilator 版本)
# # NNNNNN - 纯数字开头(无前缀)
# self.line_pattern = re.compile(r'^%(\d+)\s+(.*)$') # 匹配 %NNNNNN code
# self.tilde_pattern = re.compile(r'^~(\d+)\s+(.*)$') # 匹配 ~NNNNNN code
# self.caret_pattern = re.compile(r'^\^(\d+)\s+(.*)$') # 匹配 ^NNNNNN code
# # [修复] 纯数字开头(无前缀)或空格开头
# self.plain_pattern = re.compile(r'^\s*(\d+)\s+(.*)$') # 匹配 " NNNNNN" 或 "NNNNNN"
# self.decl_pattern = re.compile(r'^\s*(input|output|inout|wire|reg|logic|parameter|localparam|assign)\b')
# self.validator = BlackBoxValidator()
# if tb_code:
# self.validator._extract_signals_from_tb(tb_code)
# def generate_prompt(self, current_score):
# """生成覆盖率驱动的Prompt"""
# if not os.path.exists(self.file_path):
# return None
# try:
# with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
# lines = f.readlines()
# except Exception:
# return None
# missing_blocks = []
# current_block = []
# recording = False
# context_buffer = []
# CONTEXT_SIZE = 3
# # 收集缺失行用于 FSM 分析
# missing_lines = []
# for i, line in enumerate(lines):
# line = line.strip()
# count = -1
# clean_code = line
# is_tilde = False
# is_caret = False
# # 尝试匹配各种覆盖率标记格式
# # Verilator 覆盖率格式:
# # - %NNNNNN: 行覆盖NNNNNN 是执行次数,%000000 表示未执行
# # - ~NNNNNN: 分支/条件覆盖,~000000 表示分支从未执行
# # - ^NNNNNN: 未覆盖分支标记
# # - NNNNNN: 无前缀格式(某些版本)
# match_pct = self.line_pattern.match(line) # %NNNNNN code
# match_tilde = self.tilde_pattern.match(line) # ~NNNNNN code
# match_caret = self.caret_pattern.match(line) # ^NNNNNN code
# match_plain = self.plain_pattern.match(line) # NNNNNN code (无前缀)
# if match_pct:
# count = int(match_pct.group(1))
# clean_code = match_pct.group(2).strip()
# elif match_tilde:
# count = int(match_tilde.group(1))
# clean_code = match_tilde.group(2).strip()
# is_tilde = True
# elif match_caret:
# count = int(match_caret.group(1))
# clean_code = match_caret.group(2).strip()
# is_caret = True
# elif match_plain:
# # 纯数字格式(可能出现在某些 Verilator 版本)
# count = int(match_plain.group(1))
# clean_code = match_plain.group(2).strip()
# if "//" in clean_code:
# clean_code = clean_code.split("//")[0].strip()
# is_hard_noise = (self.decl_pattern.match(clean_code) or clean_code == "endmodule")
# is_soft_noise = (len(clean_code) < 2 or clean_code in ["end", "begin", "else", ");", "endcase", "default:"] or
# clean_code.startswith("module ") or not any(c.isalnum() for c in clean_code))
# # [修改] 覆盖状态判断:
# # - %NNNNNN: count > 0 表示已覆盖count == 0 表示未覆盖
# # - ~NNNNNN: 分支覆盖标记count == 0 也表示未覆盖!
# # - ^NNNNNN: 未覆盖分支标记
# is_definitely_covered = (not is_tilde and not is_caret and count > 0)
# # [关键修复] tilde 格式 count == 0 也应该被视为 missing
# is_definitely_missed = (
# (not is_tilde and not is_caret and count == 0 and not is_hard_noise and not is_soft_noise) or
# (is_tilde and count == 0 and not is_hard_noise and not is_soft_noise) or # [新增] ~000000 也是 missing
# (is_caret and not is_hard_noise and not is_soft_noise)
# )
# if recording:
# if is_definitely_covered:
# missing_blocks.append(current_block)
# missing_lines.extend(current_block)
# current_block = []
# recording = False
# if not is_hard_noise:
# context_buffer.append(clean_code)
# else:
# if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
# current_block.append(f"Line {i+1}: {clean_code}")
# else:
# if is_definitely_missed:
# recording = True
# if context_buffer:
# current_block.append(f"... (Context)")
# for ctx in context_buffer:
# current_block.append(f" {ctx}")
# current_block.append(f"Line {i+1}: {clean_code} <--- MISSING START")
# else:
# if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
# context_buffer.append(clean_code)
# if len(context_buffer) > CONTEXT_SIZE:
# context_buffer.pop(0)
# if recording and current_block:
# missing_blocks.append(current_block)
# missing_lines.extend(current_block)
# # [改进] 详细诊断日志 - 使用 info 级别确保可见
# total_lines = len(lines)
# parsed_lines = sum(1 for l in lines if l.strip() and (
# self.line_pattern.match(l.strip()) or
# self.tilde_pattern.match(l.strip()) or
# self.caret_pattern.match(l.strip()) or
# self.plain_pattern.match(l.strip())
# ))
# # 收集零计数行的详细信息
# zero_count_details = []
# for l in lines:
# l_stripped = l.strip()
# if not l_stripped:
# continue
# match_pct = self.line_pattern.match(l_stripped)
# match_tilde = self.tilde_pattern.match(l_stripped)
# if match_pct and int(match_pct.group(1)) == 0:
# zero_count_details.append(('%', match_pct.group(2).strip()[:50]))
# elif match_tilde and int(match_tilde.group(1)) == 0:
# zero_count_details.append(('~', match_tilde.group(2).strip()[:50]))
# zero_count_lines = len(zero_count_details)
# logger.info(f"CoverageParser: Total={total_lines}, Parsed={parsed_lines}, Zero-count={zero_count_lines}, Missing blocks={len(missing_blocks)}")
# if not missing_blocks:
# # [改进] 详细诊断信息
# if zero_count_lines > 0:
# logger.warning(f"Found {zero_count_lines} lines with zero coverage count, but no missing blocks extracted.")
# logger.warning("Zero-count lines:")
# for prefix, code in zero_count_details[:10]: # 只显示前10个
# logger.warning(f" {prefix}000000: {code}")
# if len(zero_count_details) > 10:
# logger.warning(f" ... and {len(zero_count_details) - 10} more")
# logger.warning("These lines may have been filtered as noise (declarations, etc.)")
# return None
# selected_blocks = missing_blocks[:50]
# # 获取实际信号名用于示例
# reset_signal = self.validator._find_reset_signal()
# inputs_no_clk = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
# example_signal = inputs_no_clk[0] if inputs_no_clk else (reset_signal if reset_signal != "reset" else "ena")
# # 分析 FSM 相关的缺失代码
# fsm_analysis = self._analyze_fsm_missing(missing_lines)
# # [新增] 从语义分析结果获取 FSM 和功能点信息
# semantic_context = self._generate_semantic_context()
# prompt = f"""
# [ROLE]
# You are a hardware verification expert. Your task is to write a test scenario to improve code coverage.
# [COVERAGE STATUS]
# Current testbench achieves {current_score:.2f}% coverage.
# The following logic blocks in the DUT are NEVER executed during simulation:
# """
# for idx, block in enumerate(selected_blocks):
# prompt += f"--- Missing Logic Block {idx+1} ---\n" + "\n".join(block) + "\n\n"
# # [新增] 添加语义分析上下文
# if semantic_context:
# prompt += f"""
# [SEMANTIC ANALYSIS - MODULE UNDERSTANDING]
# {semantic_context}
# """
# # === [新增] 添加能量分配目标上下文 ===
# if self.energy_allocator:
# energy_context = self.energy_allocator.get_target_context()
# if energy_context:
# prompt += f"""
# [ENERGY-ALIGNED TARGET - PRIORITY]
# {energy_context}
# Focus your test scenario on covering this high-priority target first.
# """
# # =====================================
# prompt += self.validator.generate_constraint_prompt()
# # 添加 FSM 分析提示
# if fsm_analysis:
# prompt += f"""
# [FSM STATE TRANSITION ANALYSIS - CRITICAL]
# {fsm_analysis}
# IMPORTANT: FSM transitions have PRIORITY ORDER!
# - 'if' conditions are evaluated TOP to BOTTOM
# - The FIRST matching condition determines the next state
# - To trigger a branch like "else if (condition)", you MUST ensure all higher-priority conditions are FALSE
# - Read the missing code's context carefully: what conditions precede it?
# """
# prompt += f"""
# [OUTPUT REQUIREMENTS - CRITICAL]
# 1. Return ONLY Verilog test scenario code (NOT a task definition)
# 2. Your code will be inserted INTO an existing `initial begin ... end` block
# 3. DO NOT wrap your code in `task ... endtask` - just write the test sequence directly
# 4. DO NOT use `$finish` or `$stop` - the testbench handles simulation end
# [CODING STYLE]
# 1. Use blocking assignments for input signals: `signal = value;`
# 2. Use `#N;` for time delays: `#10;` means wait 10 time units
# 3. Use `repeat(N) @(posedge clk);` to wait for N clock cycles
# 4. Start with reset sequence if needed
# [BLACK-BOX CONSTRAINTS - CRITICAL]
# 1. You can ONLY control module INPUTS listed above
# 2. You CANNOT access internal signals (state, next_state, counters, etc.)
# 3. You CANNOT use `force` or `assign` on internal signals
# 4. To trigger a specific state: drive inputs and wait for the FSM to reach it naturally
# [STEP-BY-STEP APPROACH - REQUIRED]
# For each missing branch, think through:
# 1. What STATE must the FSM be in? (Look at the case statement)
# 2. What CONDITIONS must be true/false? (Check priority order!)
# 3. How to reach that state from reset? (Trace state transitions)
# 4. What inputs to apply and in what order?
# [POSITIVE EXAMPLE - CORRECT APPROACH]
# ```verilog
# // Reset sequence - use ACTUAL input signal names from above
# {reset_signal} = 1;
# repeat(2) @(posedge clk);
# {reset_signal} = 0;
# // Wait for FSM to reach desired state (estimate cycles)
# repeat(3) @(posedge clk);
# // Trigger missing branch by driving inputs
# {example_signal} = 1;
# repeat(5) @(posedge clk);
# {example_signal} = 0;
# repeat(10) @(posedge clk);
# ```
# [NEGATIVE EXAMPLE - DO NOT DO THIS]
# ```verilog
# // WRONG: Using wrong signal name (e.g., 'reset' instead of '{reset_signal}')
# reset = 1; // ERROR: Signal 'reset' does not exist! Use '{reset_signal}' instead!
# // WRONG: Not considering condition priority in FSM
# // If missing code is "else if (condition_b)", you must make condition_a FALSE first!
# // Example: if FSM has "if (!signal_a) ... else if (signal_b) ..."
# // Then signal_a must be 1 (FALSE) for the else-if branch to execute
# signal_a = 0; // WRONG: This blocks the else-if branch!
# signal_b = 1; // This will NOT trigger because signal_a=0 took priority
# // CORRECT: Analyze priority, set higher-priority conditions to FALSE
# signal_a = 1; // Now the first condition (!signal_a) is FALSE
# signal_b = 1; // Now this else-if branch can execute
# // WRONG: Trying to assign internal state
# state = IDLE; // ERROR: Cannot modify internal signal!
# // WRONG: Using force on internal signal
# force DUT.state = WL; // ERROR: Cannot force internal signal!
# // WRONG: Checking internal state in condition
# if (state == WL) begin // ERROR: Cannot read internal signal!
# {example_signal} = 1;
# end
# // CORRECT ALTERNATIVE: Estimate timing instead
# repeat(5) @(posedge clk); // Wait for FSM to reach expected state
# {example_signal} = 1;
# ```
# [SIGNAL NAME WARNING - CRITICAL]
# - DO NOT use 'reset' if the actual signal is '{reset_signal}'
# - DO NOT use 'rst' if the actual signal is '{reset_signal}'
# - ALWAYS use EXACT signal names from the ALLOWED INPUTS list above
# - Double-check every signal name before using it!
# Now write the test scenario code to cover the missing blocks:
# """
# # === [新增] 注入多样性约束 ===
# if self.diversity_injector:
# # 获取未覆盖功能点
# uncovered_functions = []
# if self.semantic_result and self.semantic_result.get('function_points'):
# uncovered_functions = [
# fp for fp in self.semantic_result['function_points']
# if not fp.get('covered', False)
# ]
# # 获取当前目标功能点
# target_function = ""
# if self.energy_allocator and self.energy_allocator.current_target:
# target_function = self.energy_allocator.current_target.function_point
# # 注入多样性约束
# prompt = self.diversity_injector.inject_diversity_constraints(
# prompt=prompt,
# target_function=target_function,
# uncovered_functions=uncovered_functions
# )
# # =================================
# return prompt
# def _analyze_fsm_missing(self, missing_lines: List[str]) -> str:
# """分析 FSM 相关的缺失代码,生成具体的 FSM 状态转换指导"""
# analysis = []
# # 检查是否涉及 FSM 状态转换
# has_state_case = any('case' in line.lower() and 'state' in line.lower() for line in missing_lines)
# has_else_if = any('else if' in line.lower() for line in missing_lines)
# has_if_condition = any(re.search(r'\bif\s*\(', line) for line in missing_lines)
# if has_state_case or has_else_if:
# analysis.append("- Missing code involves FSM state transitions or conditional branches")
# if has_else_if or has_if_condition:
# analysis.append("- Conditional branches have PRIORITY ORDER (top to bottom)")
# analysis.append("- 'else if' branches require ALL previous conditions to be FALSE")
# analysis.append("- Analyze the missing code's context: what conditions block this branch?")
# if has_state_case:
# analysis.append("- To trigger a state transition: first reach the source state, then drive inputs")
# # === 新增FSM 状态路径分析 ===
# # 尝试从缺失代码中提取 FSM 状态信息
# fsm_state_info = self._extract_fsm_state_from_missing(missing_lines)
# if fsm_state_info:
# analysis.append("")
# analysis.append("[FSM STATE PATH ANALYSIS]")
# analysis.extend(fsm_state_info)
# return "\n".join(analysis) if analysis else ""
# def _extract_fsm_state_from_missing(self, missing_lines: List[str]) -> List[str]:
# """
# 从缺失代码中提取 FSM 状态信息,生成具体的状态转换指导
# 分析策略:
# 1. 从缺失代码的上下文识别 case 分支FSM 状态)
# 2. 分析该状态下的条件分支优先级
# 3. 识别需要满足的输入条件
# """
# info = []
# # 从 annotated 文件中读取完整的 DUT 代码以分析 FSM 结构
# try:
# with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
# full_content = f.read()
# except:
# return info
# # 提取缺失代码所在的 FSM 状态
# target_state = None
# missing_condition = None
# for line in missing_lines:
# # 查找 case 分支标记(如 "WL:", "WR:", "FALLL:" 等)
# # 格式可能是 "Line N: STATE:" 或 "STATE:"
# state_match = re.search(r'\b([A-Z][A-Z0-9_]*)\s*:', line)
# if state_match:
# potential_state = state_match.group(1)
# # 排除常见的非状态关键字
# if potential_state not in ['IF', 'ELSE', 'CASE', 'BEGIN', 'END', 'DEFAULT']:
# target_state = potential_state
# break
# # 如果没找到,尝试从整个文件中分析
# if not target_state:
# # 查找缺失行附近的 case 分支
# lines = full_content.split('\n')
# for i, line in enumerate(lines):
# # 查找覆盖率标记为 0 的行
# if re.match(r'^%000000', line.strip()):
# # 向上查找最近的 case 分支(状态)
# for j in range(i-1, max(0, i-20), -1):
# state_match = re.search(r'^\s*([A-Z][A-Z0-9_]*)\s*:', lines[j])
# if state_match:
# target_state = state_match.group(1)
# break
# if target_state:
# break
# # 分析缺失的条件分支
# for line in missing_lines:
# # 提取 else if 条件
# else_if_match = re.search(r'else\s+if\s*\(([^)]+)\)', line)
# if else_if_match:
# missing_condition = else_if_match.group(1)
# break
# # 提取 if 条件
# if_match = re.search(r'\bif\s*\(([^)]+)\)', line)
# if if_match:
# missing_condition = if_match.group(1)
# break
# # 生成具体的指导信息
# if target_state:
# info.append(f"- Target FSM state identified: {target_state}")
# # 查找复位后的初始状态
# reset_state = self._find_reset_state(full_content)
# if reset_state:
# info.append(f"- After reset, FSM starts in state: {reset_state}")
# if reset_state != target_state:
# info.append(f"- CRITICAL: You must FIRST transition from {reset_state} to {target_state}!")
# info.append(f"- Do NOT assume FSM will automatically reach {target_state}!")
# # 尝试找到状态转换路径
# transition_hint = self._find_state_transition_hint(full_content, reset_state, target_state)
# if transition_hint:
# info.append(f"- To reach {target_state}: {transition_hint}")
# if missing_condition:
# info.append(f"- Missing condition: \"{missing_condition}\"")
# # 分析条件优先级
# priority_info = self._analyze_condition_priority(full_content, target_state, missing_condition)
# if priority_info:
# info.extend(priority_info)
# return info
# def _find_reset_state(self, content: str) -> Optional[str]:
# """从 DUT 代码中找到复位后的初始状态"""
# # 查找复位逻辑中的状态赋值
# # 常见模式: if (reset) state <= IDLE; 或 state <= 0;
# patterns = [
# r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
# r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*(\d+);',
# r'if\s*\([^)]*rst[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
# ]
# for pattern in patterns:
# match = re.search(pattern, content, re.IGNORECASE)
# if match:
# state = match.group(1)
# # 如果是数字,尝试从参数中找对应的状态名
# if state.isdigit():
# # 查找参数定义
# param_match = re.search(r'parameter\s+([^;]+);', content)
# if param_match:
# params = param_match.group(1)
# # 解析参数列表
# for param in params.split(','):
# param = param.strip()
# if '=' in param:
# name, value = param.split('=')
# if value.strip() == state:
# return name.strip()
# return state
# return None
# def _find_state_transition_hint(self, content: str, from_state: str, to_state: str) -> Optional[str]:
# """找到从一个状态到另一个状态的转换条件"""
# # 在 case 语句中查找 from_state 分支
# # 提取该分支下到 to_state 的转换条件
# # 简单策略:查找 "next = TO_STATE" 或 "next <= TO_STATE"
# pattern = rf'{from_state}\s*:.*?next\s*=?\s*{to_state}'
# match = re.search(pattern, content, re.DOTALL)
# if match:
# # 提取条件
# branch_code = match.group(0)
# # 查找 if 条件
# if_match = re.search(r'if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
# if if_match:
# return f"set condition: {if_match.group(1)}"
# # 查找 else if 条件
# elif_match = re.search(r'else\s+if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
# if elif_match:
# return f"set condition: {elif_match.group(1)} (ensure earlier conditions are FALSE)"
# # 尝试反向查找:什么条件下会转换到目标状态
# trans_pattern = rf'(?:if|else\s+if)\s*\(([^)]+)\)\s*(?:next\s*=?\s*{to_state}|{to_state}\s*;)'
# trans_match = re.search(trans_pattern, content)
# if trans_match:
# return f"set condition: {trans_match.group(1)}"
# return None
# def _analyze_condition_priority(self, content: str, state: str, missing_condition: str) -> List[str]:
# """分析条件分支的优先级,找出需要排除的条件"""
# info = []
# if not state:
# return info
# # 查找该状态下的所有条件分支
# # 提取 state: 后面的代码块
# state_block_pattern = rf'{state}\s*:(.*?)(?=[A-Z][A-Z0-9_]*\s*:|endcase|default:)'
# match = re.search(state_block_pattern, content, re.DOTALL)
# if not match:
# return info
# state_block = match.group(1)
# # 提取所有条件分支
# conditions = []
# for cond_match in re.finditer(r'(?:if|else\s+if)\s*\(([^)]+)\)', state_block):
# conditions.append(cond_match.group(1).strip())
# # 找到缺失条件在列表中的位置
# missing_idx = -1
# for i, cond in enumerate(conditions):
# # 简化比较(去除空格)
# if cond.replace(' ', '') in missing_condition.replace(' ', '') or \
# missing_condition.replace(' ', '') in cond.replace(' ', ''):
# missing_idx = i
# break
# if missing_idx > 0:
# info.append(f"- This branch is condition #{missing_idx + 1} (lower priority)")
# info.append(f"- You must make ALL earlier conditions FALSE:")
# for i in range(missing_idx):
# cond = conditions[i]
# # 分析如何使条件为 FALSE
# false_hint = self._get_false_hint(cond)
# info.append(f" * \"{cond}\" must be FALSE → {false_hint}")
# return info
# def _get_false_hint(self, condition: str) -> str:
# """分析如何使条件为 FALSE"""
# condition = condition.strip()
# # 处理 !signal 形式
# if condition.startswith('!'):
# signal = condition[1:].strip()
# return f"set {signal} = 1"
# # 处理 signal 形式(布尔值)
# if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', condition):
# return f"set {condition} = 0"
# # 处理比较运算符
# if '==' in condition:
# parts = condition.split('==')
# if len(parts) == 2:
# signal = parts[0].strip()
# value = parts[1].strip()
# if value.isdigit():
# return f"set {signal} != {value}"
# # 处理 >= 形式
# if '>=' in condition:
# parts = condition.split('>=')
# if len(parts) == 2:
# signal = parts[0].strip()
# value = parts[1].strip()
# if value.isdigit():
# return f"set {signal} < {value}"
# # 处理 > 形式
# if '>' in condition and '>=' not in condition:
# parts = condition.split('>')
# if len(parts) == 2:
# signal = parts[0].strip()
# value = parts[1].strip()
# return f"set {signal} <= {value}"
# return "analyze the condition logic"
# def _generate_semantic_context(self) -> str:
# """
# [新增] 从语义分析结果生成 Prompt 上下文
# 整合语义分析层 (Layer 0) 的输出,为 LLM 提供更精准的指导:
# - FSM 状态转换图
# - 功能点重要性排序
# - 测试场景建议
# Returns:
# 语义上下文字符串,用于增强 Prompt
# """
# if not self.semantic_result:
# return ""
# context_parts = []
# # 1. 模块基础信息
# module_name = self.semantic_result.get('module_name', '')
# inputs = self.semantic_result.get('inputs', [])
# outputs = self.semantic_result.get('outputs', [])
# if module_name:
# context_parts.append(f"Module Name: {module_name}")
# if inputs:
# context_parts.append(f"Module Inputs: {', '.join(inputs)}")
# if outputs:
# context_parts.append(f"Module Outputs: {', '.join(outputs)}")
# # 2. FSM 信息(最关键)
# fsm_info = self.semantic_result.get('fsm_info')
# if fsm_info:
# context_parts.append("")
# context_parts.append("=== FSM STATE MACHINE DETAILS ===")
# context_parts.append(f"State Variable: {fsm_info.get('state_variable', 'unknown')}")
# states = fsm_info.get('states', [])
# if states:
# context_parts.append(f"All States ({len(states)}): {', '.join(states)}")
# # 状态转换表
# transitions = fsm_info.get('transitions', {})
# if transitions:
# context_parts.append("")
# context_parts.append("=== STATE TRANSITION TABLE ===")
# context_parts.append("Format: CURRENT_STATE --[CONDITION]--> NEXT_STATE")
# context_parts.append("")
# for state, trans_list in transitions.items():
# for trans in trans_list:
# condition = trans.get('condition', 'default')
# next_state = trans.get('next_state', 'unknown')
# if condition == 'default':
# context_parts.append(f" {state} --[default]--> {next_state}")
# else:
# context_parts.append(f" {state} --[if ({condition})]--> {next_state}")
# # 添加状态转换路径分析
# context_parts.append("")
# context_parts.append("=== STATE TRANSITION PATH HINTS ===")
# reset_state = self._find_reset_state_from_fsm(fsm_info)
# if reset_state:
# context_parts.append(f"Initial State (after reset): {reset_state}")
# context_parts.append("")
# context_parts.append("IMPORTANT: To reach a target state, trace the path from reset:")
# context_parts.append(" 1. Reset the DUT to initialize to the starting state")
# context_parts.append(" 2. Apply inputs to trigger state transitions")
# context_parts.append(" 3. Wait for the FSM to naturally reach the target state")
# context_parts.append(" 4. THEN apply inputs to trigger the missing branch")
# # 3. 功能点优先级
# function_points = self.semantic_result.get('function_points', [])
# if function_points:
# context_parts.append("")
# context_parts.append("=== FUNCTION POINTS (Ranked by Importance) ===")
# for i, fp in enumerate(function_points[:10]): # Top 10
# name = fp.get('name', 'unknown')
# fp_type = fp.get('type', 'unknown')
# importance = fp.get('importance', 0)
# covered = fp.get('covered', False)
# status = "✓ COVERED" if covered else "✗ NOT COVERED"
# context_parts.append(f" {i+1}. [{status}] {name} ({fp_type}): importance={importance:.2f}")
# # 4. 测试场景建议
# test_scenarios = self.semantic_result.get('test_scenarios', [])
# if test_scenarios:
# context_parts.append("")
# context_parts.append("=== RECOMMENDED TEST SCENARIOS ===")
# for i, ts in enumerate(test_scenarios[:5]): # Top 5
# name = ts.get('name', 'unknown')
# description = ts.get('description', '')
# priority = ts.get('priority', 0)
# context_parts.append(f" {i+1}. {name}: {description} (priority={priority:.2f})")
# if context_parts:
# return "\n".join(context_parts)
# return ""
# def _find_reset_state_from_fsm(self, fsm_info: dict) -> Optional[str]:
# """从 FSM 信息中推断复位后的初始状态"""
# # 方法1检查是否有明确的复位状态
# transitions = fsm_info.get('transitions', {})
# # 复位后通常进入第一个定义的状态或特定名称的状态
# states = fsm_info.get('states', [])
# # 常见的初始状态命名
# initial_state_names = ['IDLE', 'INIT', 'RESET', 'START', 'BEGIN']
# for name in initial_state_names:
# if name in states:
# return name
# # 如果没有找到,返回第一个状态
# if states:
# return states[0]
# return None
# # ============================================================================
# # TBInjector - 场景注入器
# # ============================================================================
# class TBInjector:
# """
# 场景注入器 - 将LLM生成的测试代码注入到现有测试平台
# 集成三层防护策略:
# 1. Layer 1: Prompt约束由CoverageParser处理
# 2. Layer 2: 智能代码转换
# 3. Layer 3: 质量评估和重试建议
# """
# def __init__(self, tb_code):
# """
# 初始化注入器
# Args:
# tb_code: 原始测试平台代码字符串
# """
# self.content = tb_code
# self.validator = BlackBoxValidator()
# self.validator._extract_signals_from_tb(tb_code)
# self.last_validation_result = None
# def inject(self, new_code, iter_idx):
# """
# 注入新的测试场景到测试平台
# Args:
# new_code: LLM生成的测试代码
# iter_idx: 迭代序号
# Returns:
# 修改后的测试平台代码
# """
# # Step 1: 预处理代码(包含三层防护)
# scenario_code, result = self._preprocess_code(new_code, iter_idx)
# self.last_validation_result = result
# # 记录日志
# if result['violations']['critical']:
# logger.warning(f"[CGA-{iter_idx}] Critical violations detected:")
# for v in result['violations']['critical']:
# logger.warning(f" - {v}")
# if result['violations']['warning']:
# logger.info(f"[CGA-{iter_idx}] Warnings:")
# for v in result['violations']['warning']:
# logger.info(f" - {v}")
# if result['transformations']:
# logger.info(f"[CGA-{iter_idx}] Code transformations applied:")
# for t in result['transformations']:
# logger.info(f" - {t['type']}: {t.get('original', 'N/A')[:50]}...")
# # Step 2: 构建场景块
# scenario_block = self._build_scenario_block(scenario_code, iter_idx)
# # Step 3: 注入到TB中
# modified_tb = self._inject_scenario(scenario_block)
# return modified_tb
# def should_retry(self):
# """是否应该重试"""
# if self.last_validation_result is None:
# return False
# return self.last_validation_result.get('should_retry', False)
# def get_quality_score(self):
# """获取代码质量分数"""
# if self.last_validation_result is None:
# return 0
# return self.last_validation_result.get('quality_score', 0)
# def _preprocess_code(self, code, iter_idx):
# """预处理LLM生成的代码 - 增强版,包含语法预检查"""
# # 移除markdown标记
# code = re.sub(r'```(?:verilog|systemverilog|sv)?\n?', '', code)
# code = re.sub(r'```', '', code)
# # 移除task包装
# code = re.sub(r'task\s+\w+\s*(?:\([^)]*\))?\s*;', '', code)
# code = re.sub(r'endtask', '', code)
# # 移除$finish和$stop
# code = re.sub(r'\$finish\s*;', '', code)
# code = re.sub(r'\$stop\s*;', '', code)
# # 移除多余空行
# code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# # [新增] 移除 initial begin ... end 代码块
# # LLM 可能生成完整的 initial begin...end 块,但我们只需要其中的测试代码
# initial_pattern = re.compile(r'\binitial\s+begin\b.*?\bend\b', re.DOTALL | re.IGNORECASE)
# # 检查并移除 initial begin ... end 块
# initial_match = initial_pattern.search(code)
# if initial_match:
# logger.warning(f"[CGA-{iter_idx}] Detected 'initial begin...end' block in generated code - this should not be included")
# logger.warning(f"[CGA-{iter_idx}] Removing 'initial begin...end' wrapper, keeping only the test content")
# # 提取块内的内容
# block_content = initial_match.group(0)
# # 移除 initial begin 和 end 包装
# # 保留块内的实际测试代码
# inner_content = re.sub(r'^\s*initial\s+begin\s*', '', block_content)
# inner_content = re.sub(r'\bend\s*$', '', inner_content)
# # 替换整个块为内部内容
# code = initial_pattern.sub(inner_content.strip(), code, count=1)
# code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# # 信号名自动修正(在验证之前)
# code = self._auto_correct_signal_names(code)
# # 三层防护:黑盒约束验证和转换
# code, result = self.validator.validate_and_transform(code, self.content)
# # [新增] 第四层Verilog 语法预检查
# # 提取完整 TB 中已声明的信号(不只是代码片段)
# signal_widths = self._extract_signal_widths()
# declared_signals = self._extract_declared_signals()
# # 调用语法检查,传入已声明信号列表
# syntax_result = self.validator.check_syntax_issues(
# code,
# signal_widths,
# declared_signals=declared_signals
# )
# # 合并检查结果
# result['syntax_check'] = syntax_result
# # 记录语法问题日志
# if syntax_result['width_mismatch']:
# logger.warning(f"[CGA-{iter_idx}] Width mismatch detected:")
# for issue in syntax_result['width_mismatch']:
# logger.warning(f" - {issue['message']}")
# if 'suggestion' in issue:
# logger.info(f" Suggestion: {issue['suggestion']}")
# if syntax_result['logic_issues']:
# logger.warning(f"[CGA-{iter_idx}] Logic issues detected:")
# for issue in syntax_result['logic_issues']:
# logger.warning(f" - {issue['message']}")
# if 'suggestion' in issue:
# logger.info(f" Suggestion: {issue['suggestion']}")
# if syntax_result['syntax_warnings']:
# for issue in syntax_result['syntax_warnings']:
# if issue['severity'] == 'error':
# logger.error(f"[CGA-{iter_idx}] Syntax error: {issue['message']}")
# else:
# logger.warning(f"[CGA-{iter_idx}] Syntax warning: {issue['message']}")
# # 如果语法检查发现问题,设置 should_retry
# if syntax_result['should_retry']:
# result['should_retry'] = True
# logger.warning(f"[CGA-{iter_idx}] Syntax issues detected, recommend retry with corrected code")
# code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# return code.strip(), result
# def _extract_declared_signals(self) -> set:
# """从完整测试平台中提取所有已声明的信号"""
# signals = set()
# # 匹配 reg [N:0] signal 或 wire [N:0] signal
# for match in re.finditer(r'\b(reg|wire|logic)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
# signals.add(match.group(2))
# # 匹配 input/output 声明
# for match in re.finditer(r'\b(input|output|inout)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
# signals.add(match.group(2))
# # 匹配模块端口连接中的信号
# for match in re.finditer(r'\.(\w+)\s*\(\s*(\w+)\s*\)', self.content):
# signals.add(match.group(2)) # 添加连接的信号名
# return signals
# def _extract_signal_widths(self) -> Dict[str, int]:
# """从测试平台中提取信号位宽信息"""
# widths = {}
# # 匹配 reg [N:0] signal 或 wire [N:0] signal
# width_pattern = re.compile(r'\b(reg|wire)\s+\[(\d+):(\d+)\]\s+(\w+)')
# for match in width_pattern.finditer(self.content):
# high = int(match.group(2))
# low = int(match.group(3))
# width = high - low + 1
# signal = match.group(4)
# widths[signal] = width
# # 匹配无位宽声明的信号(默认 1 位)
# single_bit_pattern = re.compile(r'\b(reg|wire)\s+(?!.*\[)(\w+)\s*;')
# for match in single_bit_pattern.finditer(self.content):
# signal = match.group(2)
# if signal not in widths:
# widths[signal] = 1
# return widths
# def _auto_correct_signal_names(self, code: str) -> str:
# """自动修正信号名错误"""
# corrections = []
# # 获取正确的复位信号名
# reset_signal = self.validator._find_reset_signal()
# # 如果正确的复位信号不是 'reset',则修正所有 'reset' 引用
# if reset_signal != "reset":
# # 匹配独立的 'reset' 单词(不包括 'areset', 'rst_n' 等)
# pattern = r'\breset\b(?!\w)'
# matches = re.findall(pattern, code)
# if matches:
# code = re.sub(pattern, reset_signal, code)
# corrections.append(f"reset -> {reset_signal} ({len(matches)} occurrences)")
# # 检查是否有使用 'rst' 但正确信号是 'areset' 的情况
# if reset_signal == "areset":
# pattern = r'\brst\b(?!\w)'
# matches = re.findall(pattern, code)
# if matches:
# code = re.sub(pattern, reset_signal, code)
# corrections.append(f"rst -> {reset_signal} ({len(matches)} occurrences)")
# # 检查是否使用了不存在的信号
# for signal in re.findall(r'\b(\w+)\s*=', code):
# signal = signal.strip()
# # 跳过已知的合法信号
# if signal in self.validator.dut_inputs:
# continue
# # 检查是否是复位信号的别名
# if signal.lower() in ['reset', 'rst', 'rst_n', 'rst_b'] and reset_signal != signal:
# code = re.sub(rf'\b{signal}\b', reset_signal, code)
# corrections.append(f"{signal} -> {reset_signal}")
# if corrections:
# logger.info(f"[Signal Correction] Applied corrections: {'; '.join(corrections)}")
# return code
# def _build_scenario_block(self, scenario_code, iter_idx):
# """构建完整的场景代码块"""
# # 格式化缩进
# lines = scenario_code.split('\n')
# formatted_lines = []
# for line in lines:
# stripped = line.strip()
# if stripped:
# formatted_lines.append(f" {stripped}")
# formatted_code = '\n'.join(formatted_lines)
# # 检测输出信号用于日志
# output_signals = self._detect_output_signals()
# output_log = self._generate_output_log(output_signals, iter_idx)
# # 构建完整块
# block = f'''
# // ========== CGA Iteration {iter_idx} ==========
# scenario = 100 + {iter_idx};
# // Reset signals to safe state
# {self._generate_signal_reset()}
# #5;
# // CGA generated test sequence:
# {formatted_code}
# // Log results
# {output_log}
# // ==============================================
# '''
# return block
# def _detect_output_signals(self):
# """检测DUT的输出信号"""
# outputs = []
# wire_pattern = re.compile(r'wire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
# for match in wire_pattern.finditer(self.content):
# signal = match.group(1)
# if signal.lower() not in ['clk', 'clock', 'rst', 'reset', 'areset']:
# outputs.append(signal)
# return outputs
# def _generate_signal_reset(self):
# """生成信号重置代码"""
# inputs = []
# reg_pattern = re.compile(r'reg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
# for match in reg_pattern.finditer(self.content):
# signal = match.group(1)
# if signal.lower() not in ['clk', 'clock', 'file', 'scenario']:
# inputs.append(signal)
# if inputs:
# return " " + "; ".join([f"{sig} = 0" for sig in inputs]) + ";"
# return " // No input signals to reset"
# def _generate_output_log(self, signals, iter_idx):
# """生成输出日志代码"""
# if not signals:
# return f' $display("[CGA-{iter_idx}] Scenario executed");'
# sig_names = ", ".join(signals)
# format_str = ", ".join(["%b"] * len(signals))
# return f' $fdisplay(file, "[CGA-{iter_idx}] {sig_names} = {format_str}", {sig_names});'
# def _inject_scenario(self, scenario_block):
# """将场景块注入到测试平台"""
# modified_tb = self.content
# # 策略:如果有 $fclose在其之前插入
# if "$fclose" in modified_tb:
# modified_tb = re.sub(
# r'(\s*)(\$fclose\s*\([^)]+\)\s*;)',
# scenario_block + r'\1\2',
# modified_tb,
# count=1
# )
# elif "$finish" in modified_tb:
# # 否则在 $finish 之前插入
# modified_tb = modified_tb.replace(
# "$finish;",
# scenario_block + "\n $finish;"
# )
# else:
# # 兜底:在最后一个 end 之前插入
# last_end = modified_tb.rfind("end")
# if last_end != -1:
# modified_tb = modified_tb[:last_end] + scenario_block + modified_tb[last_end:]
# return modified_tb
"""
Description : Utils for CGA (CoverageParser & TBInjector)
- Features: Sticky Mode, Smart Noise Filtering (No assign/decls)
- Enhanced: Three-layer protection for black-box constraints
* Layer 1: Enhanced Prompt constraints (prevention)
* Layer 2: Smart code transformation (conversion)
* Layer 3: Quality assessment & retry (fallback)
- Integrated: Diversity Constraint Injector (Layer 1)
Author : CorrectBench Integration
"""
import re
import os
import logging
from typing import Tuple, List, Optional, Dict, TYPE_CHECKING
# [新增] 导入多样性约束注入器
if TYPE_CHECKING:
from autoline.diversity_injector import DiversityInjector
# 配置日志
logger = logging.getLogger(__name__)
# ============================================================================
# 三层防护策略说明
# ============================================================================
# Layer 1 - Prompt约束: 动态提取允许信号列表,明确约束,正反面示例
# Layer 2 - 智能转换: 检测违规意图,尝试转换为合法形式
# Layer 3 - 质量评估: 违规比例过高时返回质量分数,触发重新生成
# ============================================================================
# ============================================================================
# 黑盒约束检查器 - 三层防护策略实现
# ============================================================================
class BlackBoxValidator:
"""
黑盒约束验证器 - 三层防护策略
Layer 1: 增强Prompt约束预防
- 动态提取允许信号列表
- 生成明确的约束提示
Layer 2: 智能代码转换(转换)
- 检测违规意图
- 尝试转换为合法的等价形式
- 转换失败时才注释
Layer 3: 质量评估(重试)
- 计算代码质量分数
- 违规比例过高时建议重试
"""
# 常见的内部信号命名模式(按严重程度分类)
INTERNAL_SIGNAL_PATTERNS = {
# 高风险FSM状态相关绝对不能修改
'critical': [
(r'\bstate\b', 'FSM状态寄存器'),
(r'\bnext_state\b', 'FSM下一状态'),
(r'\bcurrent_state\b', 'FSM当前状态'),
(r'\bnext\b(?!\s*[,@])', '下一状态简写'),
],
# 中风险:计数器和内部寄存器
'warning': [
(r'\bcounter\b', '内部计数器'),
(r'\bcount\b', '计数寄存器'),
(r'\bcnt\b', '计数简写'),
(r'\bfall_counter\b', '下落计数器'),
(r'\breg_\w+', '内部寄存器'),
],
# 低风险:可疑信号(需要确认)
'info': [
(r'\binternal_\w+', '内部信号'),
(r'\btemp_\w+', '临时信号'),
(r'\bprev_\w+', '前一状态'),
]
}
# 非法语句模式
FORBIDDEN_STATEMENTS = [
(r'\bforce\s+(\w+)', 'force语句', 'critical'),
(r'\bassign\s+(\w+)\s*=', '连续赋值', 'critical'),
(r'\bdeassign\s+', 'deassign语句', 'critical'),
(r'\brelease\s+', 'release语句', 'critical'),
]
# 层次化访问模式(如 DUT.state
HIERARCHICAL_ACCESS = r'(\w+)\s*\.\s*(\w+)'
# =========================================================================
# [新增] 通用禁止信号名模式 - LLM 常见的"猜测型"命名
# =========================================================================
# 这些模式会被自动检测并禁止,防止 LLM 使用不存在的信号名
FORBIDDEN_NAME_PATTERNS = [
# 带数字后缀的通用名
(r'^input_signal_\d+$', '带数字后缀的 input_signal'),
(r'^input_\d+$', '带数字后缀的 input'),
(r'^in_\d+$', '带数字后缀的 in'),
(r'^output_signal_\d+$', '带数字后缀的 output_signal'),
(r'^output_\d+$', '带数字后缀的 output'),
(r'^out_\d+$', '带数字后缀的 out'),
(r'^data_\d+$', '带数字后缀的 data'),
(r'^data_in_\d+$', '带数字后缀的 data_in'),
(r'^data_out_\d+$', '带数字后缀的 data_out'),
(r'^signal_\d+$', '带数字后缀的 signal'),
(r'^sig_\d+$', '带数字后缀的 sig'),
(r'^port_\d+$', '带数字后缀的 port'),
# 通用控制信号名(除非实际存在)
(r'^reset$', '通用复位名'),
(r'^rst$', '通用复位简写'),
(r'^rst_n$', '通用低电平复位'),
(r'^rst_b$', '通用低电平复位'),
(r'^clr$', '通用清零'),
(r'^clear$', '通用清零'),
(r'^enable$', '通用使能'),
(r'^en$', '通用使能简写'),
(r'^ena$', '通用使能简写'),
(r'^clk_in$', '通用时钟输入'),
(r'^clock$', '通用时钟'),
(r'^clk$', '通用时钟简写'),
# 极简通用名
(r'^a$', '单字母信号名'),
(r'^b$', '单字母信号名'),
(r'^c$', '单字母信号名'),
(r'^d$', '单字母信号名'),
(r'^x$', '单字母信号名'),
(r'^y$', '单字母信号名'),
(r'^z$', '单字母信号名'),
]
def __init__(self, dut_inputs: List[str] = None, dut_outputs: List[str] = None, dut_code: str = None):
"""
Args:
dut_inputs: DUT模块的输入端口列表
dut_outputs: DUT模块的输出端口列表
dut_code: DUT模块的源代码用于提取所有信号名
"""
self.dut_inputs = dut_inputs or []
self.dut_outputs = dut_outputs or []
self.dut_code = dut_code or ""
# [新增] 从 DUT 代码提取的所有信号名
self.dut_all_signals = set() # 所有信号名(端口 + 内部信号)
self.dut_internal_signals = set() # 仅内部信号
self.signal_widths = {} # 信号位宽
if dut_code:
self._extract_all_signals_from_dut(dut_code)
self.violations = {'critical': [], 'warning': [], 'info': []}
self.transformations = []
# =========================================================================
# [新增] 从 DUT 代码提取所有信号名
# =========================================================================
def _extract_all_signals_from_dut(self, dut_code: str):
"""
从 DUT 代码中提取所有信号名(端口 + 内部信号)
提取内容:
- 输入端口 (input)
- 输出端口 (output)
- 双向端口 (inout)
- 内部寄存器 (reg)
- 内部连线 (wire)
- 参数 (parameter/localparam)
"""
self.dut_all_signals = set()
self.dut_internal_signals = set()
self.signal_widths = {}
# 1. 提取端口声明
# 格式: input/output/inout [width] name
port_patterns = [
# 带位宽的端口: input [7:0] data_out
(r'(?:^|[\s;,])(input|output|inout)\s+\[(\d+):(\d+)\]\s+(\w+)', 'port'),
# 无位宽的端口: input clk
(r'(?:^|[\s;,])(input|output|inout)\s+(\w+)(?=\s*[;,\n)])', 'port_simple'),
]
for pattern, ptype in port_patterns:
for match in re.finditer(pattern, dut_code, re.MULTILINE):
if ptype == 'port':
direction = match.group(1)
msb = int(match.group(2))
lsb = int(match.group(3))
name = match.group(4)
width = msb - lsb + 1
self.dut_all_signals.add(name)
self.signal_widths[name] = width
if direction == 'input' and name not in self.dut_inputs:
self.dut_inputs.append(name)
elif direction == 'output' and name not in self.dut_outputs:
self.dut_outputs.append(name)
else:
direction = match.group(1)
name = match.group(2)
# 排除关键字
if name.lower() not in ['wire', 'reg', 'logic', 'input', 'output']:
self.dut_all_signals.add(name)
self.signal_widths[name] = 1
if direction == 'input' and name not in self.dut_inputs:
self.dut_inputs.append(name)
elif direction == 'output' and name not in self.dut_outputs:
self.dut_outputs.append(name)
# 2. 提取内部信号声明 (reg, wire, logic)
internal_patterns = [
# 带位宽: reg [7:0] counter
(r'\b(reg|wire|logic)\s+\[(\d+):(\d+)\]\s+(\w+)', 'internal_width'),
# 无位宽: reg state
(r'\b(reg|wire|logic)\s+(\w+)(?=\s*[;,\n=])', 'internal_simple'),
]
for pattern, ptype in internal_patterns:
for match in re.finditer(pattern, dut_code):
if ptype == 'internal_width':
sig_type = match.group(1)
msb = int(match.group(2))
lsb = int(match.group(3))
name = match.group(4)
width = msb - lsb + 1
self.dut_all_signals.add(name)
self.dut_internal_signals.add(name)
self.signal_widths[name] = width
else:
sig_type = match.group(1)
name = match.group(2)
# 排除关键字和已提取的端口
if name.lower() not in ['wire', 'reg', 'logic', 'input', 'output', 'begin', 'end', 'if', 'else', 'case', 'always', 'initial']:
if name not in self.dut_inputs and name not in self.dut_outputs:
self.dut_all_signals.add(name)
self.dut_internal_signals.add(name)
self.signal_widths[name] = 1
# 3. 提取参数
param_pattern = r'(?:parameter|localparam)\s+(?:\[\d+:\d+\]\s*)?(\w+)\s*='
for match in re.finditer(param_pattern, dut_code):
name = match.group(1)
self.dut_all_signals.add(name)
logger.info(f"Extracted from DUT: {len(self.dut_inputs)} inputs, {len(self.dut_outputs)} outputs, "
f"{len(self.dut_internal_signals)} internal signals, total {len(self.dut_all_signals)} signals")
def get_all_allowed_signals(self) -> List[str]:
"""获取所有允许的信号名(端口 + 内部信号)"""
return sorted(list(self.dut_all_signals))
def get_drivable_signals(self) -> List[str]:
"""获取可驱动的信号名(仅输入端口)"""
return sorted(self.dut_inputs)
def get_readable_signals(self) -> List[str]:
"""获取可读取的信号名(输出端口 + 内部信号)"""
return sorted(list(set(self.dut_outputs) | self.dut_internal_signals))
def is_valid_signal(self, name: str) -> bool:
"""检查信号名是否在 DUT 中存在"""
return name in self.dut_all_signals
def is_drivable_signal(self, name: str) -> bool:
"""检查信号是否可以被驱动(赋值)"""
return name in self.dut_inputs
def generate_signal_constraint_prompt(self) -> str:
"""
生成基于 DUT 实际信号名的约束提示
直接告诉 LLM只能使用这些已存在的信号名
"""
drivable = self.get_drivable_signals()
readable = self.get_readable_signals()
all_signals = self.get_all_allowed_signals()
reset_signal = self._find_reset_signal()
prompt = f"""
{'='*60}
📋 [DUT SIGNAL NAMES - USE ONLY THESE]
{'='*60}
⚠️ CRITICAL: You can ONLY use signal names that EXIST in the DUT!
⚠️ DO NOT invent or guess any signal name!
✅ DRIVABLE INPUTS (you CAN assign to these):
{drivable}
📖 READABLE SIGNALS (you can read but NOT assign):
{readable}
📝 ALL DUT SIGNALS (for reference):
{all_signals[:20]}{'...' if len(all_signals) > 20 else ''}
🚫 FORBIDDEN - THESE DO NOT EXIST:
- Any name NOT in the lists above
- "input_signal_1", "input_signal_2", "in_1", "data_1" (generic guesses)
- "reset" (actual: "{reset_signal}"), "rst", "enable", "en" (unless listed above)
{'='*60}
⚠️ RULE: If a signal is not listed in DRIVABLE INPUTS, it does NOT exist!
You MUST use exact signal names from the DRIVABLE INPUTS list.
✅ CORRECT EXAMPLE:
{reset_signal} = 1; // '{reset_signal}' is in DRIVABLE INPUTS
{drivable[0] if drivable else 'signal'} = 0; // Using actual signal name
❌ WRONG EXAMPLE:
input_signal_1 = 1; // Does NOT exist in DUT!
reset = 1; // Does NOT exist! Use '{reset_signal}' instead!
"""
return prompt
def validate_and_transform(self, code: str, tb_code: str = None) -> Tuple[str, Dict]:
"""验证并转换代码 - 主入口"""
self.violations = {'critical': [], 'warning': [], 'info': []}
self.transformations = []
if tb_code:
self._extract_signals_from_tb(tb_code)
original_lines = code.strip().split('\n')
total_lines = len([l for l in original_lines if l.strip() and not l.strip().startswith('//')])
# Step 1: 移除非法语句
code = self._transform_forbidden_statements(code)
# Step 2: 转换层次化访问
code = self._transform_hierarchical_access(code)
# Step 3: 智能转换内部信号访问
code = self._smart_transform_internal_signals(code)
# Step 4: 最后清理
code = self._final_cleanup(code)
# 计算质量分数
quality_score = self._calculate_quality_score(total_lines)
# 决定是否需要重试
should_retry = quality_score < 50 or len(self.violations['critical']) > 3
result = {
'quality_score': quality_score,
'is_valid': len(self.violations['critical']) == 0,
'violations': self.violations,
'transformations': self.transformations,
'should_retry': should_retry,
'allowed_signals': self._get_allowed_signals_info()
}
return code.strip(), result
def _extract_signals_from_tb(self, tb_code: str):
"""从测试平台代码中提取DUT输入输出信号"""
dut_match = re.search(r'(\w+)\s+(?:DUT|dut|uut|UUT)\s*\(', tb_code, re.IGNORECASE)
if dut_match:
start = dut_match.start()
bracket_count = 0
end = start
for i, char in enumerate(tb_code[start:]):
if char == '(':
bracket_count += 1
elif char == ')':
bracket_count -= 1
if bracket_count == 0:
end = start + i + 1
break
dut_instance = tb_code[start:end]
port_pattern = r'\.(\w+)\s*\(\s*(\w+)\s*\)'
for match in re.finditer(port_pattern, dut_instance):
signal_name = match.group(2)
is_input = re.search(rf'\breg\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
is_output = re.search(rf'\bwire\s+(?:\[[\d:]+\]\s*)?{re.escape(signal_name)}\s*[;,\n]', tb_code)
if is_input and signal_name not in self.dut_inputs:
self.dut_inputs.append(signal_name)
if is_output and signal_name not in self.dut_outputs:
self.dut_outputs.append(signal_name)
# 备用方案通过reg/wire声明推断
if not self.dut_inputs and not self.dut_outputs:
for match in re.finditer(r'\breg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
signal = match.group(1)
if signal.lower() not in ['file', 'scenario', 'i', 'j', 'k', 'cnt']:
if signal not in self.dut_inputs:
self.dut_inputs.append(signal)
for match in re.finditer(r'\bwire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*[;,\n]', tb_code):
signal = match.group(1)
if signal not in self.dut_outputs:
self.dut_outputs.append(signal)
def _transform_forbidden_statements(self, code: str) -> str:
"""转换非法语句"""
for pattern, desc, severity in self.FORBIDDEN_STATEMENTS:
matches = list(re.finditer(pattern, code, re.IGNORECASE))
for match in reversed(matches):
signal = match.group(1) if match.groups() else 'unknown'
self.violations[severity].append(f"{desc}: {signal}")
line_start = code.rfind('\n', 0, match.start()) + 1
line_end = code.find('\n', match.end())
if line_end == -1:
line_end = len(code)
original_line = code[line_start:line_end]
# 尝试转换 force -> 直接赋值(仅对输入信号)
if 'force' in match.group(0).lower() and signal in self.dut_inputs:
new_line = re.sub(r'\bforce\s+', '', original_line, flags=re.IGNORECASE)
code = code[:line_start] + new_line + code[line_end:]
self.transformations.append({
'type': 'force_to_assign',
'original': original_line.strip(),
'transformed': new_line.strip()
})
continue
code = code[:line_start] + '// [BLOCKED] ' + original_line.lstrip() + code[line_end:]
self.transformations.append({
'type': 'blocked',
'original': original_line.strip(),
'reason': desc
})
return code
def _transform_hierarchical_access(self, code: str) -> str:
"""转换层次化访问(如 DUT.state"""
for match in re.finditer(self.HIERARCHICAL_ACCESS, code):
prefix = match.group(1)
signal = match.group(2)
if prefix.upper() in ['DUT', 'UUT', 'TOP', 'TB']:
if signal not in self.dut_outputs:
self.violations['critical'].append(f"层次化访问内部信号: {prefix}.{signal}")
line_start = code.rfind('\n', 0, match.start()) + 1
line_end = code.find('\n', match.end())
if line_end == -1:
line_end = len(code)
original_line = code[line_start:line_end]
code = code[:line_start] + '// [HIERARCHY] ' + original_line.lstrip() + code[line_end:]
return code
def _smart_transform_internal_signals(self, code: str) -> str:
"""智能转换内部信号访问"""
lines = code.split('\n')
transformed_lines = []
for line in lines:
stripped = line.strip()
if stripped.startswith('//') or not stripped:
transformed_lines.append(line)
continue
if (stripped.startswith('#') or stripped.startswith('$') or
stripped.startswith('repeat(') or stripped.startswith('@(')):
transformed_lines.append(line)
continue
detected_signals = self._detect_internal_signals_in_line(stripped)
has_critical = detected_signals.get('critical', [])
has_warning = detected_signals.get('warning', [])
if not has_critical and not has_warning:
transformed_lines.append(line)
continue
context = self._analyze_signal_context(stripped, detected_signals)
if context['type'] == 'assignment':
transformed_lines.append(f"// [INTERNAL_ASSIGN] Cannot modify internal signal")
transformed_lines.append(f"// Original: {stripped}")
self.violations['critical'].append(f"尝试修改内部信号: {context['signals']}")
elif context['type'] == 'condition':
transformed = self._transform_condition(stripped, context)
transformed_lines.append(transformed)
self.transformations.append({
'type': 'condition_transform',
'original': stripped,
'transformed': transformed
})
elif context['type'] == 'wait_for_state':
transformed = self._transform_state_wait(stripped, context)
transformed_lines.append(transformed)
self.transformations.append({
'type': 'wait_transform',
'original': stripped,
'transformed': transformed
})
else:
if has_critical:
transformed_lines.append(f"// [WARNING] Contains internal signal reference: {has_critical}")
transformed_lines.append(f"// Original: {stripped}")
for sig in has_critical:
self.violations['warning'].append(f"可疑的内部信号访问: {sig}")
else:
transformed_lines.append(line)
return '\n'.join(transformed_lines)
def _detect_internal_signals_in_line(self, line: str) -> Dict[str, List[str]]:
"""检测行中的内部信号"""
detected = {'critical': [], 'warning': [], 'info': []}
LEGAL_KEYWORDS = {
'repeat', 'posedge', 'negedge', 'begin', 'end', 'if', 'else',
'while', 'for', 'case', 'default', 'always', 'initial',
'assign', 'wire', 'reg', 'input', 'output', 'inout',
'parameter', 'localparam', 'integer', 'real', 'time',
'clk', 'clock', 'reset', 'rst', 'areset', 'rst_n',
'enable', 'ena', 'valid', 'ready', 'data', 'addr', 'address',
'true', 'false', 'idle', 'wait'
}
SYSTEM_FUNCTIONS = {'$display', '$write', '$monitor', '$fopen', '$fclose',
'$fdisplay', '$fwrite', '$readmemh', '$readmemb',
'$finish', '$stop', '$random', '$time', '$stime'}
for severity, patterns in self.INTERNAL_SIGNAL_PATTERNS.items():
for pattern, name in patterns:
matches = re.findall(pattern, line, re.IGNORECASE)
if matches:
for match in matches:
if isinstance(match, tuple):
match = match[0] if match[0] else match[1]
match_lower = match.lower() if match else ''
if match_lower in LEGAL_KEYWORDS:
continue
if match in SYSTEM_FUNCTIONS:
continue
if match in self.dut_inputs or match in self.dut_outputs:
continue
if match.startswith('$'):
continue
if match and match not in detected[severity]:
detected[severity].append(match)
return detected
def _analyze_signal_context(self, line: str, signals: Dict) -> Dict:
"""分析信号使用上下文"""
assign_match = re.search(r'(\w+)\s*(?:=|<=)\s*', line)
if assign_match:
target = assign_match.group(1)
if target in signals.get('critical', []) or target in signals.get('warning', []):
return {'type': 'assignment', 'signals': [target], 'line': line}
if re.search(r'wait\s*\([^)]*state', line, re.IGNORECASE):
return {'type': 'wait_for_state', 'signals': signals.get('critical', []), 'line': line}
if re.search(r'if\s*\(|while\s*\(|@\s*\(', line):
return {'type': 'condition', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
return {'type': 'other', 'signals': signals.get('critical', []) + signals.get('warning', []), 'line': line}
def _transform_condition(self, line: str, context: Dict) -> str:
"""转换条件判断语句"""
original = line
if 'state' in str(context['signals']):
indent = len(line) - len(line.lstrip())
spaces = ' ' * indent
transformed = f"// [TRANSFORMED] Original: {original.strip()}\n"
transformed += f"{spaces}// Cannot directly check internal state\n"
transformed += f"{spaces}// Alternative: Wait for expected clock cycles\n"
transformed += f"{spaces}repeat(5) @(posedge clk); // Adjust cycles as needed"
self.violations['warning'].append(f"条件判断转换: {original.strip()}")
return transformed
return f"// [TRANSFORMED] {original}"
def _transform_state_wait(self, line: str, context: Dict) -> str:
"""转换状态等待语句"""
indent = len(line) - len(line.lstrip())
spaces = ' ' * indent
transformed = f"// [TRANSFORMED] Original: {line.strip()}\n"
transformed += f"{spaces}// Cannot wait for internal state directly\n"
transformed += f"{spaces}// Alternative: Drive inputs and wait for expected cycles\n"
transformed += f"{spaces}repeat(10) @(posedge clk); // Adjust based on FSM design"
self.violations['info'].append(f"状态等待转换: {line.strip()}")
return transformed
def _final_cleanup(self, code: str) -> str:
"""最终清理"""
lines = code.split('\n')
cleaned = []
for line in lines:
stripped = line.strip()
if stripped in ['begin', 'end'] and cleaned:
last = cleaned[-1].strip()
if last.startswith('// [TRANSFORMED]') or last.startswith('// [INTERNAL'):
continue
cleaned.append(line)
result = '\n'.join(cleaned)
result = re.sub(r'\n\s*\n\s*\n', '\n\n', result)
return result
def _calculate_quality_score(self, total_lines: int) -> int:
"""计算代码质量分数"""
if total_lines == 0:
return 0
score = 100
score -= len(self.violations['critical']) * 20
score -= len(self.violations['warning']) * 5
score += len([t for t in self.transformations if 'blocked' not in t.get('type', '')]) * 5
return max(0, min(100, score))
def _get_allowed_signals_info(self) -> Dict:
"""获取允许的信号信息"""
return {
'inputs': self.dut_inputs,
'outputs': self.dut_outputs,
'all_allowed': self.dut_inputs + self.dut_outputs
}
def generate_constraint_prompt(self) -> str:
"""
生成动态约束提示
如果从 DUT 提取了信号名,则使用精确的信号列表
否则使用通用的约束提示
"""
# [优先] 如果已经从 DUT 提取了信号名,使用精确的约束
if self.dut_all_signals:
return self.generate_signal_constraint_prompt()
# [备选] 使用通用约束提示
reset_signal = self._find_reset_signal()
inputs_list = str(self.dut_inputs) if self.dut_inputs else "[]"
# 动态获取禁止信号名示例
forbidden_examples = self.get_forbidden_examples(count=8)
forbidden_str = ", ".join(f'"{ex}"' for ex in forbidden_examples[:4])
forbidden_str2 = ", ".join(f'"{ex}"' for ex in forbidden_examples[4:8]) if len(forbidden_examples) > 4 else ""
prompt = f"""
{'='*60}
📋 [SIGNAL CONSTRAINTS - DERIVED FROM DUT]
{'='*60}
"""
if self.dut_inputs:
prompt += f"""
✅ ALLOWED INPUT SIGNALS (you CAN drive these):
{inputs_list}
🚫 FORBIDDEN SIGNAL NAMES - DO NOT USE THESE:
Generic patterns: {forbidden_str}
"""
if forbidden_str2:
prompt += f" More examples: {forbidden_str2}\n"
prompt += f"""
⚠️ Also forbidden: "reset" (actual: "{reset_signal}"), "rst", "enable", "en"
⚠️ Any name NOT in ALLOWED INPUTS above is FORBIDDEN!
⚠️ CRITICAL RULE: You MUST use ONLY the signal names from ALLOWED INPUTS!
If a signal name is not in the list, it does NOT exist in this design!
"""
else:
prompt += """
⚠️ WARNING: Could not extract input signals from testbench.
Please check the testbench code for actual signal names before writing test code.
"""
if self.dut_outputs:
outputs_list = str(self.dut_outputs)
prompt += f"""
📖 OUTPUT SIGNALS (you can READ but NOT write):
{outputs_list}
"""
prompt += f"""
🚫 FORBIDDEN ACTIONS:
1. NEVER assign to internal signals (state, counter, etc.)
2. NEVER use 'force' or 'assign' statements
3. NEVER access DUT.state (hierarchical access)
4. NEVER guess signal names - use ONLY from ALLOWED INPUTS!
✅ CORRECT APPROACH:
- To reach FSM state: drive inputs and WAIT for transition
- Example: {reset_signal} = 1; repeat(2) @(posedge clk); {reset_signal} = 0;
"""
return prompt
def _find_reset_signal(self) -> str:
"""查找复位信号名"""
# 按优先级查找常见的复位信号名
reset_candidates = ['areset', 'rst_n', 'rst', 'reset', 'rst_b']
for sig in reset_candidates:
if sig in self.dut_inputs:
return sig
# 如果没找到,检查输入列表中是否有类似名称
for sig in self.dut_inputs:
sig_lower = sig.lower()
if 'reset' in sig_lower or 'rst' in sig_lower:
return sig
# 默认返回第一个输入信号(排除 clk
for sig in self.dut_inputs:
if 'clk' not in sig.lower():
return sig
return "reset" # 兜底
# =========================================================================
# [新增] 通用禁止信号名检测
# =========================================================================
def is_forbidden_signal_name(self, name: str) -> Tuple[bool, str]:
"""
检测信号名是否为禁止的"猜测型"名称
Args:
name: 待检测的信号名
Returns:
(is_forbidden, reason): 是否禁止及原因
"""
name_lower = name.lower()
# 如果信号名在允许列表中,则不禁止
if name in self.dut_inputs or name in self.dut_outputs:
return False, ""
# 检查是否匹配禁止模式
for pattern, reason in self.FORBIDDEN_NAME_PATTERNS:
if re.match(pattern, name_lower, re.IGNORECASE):
return True, reason
return False, ""
def get_forbidden_examples(self, count: int = 6) -> List[str]:
"""
动态生成禁止信号名示例
基于实际 DUT 信号生成有针对性的错误示例
Args:
count: 返回的示例数量
Returns:
禁止信号名示例列表
"""
examples = []
# 1. 带数字后缀的通用名(始终禁止)
generic_patterns = [
'input_signal_1', 'input_signal_2',
'in_1', 'in_2', 'in_3',
'data_1', 'data_2',
'signal_1', 'signal_2',
]
examples.extend(generic_patterns[:count])
# 2. 基于实际信号生成"错误猜测"示例
reset_signal = self._find_reset_signal()
# 如果实际复位信号不是 reset/rst则添加这些为禁止示例
if reset_signal and reset_signal not in ['reset', 'rst']:
if reset_signal.lower() != 'reset':
examples.append('reset') # 错误:应该用 areset
if reset_signal.lower() != 'rst':
examples.append('rst') # 错误:应该用 areset
# 3. 检查是否有时钟信号
clk_signals = [s for s in self.dut_inputs if 'clk' in s.lower()]
if clk_signals:
actual_clk = clk_signals[0]
if actual_clk != 'clk':
examples.append('clk') # 错误:应该用实际时钟名
# 4. 检查是否有使能信号
en_signals = [s for s in self.dut_inputs if 'en' in s.lower() or 'enable' in s.lower()]
if not en_signals:
examples.append('enable') # 不存在的使能信号
examples.append('en')
# 去重并限制数量
seen = set()
unique_examples = []
for ex in examples:
if ex not in seen and ex not in self.dut_inputs:
seen.add(ex)
unique_examples.append(ex)
if len(unique_examples) >= count:
break
return unique_examples
def detect_forbidden_signals_in_code(self, code: str) -> List[Dict]:
"""
检测代码中使用的禁止信号名
Args:
code: Verilog 代码
Returns:
检测到的禁止信号列表
"""
forbidden_signals = []
# 提取代码中使用的所有信号名(赋值语句左侧)
# 匹配: signal = value 形式
for match in re.finditer(r'^\s*(\w+)\s*=', code, re.MULTILINE):
signal = match.group(1)
is_forbidden, reason = self.is_forbidden_signal_name(signal)
if is_forbidden:
forbidden_signals.append({
'signal': signal,
'reason': reason,
'line': match.group(0).strip()
})
return forbidden_signals
# =========================================================================
# [新增] Verilog 语法预检查 - 检测常见逻辑错误
# =========================================================================
def check_syntax_issues(self, code: str, signal_widths: Dict[str, int] = None, declared_signals: set = None) -> Dict:
"""
检测 Verilog 代码中的常见语法/逻辑问题
Args:
code: 待检查的代码
signal_widths: 信号位宽映射 (可选,如 {'in': 1, 'data': 8})
declared_signals: 已声明的信号集合 (可选,用于检测未声明信号)
Returns:
{
'width_mismatch': [...], # 位宽不匹配警告
'logic_issues': [...], # 逻辑问题
'syntax_warnings': [...], # 语法警告
'should_retry': bool # 是否建议重试
}
"""
result = {
'width_mismatch': [],
'logic_issues': [],
'syntax_warnings': [],
'should_retry': False
}
# 检查位宽不匹配
result['width_mismatch'] = self._check_width_mismatch(code, signal_widths)
# 检查逻辑问题
result['logic_issues'] = self._check_logic_issues(code, signal_widths)
# 检查其他语法问题(传入已声明信号)
result['syntax_warnings'] = self._check_syntax_warnings(code, declared_signals)
# 决定是否需要重试
# 包括位宽不匹配、逻辑问题、语法错误severity='error'
has_syntax_errors = any(
issue.get('severity') == 'error'
for issue in result['syntax_warnings']
)
result['should_retry'] = (
len(result['width_mismatch']) > 0 or
len(result['logic_issues']) > 0 or
has_syntax_errors
)
return result
def _check_width_mismatch(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
"""
检测位宽不匹配问题
常见问题:
- {signal} = N'b... 将多位值赋给单比特信号
- signal = N'b... 位宽不匹配
"""
issues = []
signal_widths = signal_widths or {}
# 默认假设未声明信号为 1 位
def get_width(sig):
return signal_widths.get(sig, 1)
# 模式1: {signal} = N'bvalue (拼接赋值)
# 例: {in} = 8'b01111100 - 将 8 位赋给 1 位
concat_pattern = re.compile(r'\{(\w+)\}\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
for match in concat_pattern.finditer(code):
signal = match.group(1)
value_width = int(match.group(2))
base = match.group(3)
value = match.group(4)
actual_width = get_width(signal)
if value_width > actual_width:
issues.append({
'type': 'concat_width_mismatch',
'signal': signal,
'signal_width': actual_width,
'assigned_width': value_width,
'original': match.group(0),
'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value via concatenation. Verilog will truncate.",
'severity': 'warning',
'suggestion': f"Use a shift register: reg [{value_width-1}:0] temp; temp = {value_width}'{base}{value}; then shift bits one by one"
})
# 模式2: signal = N'bvalue (直接赋值)
assign_pattern = re.compile(r'\b(\w+)\s*=\s*(\d+)\'([bhd])([0-9a-fA-FxXzZ_]+)')
for match in assign_pattern.finditer(code):
signal = match.group(1)
value_width = int(match.group(2))
# 跳过拼接赋值(已处理)
if f'{{{signal}}}' in match.group(0):
continue
actual_width = get_width(signal)
# 只有当信号已知且位宽不匹配时才警告
if signal in signal_widths and value_width > actual_width:
issues.append({
'type': 'direct_width_mismatch',
'signal': signal,
'signal_width': actual_width,
'assigned_width': value_width,
'original': match.group(0),
'message': f"Signal '{signal}' is {actual_width}-bit, but assigned {value_width}-bit value. Truncation will occur.",
'severity': 'warning'
})
return issues
def _check_logic_issues(self, code: str, signal_widths: Dict[str, int] = None) -> List[Dict]:
"""
检测逻辑问题
常见问题:
- 单比特信号自移位 (in = in >> 1 无效果)
- 无效的循环条件
"""
issues = []
signal_widths = signal_widths or {}
def get_width(sig):
return signal_widths.get(sig, 1)
# 模式: signal = signal >> N 或 signal = signal << N
shift_pattern = re.compile(r'\b(\w+)\s*=\s*\1\s*(>>|<<)\s*(\d+)?')
for match in shift_pattern.finditer(code):
signal = match.group(1)
direction = match.group(2)
shift_amount = int(match.group(3)) if match.group(3) else 1
actual_width = get_width(signal)
if actual_width == 1:
issues.append({
'type': 'single_bit_shift',
'signal': signal,
'direction': direction,
'original': match.group(0),
'message': f"Single-bit signal '{signal}' self-shift has no effect. Result is always 0.",
'severity': 'warning',
'suggestion': f"Use a shift register for bit-serial input, not the input signal itself"
})
# 模式: repeat(N) begin ... signal = signal >> 1; end (循环移位单比特)
repeat_shift_pattern = re.compile(r'repeat\s*\(\s*\d+\s*\)\s*begin[^}]*?(\w+)\s*=\s*\1\s*(>>|<<)', re.DOTALL)
for match in repeat_shift_pattern.finditer(code):
signal = match.group(1)
actual_width = get_width(signal)
if actual_width == 1:
issues.append({
'type': 'repeat_single_bit_shift',
'signal': signal,
'original': match.group(0)[:100] + '...',
'message': f"Repeat loop shifting single-bit signal '{signal}' is ineffective",
'severity': 'warning'
})
return issues
def _check_syntax_warnings(self, code: str, declared_signals: set = None) -> List[Dict]:
"""
检测其他语法问题
Args:
code: 待检查的代码
declared_signals: 已声明的信号集合 (从完整 TB 中提取)
"""
issues = []
declared_signals = declared_signals or set()
# 检查: 缺少分号
# 注意: 这只是简单检查,不是完整解析
lines = code.split('\n')
for i, line in enumerate(lines):
stripped = line.strip()
if not stripped or stripped.startswith('//'):
continue
# 跳过不需要分号的行
skip_patterns = [
r'^begin$', r'^end$', r'^endcase$', r'^endmodule$',
r'^else$', r'^\)$', r'^\}\s*$', r'^`timescale', r'^`include'
]
if any(re.match(p, stripped) for p in skip_patterns):
continue
# 检查是否需要分号但没有
needs_semicolon = re.search(r'\b(initial|always|assign|reg|wire|parameter|localport)\b', stripped) is None
has_semicolon = stripped.endswith(';') or stripped.endswith(')') or stripped.endswith('}')
if needs_semicolon and not has_semicolon and not stripped.endswith('begin'):
# 可能缺少分号(但不确定)
pass # 暂不报警,避免误报
# 检查: 不匹配的 begin/end
begin_count = len(re.findall(r'\bbegin\b', code))
end_count = len(re.findall(r'\bend\b', code))
if begin_count != end_count:
issues.append({
'type': 'mismatched_begin_end',
'message': f"Mismatched begin/end: {begin_count} begin vs {end_count} end",
'severity': 'error'
})
# 检查: 未声明的信号(在赋值左侧使用的信号)
# 使用传入的已声明信号集合
for match in re.finditer(r'^\s*(\w+)\s*=', code, re.MULTILINE):
signal = match.group(1)
# 跳过系统任务和关键字
if signal in ['if', 'else', 'case', 'for', 'while', 'repeat', 'assign', 'force', 'release']:
continue
# 跳过以 $ 开头的系统任务
if signal.startswith('$'):
continue
# 检查是否在已声明信号列表中
if signal not in declared_signals:
issues.append({
'type': 'undeclared_signal',
'signal': signal,
'message': f"Signal '{signal}' is used but not declared in the testbench",
'severity': 'error',
'suggestion': f"Use an existing signal name (declared: {', '.join(list(declared_signals)[:10])}...)"
})
# 检查: always 块与时钟生成冲突
# 检测是否有多个 always/initial 块驱动同一信号
always_blocks = re.findall(r'\balways\s*(@[^i]|begin)', code)
initial_clk_blocks = len(re.findall(r'initial\s+begin[^i]*?clk\s*=', code, re.DOTALL))
always_clk_blocks = len(re.findall(r'\balways[^i]*?clk\s*=', code, re.DOTALL))
if initial_clk_blocks > 0 and always_clk_blocks > 0:
issues.append({
'type': 'multiple_clock_drivers',
'message': f"Multiple clock drivers detected: {initial_clk_blocks} initial + {always_clk_blocks} always blocks driving clk",
'severity': 'error',
'suggestion': "Remove duplicate clock generation. The testbench already has clock generation."
})
# 检查: initial 块嵌套(生成了 initial begin ... end 在注入时会导致嵌套)
if re.search(r'\binitial\s+begin\b', code):
issues.append({
'type': 'initial_block_injection',
'message': "Code contains 'initial begin...end' block which should not be injected into an existing initial block",
'severity': 'error',
'suggestion': "Remove the 'initial begin...end' wrapper, keep only the test statements inside"
})
return issues
# ============================================================================
# CoverageParser - 覆盖率解析器
# ============================================================================
class CoverageParser:
"""覆盖率解析器 - 从带注释的Verilog文件中提取未覆盖的代码块
[增强] 集成语义分析结果,提供更精准的 FSM 状态路径指导
[新增] 集成能量分配层,提供目标功能点优先级信息
[新增] 集成多样性约束注入器,避免测试用例同质化
[新增] 从 DUT 代码提取信号名,精确约束 LLM
"""
def __init__(self, annotated_file, tb_code=None, semantic_result=None,
energy_allocator=None, diversity_injector=None, dut_code=None):
self.file_path = annotated_file
self.tb_code = tb_code
self.semantic_result = semantic_result # [新增] 语义分析结果
self.energy_allocator = energy_allocator # [新增] 能量分配器
self.diversity_injector = diversity_injector # [新增] 多样性约束注入器
self.dut_code = dut_code # [新增] DUT 代码
# 修复Verilator 覆盖率标记格式多样化:
# %NNNNNN - 行覆盖计数(%000000 表示从未执行)
# ~NNNNNN - 分支/条件覆盖计数(~000000 表示分支从未执行)
# ^NNNNNN - 未覆盖的分支标记
# NNNNNN - 空格开头+数字(某些 Verilator 版本)
# NNNNNN - 纯数字开头(无前缀)
self.line_pattern = re.compile(r'^%(\d+)\s+(.*)$') # 匹配 %NNNNNN code
self.tilde_pattern = re.compile(r'^~(\d+)\s+(.*)$') # 匹配 ~NNNNNN code
self.caret_pattern = re.compile(r'^\^(\d+)\s+(.*)$') # 匹配 ^NNNNNN code
# [修复] 纯数字开头(无前缀)或空格开头
self.plain_pattern = re.compile(r'^\s*(\d+)\s+(.*)$') # 匹配 " NNNNNN" 或 "NNNNNN"
self.decl_pattern = re.compile(r'^\s*(input|output|inout|wire|reg|logic|parameter|localparam|assign)\b')
# [修改] 传递 DUT 代码给 BlackBoxValidator
self.validator = BlackBoxValidator(dut_code=dut_code)
if tb_code:
self.validator._extract_signals_from_tb(tb_code)
def generate_prompt(self, current_score):
"""生成覆盖率驱动的Prompt"""
if not os.path.exists(self.file_path):
return None
try:
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
lines = f.readlines()
except Exception:
return None
missing_blocks = []
current_block = []
recording = False
context_buffer = []
CONTEXT_SIZE = 3
# 收集缺失行用于 FSM 分析
missing_lines = []
for i, line in enumerate(lines):
line = line.strip()
count = -1
clean_code = line
is_tilde = False
is_caret = False
# 尝试匹配各种覆盖率标记格式
# Verilator 覆盖率格式:
# - %NNNNNN: 行覆盖NNNNNN 是执行次数,%000000 表示未执行
# - ~NNNNNN: 分支/条件覆盖,~000000 表示分支从未执行
# - ^NNNNNN: 未覆盖分支标记
# - NNNNNN: 无前缀格式(某些版本)
match_pct = self.line_pattern.match(line) # %NNNNNN code
match_tilde = self.tilde_pattern.match(line) # ~NNNNNN code
match_caret = self.caret_pattern.match(line) # ^NNNNNN code
match_plain = self.plain_pattern.match(line) # NNNNNN code (无前缀)
if match_pct:
count = int(match_pct.group(1))
clean_code = match_pct.group(2).strip()
elif match_tilde:
count = int(match_tilde.group(1))
clean_code = match_tilde.group(2).strip()
is_tilde = True
elif match_caret:
count = int(match_caret.group(1))
clean_code = match_caret.group(2).strip()
is_caret = True
elif match_plain:
# 纯数字格式(可能出现在某些 Verilator 版本)
count = int(match_plain.group(1))
clean_code = match_plain.group(2).strip()
if "//" in clean_code:
clean_code = clean_code.split("//")[0].strip()
is_hard_noise = (self.decl_pattern.match(clean_code) or clean_code == "endmodule")
is_soft_noise = (len(clean_code) < 2 or clean_code in ["end", "begin", "else", ");", "endcase", "default:"] or
clean_code.startswith("module ") or not any(c.isalnum() for c in clean_code))
# [修改] 覆盖状态判断:
# - %NNNNNN: count > 0 表示已覆盖count == 0 表示未覆盖
# - ~NNNNNN: 分支覆盖标记count == 0 也表示未覆盖!
# - ^NNNNNN: 未覆盖分支标记
is_definitely_covered = (not is_tilde and not is_caret and count > 0)
# [关键修复] tilde 格式 count == 0 也应该被视为 missing
is_definitely_missed = (
(not is_tilde and not is_caret and count == 0 and not is_hard_noise and not is_soft_noise) or
(is_tilde and count == 0 and not is_hard_noise and not is_soft_noise) or # [新增] ~000000 也是 missing
(is_caret and not is_hard_noise and not is_soft_noise)
)
if recording:
if is_definitely_covered:
missing_blocks.append(current_block)
missing_lines.extend(current_block)
current_block = []
recording = False
if not is_hard_noise:
context_buffer.append(clean_code)
else:
if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
current_block.append(f"Line {i+1}: {clean_code}")
else:
if is_definitely_missed:
recording = True
if context_buffer:
current_block.append(f"... (Context)")
for ctx in context_buffer:
current_block.append(f" {ctx}")
current_block.append(f"Line {i+1}: {clean_code} <--- MISSING START")
else:
if not is_hard_noise and not (is_soft_noise and len(clean_code) < 4):
context_buffer.append(clean_code)
if len(context_buffer) > CONTEXT_SIZE:
context_buffer.pop(0)
if recording and current_block:
missing_blocks.append(current_block)
missing_lines.extend(current_block)
# [改进] 详细诊断日志 - 使用 info 级别确保可见
total_lines = len(lines)
parsed_lines = sum(1 for l in lines if l.strip() and (
self.line_pattern.match(l.strip()) or
self.tilde_pattern.match(l.strip()) or
self.caret_pattern.match(l.strip()) or
self.plain_pattern.match(l.strip())
))
# 收集零计数行的详细信息
zero_count_details = []
for l in lines:
l_stripped = l.strip()
if not l_stripped:
continue
match_pct = self.line_pattern.match(l_stripped)
match_tilde = self.tilde_pattern.match(l_stripped)
if match_pct and int(match_pct.group(1)) == 0:
zero_count_details.append(('%', match_pct.group(2).strip()[:50]))
elif match_tilde and int(match_tilde.group(1)) == 0:
zero_count_details.append(('~', match_tilde.group(2).strip()[:50]))
zero_count_lines = len(zero_count_details)
logger.info(f"CoverageParser: Total={total_lines}, Parsed={parsed_lines}, Zero-count={zero_count_lines}, Missing blocks={len(missing_blocks)}")
if not missing_blocks:
# [改进] 详细诊断信息
if zero_count_lines > 0:
logger.warning(f"Found {zero_count_lines} lines with zero coverage count, but no missing blocks extracted.")
logger.warning("Zero-count lines:")
for prefix, code in zero_count_details[:10]: # 只显示前10个
logger.warning(f" {prefix}000000: {code}")
if len(zero_count_details) > 10:
logger.warning(f" ... and {len(zero_count_details) - 10} more")
logger.warning("These lines may have been filtered as noise (declarations, etc.)")
return None
selected_blocks = missing_blocks[:50]
# 获取实际信号名用于示例
reset_signal = self.validator._find_reset_signal()
inputs_no_clk = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
example_signal = inputs_no_clk[0] if inputs_no_clk else (reset_signal if reset_signal != "reset" else "ena")
# [新增] 动态获取禁止信号名示例
forbidden_examples = self.validator.get_forbidden_examples(count=8)
forbidden_display = ", ".join(f'"{ex}"' for ex in forbidden_examples[:6])
# 分析 FSM 相关的缺失代码
fsm_analysis = self._analyze_fsm_missing(missing_lines)
# [新增] 从语义分析结果获取 FSM 和功能点信息
semantic_context = self._generate_semantic_context()
# === [新增] 在 prompt 开头添加醒目的信号名清单 ===
inputs_json = str(self.validator.dut_inputs) if self.validator.dut_inputs else "[]"
outputs_json = str(self.validator.dut_outputs) if self.validator.dut_outputs else "[]"
prompt = f"""
[ROLE]
You are a hardware verification expert. Your task is to write a test scenario to improve code coverage.
{'='*60}
⚠️ [SIGNAL NAME CONSTRAINTS - READ THIS FIRST] ⚠️
{'='*60}
📋 ALLOWED INPUT SIGNALS (you CAN drive these):
{inputs_json}
📋 OUTPUT SIGNALS (you can READ but NOT write):
{outputs_json}
🚫 FORBIDDEN SIGNAL NAMES - THESE DO NOT EXIST:
{forbidden_display}
⚠️ Also: "reset" (actual: "{reset_signal}"), "rst", "enable", "en"
⚠️ ANY name NOT in ALLOWED INPUTS above is FORBIDDEN!
{'='*60}
[COVERAGE STATUS]
Current testbench achieves {current_score:.2f}% coverage.
The following logic blocks in the DUT are NEVER executed during simulation:
"""
for idx, block in enumerate(selected_blocks):
prompt += f"--- Missing Logic Block {idx+1} ---\n" + "\n".join(block) + "\n\n"
# [新增] 添加语义分析上下文
if semantic_context:
prompt += f"""
[SEMANTIC ANALYSIS - MODULE UNDERSTANDING]
{semantic_context}
"""
# === [新增] 添加能量分配目标上下文 ===
if self.energy_allocator:
energy_context = self.energy_allocator.get_target_context()
if energy_context:
prompt += f"""
[ENERGY-ALIGNED TARGET - PRIORITY]
{energy_context}
Focus your test scenario on covering this high-priority target first.
"""
# =====================================
prompt += self.validator.generate_constraint_prompt()
# 添加 FSM 分析提示
if fsm_analysis:
prompt += f"""
[FSM STATE TRANSITION ANALYSIS - CRITICAL]
{fsm_analysis}
IMPORTANT: FSM transitions have PRIORITY ORDER!
- 'if' conditions are evaluated TOP to BOTTOM
- The FIRST matching condition determines the next state
- To trigger a branch like "else if (condition)", you MUST ensure all higher-priority conditions are FALSE
- Read the missing code's context carefully: what conditions precede it?
"""
prompt += f"""
[OUTPUT REQUIREMENTS - CRITICAL]
1. Return ONLY Verilog test scenario code (NOT a task definition)
2. Your code will be inserted INTO an existing `initial begin ... end` block
3. DO NOT wrap your code in `task ... endtask` - just write the test sequence directly
4. DO NOT use `$finish` or `$stop` - the testbench handles simulation end
[CODING STYLE]
1. Use blocking assignments for input signals: `signal = value;`
2. Use `#N;` for time delays: `#10;` means wait 10 time units
3. Use `repeat(N) @(posedge clk);` to wait for N clock cycles
4. Start with reset sequence if needed
[BLACK-BOX CONSTRAINTS - CRITICAL]
1. You can ONLY control module INPUTS listed above
2. You CANNOT access internal signals (state, next_state, counters, etc.)
3. You CANNOT use `force` or `assign` on internal signals
4. To trigger a specific state: drive inputs and wait for the FSM to reach it naturally
[STEP-BY-STEP APPROACH - REQUIRED]
For each missing branch, think through:
1. What STATE must the FSM be in? (Look at the case statement)
2. What CONDITIONS must be true/false? (Check priority order!)
3. How to reach that state from reset? (Trace state transitions)
4. What inputs to apply and in what order?
[POSITIVE EXAMPLE - CORRECT APPROACH]
```verilog
// Reset sequence - use ACTUAL input signal names from above
{reset_signal} = 1;
repeat(2) @(posedge clk);
{reset_signal} = 0;
// Wait for FSM to reach desired state (estimate cycles)
repeat(3) @(posedge clk);
// Trigger missing branch by driving inputs
{example_signal} = 1;
repeat(5) @(posedge clk);
{example_signal} = 0;
repeat(10) @(posedge clk);
```
[NEGATIVE EXAMPLE - DO NOT DO THIS]
```verilog
// WRONG: Using wrong signal name (e.g., 'reset' instead of '{reset_signal}')
reset = 1; // ERROR: Signal 'reset' does not exist! Use '{reset_signal}' instead!
// WRONG: Not considering condition priority in FSM
// If missing code is "else if (condition_b)", you must make condition_a FALSE first!
// Example: if FSM has "if (!signal_a) ... else if (signal_b) ..."
// Then signal_a must be 1 (FALSE) for the else-if branch to execute
signal_a = 0; // WRONG: This blocks the else-if branch!
signal_b = 1; // This will NOT trigger because signal_a=0 took priority
// CORRECT: Analyze priority, set higher-priority conditions to FALSE
signal_a = 1; // Now the first condition (!signal_a) is FALSE
signal_b = 1; // Now this else-if branch can execute
// WRONG: Trying to assign internal state
state = IDLE; // ERROR: Cannot modify internal signal!
// WRONG: Using force on internal signal
force DUT.state = WL; // ERROR: Cannot force internal signal!
// WRONG: Checking internal state in condition
if (state == WL) begin // ERROR: Cannot read internal signal!
{example_signal} = 1;
end
// CORRECT ALTERNATIVE: Estimate timing instead
repeat(5) @(posedge clk); // Wait for FSM to reach expected state
{example_signal} = 1;
```
{'='*60}
⚠️ [FINAL SIGNAL NAME CHECK - BEFORE YOU WRITE CODE] ⚠️
{'='*60}
🚫 STOP! Verify your signal names:
✅ ALLOWED INPUTS: {inputs_json}
🚫 FORBIDDEN: {forbidden_display}
❌ WRONG: input_signal_1 = 1; // Does NOT exist!
❌ WRONG: input_signal_2 = 0; // Does NOT exist!
❌ WRONG: reset = 1; // Wrong! Use '{reset_signal}' instead!
❌ WRONG: data_1 = 1; // Does NOT exist!
✅ CORRECT: {example_signal} = 1; // From ALLOWED INPUTS
✅ CORRECT: {reset_signal} = 1; // Actual reset signal
⚠️ RULE: If a signal name is NOT in ALLOWED INPUTS, it does NOT exist!
Now write the test scenario code using ONLY signal names from ALLOWED INPUTS.
"""
# === [新增] 注入多样性约束 ===
if self.diversity_injector:
# 获取未覆盖功能点
uncovered_functions = []
if self.semantic_result and self.semantic_result.get('function_points'):
uncovered_functions = [
fp for fp in self.semantic_result['function_points']
if not fp.get('covered', False)
]
# 获取当前目标功能点
target_function = ""
if self.energy_allocator and self.energy_allocator.current_target:
target_function = self.energy_allocator.current_target.function_point
# 注入多样性约束
prompt = self.diversity_injector.inject_diversity_constraints(
prompt=prompt,
target_function=target_function,
uncovered_functions=uncovered_functions
)
# =================================
return prompt
def _analyze_fsm_missing(self, missing_lines: List[str]) -> str:
"""分析 FSM 相关的缺失代码,生成具体的 FSM 状态转换指导"""
analysis = []
# 检查是否涉及 FSM 状态转换
has_state_case = any('case' in line.lower() and 'state' in line.lower() for line in missing_lines)
has_else_if = any('else if' in line.lower() for line in missing_lines)
has_if_condition = any(re.search(r'\bif\s*\(', line) for line in missing_lines)
if has_state_case or has_else_if:
analysis.append("- Missing code involves FSM state transitions or conditional branches")
if has_else_if or has_if_condition:
analysis.append("- Conditional branches have PRIORITY ORDER (top to bottom)")
analysis.append("- 'else if' branches require ALL previous conditions to be FALSE")
analysis.append("- Analyze the missing code's context: what conditions block this branch?")
if has_state_case:
analysis.append("- To trigger a state transition: first reach the source state, then drive inputs")
# === 新增FSM 状态路径分析 ===
# 尝试从缺失代码中提取 FSM 状态信息
fsm_state_info = self._extract_fsm_state_from_missing(missing_lines)
if fsm_state_info:
analysis.append("")
analysis.append("[FSM STATE PATH ANALYSIS]")
analysis.extend(fsm_state_info)
return "\n".join(analysis) if analysis else ""
def _extract_fsm_state_from_missing(self, missing_lines: List[str]) -> List[str]:
"""
从缺失代码中提取 FSM 状态信息,生成具体的状态转换指导
分析策略:
1. 从缺失代码的上下文识别 case 分支FSM 状态)
2. 分析该状态下的条件分支优先级
3. 识别需要满足的输入条件
"""
info = []
# 从 annotated 文件中读取完整的 DUT 代码以分析 FSM 结构
try:
with open(self.file_path, 'r', encoding='utf-8', errors='ignore') as f:
full_content = f.read()
except:
return info
# 提取缺失代码所在的 FSM 状态
target_state = None
missing_condition = None
for line in missing_lines:
# 查找 case 分支标记(如 "WL:", "WR:", "FALLL:" 等)
# 格式可能是 "Line N: STATE:" 或 "STATE:"
state_match = re.search(r'\b([A-Z][A-Z0-9_]*)\s*:', line)
if state_match:
potential_state = state_match.group(1)
# 排除常见的非状态关键字
if potential_state not in ['IF', 'ELSE', 'CASE', 'BEGIN', 'END', 'DEFAULT']:
target_state = potential_state
break
# 如果没找到,尝试从整个文件中分析
if not target_state:
# 查找缺失行附近的 case 分支
lines = full_content.split('\n')
for i, line in enumerate(lines):
# 查找覆盖率标记为 0 的行
if re.match(r'^%000000', line.strip()):
# 向上查找最近的 case 分支(状态)
for j in range(i-1, max(0, i-20), -1):
state_match = re.search(r'^\s*([A-Z][A-Z0-9_]*)\s*:', lines[j])
if state_match:
target_state = state_match.group(1)
break
if target_state:
break
# 分析缺失的条件分支
for line in missing_lines:
# 提取 else if 条件
else_if_match = re.search(r'else\s+if\s*\(([^)]+)\)', line)
if else_if_match:
missing_condition = else_if_match.group(1)
break
# 提取 if 条件
if_match = re.search(r'\bif\s*\(([^)]+)\)', line)
if if_match:
missing_condition = if_match.group(1)
break
# 生成具体的指导信息
if target_state:
info.append(f"- Target FSM state identified: {target_state}")
# 查找复位后的初始状态
reset_state = self._find_reset_state(full_content)
if reset_state:
info.append(f"- After reset, FSM starts in state: {reset_state}")
if reset_state != target_state:
info.append(f"- CRITICAL: You must FIRST transition from {reset_state} to {target_state}!")
info.append(f"- Do NOT assume FSM will automatically reach {target_state}!")
# 尝试找到状态转换路径
transition_hint = self._find_state_transition_hint(full_content, reset_state, target_state)
if transition_hint:
info.append(f"- To reach {target_state}: {transition_hint}")
if missing_condition:
info.append(f"- Missing condition: \"{missing_condition}\"")
# 分析条件优先级
priority_info = self._analyze_condition_priority(full_content, target_state, missing_condition)
if priority_info:
info.extend(priority_info)
return info
def _find_reset_state(self, content: str) -> Optional[str]:
"""从 DUT 代码中找到复位后的初始状态"""
# 查找复位逻辑中的状态赋值
# 常见模式: if (reset) state <= IDLE; 或 state <= 0;
patterns = [
r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
r'if\s*\([^)]*reset[^)]*\)\s*state\s*<=\s*(\d+);',
r'if\s*\([^)]*rst[^)]*\)\s*state\s*<=\s*([A-Z][A-Z0-9_]*);',
]
for pattern in patterns:
match = re.search(pattern, content, re.IGNORECASE)
if match:
state = match.group(1)
# 如果是数字,尝试从参数中找对应的状态名
if state.isdigit():
# 查找参数定义
param_match = re.search(r'parameter\s+([^;]+);', content)
if param_match:
params = param_match.group(1)
# 解析参数列表
for param in params.split(','):
param = param.strip()
if '=' in param:
name, value = param.split('=')
if value.strip() == state:
return name.strip()
return state
return None
def _find_state_transition_hint(self, content: str, from_state: str, to_state: str) -> Optional[str]:
"""找到从一个状态到另一个状态的转换条件"""
# 在 case 语句中查找 from_state 分支
# 提取该分支下到 to_state 的转换条件
# 简单策略:查找 "next = TO_STATE" 或 "next <= TO_STATE"
pattern = rf'{from_state}\s*:.*?next\s*=?\s*{to_state}'
match = re.search(pattern, content, re.DOTALL)
if match:
# 提取条件
branch_code = match.group(0)
# 查找 if 条件
if_match = re.search(r'if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
if if_match:
return f"set condition: {if_match.group(1)}"
# 查找 else if 条件
elif_match = re.search(r'else\s+if\s*\(([^)]+)\)\s*next\s*=?\s*' + to_state, branch_code)
if elif_match:
return f"set condition: {elif_match.group(1)} (ensure earlier conditions are FALSE)"
# 尝试反向查找:什么条件下会转换到目标状态
trans_pattern = rf'(?:if|else\s+if)\s*\(([^)]+)\)\s*(?:next\s*=?\s*{to_state}|{to_state}\s*;)'
trans_match = re.search(trans_pattern, content)
if trans_match:
return f"set condition: {trans_match.group(1)}"
return None
def _analyze_condition_priority(self, content: str, state: str, missing_condition: str) -> List[str]:
"""分析条件分支的优先级,找出需要排除的条件"""
info = []
if not state:
return info
# 查找该状态下的所有条件分支
# 提取 state: 后面的代码块
state_block_pattern = rf'{state}\s*:(.*?)(?=[A-Z][A-Z0-9_]*\s*:|endcase|default:)'
match = re.search(state_block_pattern, content, re.DOTALL)
if not match:
return info
state_block = match.group(1)
# 提取所有条件分支
conditions = []
for cond_match in re.finditer(r'(?:if|else\s+if)\s*\(([^)]+)\)', state_block):
conditions.append(cond_match.group(1).strip())
# 找到缺失条件在列表中的位置
missing_idx = -1
for i, cond in enumerate(conditions):
# 简化比较(去除空格)
if cond.replace(' ', '') in missing_condition.replace(' ', '') or \
missing_condition.replace(' ', '') in cond.replace(' ', ''):
missing_idx = i
break
if missing_idx > 0:
info.append(f"- This branch is condition #{missing_idx + 1} (lower priority)")
info.append(f"- You must make ALL earlier conditions FALSE:")
for i in range(missing_idx):
cond = conditions[i]
# 分析如何使条件为 FALSE
false_hint = self._get_false_hint(cond)
info.append(f" * \"{cond}\" must be FALSE → {false_hint}")
return info
def _get_false_hint(self, condition: str) -> str:
"""分析如何使条件为 FALSE"""
condition = condition.strip()
# 处理 !signal 形式
if condition.startswith('!'):
signal = condition[1:].strip()
return f"set {signal} = 1"
# 处理 signal 形式(布尔值)
if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', condition):
return f"set {condition} = 0"
# 处理比较运算符
if '==' in condition:
parts = condition.split('==')
if len(parts) == 2:
signal = parts[0].strip()
value = parts[1].strip()
if value.isdigit():
return f"set {signal} != {value}"
# 处理 >= 形式
if '>=' in condition:
parts = condition.split('>=')
if len(parts) == 2:
signal = parts[0].strip()
value = parts[1].strip()
if value.isdigit():
return f"set {signal} < {value}"
# 处理 > 形式
if '>' in condition and '>=' not in condition:
parts = condition.split('>')
if len(parts) == 2:
signal = parts[0].strip()
value = parts[1].strip()
return f"set {signal} <= {value}"
return "analyze the condition logic"
def _generate_semantic_context(self) -> str:
"""
[新增] 从语义分析结果生成 Prompt 上下文
整合语义分析层 (Layer 0) 的输出,为 LLM 提供更精准的指导:
- FSM 状态转换图
- 功能点重要性排序
- 测试场景建议
Returns:
语义上下文字符串,用于增强 Prompt
"""
if not self.semantic_result:
return ""
context_parts = []
# 1. 模块基础信息
module_name = self.semantic_result.get('module_name', '')
inputs = self.semantic_result.get('inputs', [])
outputs = self.semantic_result.get('outputs', [])
if module_name:
context_parts.append(f"Module Name: {module_name}")
if inputs:
context_parts.append(f"Module Inputs: {', '.join(inputs)}")
if outputs:
context_parts.append(f"Module Outputs: {', '.join(outputs)}")
# 2. FSM 信息(最关键)
fsm_info = self.semantic_result.get('fsm_info')
if fsm_info:
context_parts.append("")
context_parts.append("=== FSM STATE MACHINE DETAILS ===")
context_parts.append(f"State Variable: {fsm_info.get('state_variable', 'unknown')}")
states = fsm_info.get('states', [])
if states:
context_parts.append(f"All States ({len(states)}): {', '.join(states)}")
# 状态转换表
transitions = fsm_info.get('transitions', {})
if transitions:
context_parts.append("")
context_parts.append("=== STATE TRANSITION TABLE ===")
context_parts.append("Format: CURRENT_STATE --[CONDITION]--> NEXT_STATE")
context_parts.append("")
for state, trans_list in transitions.items():
for trans in trans_list:
condition = trans.get('condition', 'default')
next_state = trans.get('next_state', 'unknown')
if condition == 'default':
context_parts.append(f" {state} --[default]--> {next_state}")
else:
context_parts.append(f" {state} --[if ({condition})]--> {next_state}")
# 添加状态转换路径分析
context_parts.append("")
context_parts.append("=== STATE TRANSITION PATH HINTS ===")
reset_state = self._find_reset_state_from_fsm(fsm_info)
if reset_state:
context_parts.append(f"Initial State (after reset): {reset_state}")
context_parts.append("")
context_parts.append("IMPORTANT: To reach a target state, trace the path from reset:")
context_parts.append(" 1. Reset the DUT to initialize to the starting state")
context_parts.append(" 2. Apply inputs to trigger state transitions")
context_parts.append(" 3. Wait for the FSM to naturally reach the target state")
context_parts.append(" 4. THEN apply inputs to trigger the missing branch")
# 3. 功能点优先级
function_points = self.semantic_result.get('function_points', [])
if function_points:
context_parts.append("")
context_parts.append("=== FUNCTION POINTS (Ranked by Importance) ===")
for i, fp in enumerate(function_points[:10]): # Top 10
name = fp.get('name', 'unknown')
fp_type = fp.get('type', 'unknown')
importance = fp.get('importance', 0)
covered = fp.get('covered', False)
status = "✓ COVERED" if covered else "✗ NOT COVERED"
context_parts.append(f" {i+1}. [{status}] {name} ({fp_type}): importance={importance:.2f}")
# 4. 测试场景建议
test_scenarios = self.semantic_result.get('test_scenarios', [])
if test_scenarios:
context_parts.append("")
context_parts.append("=== RECOMMENDED TEST SCENARIOS ===")
for i, ts in enumerate(test_scenarios[:5]): # Top 5
name = ts.get('name', 'unknown')
description = ts.get('description', '')
priority = ts.get('priority', 0)
context_parts.append(f" {i+1}. {name}: {description} (priority={priority:.2f})")
if context_parts:
return "\n".join(context_parts)
return ""
def _find_reset_state_from_fsm(self, fsm_info: dict) -> Optional[str]:
"""从 FSM 信息中推断复位后的初始状态"""
# 方法1检查是否有明确的复位状态
transitions = fsm_info.get('transitions', {})
# 复位后通常进入第一个定义的状态或特定名称的状态
states = fsm_info.get('states', [])
# 常见的初始状态命名
initial_state_names = ['IDLE', 'INIT', 'RESET', 'START', 'BEGIN']
for name in initial_state_names:
if name in states:
return name
# 如果没有找到,返回第一个状态
if states:
return states[0]
return None
# ============================================================================
# TBInjector - 场景注入器
# ============================================================================
class TBInjector:
"""
场景注入器 - 将LLM生成的测试代码注入到现有测试平台
集成三层防护策略:
1. Layer 1: Prompt约束由CoverageParser处理
2. Layer 2: 智能代码转换
3. Layer 3: 质量评估和重试建议
"""
def __init__(self, tb_code):
"""
初始化注入器
Args:
tb_code: 原始测试平台代码字符串
"""
self.content = tb_code
self.validator = BlackBoxValidator()
self.validator._extract_signals_from_tb(tb_code)
self.last_validation_result = None
def inject(self, new_code, iter_idx):
"""
注入新的测试场景到测试平台
Args:
new_code: LLM生成的测试代码
iter_idx: 迭代序号
Returns:
修改后的测试平台代码
"""
# Step 1: 预处理代码(包含三层防护)
scenario_code, result = self._preprocess_code(new_code, iter_idx)
self.last_validation_result = result
# 记录日志
if result['violations']['critical']:
logger.warning(f"[CGA-{iter_idx}] Critical violations detected:")
for v in result['violations']['critical']:
logger.warning(f" - {v}")
if result['violations']['warning']:
logger.info(f"[CGA-{iter_idx}] Warnings:")
for v in result['violations']['warning']:
logger.info(f" - {v}")
if result['transformations']:
logger.info(f"[CGA-{iter_idx}] Code transformations applied:")
for t in result['transformations']:
logger.info(f" - {t['type']}: {t.get('original', 'N/A')[:50]}...")
# Step 2: 构建场景块
scenario_block = self._build_scenario_block(scenario_code, iter_idx)
# Step 3: 注入到TB中
modified_tb = self._inject_scenario(scenario_block)
return modified_tb
def should_retry(self):
"""是否应该重试"""
if self.last_validation_result is None:
return False
return self.last_validation_result.get('should_retry', False)
def get_quality_score(self):
"""获取代码质量分数"""
if self.last_validation_result is None:
return 0
return self.last_validation_result.get('quality_score', 0)
def _preprocess_code(self, code, iter_idx):
"""预处理LLM生成的代码 - 增强版,包含语法预检查"""
# 移除markdown标记
code = re.sub(r'```(?:verilog|systemverilog|sv)?\n?', '', code)
code = re.sub(r'```', '', code)
# 移除task包装
code = re.sub(r'task\s+\w+\s*(?:\([^)]*\))?\s*;', '', code)
code = re.sub(r'endtask', '', code)
# 移除$finish和$stop
code = re.sub(r'\$finish\s*;', '', code)
code = re.sub(r'\$stop\s*;', '', code)
# 移除多余空行
code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# [修复] 移除 initial begin ... end 代码块
# LLM 可能生成完整的 initial begin...end 块,但我们只需要其中的测试代码
# 使用括号计数来正确匹配嵌套结构,而不是简单的正则表达式
initial_match = re.search(r'\binitial\s+begin\b', code, re.IGNORECASE)
if initial_match:
logger.warning(f"[CGA-{iter_idx}] Detected 'initial begin...end' block in generated code - this should not be included")
logger.warning(f"[CGA-{iter_idx}] Removing 'initial begin...end' wrapper, keeping only the test content")
# 找到 initial begin 后的起始位置
start_pos = initial_match.end()
# 使用括号计数找到匹配的 end
begin_count = 1 # 已经遇到一个 begin (initial begin)
end_pos = start_pos
code_after_initial = code[start_pos:]
for i, char in enumerate(code_after_initial):
# 检查是否是关键字 begin 或 end
remaining = code_after_initial[i:]
if re.match(r'\bbegin\b', remaining, re.IGNORECASE):
begin_count += 1
elif re.match(r'\bend\b', remaining, re.IGNORECASE):
begin_count -= 1
if begin_count == 0:
# 找到匹配的 end
end_pos = start_pos + i
break
if begin_count == 0:
# 提取块内的内容
inner_content = code[start_pos:end_pos].strip()
# 移除末尾的 end如果有
inner_content = re.sub(r'\bend\s*$', '', inner_content.strip())
# 重建代码:移除 initial begin ... end 包装
code = code[:initial_match.start()] + inner_content + code[end_pos + 3:] # +3 跳过 'end'
logger.info(f"[CGA-{iter_idx}] Successfully removed 'initial begin...end' wrapper")
else:
logger.warning(f"[CGA-{iter_idx}] Could not find matching 'end' for 'initial begin', keeping code as-is")
code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
# 信号名自动修正(在验证之前)
code = self._auto_correct_signal_names(code)
# 三层防护:黑盒约束验证和转换
code, result = self.validator.validate_and_transform(code, self.content)
# [新增] 第四层Verilog 语法预检查
# 提取完整 TB 中已声明的信号(不只是代码片段)
signal_widths = self._extract_signal_widths()
declared_signals = self._extract_declared_signals()
# 调用语法检查,传入已声明信号列表
syntax_result = self.validator.check_syntax_issues(
code,
signal_widths,
declared_signals=declared_signals
)
# 合并检查结果
result['syntax_check'] = syntax_result
# 记录语法问题日志
if syntax_result['width_mismatch']:
logger.warning(f"[CGA-{iter_idx}] Width mismatch detected:")
for issue in syntax_result['width_mismatch']:
logger.warning(f" - {issue['message']}")
if 'suggestion' in issue:
logger.info(f" Suggestion: {issue['suggestion']}")
if syntax_result['logic_issues']:
logger.warning(f"[CGA-{iter_idx}] Logic issues detected:")
for issue in syntax_result['logic_issues']:
logger.warning(f" - {issue['message']}")
if 'suggestion' in issue:
logger.info(f" Suggestion: {issue['suggestion']}")
if syntax_result['syntax_warnings']:
for issue in syntax_result['syntax_warnings']:
if issue['severity'] == 'error':
logger.error(f"[CGA-{iter_idx}] Syntax error: {issue['message']}")
else:
logger.warning(f"[CGA-{iter_idx}] Syntax warning: {issue['message']}")
# 如果语法检查发现问题,设置 should_retry
if syntax_result['should_retry']:
result['should_retry'] = True
logger.warning(f"[CGA-{iter_idx}] Syntax issues detected, recommend retry with corrected code")
code = re.sub(r'\n\s*\n\s*\n', '\n\n', code)
return code.strip(), result
def _extract_declared_signals(self) -> set:
"""从完整测试平台中提取所有已声明的信号"""
signals = set()
# 匹配 reg [N:0] signal 或 wire [N:0] signal
for match in re.finditer(r'\b(reg|wire|logic)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
signals.add(match.group(2))
# 匹配 input/output 声明
for match in re.finditer(r'\b(input|output|inout)\s+(?:\[[^\]]+\]\s*)?(\w+)', self.content):
signals.add(match.group(2))
# 匹配模块端口连接中的信号
for match in re.finditer(r'\.(\w+)\s*\(\s*(\w+)\s*\)', self.content):
signals.add(match.group(2)) # 添加连接的信号名
return signals
def _extract_signal_widths(self) -> Dict[str, int]:
"""从测试平台中提取信号位宽信息"""
widths = {}
# 匹配 reg [N:0] signal 或 wire [N:0] signal
width_pattern = re.compile(r'\b(reg|wire)\s+\[(\d+):(\d+)\]\s+(\w+)')
for match in width_pattern.finditer(self.content):
high = int(match.group(2))
low = int(match.group(3))
width = high - low + 1
signal = match.group(4)
widths[signal] = width
# 匹配无位宽声明的信号(默认 1 位)
single_bit_pattern = re.compile(r'\b(reg|wire)\s+(?!.*\[)(\w+)\s*;')
for match in single_bit_pattern.finditer(self.content):
signal = match.group(2)
if signal not in widths:
widths[signal] = 1
return widths
def _auto_correct_signal_names(self, code: str) -> str:
"""自动修正信号名错误"""
corrections = []
# 获取正确的复位信号名
reset_signal = self.validator._find_reset_signal()
# 如果正确的复位信号不是 'reset',则修正所有 'reset' 引用
if reset_signal != "reset":
# 匹配独立的 'reset' 单词(不包括 'areset', 'rst_n' 等)
pattern = r'\breset\b(?!\w)'
matches = re.findall(pattern, code)
if matches:
code = re.sub(pattern, reset_signal, code)
corrections.append(f"reset -> {reset_signal} ({len(matches)} occurrences)")
# 检查是否有使用 'rst' 但正确信号是 'areset' 的情况
if reset_signal == "areset":
pattern = r'\brst\b(?!\w)'
matches = re.findall(pattern, code)
if matches:
code = re.sub(pattern, reset_signal, code)
corrections.append(f"rst -> {reset_signal} ({len(matches)} occurrences)")
# 检查是否使用了不存在的信号
for signal in re.findall(r'\b(\w+)\s*=', code):
signal = signal.strip()
# 跳过已知的合法信号
if signal in self.validator.dut_inputs:
continue
# 检查是否是复位信号的别名
if signal.lower() in ['reset', 'rst', 'rst_n', 'rst_b'] and reset_signal != signal:
code = re.sub(rf'\b{signal}\b', reset_signal, code)
corrections.append(f"{signal} -> {reset_signal}")
# [新增] 检测并修正类似 input_signal_N 的通用命名模式
# 这是 LLM 总是使用的通用名称,需要映射到实际信号
valid_inputs = [s for s in self.validator.dut_inputs if 'clk' not in s.lower()]
# 处理 input_signal_1, input_signal_2, ... 等模式
for match in re.finditer(r'\b(input_signal_\d+)\b', code):
wrong_signal = match.group(1)
if valid_inputs:
# 提取数字后缀,尝试按顺序映射
num_match = re.search(r'(\d+)$', wrong_signal)
if num_match:
idx = int(num_match.group(1)) - 1 # input_signal_1 -> index 0
if idx < len(valid_inputs):
correct_signal = valid_inputs[idx]
else:
correct_signal = valid_inputs[0] # fallback
else:
correct_signal = valid_inputs[0]
code = re.sub(rf'\b{re.escape(wrong_signal)}\b', correct_signal, code)
corrections.append(f"{wrong_signal} -> {correct_signal} (generic name mapped)")
# 处理 in, data 等太通用的名称
for generic_name in ['in', 'data', 'input', 'din']:
if re.search(rf'\b{generic_name}\s*=', code):
if valid_inputs:
# 使用第一个有效的输入信号作为替代
correct_signal = valid_inputs[0]
code = re.sub(rf'\b{generic_name}\s*=', f'{correct_signal} =', code)
corrections.append(f"{generic_name} -> {correct_signal} (too generic)")
if corrections:
logger.info(f"[Signal Correction] Applied corrections: {'; '.join(corrections)}")
return code
def _build_scenario_block(self, scenario_code, iter_idx):
"""构建完整的场景代码块"""
# 格式化缩进
lines = scenario_code.split('\n')
formatted_lines = []
for line in lines:
stripped = line.strip()
if stripped:
formatted_lines.append(f" {stripped}")
formatted_code = '\n'.join(formatted_lines)
# 检测输出信号用于日志
output_signals = self._detect_output_signals()
output_log = self._generate_output_log(output_signals, iter_idx)
# 构建完整块
block = f'''
// ========== CGA Iteration {iter_idx} ==========
scenario = 100 + {iter_idx};
// Reset signals to safe state
{self._generate_signal_reset()}
#5;
// CGA generated test sequence:
{formatted_code}
// Log results
{output_log}
// ==============================================
'''
return block
def _detect_output_signals(self):
"""检测DUT的输出信号"""
outputs = []
wire_pattern = re.compile(r'wire\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
for match in wire_pattern.finditer(self.content):
signal = match.group(1)
if signal.lower() not in ['clk', 'clock', 'rst', 'reset', 'areset']:
outputs.append(signal)
return outputs
def _generate_signal_reset(self):
"""生成信号重置代码"""
inputs = []
reg_pattern = re.compile(r'reg\s+(?:\[[\d:]+\]\s*)?(\w+)\s*;')
for match in reg_pattern.finditer(self.content):
signal = match.group(1)
if signal.lower() not in ['clk', 'clock', 'file', 'scenario']:
inputs.append(signal)
if inputs:
return " " + "; ".join([f"{sig} = 0" for sig in inputs]) + ";"
return " // No input signals to reset"
def _generate_output_log(self, signals, iter_idx):
"""生成输出日志代码"""
if not signals:
return f' $display("[CGA-{iter_idx}] Scenario executed");'
sig_names = ", ".join(signals)
format_str = ", ".join(["%b"] * len(signals))
return f' $fdisplay(file, "[CGA-{iter_idx}] {sig_names} = {format_str}", {sig_names});'
def _inject_scenario(self, scenario_block):
"""将场景块注入到测试平台"""
modified_tb = self.content
# 策略:如果有 $fclose在其之前插入
if "$fclose" in modified_tb:
modified_tb = re.sub(
r'(\s*)(\$fclose\s*\([^)]+\)\s*;)',
scenario_block + r'\1\2',
modified_tb,
count=1
)
elif "$finish" in modified_tb:
# 否则在 $finish 之前插入
modified_tb = modified_tb.replace(
"$finish;",
scenario_block + "\n $finish;"
)
else:
# 兜底:在最后一个 end 之前插入
last_end = modified_tb.rfind("end")
if last_end != -1:
modified_tb = modified_tb[:last_end] + scenario_block + modified_tb[last_end:]
return modified_tb