Files
TBgen_App/autoline/TB_autoline.py

538 lines
24 KiB
Python
Raw Normal View History

2026-03-30 16:46:48 +08:00
"""
Description : The main function of autoline, originally the first part of autoline.py in AutoBench 1.0
Author : Ruidi Qiu (r.qiu@tum.de)
Time : 2024/7/24 11:44:15
LastEdited : 2024/9/1 10:32:18
"""
import os
import analyze as al
import loader_saver as ls
import time
from config import Config
from loader_saver import save_dict_json_form, log_localprefix
from data.probset import HDLBitsProbset
from loader_saver import autologger as logger
from utils.utils import Timer
from autoline.TB1_gen import TaskTBgen
from autoline.TB2_syncheck import TaskTBsim
from autoline.TB3_funccheck import TaskTBcheck
from autoline.TB4_eval import TaskTBeval
from prompt_scripts import BaseScript
from LLM_call import llm_manager
# [新增] 引入我们刚写的模块
from autoline.TB_cga import TaskTBCGA
def run_autoline():
# load config
config = Config()
autoline = AutoLine(config)
autoline()
class AutoLine():
"""the class of the autoline"""
def __init__(self, config: Config):
self.config = config
self.logger = logger
self.logger.assert_(config.get_item("autoline", "promptscript") is not None, "config.autoline.promptscript is None, please check the config file.")
self.load_data()
# set run info
# self.run_info_path = config.save.root + "Chatbench_RunInfo.json"
self.run_info_path = os.path.join(config.save.root, "Chatbench_RunInfo.json")
self.run_info = []
self.analyzer_en = (config.autoline.onlyrun is None) or (config.autoline.onlyrun == "TBgensimeval") # only run the analyzer when not in the onlyrun mode (partial run)
def run(self):
for idx, probdata_single in enumerate(self.probset.data):
task_id = probdata_single["task_id"]
self.logger.info("")
self.logger.info("######################### task %d/%d [%s] #########################" % (idx+1, self.probset.num, task_id))
# run_info_single = pipeline_one_prob(probdata_single, self.config)
one_task = AutoLine_Task(probdata_single, self.config)
run_info_single = one_task.run()
self.run_info.append(run_info_single)
# save run info: (write to file every iteration and will overwrite the previous one)
save_dict_json_form(self.run_info, self.run_info_path)
if self.analyzer_en:
self.run_analyzer()
def __call__(self, *args, **kwargs):
return self.run(*args, **kwargs)
def load_data(self):
cfg_probset = self.config.autoline.probset
self.probset = HDLBitsProbset()
self.probset.load_by_config(cfg_probset)
def run_analyzer(self):
analyzer = al.Analyzer(self.run_info, self.config.gpt.model)
analyzer.run()
logger.info(analyzer.messages)
class AutoLine_Task():
def __init__(self, prob_data:dict, config:Config):
# config:
self.config = config
# probdata:
self.prob_data = prob_data
self.main_model = self.config.gpt.model # The main llm model used in the autoline (generation, correction...)
self.task_id = prob_data["task_id"]
self.task_NO = prob_data["task_number"]
self.prob_description = prob_data["description"]
self.header = prob_data["header"]
self.DUT_golden = prob_data['module_code']
self.TB_golden = prob_data.get("testbench", None)
self.mutant_list = prob_data.get("mutants", None)
self.rtlgen_list = prob_data.get('llmgen_RTL', None)
self.rtlgen_model = self.config.gpt.rtlgen_model # if llmgen_list is none, this will be used
self.rtl_num = self.config.autoline.TBcheck.rtl_num # will be covered if llmgen_list is not None
# system config:
# self.task_dir = self.config.save.root + self.task_id + "/"
self.task_dir = os.path.join(self.config.save.root, self.task_id)
self.working_dir = self.task_dir
os.makedirs(self.task_dir, exist_ok=True)
# === [CGA Mod] Save DUT immediately to task dir for CGA access ===
self.dut_path = os.path.join(self.task_dir, "DUT.v")
ls.save_code(self.DUT_golden, self.dut_path)
# ==============================================================
self.update_desc = config.autoline.update_desc
self.error_interuption = config.autoline.error_interruption # for debug'
self.save_codes = config.autoline.save_finalcodes
self.save_compile = self.config.autoline.save_compile # save the compiling codes in TBcheck and TBeval or not.
# TBgen paras:
self.TBgen_prompt_script = config.autoline.promptscript
self.circuit_type = None
self.scenario_dict = None
self.scenario_num = None
self.checklist_worked = None
# TBcheck paras:
self.TBcheck_correct_max = self.config.autoline.TBcheck.correct_max
self.iter_max = config.autoline.itermax
self.discrim_mode = config.autoline.TBcheck.discrim_mode
self.correct_mode = config.autoline.TBcheck.correct_mode
self.rtl_compens_en = config.autoline.TBcheck.rtl_compens_en
self.rtl_compens_max_iter = config.autoline.TBcheck.rtl_compens_max_iter
self.cga_enabled = config.autoline.cga.enabled
# stages:
self.TBgen_manager:TaskTBgen = None
self.TBgen:BaseScript = None
self.TBsim:TaskTBsim = None
self.TBcheck:TaskTBcheck = None
self.TBeval:TaskTBeval = None
self.stage_now = "initialization"
# changing paras:
self.autoline_iter_now = 0
self.TB_code_v = None
self.TB_code_py = None
self.next_action = None
# results:
self.incomplete_running = True
self.full_pass = False
self.TB_corrected = False
self.run_info = {}
self.run_info_short = {}
self.TBcheck_rtl_newly_gen_num = 0 # in autoline, "funccheck" = "TBcheck"
self.op_record = [] # will record the order of each stage, for example: ["gen", "syncheck", "funccheck", "gen", "syncheck", "funccheck", "eval"]
self.funccheck_op_record = []
self.funccheck_iters = []
#初始化
self.cga_coverage = 0.0
# === [CGA Mod] Initialize result dictionary for final reporting ===
self.result_dict = {
"task_id": self.task_id,
"stage": "Init",
"pass": False,
"coverage": 0.0,
"cga_enabled": self.cga_enabled
}
# =================================================================
# renew current section of llm_manager and logger
llm_manager.new_section()
logger.set_temp_log()
def run(self):
"""
The main function of running the autoline for one problem
"""
with log_localprefix(self.task_id):
self.run_stages()
self.runinfo_update()
if self.save_codes:
self.save_TB_codes()
# === [CGA Mod] Save Result JSON for Analyzer ===
self.result_dict['stage'] = self.stage_now
try:
result_save_path = self.config.autoline.result_path
except AttributeError:
# 如果 config 对象没这个属性或者它是字典且没这个key
result_save_path = "results"
# 确保是绝对路径或相对于项目根目录
if not os.path.exists(result_save_path):
os.makedirs(result_save_path, exist_ok=True)
ls.save_dict_json_form(self.result_dict, os.path.join(result_save_path, f"{self.task_id}.json"))
# ===============================================
return self.run_info
def run_TBgen(self, subdir:str=None):
# TODO: export the circuit type and scenario number
self.op_record.append("gen")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBgen"
self.TBgen_manager = TaskTBgen(self.prob_data, self.TBgen_prompt_script, working_dir, self.config)
self.TBgen = self.TBgen_manager.workflow
with log_localprefix("TBgen"):
self.TBgen()
self.TB_code_v = self.TBgen.get_attr("TB_code_v")
self.TB_code_py = self.TBgen.get_attr("TB_code_py")
self.scenario_dict = self.TBgen.get_attr("scenario_dict")
self.scenario_num = self.TBgen.get_attr("scenario_num")
self.circuit_type = self.TBgen.get_attr("circuit_type")
self.checklist_worked = self.TBgen.get_attr("checklist_worked")
self.incomplete_running = True
self._blank_log()
def run_TBsim(self, subdir:str=None):
self.op_record.append("syncheck")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBsim"
self.TBsim = TaskTBsim(
self.TBgen,
self.TBgen.TB_code,
self.header,
working_dir,
self.task_id,
self.config
)
self.TBsim.run()
self.TB_code_v = self.TBsim.TB_code_now
self.TB_code_py = self.TBsim.PY_code_now
self._blank_log()
def run_TBcheck(self, subdir:str=None):
self.op_record.append("funccheck")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBcheck"
self.TBcheck = TaskTBcheck(
task_dir = working_dir,
task_id = self.task_id,
description = self.prob_description,
module_header = self.header,
TB_code_v = self.TB_code_v,
TB_code_py = self.TB_code_py,
rtl_list = self.rtlgen_list,
rtl_num = self.rtl_num,
scenario_num = self.scenario_num,
correct_max = self.TBcheck_correct_max,
runfiles_save=self.save_compile,
discriminator_mode=self.discrim_mode,
corrector_mode=self.correct_mode,
circuit_type=self.circuit_type,
rtl_compens_en=self.rtl_compens_en,
rtl_compens_max_iter=self.rtl_compens_max_iter,
main_model = self.main_model,
rtlgen_model = self.rtlgen_model,
desc_improve=self.update_desc
)
self.rtlgen_list = self.TBcheck.rtl_list
self.TBcheck.run()
self.TB_code_v = self.TBcheck.TB_code_v
self.TB_code_py = self.TBcheck.TB_code_py
self.TB_corrected = self.TBcheck.corrected
self.funccheck_op_record.append(self.TBcheck.op_record)
self.funccheck_iters.append(self.TBcheck.iter_now)
self.TBcheck_rtl_newly_gen_num += self.TBcheck.rtl_newly_gen_num
self.next_action = self.TBcheck.next_action
if self.update_desc:
self.prob_data['description'] = self.TBcheck.update_description()
self.prob_description = self.prob_data['description']
self._blank_log()
def run_TBeval(self, subdir:str=None):
self.op_record.append("eval")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBeval"
self.TBeval = TaskTBeval(
self.task_id,
working_dir,
TB_gen=self.TB_code_v,
TB_golden=self.TB_golden,
DUT_golden=self.DUT_golden,
DUT_mutant_list=self.mutant_list,
DUT_gptgen_list=None,
pychecker_en=self.TBsim.pychecker_en,
pychecker_code=self.TB_code_py,
runfiles_save=self.save_compile
)
# attention: the rtls in DUT_gptgen_list are not the same as the rtls used in TBcheck, so currently we just block this feature
try:
self.TBeval.run()
except:
logger.failed("error when running TBeval, the autoline for this task stopped.")
self.incomplete_running = True
self._blank_log()
# 在 run_TB4_eval 或其他方法旁边添加这个新方法
def run_TBCGA(self, work_subdir="CGA", optimize=True, op_name="cga"):
"""
Coverage-Guided Agent 阶段
"""
self.stage_now = "TBCGA"
self.op_record.append(op_name)
cga = TaskTBCGA(
task_dir=self.task_dir,
task_id=self.task_id,
header=self.header,
DUT_code=self.DUT_golden,
TB_code=self.TB_code_v,
config=self.config,
work_subdir=work_subdir,
max_iter=(self.config.autoline.cga.max_iter if optimize else 0)
)
# [修改] 接收分数
final_tb, final_score = cga.run()
self.cga_coverage = final_score
# 更新状态
self.TB_code_v = final_tb
self.result_dict['coverage'] = final_score
# [新增] 强制归档 final_TB.v 到工作目录
final_tb_path = os.path.join(self.task_dir, "final_TB.v")
ls.save_code(final_tb, final_tb_path)
logger.info(f"Saved optimized TB to: {final_tb_path}")
def run_stages(self):
with Timer(print_en=False) as self.running_time:
if not self.error_interuption:
self.run_stages_core()
else:
try:
self.run_stages_core()
except Exception as e:
self.incomplete_running = True
logger.error("error when running %s, the autoline for this task stopped. error message: %s"%(self.stage_now, str(e)))
if self.error_interuption:
# if True, stop the pipeline
raise e
self.incomplete_running = False
def run_stages_core(self):
match self.config.autoline.onlyrun:
case "TBgen":
self.run_TBgen()
case "TBgensim":
self.run_TBgen()
self.run_TBsim()
# case _: # default, run all
case "TBgensimeval":
try:
self.run_TBgen("1_TBgen")
self.run_TBsim("2_TBsim")
self.run_TBeval("3_TBeval")
except Exception as e:
self.incomplete_running = True
logger.error("error when running %s, the autoline for this task stopped. error message: %s"%(self.stage_now, str(e)))
else:
self.incomplete_running = False
case _: # default, run all
for i in range(self.iter_max):
self.autoline_iter_now = i
try:
self.run_TBgen(f"{i+1}_1_TBgen")
self.run_TBsim(f"{i+1}_2_TBsim")
self.run_TBcheck(f"{i+1}_3_TBcheck")
except Exception as e:
# logger.error(f"error when running {self.stage_now}, current pipeline iter: {i+1}, will {"REBOOT" if i<self.iter_max-1 else "go to NEXT STAGE"}. error message: {str(e)}")
# self.next_action = "reboot"
# continue
err_msg = str(e)
logger.error(f"Error when running {self.stage_now}, iter: {i+1}. Message: {err_msg}")
# === [关键修改API 降温冷静期] ===
# 如果是 iverilog 失败或 API 超时,强制休息 15 秒
# 这能有效防止阿里云 API 报 429 错误或连接被重置
logger.warning("⚠️ Pipeline interrupted. Cooling down for 15s to avoid API Rate Limit...")
time.sleep(15)
# ================================
# 如果配置里要求一报错就退出,则抛出异常
if getattr(self.config.autoline, 'error_interruption', False):
raise e
# 否则,标记为重启,准备进入下一次循环
self.next_action = "reboot"
self.incomplete_running = True # 标记当前运行不完整
continue
match self.next_action:
case "pass":
break
case "reboot":
continue
# === [CGA 插入点 START] ===
# 只有当任务状态正常,且没有要求重启时
if self.next_action == "pass":
# 在进入 CGA 前,手动标记当前状态为完成,防止内部逻辑误判
self.incomplete_running = False
try:
if self.cga_enabled:
self.run_TBCGA(work_subdir="CGA", optimize=True, op_name="cga")
else:
self.run_TBCGA(work_subdir="CGA_baseline", optimize=False, op_name="coverage_eval")
except Exception as e:
logger.error(f"CGA Stage Failed: {e}. Fallback to original TB.")
self.result_dict['error'] = str(e)
# === [CGA 插入点 END] ===
try:
self.run_TBeval(f"{self.autoline_iter_now+1}_4_TBeval")
except Exception as e:
self.incomplete_running = True
logger.error("error when running %s, the autoline for this task stopped. error message: %s"%(self.stage_now, str(e)))
def runinfo_update(self):
# general
self.run_info = {
"task_id": self.task_id,
"task_number": self.task_NO,
"time": round(self.running_time.interval, 2),
"prompt_tokens": llm_manager.tokens_in_section,
"completion_tokens": llm_manager.tokens_out_section,
"token_cost": llm_manager.cost_section,
"ERROR(incomplete)": self.incomplete_running,
"op_record": self.op_record,
"reboot_times": self.autoline_iter_now,
"max_iter": self.iter_max,
# === [新增] 将覆盖率写入最终报告 ===
"coverage": self.cga_coverage
}
# token and cost from llm_manager
# TBgen
if self.TBgen is not None:
# self.run_info["prompt_tokens"] += self.TBgen.tokens["prompt"]
# self.run_info["completion_tokens"] += self.TBgen.tokens["completion"]
self.run_info["circuit_type"] = self.circuit_type
self.run_info["checklist_worked"] = self.checklist_worked
self.run_info["scenario_num"] = self.scenario_num
# TBsim
if self.TBsim is not None:
# self.run_info["prompt_tokens"] += self.TBsim.tokens["prompt"]
# self.run_info["completion_tokens"] += self.TBsim.tokens["completion"]
self.run_info.update({
"Eval0_pass": self.TBsim.Eval0_pass,
"Eval0_iv_pass": self.TBsim.sim_pass,
"debug_iter_iv": self.TBsim.debug_iter_iv_now,
"iv_runing_time": self.TBsim.iv_runing_time
})
if self.TBsim.pychecker_en:
self.run_info.update({
"Eval0_py_pass": self.TBsim.py_pass,
"debug_iter_py": self.TBsim.debug_iter_py_now,
"py_runing_time": self.TBsim.py_runing_time
})
# TODO: TBcheck runinfo update
if self.TBcheck is not None:
self.run_info.update({
"TB_corrected": self.TB_corrected,
"TBcheck_oprecord": self.funccheck_op_record,
"rtl_num_newly_gen": self.TBcheck_rtl_newly_gen_num
})
# TBeval
if self.TBeval is not None:
if self.TBeval.Eval1_exist:
self.run_info.update({"Eval1_pass": self.TBeval.Eval1_pass})
self.result_dict["Eval1_pass"] = self.TBeval.Eval1_pass
if self.TBeval.Eval2_exist:
self.run_info.update({
"Eval2_pass": self.TBeval.Eval2_pass,
"Eval2_ratio": "%d/%d"%(len(self.TBeval.Eval2_passed_mutant_idx), len(self.prob_data['mutants'])),
"Eval2_failed_mutant_idxes": self.TBeval.Eval2_failed_mutant_idx
})
self.result_dict.update({
"Eval2_pass": self.TBeval.Eval2_pass,
"Eval2_ratio": "%d/%d"%(len(self.TBeval.Eval2_passed_mutant_idx), len(self.prob_data['mutants'])),
"Eval2_failed_mutant_idxes": self.TBeval.Eval2_failed_mutant_idx
})
if self.TBeval.Eval2b_exist:
self.run_info.update({
"Eval2b_pass": self.TBeval.Eval2b_pass,
"Eval2b_ratio": "%d/%d"%(len(self.TBeval.Eval2b_passed_mutant_idx), len(self.prob_data['gptgen_RTL'])),
"Eval2b_failed_mutant_idxes": self.TBeval.Eval2b_failed_mutant_idx
})
self.result_dict.update({
"Eval2b_pass": self.TBeval.Eval2b_pass,
"Eval2b_ratio": "%d/%d"%(len(self.TBeval.Eval2b_passed_mutant_idx), len(self.prob_data['gptgen_RTL'])),
"Eval2b_failed_mutant_idxes": self.TBeval.Eval2b_failed_mutant_idx
})
# full pass
if not self.incomplete_running:
self.full_pass = self.TBsim.sim_pass and self.TBeval.Eval1_pass and self.TBeval.Eval2_pass
self.run_info.update({
"full_pass": self.full_pass
})
self.result_dict["full_pass"] = self.full_pass
self.result_dict["pass"] = self.full_pass
else:
self.result_dict["full_pass"] = False
self.result_dict["pass"] = False
self.result_dict["stage"] = self.stage_now
self.result_dict["coverage"] = self.cga_coverage
save_dict_json_form(self.run_info, os.path.join(self.task_dir, "run_info.json"))
# short run info
if "Eval2_ratio" in self.run_info.keys():
eval_progress = "Eval2 - " + self.run_info["Eval2_ratio"]
elif "Eval1_pass" in self.run_info.keys() and self.run_info["Eval1_pass"]:
eval_progress = "Eval1 - passed"
elif "Eval0_pass" in self.run_info.keys() and self.run_info["Eval0_pass"]:
eval_progress = "Eval1 - failed"
elif "Eval0_pass" in self.run_info.keys() and not self.run_info["Eval0_pass"]:
eval_progress = "Eval0 - failed"
else:
eval_progress = "Eval0 - not found"
self.run_info_short = {
"task_id": self.run_info.get("task_id", None),
"eval_progress": eval_progress,
"TB_corrected": self.run_info.get("TB_corrected", None),
"reboot_times": self.run_info.get("reboot_times", None),
"time": self.run_info.get("time", None),
"cost": self.run_info.get("token_cost", None),
}
save_dict_json_form(self.run_info_short, os.path.join(self.task_dir, "run_info_short.json"))
# run log
running_log = logger.reset_temp_log()
tasklog_path = os.path.join(self.task_dir, "task_log.log")
os.makedirs(os.path.dirname(tasklog_path), exist_ok=True)
with open(tasklog_path, "w") as f:
f.write(running_log)
return self.run_info
def save_TB_codes(self):
save_dir = self.task_dir
ls.save_code(self.TB_code_v if isinstance(self.TB_code_v, str) else "// TB code (Verilog) unavailable", os.path.join(save_dir, "final_TB.v"))
ls.save_code(self.TB_code_py if isinstance(self.TB_code_py, str) else "## TB code (Python) unavailable", os.path.join(save_dir, "final_TB.py"))
@staticmethod
def _blank_log():
logger.info("")
def __call__(self, *args, **kwargs):
return self.run(*args, **kwargs)