Files
TBgen_App/autoline/TB_autoline.py
2026-03-30 16:46:48 +08:00

538 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Description : The main function of autoline, originally the first part of autoline.py in AutoBench 1.0
Author : Ruidi Qiu (r.qiu@tum.de)
Time : 2024/7/24 11:44:15
LastEdited : 2024/9/1 10:32:18
"""
import os
import analyze as al
import loader_saver as ls
import time
from config import Config
from loader_saver import save_dict_json_form, log_localprefix
from data.probset import HDLBitsProbset
from loader_saver import autologger as logger
from utils.utils import Timer
from autoline.TB1_gen import TaskTBgen
from autoline.TB2_syncheck import TaskTBsim
from autoline.TB3_funccheck import TaskTBcheck
from autoline.TB4_eval import TaskTBeval
from prompt_scripts import BaseScript
from LLM_call import llm_manager
# [新增] 引入我们刚写的模块
from autoline.TB_cga import TaskTBCGA
def run_autoline():
# load config
config = Config()
autoline = AutoLine(config)
autoline()
class AutoLine():
"""the class of the autoline"""
def __init__(self, config: Config):
self.config = config
self.logger = logger
self.logger.assert_(config.get_item("autoline", "promptscript") is not None, "config.autoline.promptscript is None, please check the config file.")
self.load_data()
# set run info
# self.run_info_path = config.save.root + "Chatbench_RunInfo.json"
self.run_info_path = os.path.join(config.save.root, "Chatbench_RunInfo.json")
self.run_info = []
self.analyzer_en = (config.autoline.onlyrun is None) or (config.autoline.onlyrun == "TBgensimeval") # only run the analyzer when not in the onlyrun mode (partial run)
def run(self):
for idx, probdata_single in enumerate(self.probset.data):
task_id = probdata_single["task_id"]
self.logger.info("")
self.logger.info("######################### task %d/%d [%s] #########################" % (idx+1, self.probset.num, task_id))
# run_info_single = pipeline_one_prob(probdata_single, self.config)
one_task = AutoLine_Task(probdata_single, self.config)
run_info_single = one_task.run()
self.run_info.append(run_info_single)
# save run info: (write to file every iteration and will overwrite the previous one)
save_dict_json_form(self.run_info, self.run_info_path)
if self.analyzer_en:
self.run_analyzer()
def __call__(self, *args, **kwargs):
return self.run(*args, **kwargs)
def load_data(self):
cfg_probset = self.config.autoline.probset
self.probset = HDLBitsProbset()
self.probset.load_by_config(cfg_probset)
def run_analyzer(self):
analyzer = al.Analyzer(self.run_info, self.config.gpt.model)
analyzer.run()
logger.info(analyzer.messages)
class AutoLine_Task():
def __init__(self, prob_data:dict, config:Config):
# config:
self.config = config
# probdata:
self.prob_data = prob_data
self.main_model = self.config.gpt.model # The main llm model used in the autoline (generation, correction...)
self.task_id = prob_data["task_id"]
self.task_NO = prob_data["task_number"]
self.prob_description = prob_data["description"]
self.header = prob_data["header"]
self.DUT_golden = prob_data['module_code']
self.TB_golden = prob_data.get("testbench", None)
self.mutant_list = prob_data.get("mutants", None)
self.rtlgen_list = prob_data.get('llmgen_RTL', None)
self.rtlgen_model = self.config.gpt.rtlgen_model # if llmgen_list is none, this will be used
self.rtl_num = self.config.autoline.TBcheck.rtl_num # will be covered if llmgen_list is not None
# system config:
# self.task_dir = self.config.save.root + self.task_id + "/"
self.task_dir = os.path.join(self.config.save.root, self.task_id)
self.working_dir = self.task_dir
os.makedirs(self.task_dir, exist_ok=True)
# === [CGA Mod] Save DUT immediately to task dir for CGA access ===
self.dut_path = os.path.join(self.task_dir, "DUT.v")
ls.save_code(self.DUT_golden, self.dut_path)
# ==============================================================
self.update_desc = config.autoline.update_desc
self.error_interuption = config.autoline.error_interruption # for debug'
self.save_codes = config.autoline.save_finalcodes
self.save_compile = self.config.autoline.save_compile # save the compiling codes in TBcheck and TBeval or not.
# TBgen paras:
self.TBgen_prompt_script = config.autoline.promptscript
self.circuit_type = None
self.scenario_dict = None
self.scenario_num = None
self.checklist_worked = None
# TBcheck paras:
self.TBcheck_correct_max = self.config.autoline.TBcheck.correct_max
self.iter_max = config.autoline.itermax
self.discrim_mode = config.autoline.TBcheck.discrim_mode
self.correct_mode = config.autoline.TBcheck.correct_mode
self.rtl_compens_en = config.autoline.TBcheck.rtl_compens_en
self.rtl_compens_max_iter = config.autoline.TBcheck.rtl_compens_max_iter
self.cga_enabled = config.autoline.cga.enabled
# stages:
self.TBgen_manager:TaskTBgen = None
self.TBgen:BaseScript = None
self.TBsim:TaskTBsim = None
self.TBcheck:TaskTBcheck = None
self.TBeval:TaskTBeval = None
self.stage_now = "initialization"
# changing paras:
self.autoline_iter_now = 0
self.TB_code_v = None
self.TB_code_py = None
self.next_action = None
# results:
self.incomplete_running = True
self.full_pass = False
self.TB_corrected = False
self.run_info = {}
self.run_info_short = {}
self.TBcheck_rtl_newly_gen_num = 0 # in autoline, "funccheck" = "TBcheck"
self.op_record = [] # will record the order of each stage, for example: ["gen", "syncheck", "funccheck", "gen", "syncheck", "funccheck", "eval"]
self.funccheck_op_record = []
self.funccheck_iters = []
#初始化
self.cga_coverage = 0.0
# === [CGA Mod] Initialize result dictionary for final reporting ===
self.result_dict = {
"task_id": self.task_id,
"stage": "Init",
"pass": False,
"coverage": 0.0,
"cga_enabled": self.cga_enabled
}
# =================================================================
# renew current section of llm_manager and logger
llm_manager.new_section()
logger.set_temp_log()
def run(self):
"""
The main function of running the autoline for one problem
"""
with log_localprefix(self.task_id):
self.run_stages()
self.runinfo_update()
if self.save_codes:
self.save_TB_codes()
# === [CGA Mod] Save Result JSON for Analyzer ===
self.result_dict['stage'] = self.stage_now
try:
result_save_path = self.config.autoline.result_path
except AttributeError:
# 如果 config 对象没这个属性或者它是字典且没这个key
result_save_path = "results"
# 确保是绝对路径或相对于项目根目录
if not os.path.exists(result_save_path):
os.makedirs(result_save_path, exist_ok=True)
ls.save_dict_json_form(self.result_dict, os.path.join(result_save_path, f"{self.task_id}.json"))
# ===============================================
return self.run_info
def run_TBgen(self, subdir:str=None):
# TODO: export the circuit type and scenario number
self.op_record.append("gen")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBgen"
self.TBgen_manager = TaskTBgen(self.prob_data, self.TBgen_prompt_script, working_dir, self.config)
self.TBgen = self.TBgen_manager.workflow
with log_localprefix("TBgen"):
self.TBgen()
self.TB_code_v = self.TBgen.get_attr("TB_code_v")
self.TB_code_py = self.TBgen.get_attr("TB_code_py")
self.scenario_dict = self.TBgen.get_attr("scenario_dict")
self.scenario_num = self.TBgen.get_attr("scenario_num")
self.circuit_type = self.TBgen.get_attr("circuit_type")
self.checklist_worked = self.TBgen.get_attr("checklist_worked")
self.incomplete_running = True
self._blank_log()
def run_TBsim(self, subdir:str=None):
self.op_record.append("syncheck")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBsim"
self.TBsim = TaskTBsim(
self.TBgen,
self.TBgen.TB_code,
self.header,
working_dir,
self.task_id,
self.config
)
self.TBsim.run()
self.TB_code_v = self.TBsim.TB_code_now
self.TB_code_py = self.TBsim.PY_code_now
self._blank_log()
def run_TBcheck(self, subdir:str=None):
self.op_record.append("funccheck")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBcheck"
self.TBcheck = TaskTBcheck(
task_dir = working_dir,
task_id = self.task_id,
description = self.prob_description,
module_header = self.header,
TB_code_v = self.TB_code_v,
TB_code_py = self.TB_code_py,
rtl_list = self.rtlgen_list,
rtl_num = self.rtl_num,
scenario_num = self.scenario_num,
correct_max = self.TBcheck_correct_max,
runfiles_save=self.save_compile,
discriminator_mode=self.discrim_mode,
corrector_mode=self.correct_mode,
circuit_type=self.circuit_type,
rtl_compens_en=self.rtl_compens_en,
rtl_compens_max_iter=self.rtl_compens_max_iter,
main_model = self.main_model,
rtlgen_model = self.rtlgen_model,
desc_improve=self.update_desc
)
self.rtlgen_list = self.TBcheck.rtl_list
self.TBcheck.run()
self.TB_code_v = self.TBcheck.TB_code_v
self.TB_code_py = self.TBcheck.TB_code_py
self.TB_corrected = self.TBcheck.corrected
self.funccheck_op_record.append(self.TBcheck.op_record)
self.funccheck_iters.append(self.TBcheck.iter_now)
self.TBcheck_rtl_newly_gen_num += self.TBcheck.rtl_newly_gen_num
self.next_action = self.TBcheck.next_action
if self.update_desc:
self.prob_data['description'] = self.TBcheck.update_description()
self.prob_description = self.prob_data['description']
self._blank_log()
def run_TBeval(self, subdir:str=None):
self.op_record.append("eval")
working_dir = os.path.join(self.task_dir, subdir) if subdir is not None else self.task_dir
self.stage_now = "TBeval"
self.TBeval = TaskTBeval(
self.task_id,
working_dir,
TB_gen=self.TB_code_v,
TB_golden=self.TB_golden,
DUT_golden=self.DUT_golden,
DUT_mutant_list=self.mutant_list,
DUT_gptgen_list=None,
pychecker_en=self.TBsim.pychecker_en,
pychecker_code=self.TB_code_py,
runfiles_save=self.save_compile
)
# attention: the rtls in DUT_gptgen_list are not the same as the rtls used in TBcheck, so currently we just block this feature
try:
self.TBeval.run()
except:
logger.failed("error when running TBeval, the autoline for this task stopped.")
self.incomplete_running = True
self._blank_log()
# 在 run_TB4_eval 或其他方法旁边添加这个新方法
def run_TBCGA(self, work_subdir="CGA", optimize=True, op_name="cga"):
"""
Coverage-Guided Agent 阶段
"""
self.stage_now = "TBCGA"
self.op_record.append(op_name)
cga = TaskTBCGA(
task_dir=self.task_dir,
task_id=self.task_id,
header=self.header,
DUT_code=self.DUT_golden,
TB_code=self.TB_code_v,
config=self.config,
work_subdir=work_subdir,
max_iter=(self.config.autoline.cga.max_iter if optimize else 0)
)
# [修改] 接收分数
final_tb, final_score = cga.run()
self.cga_coverage = final_score
# 更新状态
self.TB_code_v = final_tb
self.result_dict['coverage'] = final_score
# [新增] 强制归档 final_TB.v 到工作目录
final_tb_path = os.path.join(self.task_dir, "final_TB.v")
ls.save_code(final_tb, final_tb_path)
logger.info(f"Saved optimized TB to: {final_tb_path}")
def run_stages(self):
with Timer(print_en=False) as self.running_time:
if not self.error_interuption:
self.run_stages_core()
else:
try:
self.run_stages_core()
except Exception as e:
self.incomplete_running = True
logger.error("error when running %s, the autoline for this task stopped. error message: %s"%(self.stage_now, str(e)))
if self.error_interuption:
# if True, stop the pipeline
raise e
self.incomplete_running = False
def run_stages_core(self):
match self.config.autoline.onlyrun:
case "TBgen":
self.run_TBgen()
case "TBgensim":
self.run_TBgen()
self.run_TBsim()
# case _: # default, run all
case "TBgensimeval":
try:
self.run_TBgen("1_TBgen")
self.run_TBsim("2_TBsim")
self.run_TBeval("3_TBeval")
except Exception as e:
self.incomplete_running = True
logger.error("error when running %s, the autoline for this task stopped. error message: %s"%(self.stage_now, str(e)))
else:
self.incomplete_running = False
case _: # default, run all
for i in range(self.iter_max):
self.autoline_iter_now = i
try:
self.run_TBgen(f"{i+1}_1_TBgen")
self.run_TBsim(f"{i+1}_2_TBsim")
self.run_TBcheck(f"{i+1}_3_TBcheck")
except Exception as e:
# logger.error(f"error when running {self.stage_now}, current pipeline iter: {i+1}, will {"REBOOT" if i<self.iter_max-1 else "go to NEXT STAGE"}. error message: {str(e)}")
# self.next_action = "reboot"
# continue
err_msg = str(e)
logger.error(f"Error when running {self.stage_now}, iter: {i+1}. Message: {err_msg}")
# === [关键修改API 降温冷静期] ===
# 如果是 iverilog 失败或 API 超时,强制休息 15 秒
# 这能有效防止阿里云 API 报 429 错误或连接被重置
logger.warning("⚠️ Pipeline interrupted. Cooling down for 15s to avoid API Rate Limit...")
time.sleep(15)
# ================================
# 如果配置里要求一报错就退出,则抛出异常
if getattr(self.config.autoline, 'error_interruption', False):
raise e
# 否则,标记为重启,准备进入下一次循环
self.next_action = "reboot"
self.incomplete_running = True # 标记当前运行不完整
continue
match self.next_action:
case "pass":
break
case "reboot":
continue
# === [CGA 插入点 START] ===
# 只有当任务状态正常,且没有要求重启时
if self.next_action == "pass":
# 在进入 CGA 前,手动标记当前状态为完成,防止内部逻辑误判
self.incomplete_running = False
try:
if self.cga_enabled:
self.run_TBCGA(work_subdir="CGA", optimize=True, op_name="cga")
else:
self.run_TBCGA(work_subdir="CGA_baseline", optimize=False, op_name="coverage_eval")
except Exception as e:
logger.error(f"CGA Stage Failed: {e}. Fallback to original TB.")
self.result_dict['error'] = str(e)
# === [CGA 插入点 END] ===
try:
self.run_TBeval(f"{self.autoline_iter_now+1}_4_TBeval")
except Exception as e:
self.incomplete_running = True
logger.error("error when running %s, the autoline for this task stopped. error message: %s"%(self.stage_now, str(e)))
def runinfo_update(self):
# general
self.run_info = {
"task_id": self.task_id,
"task_number": self.task_NO,
"time": round(self.running_time.interval, 2),
"prompt_tokens": llm_manager.tokens_in_section,
"completion_tokens": llm_manager.tokens_out_section,
"token_cost": llm_manager.cost_section,
"ERROR(incomplete)": self.incomplete_running,
"op_record": self.op_record,
"reboot_times": self.autoline_iter_now,
"max_iter": self.iter_max,
# === [新增] 将覆盖率写入最终报告 ===
"coverage": self.cga_coverage
}
# token and cost from llm_manager
# TBgen
if self.TBgen is not None:
# self.run_info["prompt_tokens"] += self.TBgen.tokens["prompt"]
# self.run_info["completion_tokens"] += self.TBgen.tokens["completion"]
self.run_info["circuit_type"] = self.circuit_type
self.run_info["checklist_worked"] = self.checklist_worked
self.run_info["scenario_num"] = self.scenario_num
# TBsim
if self.TBsim is not None:
# self.run_info["prompt_tokens"] += self.TBsim.tokens["prompt"]
# self.run_info["completion_tokens"] += self.TBsim.tokens["completion"]
self.run_info.update({
"Eval0_pass": self.TBsim.Eval0_pass,
"Eval0_iv_pass": self.TBsim.sim_pass,
"debug_iter_iv": self.TBsim.debug_iter_iv_now,
"iv_runing_time": self.TBsim.iv_runing_time
})
if self.TBsim.pychecker_en:
self.run_info.update({
"Eval0_py_pass": self.TBsim.py_pass,
"debug_iter_py": self.TBsim.debug_iter_py_now,
"py_runing_time": self.TBsim.py_runing_time
})
# TODO: TBcheck runinfo update
if self.TBcheck is not None:
self.run_info.update({
"TB_corrected": self.TB_corrected,
"TBcheck_oprecord": self.funccheck_op_record,
"rtl_num_newly_gen": self.TBcheck_rtl_newly_gen_num
})
# TBeval
if self.TBeval is not None:
if self.TBeval.Eval1_exist:
self.run_info.update({"Eval1_pass": self.TBeval.Eval1_pass})
self.result_dict["Eval1_pass"] = self.TBeval.Eval1_pass
if self.TBeval.Eval2_exist:
self.run_info.update({
"Eval2_pass": self.TBeval.Eval2_pass,
"Eval2_ratio": "%d/%d"%(len(self.TBeval.Eval2_passed_mutant_idx), len(self.prob_data['mutants'])),
"Eval2_failed_mutant_idxes": self.TBeval.Eval2_failed_mutant_idx
})
self.result_dict.update({
"Eval2_pass": self.TBeval.Eval2_pass,
"Eval2_ratio": "%d/%d"%(len(self.TBeval.Eval2_passed_mutant_idx), len(self.prob_data['mutants'])),
"Eval2_failed_mutant_idxes": self.TBeval.Eval2_failed_mutant_idx
})
if self.TBeval.Eval2b_exist:
self.run_info.update({
"Eval2b_pass": self.TBeval.Eval2b_pass,
"Eval2b_ratio": "%d/%d"%(len(self.TBeval.Eval2b_passed_mutant_idx), len(self.prob_data['gptgen_RTL'])),
"Eval2b_failed_mutant_idxes": self.TBeval.Eval2b_failed_mutant_idx
})
self.result_dict.update({
"Eval2b_pass": self.TBeval.Eval2b_pass,
"Eval2b_ratio": "%d/%d"%(len(self.TBeval.Eval2b_passed_mutant_idx), len(self.prob_data['gptgen_RTL'])),
"Eval2b_failed_mutant_idxes": self.TBeval.Eval2b_failed_mutant_idx
})
# full pass
if not self.incomplete_running:
self.full_pass = self.TBsim.sim_pass and self.TBeval.Eval1_pass and self.TBeval.Eval2_pass
self.run_info.update({
"full_pass": self.full_pass
})
self.result_dict["full_pass"] = self.full_pass
self.result_dict["pass"] = self.full_pass
else:
self.result_dict["full_pass"] = False
self.result_dict["pass"] = False
self.result_dict["stage"] = self.stage_now
self.result_dict["coverage"] = self.cga_coverage
save_dict_json_form(self.run_info, os.path.join(self.task_dir, "run_info.json"))
# short run info
if "Eval2_ratio" in self.run_info.keys():
eval_progress = "Eval2 - " + self.run_info["Eval2_ratio"]
elif "Eval1_pass" in self.run_info.keys() and self.run_info["Eval1_pass"]:
eval_progress = "Eval1 - passed"
elif "Eval0_pass" in self.run_info.keys() and self.run_info["Eval0_pass"]:
eval_progress = "Eval1 - failed"
elif "Eval0_pass" in self.run_info.keys() and not self.run_info["Eval0_pass"]:
eval_progress = "Eval0 - failed"
else:
eval_progress = "Eval0 - not found"
self.run_info_short = {
"task_id": self.run_info.get("task_id", None),
"eval_progress": eval_progress,
"TB_corrected": self.run_info.get("TB_corrected", None),
"reboot_times": self.run_info.get("reboot_times", None),
"time": self.run_info.get("time", None),
"cost": self.run_info.get("token_cost", None),
}
save_dict_json_form(self.run_info_short, os.path.join(self.task_dir, "run_info_short.json"))
# run log
running_log = logger.reset_temp_log()
tasklog_path = os.path.join(self.task_dir, "task_log.log")
os.makedirs(os.path.dirname(tasklog_path), exist_ok=True)
with open(tasklog_path, "w") as f:
f.write(running_log)
return self.run_info
def save_TB_codes(self):
save_dir = self.task_dir
ls.save_code(self.TB_code_v if isinstance(self.TB_code_v, str) else "// TB code (Verilog) unavailable", os.path.join(save_dir, "final_TB.v"))
ls.save_code(self.TB_code_py if isinstance(self.TB_code_py, str) else "## TB code (Python) unavailable", os.path.join(save_dir, "final_TB.py"))
@staticmethod
def _blank_log():
logger.info("")
def __call__(self, *args, **kwargs):
return self.run(*args, **kwargs)