321 lines
11 KiB
Plaintext
321 lines
11 KiB
Plaintext
|
|
|
||
|
|
---------------custom config--------------
|
||
|
|
run:
|
||
|
|
mode: autoline
|
||
|
|
custom_path: None
|
||
|
|
gpt:
|
||
|
|
model: qwen-max
|
||
|
|
key_path: config/key_API.json
|
||
|
|
save:
|
||
|
|
en: True
|
||
|
|
root: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/
|
||
|
|
autoline:
|
||
|
|
result_path: results
|
||
|
|
cga:
|
||
|
|
enabled: True
|
||
|
|
max_iter: 10
|
||
|
|
target_coverage: 100.0
|
||
|
|
probset:
|
||
|
|
path: /home/zhang/CorrectBench/data/HDLBits/HDLBits_data.jsonl
|
||
|
|
mutant_path: None
|
||
|
|
gptgenRTL_path: None
|
||
|
|
more_info_paths: []
|
||
|
|
only: ['fsm_ps2']
|
||
|
|
exclude: []
|
||
|
|
exclude_json: None
|
||
|
|
filter: [{}]
|
||
|
|
checklist:
|
||
|
|
max: 3
|
||
|
|
debug:
|
||
|
|
max: 5
|
||
|
|
reboot: 1
|
||
|
|
py_rollback: 2
|
||
|
|
onlyrun: None
|
||
|
|
promptscript: pychecker
|
||
|
|
timeout: 300
|
||
|
|
TBcheck:
|
||
|
|
rtl_num: 20
|
||
|
|
correct_max: 3
|
||
|
|
discrim_mode: col_full_wrong
|
||
|
|
correct_mode: naive
|
||
|
|
rtl_compens_en: True
|
||
|
|
rtl_compens_max_iter: 3
|
||
|
|
itermax: 10
|
||
|
|
update_desc: False
|
||
|
|
save_compile: True
|
||
|
|
save_finalcodes: True
|
||
|
|
error_interruption: False
|
||
|
|
------------------------------------------
|
||
|
|
------config info (custom + default)------
|
||
|
|
run:
|
||
|
|
version: 2.0
|
||
|
|
author: Ruidi Qiu - Technical University of Munich
|
||
|
|
time: 20260413_131647
|
||
|
|
custom_path: /home/zhang/CorrectBench/TBgen_App/autoline/../config/custom.yaml
|
||
|
|
mode: autoline
|
||
|
|
hostname: localhost
|
||
|
|
pid: 513097
|
||
|
|
pyversion: 3.12.3 (main, Mar 3 2026, 12:15:18) [GCC 13.3.0]
|
||
|
|
save:
|
||
|
|
en: True
|
||
|
|
root: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/
|
||
|
|
pub:
|
||
|
|
prefix: None
|
||
|
|
dir: saves/0413~0419/
|
||
|
|
subdir:
|
||
|
|
log:
|
||
|
|
en: True
|
||
|
|
dir: logs/
|
||
|
|
notes: None
|
||
|
|
cfg_pmode: iwantall
|
||
|
|
debug_en: False
|
||
|
|
level: TRACE
|
||
|
|
message:
|
||
|
|
en: True
|
||
|
|
dir: messages/
|
||
|
|
format: json
|
||
|
|
iverilog:
|
||
|
|
en: True
|
||
|
|
subdir: ivcode_nodebug
|
||
|
|
load:
|
||
|
|
prompt:
|
||
|
|
path: config/initial_prompts/prompt1.txt
|
||
|
|
pick_idx: []
|
||
|
|
stage_template:
|
||
|
|
path: config/templates/stage_template0301.txt
|
||
|
|
gpt:
|
||
|
|
model: qwen-max
|
||
|
|
key_path: config/key_API.json
|
||
|
|
temperature: None
|
||
|
|
json_mode: False
|
||
|
|
chatgpt:
|
||
|
|
start_form: chat
|
||
|
|
one_time_talk: False
|
||
|
|
rtlgen_model: qwen-max
|
||
|
|
iverilog:
|
||
|
|
dir:
|
||
|
|
task_id:
|
||
|
|
autoline:
|
||
|
|
result_path: results
|
||
|
|
cga:
|
||
|
|
enabled: True
|
||
|
|
max_iter: 10
|
||
|
|
target_coverage: 100.0
|
||
|
|
probset:
|
||
|
|
path: /home/zhang/CorrectBench/data/HDLBits/HDLBits_data.jsonl
|
||
|
|
mutant_path: None
|
||
|
|
gptgenRTL_path: None
|
||
|
|
more_info_paths: []
|
||
|
|
only: ['fsm_ps2']
|
||
|
|
exclude: []
|
||
|
|
exclude_json: None
|
||
|
|
filter: [{}]
|
||
|
|
checklist:
|
||
|
|
max: 3
|
||
|
|
debug:
|
||
|
|
max: 5
|
||
|
|
reboot: 1
|
||
|
|
py_rollback: 2
|
||
|
|
onlyrun: None
|
||
|
|
promptscript: pychecker
|
||
|
|
timeout: 300
|
||
|
|
TBcheck:
|
||
|
|
rtl_num: 20
|
||
|
|
correct_max: 3
|
||
|
|
discrim_mode: col_full_wrong
|
||
|
|
correct_mode: naive
|
||
|
|
rtl_compens_en: True
|
||
|
|
rtl_compens_max_iter: 3
|
||
|
|
itermax: 10
|
||
|
|
update_desc: False
|
||
|
|
save_compile: True
|
||
|
|
save_finalcodes: True
|
||
|
|
error_interruption: False
|
||
|
|
_initialized: True
|
||
|
|
------------------------------------------
|
||
|
|
|
||
|
|
--------------default config--------------
|
||
|
|
run:
|
||
|
|
version: 2.0
|
||
|
|
author: Ruidi Qiu - Technical University of Munich
|
||
|
|
time: None
|
||
|
|
custom_path: None
|
||
|
|
mode: qwen-max
|
||
|
|
save:
|
||
|
|
en: True
|
||
|
|
root: None
|
||
|
|
pub:
|
||
|
|
prefix: None
|
||
|
|
dir: saves/$weekrange$/
|
||
|
|
subdir:
|
||
|
|
log:
|
||
|
|
en: True
|
||
|
|
dir: logs/
|
||
|
|
notes: None
|
||
|
|
cfg_pmode: iwantall
|
||
|
|
debug_en: False
|
||
|
|
level: TRACE
|
||
|
|
message:
|
||
|
|
en: True
|
||
|
|
dir: messages/
|
||
|
|
format: json
|
||
|
|
iverilog:
|
||
|
|
en: True
|
||
|
|
subdir: ivcode_nodebug
|
||
|
|
load:
|
||
|
|
prompt:
|
||
|
|
path: config/initial_prompts/prompt1.txt
|
||
|
|
pick_idx: []
|
||
|
|
stage_template:
|
||
|
|
path: config/templates/stage_template0301.txt
|
||
|
|
gpt:
|
||
|
|
model: 4o
|
||
|
|
key_path: config/key_API.json
|
||
|
|
temperature: None
|
||
|
|
json_mode: False
|
||
|
|
chatgpt:
|
||
|
|
start_form: chat
|
||
|
|
one_time_talk: False
|
||
|
|
rtlgen_model: None
|
||
|
|
iverilog:
|
||
|
|
dir:
|
||
|
|
task_id:
|
||
|
|
autoline:
|
||
|
|
result_path: results
|
||
|
|
cga:
|
||
|
|
enabled: True
|
||
|
|
max_iter: 10
|
||
|
|
target_coverage: 100.0
|
||
|
|
probset:
|
||
|
|
path: None
|
||
|
|
mutant_path: None
|
||
|
|
gptgenRTL_path: None
|
||
|
|
more_info_paths: []
|
||
|
|
only: ['lemmings3', 'lemmings4', 'ece241_2013_q8', '2014_q3fsm', 'm2014_q6', 'review2015_fsm', 'rule110', 'fsm_ps2']
|
||
|
|
exclude: []
|
||
|
|
exclude_json: None
|
||
|
|
filter: [{}]
|
||
|
|
checklist:
|
||
|
|
max: 3
|
||
|
|
debug:
|
||
|
|
max: 5
|
||
|
|
reboot: 1
|
||
|
|
py_rollback: 2
|
||
|
|
onlyrun: None
|
||
|
|
promptscript: None
|
||
|
|
timeout: 300
|
||
|
|
TBcheck:
|
||
|
|
rtl_num: 20
|
||
|
|
correct_max: 3
|
||
|
|
discrim_mode: col_full_wrong
|
||
|
|
correct_mode: naive
|
||
|
|
rtl_compens_en: True
|
||
|
|
rtl_compens_max_iter: 3
|
||
|
|
itermax: 10
|
||
|
|
update_desc: False
|
||
|
|
save_compile: True
|
||
|
|
save_finalcodes: True
|
||
|
|
error_interruption: False
|
||
|
|
------------------------------------------
|
||
|
|
2026-04-13 13:16:47 | INFO |
|
||
|
|
2026-04-13 13:16:47 | INFO | ######################### task 1/1 [fsm_ps2] #########################
|
||
|
|
2026-04-13 13:17:01 | INFO | [fsm_ps2] [TBgen] stage_0 ends (14.61s used)
|
||
|
|
2026-04-13 13:17:35 | INFO | [fsm_ps2] [TBgen] stage_1 ends (34.03s used)
|
||
|
|
2026-04-13 13:18:20 | INFO | [fsm_ps2] [TBgen] stage_2 ends (44.84s used)
|
||
|
|
2026-04-13 13:18:59 | INFO | [fsm_ps2] [TBgen] stage_3 ends (38.91s used)
|
||
|
|
2026-04-13 13:20:46 | INFO | [fsm_ps2] [TBgen] stage_4 ends (106.51s used)
|
||
|
|
2026-04-13 13:20:46 | INFO | [fsm_ps2] [TBgen] stage_checklist ends (0.00s used)
|
||
|
|
2026-04-13 13:25:28 | INFO | [fsm_ps2] [TBgen] stage_4b ends (282.58s used)
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBgen] stage_5 ends (31.76s used)
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2]
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBsim] iverilog compilation : passed!
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBsim] python simulation : passed!
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBsim] TBsim finished : True!
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2]
|
||
|
|
2026-04-13 13:26:00 | INFO | [fsm_ps2] rtl list not found, generating naive rtls for testbench checking
|
||
|
|
2026-04-13 13:35:48 | INFO | [fsm_ps2] 20 naive rtls generated
|
||
|
|
2026-04-13 13:35:48 | INFO | [fsm_ps2] [TBcheck] [discriminator] Discriminating the testbench, NO.0 discrimination
|
||
|
|
2026-04-13 13:35:48 | INFO | [fsm_ps2] [TBcheck] [discriminator] RTL(s) [1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 16, 17, 19, 20] have syntax error during discrimination
|
||
|
|
2026-04-13 13:35:48 | INFO | [fsm_ps2] [TBcheck] [discriminator] too few RTL passed the syntax check (5/20), I will generate more and recheck. This is not TB's fault.
|
||
|
|
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] 15 naive rtls generated
|
||
|
|
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] re-discriminate the testbench with updated RTL list
|
||
|
|
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] RTL(s) [6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20] have syntax error during discrimination
|
||
|
|
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] too few RTL passed the syntax check (7/20), I will generate more and recheck. This is not TB's fault.
|
||
|
|
2026-04-13 13:48:50 | INFO | [fsm_ps2] [TBcheck] [discriminator] 13 naive rtls generated
|
||
|
|
2026-04-13 13:48:50 | INFO | [fsm_ps2] [TBcheck] [discriminator] re-discriminate the testbench with updated RTL list
|
||
|
|
2026-04-13 13:48:51 | INFO | [fsm_ps2] [TBcheck] [discriminator] RTL(s) [8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20] have syntax error during discrimination
|
||
|
|
2026-04-13 13:48:51 | INFO | [fsm_ps2] [TBcheck] [discriminator] too few RTL passed the syntax check (9/20), I will generate more and recheck. This is not TB's fault.
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] [discriminator] 11 naive rtls generated
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] [discriminator] re-discriminate the testbench with updated RTL list
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] [discriminator] no re-discrimination since the max iteration reached
|
||
|
|
2026-04-13 13:53:24 | POSITIVE | [fsm_ps2] [TBcheck] [discriminator] TB_discriminating finished, TB passed, wrong scenarios: [], scenario pass ratio: 10/10
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] Testbench passed the funccheck
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] self funccheck finished. Next Action: [pass]
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2]
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [fsm_ps2] Starting Coverage-Guided Agent (CGA)...
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] [fsm_ps2] Running Semantic Analysis (Layer 0)...
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] FSM detected: state (4 states)
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] Total function points identified: 2
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] Energy allocator initialized: 2 targets
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] Diversity injector initialized with history file: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/test_history.json
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] Quality evaluator initialized
|
||
|
|
2026-04-13 13:53:24 | INFO | [fsm_ps2] --- CGA Iter 0 (Baseline) ---
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] Baseline Coverage: 77.78%
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] --- CGA Iter 1 / 10 ---
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] No more active targets with remaining energy. Stopping.
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] CGA Finished. Final Coverage: 77.78%
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] Energy report saved to /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/energy_report.txt
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] Diversity report saved to /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/diversity_report.txt
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] Quality evaluation report saved to /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/quality_evaluation_report.txt
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] Semantic Coverage: 100.00%
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] Saved optimized TB to: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/final_TB.v
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2] [TBeval] Eval 1: Golden RTL checking begins
|
||
|
|
2026-04-13 13:53:25 | POSITIVE | [fsm_ps2] [TBeval] Eval 1: Golden RTL checking passed!
|
||
|
|
2026-04-13 13:53:25 | INFO | [fsm_ps2]
|
||
|
|
2026-04-13 13:53:25 | INFO |
|
||
|
|
########## Analyze of Chatbench_RunInfo ##########
|
||
|
|
|
||
|
|
#### pass numbers:
|
||
|
|
Eval2 : 0
|
||
|
|
Eval1 : 1
|
||
|
|
Eval0 : 1
|
||
|
|
total : 1 (Failed: 0)
|
||
|
|
passed TB by autoline reboot action (from TB3_check): 0
|
||
|
|
|
||
|
|
passed TB by functional corrector: 0
|
||
|
|
|
||
|
|
#### CGA Coverage Info:
|
||
|
|
Average Coverage : 77.78%
|
||
|
|
Max Coverage : 77.78%
|
||
|
|
Min Coverage : 77.78%
|
||
|
|
|
||
|
|
#### tokens and cost:
|
||
|
|
average prompt tokens: 26863
|
||
|
|
average completion tokens: 22817
|
||
|
|
total cost: 1.9063
|
||
|
|
average cost: 1.9063
|
||
|
|
|
||
|
|
#### time:
|
||
|
|
average time: 2198.40s
|
||
|
|
|
||
|
|
#### debug info table:
|
||
|
|
FUNCTIONAL debug info table:
|
||
|
|
(debugged here means functional debugging)
|
||
|
|
| un-func-debugged | func-debugged | total |
|
||
|
|
failed | 0 | 0 | 0 |
|
||
|
|
Eval0 | 1 | 0 | 1 |
|
||
|
|
Eval1 | 1 | 0 | 1 |
|
||
|
|
Eval2 | 0 | 0 | 0 |
|
||
|
|
|
||
|
|
#### Eval2 ratio:
|
||
|
|
fsm_ps2: No Eval2 ratio data
|
||
|
|
|
||
|
|
#### CGA Coverage Detail List:
|
||
|
|
Task ID | Coverage
|
||
|
|
----------------------------------------
|
||
|
|
fsm_ps2 | 77.78%
|
||
|
|
|
||
|
|
loose Eval2 pass metric applied: 0.8
|
||
|
|
|
||
|
|
|