Files

321 lines
11 KiB
Plaintext
Raw Permalink Normal View History

2026-05-22 10:02:42 +08:00
---------------custom config--------------
run:
mode: autoline
custom_path: None
gpt:
model: qwen-max
key_path: config/key_API.json
save:
en: True
root: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/
autoline:
result_path: results
cga:
enabled: True
max_iter: 10
target_coverage: 100.0
probset:
path: /home/zhang/CorrectBench/data/HDLBits/HDLBits_data.jsonl
mutant_path: None
gptgenRTL_path: None
more_info_paths: []
only: ['fsm_ps2']
exclude: []
exclude_json: None
filter: [{}]
checklist:
max: 3
debug:
max: 5
reboot: 1
py_rollback: 2
onlyrun: None
promptscript: pychecker
timeout: 300
TBcheck:
rtl_num: 20
correct_max: 3
discrim_mode: col_full_wrong
correct_mode: naive
rtl_compens_en: True
rtl_compens_max_iter: 3
itermax: 10
update_desc: False
save_compile: True
save_finalcodes: True
error_interruption: False
------------------------------------------
------config info (custom + default)------
run:
version: 2.0
author: Ruidi Qiu - Technical University of Munich
time: 20260413_131647
custom_path: /home/zhang/CorrectBench/TBgen_App/autoline/../config/custom.yaml
mode: autoline
hostname: localhost
pid: 513097
pyversion: 3.12.3 (main, Mar 3 2026, 12:15:18) [GCC 13.3.0]
save:
en: True
root: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/
pub:
prefix: None
dir: saves/0413~0419/
subdir:
log:
en: True
dir: logs/
notes: None
cfg_pmode: iwantall
debug_en: False
level: TRACE
message:
en: True
dir: messages/
format: json
iverilog:
en: True
subdir: ivcode_nodebug
load:
prompt:
path: config/initial_prompts/prompt1.txt
pick_idx: []
stage_template:
path: config/templates/stage_template0301.txt
gpt:
model: qwen-max
key_path: config/key_API.json
temperature: None
json_mode: False
chatgpt:
start_form: chat
one_time_talk: False
rtlgen_model: qwen-max
iverilog:
dir:
task_id:
autoline:
result_path: results
cga:
enabled: True
max_iter: 10
target_coverage: 100.0
probset:
path: /home/zhang/CorrectBench/data/HDLBits/HDLBits_data.jsonl
mutant_path: None
gptgenRTL_path: None
more_info_paths: []
only: ['fsm_ps2']
exclude: []
exclude_json: None
filter: [{}]
checklist:
max: 3
debug:
max: 5
reboot: 1
py_rollback: 2
onlyrun: None
promptscript: pychecker
timeout: 300
TBcheck:
rtl_num: 20
correct_max: 3
discrim_mode: col_full_wrong
correct_mode: naive
rtl_compens_en: True
rtl_compens_max_iter: 3
itermax: 10
update_desc: False
save_compile: True
save_finalcodes: True
error_interruption: False
_initialized: True
------------------------------------------
--------------default config--------------
run:
version: 2.0
author: Ruidi Qiu - Technical University of Munich
time: None
custom_path: None
mode: qwen-max
save:
en: True
root: None
pub:
prefix: None
dir: saves/$weekrange$/
subdir:
log:
en: True
dir: logs/
notes: None
cfg_pmode: iwantall
debug_en: False
level: TRACE
message:
en: True
dir: messages/
format: json
iverilog:
en: True
subdir: ivcode_nodebug
load:
prompt:
path: config/initial_prompts/prompt1.txt
pick_idx: []
stage_template:
path: config/templates/stage_template0301.txt
gpt:
model: 4o
key_path: config/key_API.json
temperature: None
json_mode: False
chatgpt:
start_form: chat
one_time_talk: False
rtlgen_model: None
iverilog:
dir:
task_id:
autoline:
result_path: results
cga:
enabled: True
max_iter: 10
target_coverage: 100.0
probset:
path: None
mutant_path: None
gptgenRTL_path: None
more_info_paths: []
only: ['lemmings3', 'lemmings4', 'ece241_2013_q8', '2014_q3fsm', 'm2014_q6', 'review2015_fsm', 'rule110', 'fsm_ps2']
exclude: []
exclude_json: None
filter: [{}]
checklist:
max: 3
debug:
max: 5
reboot: 1
py_rollback: 2
onlyrun: None
promptscript: None
timeout: 300
TBcheck:
rtl_num: 20
correct_max: 3
discrim_mode: col_full_wrong
correct_mode: naive
rtl_compens_en: True
rtl_compens_max_iter: 3
itermax: 10
update_desc: False
save_compile: True
save_finalcodes: True
error_interruption: False
------------------------------------------
2026-04-13 13:16:47 | INFO |
2026-04-13 13:16:47 | INFO | ######################### task 1/1 [fsm_ps2] #########################
2026-04-13 13:17:01 | INFO | [fsm_ps2] [TBgen] stage_0 ends (14.61s used)
2026-04-13 13:17:35 | INFO | [fsm_ps2] [TBgen] stage_1 ends (34.03s used)
2026-04-13 13:18:20 | INFO | [fsm_ps2] [TBgen] stage_2 ends (44.84s used)
2026-04-13 13:18:59 | INFO | [fsm_ps2] [TBgen] stage_3 ends (38.91s used)
2026-04-13 13:20:46 | INFO | [fsm_ps2] [TBgen] stage_4 ends (106.51s used)
2026-04-13 13:20:46 | INFO | [fsm_ps2] [TBgen] stage_checklist ends (0.00s used)
2026-04-13 13:25:28 | INFO | [fsm_ps2] [TBgen] stage_4b ends (282.58s used)
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBgen] stage_5 ends (31.76s used)
2026-04-13 13:26:00 | INFO | [fsm_ps2]
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBsim] iverilog compilation : passed!
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBsim] python simulation : passed!
2026-04-13 13:26:00 | INFO | [fsm_ps2] [TBsim] TBsim finished : True!
2026-04-13 13:26:00 | INFO | [fsm_ps2]
2026-04-13 13:26:00 | INFO | [fsm_ps2] rtl list not found, generating naive rtls for testbench checking
2026-04-13 13:35:48 | INFO | [fsm_ps2] 20 naive rtls generated
2026-04-13 13:35:48 | INFO | [fsm_ps2] [TBcheck] [discriminator] Discriminating the testbench, NO.0 discrimination
2026-04-13 13:35:48 | INFO | [fsm_ps2] [TBcheck] [discriminator] RTL(s) [1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 16, 17, 19, 20] have syntax error during discrimination
2026-04-13 13:35:48 | INFO | [fsm_ps2] [TBcheck] [discriminator] too few RTL passed the syntax check (5/20), I will generate more and recheck. This is not TB's fault.
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] 15 naive rtls generated
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] re-discriminate the testbench with updated RTL list
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] RTL(s) [6, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20] have syntax error during discrimination
2026-04-13 13:43:39 | INFO | [fsm_ps2] [TBcheck] [discriminator] too few RTL passed the syntax check (7/20), I will generate more and recheck. This is not TB's fault.
2026-04-13 13:48:50 | INFO | [fsm_ps2] [TBcheck] [discriminator] 13 naive rtls generated
2026-04-13 13:48:50 | INFO | [fsm_ps2] [TBcheck] [discriminator] re-discriminate the testbench with updated RTL list
2026-04-13 13:48:51 | INFO | [fsm_ps2] [TBcheck] [discriminator] RTL(s) [8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 20] have syntax error during discrimination
2026-04-13 13:48:51 | INFO | [fsm_ps2] [TBcheck] [discriminator] too few RTL passed the syntax check (9/20), I will generate more and recheck. This is not TB's fault.
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] [discriminator] 11 naive rtls generated
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] [discriminator] re-discriminate the testbench with updated RTL list
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] [discriminator] no re-discrimination since the max iteration reached
2026-04-13 13:53:24 | POSITIVE | [fsm_ps2] [TBcheck] [discriminator] TB_discriminating finished, TB passed, wrong scenarios: [], scenario pass ratio: 10/10
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] Testbench passed the funccheck
2026-04-13 13:53:24 | INFO | [fsm_ps2] [TBcheck] self funccheck finished. Next Action: [pass]
2026-04-13 13:53:24 | INFO | [fsm_ps2]
2026-04-13 13:53:24 | INFO | [fsm_ps2] [fsm_ps2] Starting Coverage-Guided Agent (CGA)...
2026-04-13 13:53:24 | INFO | [fsm_ps2] [fsm_ps2] Running Semantic Analysis (Layer 0)...
2026-04-13 13:53:24 | INFO | [fsm_ps2] FSM detected: state (4 states)
2026-04-13 13:53:24 | INFO | [fsm_ps2] Total function points identified: 2
2026-04-13 13:53:24 | INFO | [fsm_ps2] Energy allocator initialized: 2 targets
2026-04-13 13:53:24 | INFO | [fsm_ps2] Diversity injector initialized with history file: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/test_history.json
2026-04-13 13:53:24 | INFO | [fsm_ps2] Quality evaluator initialized
2026-04-13 13:53:24 | INFO | [fsm_ps2] --- CGA Iter 0 (Baseline) ---
2026-04-13 13:53:25 | INFO | [fsm_ps2] Baseline Coverage: 77.78%
2026-04-13 13:53:25 | INFO | [fsm_ps2] --- CGA Iter 1 / 10 ---
2026-04-13 13:53:25 | INFO | [fsm_ps2] No more active targets with remaining energy. Stopping.
2026-04-13 13:53:25 | INFO | [fsm_ps2] CGA Finished. Final Coverage: 77.78%
2026-04-13 13:53:25 | INFO | [fsm_ps2] Energy report saved to /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/energy_report.txt
2026-04-13 13:53:25 | INFO | [fsm_ps2] Diversity report saved to /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/diversity_report.txt
2026-04-13 13:53:25 | INFO | [fsm_ps2] Quality evaluation report saved to /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/CGA/quality_evaluation_report.txt
2026-04-13 13:53:25 | INFO | [fsm_ps2] Semantic Coverage: 100.00%
2026-04-13 13:53:25 | INFO | [fsm_ps2] Saved optimized TB to: /home/zhang/CorrectBench/TBgen_App/output/fsm_ps2/fsm_ps2/final_TB.v
2026-04-13 13:53:25 | INFO | [fsm_ps2] [TBeval] Eval 1: Golden RTL checking begins
2026-04-13 13:53:25 | POSITIVE | [fsm_ps2] [TBeval] Eval 1: Golden RTL checking passed!
2026-04-13 13:53:25 | INFO | [fsm_ps2]
2026-04-13 13:53:25 | INFO |
########## Analyze of Chatbench_RunInfo ##########
#### pass numbers:
Eval2 : 0
Eval1 : 1
Eval0 : 1
total : 1 (Failed: 0)
passed TB by autoline reboot action (from TB3_check): 0
passed TB by functional corrector: 0
#### CGA Coverage Info:
Average Coverage : 77.78%
Max Coverage : 77.78%
Min Coverage : 77.78%
#### tokens and cost:
average prompt tokens: 26863
average completion tokens: 22817
total cost: 1.9063
average cost: 1.9063
#### time:
average time: 2198.40s
#### debug info table:
FUNCTIONAL debug info table:
(debugged here means functional debugging)
| un-func-debugged | func-debugged | total |
failed | 0 | 0 | 0 |
Eval0 | 1 | 0 | 1 |
Eval1 | 1 | 0 | 1 |
Eval2 | 0 | 0 | 0 |
#### Eval2 ratio:
fsm_ps2: No Eval2 ratio data
#### CGA Coverage Detail List:
Task ID | Coverage
----------------------------------------
fsm_ps2 | 77.78%
loose Eval2 pass metric applied: 0.8