Files
CGA-bench/analysis/paper_runs/paper_fsm/run_level.csv
2026-05-22 10:02:42 +08:00

1.9 KiB

1experiment_namemodelconditionrepeatrun_dirtask_idcoveragesemantic_coverageeval1_passeval2_passeval2_ratioeval2_ratio_floateval2_failed_mutantsfull_passtime_sectoken_costfirst_improvement_iterop_recordtask_log
2paper_fsmqwen-maxbaseline1/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/baseline/paper_fsm_qwen-max_baseline_r01_20260410_1841062013_q2afsm92.307692307692373.51FalseFalse519.090.7850400000000001gen,syncheck,funccheck,coverage_eval,eval/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/baseline/paper_fsm_qwen-max_baseline_r01_20260410_184106/2013_q2afsm/task_log.log
3paper_fsmqwen-maxbaseline1/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/baseline/paper_fsm_qwen-max_baseline_r01_20260410_1841062012_q2fsm91.6666666666666661.17TrueFalse7/100.76,7,8False348.970.62682gen,syncheck,funccheck,coverage_eval,eval/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/baseline/paper_fsm_qwen-max_baseline_r01_20260410_184106/2012_q2fsm/task_log.log
4paper_fsmqwen-maxcga1/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/cga/paper_fsm_qwen-max_cga_r01_20260410_1855372013_q2afsm92.307692307692373.51FalseFalse1803.022.66142gen,syncheck,funccheck,gen,syncheck,funccheck,cga,eval/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/cga/paper_fsm_qwen-max_cga_r01_20260410_185537/2013_q2afsm/task_log.log
5paper_fsmqwen-maxcga1/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/cga/paper_fsm_qwen-max_cga_r01_20260410_1855372012_q2fsm91.6666666666666661.17TrueFalse3/100.32,4,6,7,8,9,10False489.720.7213599999999999gen,syncheck,funccheck,cga,eval/home/zhang/CorrectBench/saves/0406~0412/Paper_Experiments/paper_fsm/qwen-max/cga/paper_fsm_qwen-max_cga_r01_20260410_185537/2012_q2fsm/task_log.log