Add SkillsBench v1.1 evaluation result

#21
Files changed (1) hide show
  1. .eval_results/skillsbench.yaml +11 -0
.eval_results/skillsbench.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ - dataset:
2
+ id: benchflow/skillsbench
3
+ task_id: skillsbench_v1_1
4
+ revision: be2a6ce2cb1f4ff67ce937307cade0c5a0477a13
5
+ value: 53.0
6
+ date: "2026-06-11"
7
+ source:
8
+ url: https://huggingface.co/datasets/benchflow/skillsbench-leaderboard/raw/main/leaderboard/skillsbench/v1.1/official.json
9
+ name: SkillsBench v1.1 official leaderboard
10
+ user: benchflow
11
+ notes: "with-skills; BenchFlow harness; OpenHands agent; 87 tasks x 3 trials; full 261/261 coverage"