{
  "updated": 1781294219,
  "updated_str": "2026-06-12 19:56:59 UTC",
  "models": [
    "haiku",
    "sonnet",
    "opus",
    "fable"
  ],
  "efforts": [
    "low",
    "medium",
    "high",
    "xhigh",
    "max"
  ],
  "tool_modes": [
    "off",
    "on"
  ],
  "totals": {
    "runs": 117,
    "graded": 114,
    "bench_cost_usd": 1.2065,
    "judge_cost_usd": 1.3285,
    "output_tokens": 160705
  },
  "configs": [
    {
      "level": "children",
      "model": "haiku",
      "effort": "low",
      "tool_mode": "off",
      "n": 10,
      "graded": 10,
      "accuracy": 0.885,
      "solved": 8.85,
      "mean_elegance": 6.0,
      "out_tokens": 12614,
      "in_tokens": 2989,
      "bench_cost_usd": 0.0721,
      "judge_cost_usd": 0.1272,
      "mean_wall_s": 9.9
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "low",
      "tool_mode": "on",
      "n": 10,
      "graded": 10,
      "accuracy": 0.895,
      "solved": 8.95,
      "mean_elegance": 5.9,
      "out_tokens": 10412,
      "in_tokens": 100,
      "bench_cost_usd": 0.1455,
      "judge_cost_usd": 0.1058,
      "mean_wall_s": 9.4
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "medium",
      "tool_mode": "off",
      "n": 10,
      "graded": 10,
      "accuracy": 0.895,
      "solved": 8.95,
      "mean_elegance": 6.0,
      "out_tokens": 14315,
      "in_tokens": 2989,
      "bench_cost_usd": 0.0806,
      "judge_cost_usd": 0.1246,
      "mean_wall_s": 10.9
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "medium",
      "tool_mode": "on",
      "n": 10,
      "graded": 10,
      "accuracy": 0.9,
      "solved": 9.0,
      "mean_elegance": 6.1,
      "out_tokens": 12741,
      "in_tokens": 100,
      "bench_cost_usd": 0.0875,
      "judge_cost_usd": 0.1236,
      "mean_wall_s": 10.7
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "high",
      "tool_mode": "off",
      "n": 10,
      "graded": 10,
      "accuracy": 0.89,
      "solved": 8.9,
      "mean_elegance": 5.8,
      "out_tokens": 18161,
      "in_tokens": 2989,
      "bench_cost_usd": 0.0998,
      "judge_cost_usd": 0.1225,
      "mean_wall_s": 13.1
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "high",
      "tool_mode": "on",
      "n": 10,
      "graded": 10,
      "accuracy": 0.9,
      "solved": 9.0,
      "mean_elegance": 5.9,
      "out_tokens": 13596,
      "in_tokens": 100,
      "bench_cost_usd": 0.0918,
      "judge_cost_usd": 0.1247,
      "mean_wall_s": 11.3
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "xhigh",
      "tool_mode": "off",
      "n": 10,
      "graded": 10,
      "accuracy": 0.89,
      "solved": 8.9,
      "mean_elegance": 5.9,
      "out_tokens": 14072,
      "in_tokens": 2989,
      "bench_cost_usd": 0.0793,
      "judge_cost_usd": 0.1305,
      "mean_wall_s": 11.1
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "xhigh",
      "tool_mode": "on",
      "n": 10,
      "graded": 10,
      "accuracy": 0.9,
      "solved": 9.0,
      "mean_elegance": 6.0,
      "out_tokens": 12892,
      "in_tokens": 100,
      "bench_cost_usd": 0.0883,
      "judge_cost_usd": 0.1225,
      "mean_wall_s": 10.7
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "max",
      "tool_mode": "off",
      "n": 10,
      "graded": 10,
      "accuracy": 0.89,
      "solved": 8.9,
      "mean_elegance": 5.9,
      "out_tokens": 33426,
      "in_tokens": 2989,
      "bench_cost_usd": 0.1761,
      "judge_cost_usd": 0.1214,
      "mean_wall_s": 23.8
    },
    {
      "level": "children",
      "model": "haiku",
      "effort": "max",
      "tool_mode": "on",
      "n": 10,
      "graded": 10,
      "accuracy": 0.885,
      "solved": 8.85,
      "mean_elegance": 6.1,
      "out_tokens": 14718,
      "in_tokens": 100,
      "bench_cost_usd": 0.0974,
      "judge_cost_usd": 0.1272,
      "mean_wall_s": 12.0
    },
    {
      "level": "children",
      "model": "sonnet",
      "effort": "low",
      "tool_mode": "off",
      "n": 10,
      "graded": 10,
      "accuracy": 0.9,
      "solved": 9.0,
      "mean_elegance": 3.6,
      "out_tokens": 973,
      "in_tokens": 2699,
      "bench_cost_usd": 0.0287,
      "judge_cost_usd": 0.0588,
      "mean_wall_s": 4.1
    },
    {
      "level": "children",
      "model": "sonnet",
      "effort": "low",
      "tool_mode": "on",
      "n": 7,
      "graded": 4,
      "accuracy": 1.0,
      "solved": 4.0,
      "mean_elegance": 6.75,
      "out_tokens": 2785,
      "in_tokens": 21,
      "bench_cost_usd": 0.1593,
      "judge_cost_usd": 0.0397,
      "mean_wall_s": 9.5
    }
  ]
}