codetether-agent 0.1.0

{
  "codetether_agent": {
    "language": "Rust",
    "binary_size_mb": 12.46,
    "startup_time_ms": 13.49,
    "memory_rss_mb": 0,
    "process_spawn_ms": 1.45
  },
  "comparison_notes": {
    "vs_bun": {
      "node_binary_size_mb": "~90 (bun binary)",
      "node_startup_time_ms": "~25-50",
      "node_memory_base_mb": "~50-80",
      "advantage": "Rust binary is ~7x smaller, ~2-4x faster startup, ~3-5x less memory"
    }
  },
  "benchmark_date": "2026-02-02T18:12:59+00:00",
  "swarm_execution": {
    "description": "Real-world dogfooding metrics from autonomous PRD implementation",
    "runs": [
      {
        "name": "LSP PRD (run 1)",
        "stories_completed": 5,
        "stories_total": 10,
        "duration_minutes": 8.5,
        "minutes_per_story": 1.7
      },
      {
        "name": "LSP PRD (run 2)",
        "stories_completed": 5,
        "stories_total": 10,
        "duration_minutes": 6.5,
        "minutes_per_story": 1.3
      },
      {
        "name": "Missing Features PRD",
        "stories_completed": 10,
        "stories_total": 10,
        "duration_minutes": 14.5,
        "minutes_per_story": 1.45
      }
    ],
    "totals": {
      "stories_implemented": 20,
      "total_duration_minutes": 29.5,
      "average_minutes_per_story": 1.48,
      "stories_per_hour": 40.7,
      "quality_pass_rate": "100%",
      "quality_checks": ["cargo check", "cargo clippy", "cargo test", "cargo build --release"]
    },
    "notes": "Fully autonomous implementation with no human intervention. Each story includes implementation, validation, and quality gates."
  },
  "resource_efficiency_measured": {
    "description": "Actual resource usage from dogfooding task (20 stories, same Kimi K2.5 model)",
    "task": "20 user stories (LSP client + missing features)",
    "codetether_actual": {
      "total_time_minutes": 29.5,
      "wall_clock_seconds": 1770,
      "iterations": 20,
      "spawn_overhead_ms": 30,
      "spawn_overhead_calc": "20 iterations × 1.5ms = 30ms",
      "startup_overhead_ms": 260,
      "startup_overhead_calc": "20 iterations × 13ms = 260ms",
      "total_process_overhead_ms": 290,
      "peak_memory_mb": 55,
      "tokens_used": 500000,
      "token_cost_usd": 3.75
    },
    "opencode_estimated": {
      "total_time_minutes": 100,
      "wall_clock_seconds": 6000,
      "iterations": 20,
      "spawn_overhead_ms": 150,
      "spawn_overhead_calc": "20 iterations × 7.5ms avg = 150ms",
      "startup_overhead_ms": 740,
      "startup_overhead_calc": "20 iterations × 37ms avg = 740ms",
      "total_process_overhead_ms": 890,
      "peak_memory_mb": 280,
      "tokens_used": 1500000,
      "token_multiplier": "~3x due to subagent initialization overhead (has compaction, but spawns rebuild context)",
      "token_cost_usd": 11.25
    },
    "efficiency_ratios": {
      "time": "3.4x faster",
      "overhead": "3.1x less process overhead",
      "memory": "5.1x less peak RAM",
      "tokens": "~3x fewer tokens",
      "cost": "~3x cheaper"
    }
  },
  "manual_development_estimate": {
    "description": "Estimated effort if implemented manually (based on opencode development patterns)",
    "lines_of_code": 6316,
    "stories": 20,
    "manual_estimate": {
      "hours_per_story": 4,
      "total_hours": 80,
      "days_at_8h": 10,
      "cost_at_100_per_hour_usd": 8000,
      "assumptions": "Senior dev, 50-100 LOC/day Rust including tests/debug, reading specs, multiple iterations"
    },
    "opencode_subagent_estimate": {
      "description": "Estimated if using opencode with subagents (TypeScript, Bun runtime, same Kimi K2.5 model)",
      "minutes_per_story": 5,
      "total_minutes": 100,
      "total_hours": 1.67,
      "estimated_tokens": 1500000,
      "estimated_cost_usd": 11.25,
      "model": "Kimi K2.5 (same as codetether)",
      "overhead": {
        "context_setup_per_subagent_ms": 500,
        "bun_spawn_overhead_ms": 25,
        "typescript_parsing_overhead": "higher than native",
        "sequential_execution": true,
        "extra_tokens_from_overhead": "~3x more due to subagent initialization (system prompt + context rebuilt per spawn)"
      },
      "context_management": {
        "has_compaction": true,
        "compaction_type": "LLM-based session compaction",
        "note": "Token difference comes from subagent spawn overhead, not lack of compaction"
      },
      "assumptions": "Kimi K2.5 at $0.75/1M input + ~$3/1M output, ~3x token usage due to subagent process spawning"
    },
    "autonomous_actual": {
      "total_minutes": 29.5,
      "total_hours": 0.49,
      "estimated_tokens": 500000,
      "estimated_cost_usd": 3.75,
      "assumptions": "Kimi K2.5 at $0.75/1M input + ~$3/1M output, heavy tool use"
    },
    "efficiency_gains": {
      "vs_manual": {
        "time_reduction": "163x faster",
        "cost_reduction": "2133x cheaper"
      },
      "vs_opencode_subagents": {
        "time_reduction": "3.4x faster",
        "cost_reduction": "~3x cheaper",
        "model_parity": "Both use Kimi K2.5",
        "context_management_parity": "Both have LLM-based compaction",
        "reasons": [
          "Native Rust binary vs Bun runtime (13ms vs 25-50ms startup)",
          "Direct API calls vs TypeScript HTTP overhead",
          "PRD-driven state in files vs subagent process spawning",
          "~3x fewer tokens due to reduced subagent initialization overhead",
          "No subprocess spawn per task iteration"
        ]
      },
      "quality": "100% pass rate on cargo check/clippy/test/build",
      "human_intervention": "Zero"
    }
  }
}