1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
use clap::{Parser, Subcommand, ValueEnum};
#[derive(Parser)]
#[command(
name = "autoresearch",
version,
about = "Universal autoresearch CLI — optimize any metric with autonomous experiments",
long_about = "Autoresearch CLI brings Karpathy's autoresearch pattern to any project.\n\n\
Workflow: doctor → init → baseline → loop (hypothesize → implement → commit → eval → record) → report\n\n\
Quick start:\n \
1. autoresearch install claude-code (or codex, opencode, cursor, windsurf, all)\n \
2. autoresearch init --target-file <F> --eval-command <C>\n \
3. autoresearch doctor (validate before starting)\n \
4. Tell your agent: /autoresearch\n\n\
Best practices: hyperparameters first, one change per experiment, fork when stuck.\n\
Run `autoresearch agent-info --json` for full machine-readable capabilities + strategy guide."
)]
pub struct Cli {
#[command(subcommand)]
pub command: Commands,
/// Output as JSON (auto-enabled when piped)
#[arg(long, global = true)]
pub json: bool,
}
#[derive(Subcommand)]
pub enum Commands {
/// Install the autoresearch skill into an AI coding agent
Install {
/// Target agent platform
#[arg(value_enum)]
target: InstallTarget,
},
/// Initialize autoresearch in the current project
Init {
/// Target file the agent may modify
#[arg(long)]
target_file: Option<String>,
/// Eval command that produces the metric
#[arg(long)]
eval_command: Option<String>,
/// Metric name (e.g., val_bpb, accuracy, p99_latency)
#[arg(long, default_value = "metric")]
metric_name: String,
/// Metric direction: lower or higher
#[arg(long, default_value = "lower")]
metric_direction: String,
/// Time budget per experiment (e.g., 5m, 30s)
#[arg(long, default_value = "5m")]
time_budget: String,
/// Git branch for experiments
#[arg(long, default_value = "autoresearch")]
branch: String,
},
/// Record an experiment result (for agent use)
Record {
/// Metric value from this experiment (use = for negatives: --metric=-0.5)
#[arg(long, allow_negative_numbers = true)]
metric: f64,
/// Status: kept, discarded, or baseline
#[arg(long)]
status: String,
/// Summary of what was tried
#[arg(long)]
summary: String,
},
/// Show experiment history from git log
Log {
/// Maximum number of entries to show
#[arg(short = 'n', long, default_value = "20")]
limit: usize,
},
/// Show the best experiment and its diff from baseline
Best,
/// Compare two experiments by run number
Diff {
/// First run number
run_a: usize,
/// Second run number
run_b: usize,
},
/// Check if an autoresearch loop is currently running
Status,
/// Export experiment history
Export {
/// Export format
#[arg(long, value_enum, default_value = "csv")]
format: ExportFormat,
/// Output file path (stdout if not specified)
#[arg(short, long)]
output: Option<String>,
},
/// Pre-flight check before starting an experiment loop
Doctor,
/// Fork experiments into parallel branches for multi-direction exploration
Fork {
/// Names for each fork (creates autoresearch/<name> branches)
#[arg(required = true)]
names: Vec<String>,
},
/// Generate a cross-model review prompt from experiment history
Review,
/// Live terminal dashboard — watch experiments as they happen
Watch {
/// Refresh interval in seconds
#[arg(short, long, default_value = "2")]
interval: u64,
},
/// Compare fork branches and merge the best one back
MergeBest,
/// Generate a markdown report of the research session
Report {
/// Output file path (stdout if not specified)
#[arg(short, long)]
output: Option<String>,
},
/// Print the full autoresearch methodology guide (works without skill installed)
Guide,
/// Show CLI capabilities for agent discovery
AgentInfo,
}
#[derive(ValueEnum, Clone, Debug)]
pub enum InstallTarget {
/// Claude Code (~/.claude/skills/)
ClaudeCode,
/// Gemini CLI (~/.gemini/skills/)
Gemini,
/// Codex CLI (~/.codex/skills/)
Codex,
/// OpenCode (~/.config/opencode/skills/)
Opencode,
/// GitHub Copilot (.github/skills/)
Copilot,
/// Cursor (.cursor/skills/)
Cursor,
/// Windsurf (.windsurf/skills/)
Windsurf,
/// Universal .agents/skills/ (Augment, Goose, Roo, etc.)
Agents,
/// Install into all supported agents
All,
}
#[derive(ValueEnum, Clone, Debug)]
pub enum ExportFormat {
Csv,
Json,
Jsonl,
}
pub fn parse() -> Cli {
Cli::parse()
}