1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use super::*;
#[derive(Subcommand)]
pub(crate) enum ExpCommand {
/// Create experiment in Draft state (records control/treatment commits).
New {
#[arg(long)]
name: String,
#[arg(long)]
hypothesis: String,
#[arg(long)]
change: String,
/// tokens_per_session|cost_per_session|success_rate|tool_loops|duration_minutes|files_per_session
#[arg(long)]
metric: String,
/// git|branch|manual
#[arg(long, default_value = "git")]
bind: String,
#[arg(long, default_value_t = 14)]
duration_days: u32,
/// target delta pct, e.g. -10.0 for -10%
#[arg(long, default_value_t = -10.0, allow_hyphen_values = true)]
target_pct: f64,
#[arg(long)]
control_commit: Option<String>,
#[arg(long)]
treatment_commit: Option<String>,
#[arg(long)]
control_branch: Option<String>,
#[arg(long)]
treatment_branch: Option<String>,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Transition experiment from Draft to Running.
Start {
id: String,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// List all experiments.
List {
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Show one experiment's metadata.
Status {
id: String,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Manual variant tag for a session.
Tag {
id: String,
#[arg(long)]
session: String,
/// control|treatment|excluded
#[arg(long)]
variant: String,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Render markdown (or JSON) report with bootstrap CI.
Report {
id: String,
#[arg(long)]
json: bool,
/// Force a full agent transcript rescan before reading. This can take a while on large workspaces.
#[arg(short, long)]
refresh: bool,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Mark experiment Concluded.
Conclude {
id: String,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Mark experiment Archived (must be Concluded first).
Archive {
id: String,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
/// Print MDE at 80% power / 95% CI for a metric given expected sample size.
Power {
/// tokens_per_session|cost_per_session|success_rate|…
#[arg(long)]
metric: String,
/// Expected sessions per arm.
#[arg(long)]
baseline_n: usize,
/// Force a full agent transcript rescan before reading. This can take a while on large workspaces.
#[arg(short, long)]
refresh: bool,
#[arg(long)]
workspace: Option<PathBuf>,
/// project name shorthand for --workspace (mutually exclusive)
#[arg(long, conflicts_with = "workspace")]
project: Option<String>,
},
}