1use crate::core::event::{Event, SessionRecord};
5use crate::experiment::binding::{ManualTags, partition};
6use crate::experiment::metric::value_for;
7use crate::experiment::stats::{DEFAULT_RESAMPLES, Summary, summarize};
8use crate::experiment::types::{Classification, Criterion, Direction, Experiment};
9use serde::{Deserialize, Serialize};
10use std::path::Path;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct Report {
14 pub experiment: Experiment,
15 pub summary: Summary,
16 pub excluded_count: usize,
17 pub target_met: Option<bool>,
18}
19
20pub fn run(
22 exp: &Experiment,
23 sessions: &[(SessionRecord, Vec<Event>)],
24 manual_tags: &ManualTags,
25 workspace: &Path,
26) -> Report {
27 let records: Vec<SessionRecord> = sessions.iter().map(|(s, _)| s.clone()).collect();
28 let (control_s, treatment_s, excluded_s) =
29 partition(&records, &exp.binding, manual_tags, workspace);
30 let control = metric_values(
31 exp,
32 sessions,
33 &control_s,
34 Classification::Control,
35 manual_tags,
36 );
37 let treatment = metric_values(
38 exp,
39 sessions,
40 &treatment_s,
41 Classification::Treatment,
42 manual_tags,
43 );
44 let _ = excluded_s;
45 let excluded = records.len() - control.len() - treatment.len();
46 let summary = summarize(
47 &control,
48 &treatment,
49 stable_seed(&exp.id),
50 DEFAULT_RESAMPLES,
51 );
52 let target_met = evaluate_criterion(&exp.success_criterion, &summary);
53 Report {
54 experiment: exp.clone(),
55 summary,
56 excluded_count: excluded,
57 target_met,
58 }
59}
60
61fn metric_values(
62 exp: &Experiment,
63 sessions: &[(SessionRecord, Vec<Event>)],
64 picked: &[&SessionRecord],
65 _which: Classification,
66 _tags: &ManualTags,
67) -> Vec<f64> {
68 let ids: std::collections::HashSet<&str> = picked.iter().map(|s| s.id.as_str()).collect();
69 sessions
70 .iter()
71 .filter(|(s, _)| ids.contains(s.id.as_str()))
72 .filter_map(|(s, evs)| value_for(exp.metric, s, evs))
73 .collect()
74}
75
76fn evaluate_criterion(c: &Criterion, s: &Summary) -> Option<bool> {
77 match c {
78 Criterion::Delta {
79 direction,
80 target_pct,
81 } => {
82 let pct = s.delta_pct?;
83 Some(match direction {
84 Direction::Decrease => pct <= *target_pct,
85 Direction::Increase => pct >= *target_pct,
86 })
87 }
88 Criterion::Absolute { metric_value } => {
89 let m = s.median_treatment?;
90 Some(m <= *metric_value)
91 }
92 }
93}
94
95fn stable_seed(id: &str) -> u64 {
96 let mut h: u64 = 1469598103934665603;
97 for b in id.as_bytes() {
98 h ^= *b as u64;
99 h = h.wrapping_mul(1099511628211);
100 }
101 h
102}
103
104pub fn to_markdown(report: &Report) -> String {
106 let e = &report.experiment;
107 let s = &report.summary;
108 let mut out = String::new();
109 out.push_str(&format!("# Experiment: {}\n\n", e.name));
110 out.push_str(&format!(
111 "State: {:?} · Duration: {}d\nHypothesis: {}\nChange: {}\n\n",
112 e.state, e.duration_days, e.hypothesis, e.change_description
113 ));
114 let (ctl_label, trt_label) = match &e.binding {
115 crate::experiment::types::Binding::GitCommit {
116 control_commit,
117 treatment_commit,
118 } => (short(control_commit), short(treatment_commit)),
119 crate::experiment::types::Binding::Branch {
120 control_branch,
121 treatment_branch,
122 } => (control_branch.clone(), treatment_branch.clone()),
123 crate::experiment::types::Binding::ManualTag { variant_field } => {
124 (format!("manual:{}", variant_field), "manual".into())
125 }
126 };
127 out.push_str(&format!(
128 "Binding: control {} · treatment {}\nMetric: {}\n\n",
129 ctl_label,
130 trt_label,
131 e.metric.as_str()
132 ));
133 out.push_str("| | N | median | mean |\n|---|---|---|---|\n");
134 out.push_str(&format!(
135 "| control | {} | {} | {} |\n",
136 s.n_control,
137 fmt_opt(s.median_control),
138 fmt_opt(s.mean_control),
139 ));
140 out.push_str(&format!(
141 "| treatment| {} | {} | {} |\n\n",
142 s.n_treatment,
143 fmt_opt(s.median_treatment),
144 fmt_opt(s.mean_treatment),
145 ));
146 if let Some(d) = s.delta_median {
147 out.push_str(&format!(
148 "Delta (median): {:+.2}{}\n",
149 d,
150 s.delta_pct
151 .map(|p| format!(" ({:+.1}%)", p))
152 .unwrap_or_default(),
153 ));
154 }
155 if let (Some(lo), Some(hi)) = (s.ci95_lo, s.ci95_hi) {
156 out.push_str(&format!(
157 "95% bootstrap CI on delta: [{:+.2}, {:+.2}]\n",
158 lo, hi
159 ));
160 }
161 if let Some(met) = report.target_met {
162 out.push_str(&format!(
163 "Target: {}\n",
164 if met { "MET" } else { "not met" }
165 ));
166 }
167 out.push_str(&format!("\nExcluded: {} sessions\n", report.excluded_count));
168 if s.small_sample_warning {
169 out.push_str("Warning: N per arm < 30 — CI may be unreliable.\n");
170 }
171 out
172}
173
174fn fmt_opt(v: Option<f64>) -> String {
175 v.map(|x| format!("{:.2}", x)).unwrap_or_else(|| "—".into())
176}
177
178fn short(commit: &str) -> String {
179 commit.chars().take(7).collect()
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185 use crate::core::event::SessionStatus;
186 use crate::experiment::types::{Binding, Criterion, Direction, Metric, State};
187
188 fn exp() -> Experiment {
189 Experiment {
190 id: "e".into(),
191 name: "e".into(),
192 hypothesis: "h".into(),
193 change_description: "c".into(),
194 metric: Metric::TokensPerSession,
195 binding: Binding::GitCommit {
196 control_commit: "c".into(),
197 treatment_commit: "t".into(),
198 },
199 duration_days: 14,
200 success_criterion: Criterion::Delta {
201 direction: Direction::Decrease,
202 target_pct: -10.0,
203 },
204 state: State::Running,
205 created_at_ms: 0,
206 concluded_at_ms: None,
207 }
208 }
209
210 fn session_with(id: &str, tokens: u32) -> (SessionRecord, Vec<Event>) {
211 let s = SessionRecord {
212 id: id.into(),
213 agent: "cursor".into(),
214 model: None,
215 workspace: "/ws".into(),
216 started_at_ms: 0,
217 ended_at_ms: None,
218 status: SessionStatus::Done,
219 trace_path: String::new(),
220 start_commit: None,
221 end_commit: None,
222 branch: None,
223 dirty_start: None,
224 dirty_end: None,
225 repo_binding_source: None,
226 prompt_fingerprint: None,
227 };
228 let mut ev = Event {
229 session_id: id.into(),
230 seq: 0,
231 ts_ms: 0,
232 ts_exact: false,
233 kind: crate::core::event::EventKind::ToolCall,
234 source: crate::core::event::EventSource::Tail,
235 tool: None,
236 tool_call_id: None,
237 tokens_in: Some(tokens),
238 tokens_out: None,
239 reasoning_tokens: None,
240 cost_usd_e6: None,
241 payload: serde_json::Value::Null,
242 };
243 ev.tokens_in = Some(tokens);
244 (s, vec![ev])
245 }
246
247 #[test]
248 fn manual_tags_drive_partition_without_git() {
249 let e = exp();
250 let sessions = vec![
251 session_with("a", 100),
252 session_with("b", 80),
253 session_with("c", 200),
254 session_with("d", 70),
255 ];
256 let mut tags = ManualTags::new();
257 tags.insert("a".into(), Classification::Control);
258 tags.insert("b".into(), Classification::Control);
259 tags.insert("c".into(), Classification::Treatment);
260 tags.insert("d".into(), Classification::Treatment);
261 let r = run(&e, &sessions, &tags, Path::new("/no"));
262 assert_eq!(r.summary.n_control, 2);
263 assert_eq!(r.summary.n_treatment, 2);
264 }
265}