1use crate::eval::{stable_hash_hex, EvaluationDataset, EvaluationSample};
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
11pub enum OptimizationBudget {
12 Light,
13 #[default]
14 Medium,
15 Heavy,
16}
17
18impl OptimizationBudget {
19 pub fn from_label(value: &str) -> anyhow::Result<Self> {
20 match value {
21 "light" => Ok(Self::Light),
22 "medium" => Ok(Self::Medium),
23 "heavy" => Ok(Self::Heavy),
24 other => anyhow::bail!("unknown optimization budget: {other}"),
25 }
26 }
27
28 pub fn label(self) -> &'static str {
29 match self {
30 Self::Light => "light",
31 Self::Medium => "medium",
32 Self::Heavy => "heavy",
33 }
34 }
35
36 pub fn candidate_limit(self, requested: u32) -> usize {
37 let cap = match self {
38 Self::Light => 2,
39 Self::Medium => 4,
40 Self::Heavy => 8,
41 };
42 requested.max(1).min(cap) as usize
43 }
44
45 pub fn holdout_percent(self) -> usize {
46 match self {
47 Self::Light => 20,
48 Self::Medium => 25,
49 Self::Heavy => 30,
50 }
51 }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct DatasetSplit {
56 pub train: Vec<EvaluationSample>,
57 pub holdout: Vec<EvaluationSample>,
58 pub holdout_percent: usize,
59}
60
61pub fn split_dataset(dataset: &EvaluationDataset, budget: OptimizationBudget) -> DatasetSplit {
62 if dataset.samples.len() < 2 {
63 return DatasetSplit {
64 train: dataset.samples.clone(),
65 holdout: Vec::new(),
66 holdout_percent: 0,
67 };
68 }
69
70 let holdout_percent = budget.holdout_percent();
71 let holdout_len = ((dataset.samples.len() * holdout_percent).div_ceil(100))
72 .max(1)
73 .min(dataset.samples.len() - 1);
74 let split_at = dataset.samples.len() - holdout_len;
75
76 DatasetSplit {
77 train: dataset.samples[..split_at].to_vec(),
78 holdout: dataset.samples[split_at..].to_vec(),
79 holdout_percent,
80 }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct PromptVariantRecord {
85 pub id: String,
86 pub strategy: String,
87 pub target_file: String,
88 pub patch_hash: String,
89 pub description: String,
90}
91
92impl PromptVariantRecord {
93 pub fn from_patch(
94 strategy: impl Into<String>,
95 target_file: impl Into<String>,
96 description: impl Into<String>,
97 patch: &str,
98 ) -> Self {
99 let patch_hash = stable_hash_hex(patch.as_bytes());
100 Self {
101 id: patch_hash.replace(':', "_"),
102 strategy: strategy.into(),
103 target_file: target_file.into(),
104 patch_hash,
105 description: description.into(),
106 }
107 }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ExperimentLedger {
112 pub budget: OptimizationBudget,
113 pub dataset_version: String,
114 pub dataset_hash: String,
115 pub train_samples: usize,
116 pub holdout_samples: usize,
117 pub variants: Vec<PromptVariantRecord>,
118}
119
120impl ExperimentLedger {
121 pub fn new(
122 budget: OptimizationBudget,
123 dataset: &EvaluationDataset,
124 split: &DatasetSplit,
125 ) -> Self {
126 Self {
127 budget,
128 dataset_version: dataset.version.clone(),
129 dataset_hash: dataset.content_hash(),
130 train_samples: split.train.len(),
131 holdout_samples: split.holdout.len(),
132 variants: Vec::new(),
133 }
134 }
135
136 pub fn record_variant(&mut self, variant: PromptVariantRecord) {
137 self.variants.push(variant);
138 }
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn split_dataset_keeps_at_least_one_train_sample() {
147 let dataset = EvaluationDataset::synthetic_v1();
148 let split = split_dataset(&dataset, OptimizationBudget::Heavy);
149
150 assert!(!split.train.is_empty());
151 assert!(!split.holdout.is_empty());
152 assert_eq!(
153 split.train.len() + split.holdout.len(),
154 dataset.samples.len()
155 );
156 }
157
158 #[test]
159 fn variant_ids_are_stable_hashes() {
160 let first = PromptVariantRecord::from_patch("schema", "src/main.rs", "desc", "patch");
161 let second = PromptVariantRecord::from_patch("schema", "src/main.rs", "desc", "patch");
162
163 assert_eq!(first.id, second.id);
164 assert!(first.patch_hash.starts_with("fnv1a64:"));
165 }
166
167 #[test]
168 fn budget_caps_candidates_but_never_to_zero() {
169 assert_eq!(OptimizationBudget::Light.candidate_limit(99), 2);
170 assert_eq!(OptimizationBudget::Medium.candidate_limit(99), 4);
171 assert_eq!(OptimizationBudget::Heavy.candidate_limit(99), 8);
172 assert_eq!(OptimizationBudget::Light.candidate_limit(0), 1);
173 }
174
175 #[test]
176 fn ledger_recording_variants_does_not_record_acceptance() {
177 let dataset = EvaluationDataset::synthetic_v1();
178 let split = split_dataset(&dataset, OptimizationBudget::Medium);
179 let mut ledger = ExperimentLedger::new(OptimizationBudget::Medium, &dataset, &split);
180
181 ledger.record_variant(PromptVariantRecord::from_patch(
182 "schema",
183 "src/main.rs",
184 "candidate only",
185 "patch",
186 ));
187
188 assert_eq!(ledger.variants.len(), 1);
189 assert_eq!(
190 ledger.train_samples + ledger.holdout_samples,
191 dataset.samples.len()
192 );
193 }
194}