Skip to main content

mdx_rust_core/
ledger.rs

1//! Experiment budgeting and prompt variant ledger primitives.
2//!
3//! These records make optimization runs explainable without requiring a
4//! database. They are append-friendly JSON structures that can later be moved
5//! behind a richer storage layer.
6
7use crate::eval::{stable_hash_hex, EvaluationDataset, EvaluationSample};
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
11pub enum OptimizationBudget {
12    Light,
13    #[default]
14    Medium,
15    Heavy,
16}
17
18impl OptimizationBudget {
19    pub fn from_label(value: &str) -> anyhow::Result<Self> {
20        match value {
21            "light" => Ok(Self::Light),
22            "medium" => Ok(Self::Medium),
23            "heavy" => Ok(Self::Heavy),
24            other => anyhow::bail!("unknown optimization budget: {other}"),
25        }
26    }
27
28    pub fn label(self) -> &'static str {
29        match self {
30            Self::Light => "light",
31            Self::Medium => "medium",
32            Self::Heavy => "heavy",
33        }
34    }
35
36    pub fn candidate_limit(self, requested: u32) -> usize {
37        let cap = match self {
38            Self::Light => 2,
39            Self::Medium => 4,
40            Self::Heavy => 8,
41        };
42        requested.max(1).min(cap) as usize
43    }
44
45    pub fn holdout_percent(self) -> usize {
46        match self {
47            Self::Light => 20,
48            Self::Medium => 25,
49            Self::Heavy => 30,
50        }
51    }
52}
53
54#[derive(Debug, Clone, Serialize, Deserialize)]
55pub struct DatasetSplit {
56    pub train: Vec<EvaluationSample>,
57    pub holdout: Vec<EvaluationSample>,
58    pub holdout_percent: usize,
59}
60
61pub fn split_dataset(dataset: &EvaluationDataset, budget: OptimizationBudget) -> DatasetSplit {
62    if dataset.samples.len() < 2 {
63        return DatasetSplit {
64            train: dataset.samples.clone(),
65            holdout: Vec::new(),
66            holdout_percent: 0,
67        };
68    }
69
70    let holdout_percent = budget.holdout_percent();
71    let holdout_len = ((dataset.samples.len() * holdout_percent).div_ceil(100))
72        .max(1)
73        .min(dataset.samples.len() - 1);
74    let split_at = dataset.samples.len() - holdout_len;
75
76    DatasetSplit {
77        train: dataset.samples[..split_at].to_vec(),
78        holdout: dataset.samples[split_at..].to_vec(),
79        holdout_percent,
80    }
81}
82
83#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct PromptVariantRecord {
85    pub id: String,
86    pub strategy: String,
87    pub target_file: String,
88    pub patch_hash: String,
89    pub description: String,
90}
91
92impl PromptVariantRecord {
93    pub fn from_patch(
94        strategy: impl Into<String>,
95        target_file: impl Into<String>,
96        description: impl Into<String>,
97        patch: &str,
98    ) -> Self {
99        let patch_hash = stable_hash_hex(patch.as_bytes());
100        Self {
101            id: patch_hash.replace(':', "_"),
102            strategy: strategy.into(),
103            target_file: target_file.into(),
104            patch_hash,
105            description: description.into(),
106        }
107    }
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct ExperimentLedger {
112    pub budget: OptimizationBudget,
113    pub dataset_version: String,
114    pub dataset_hash: String,
115    pub train_samples: usize,
116    pub holdout_samples: usize,
117    pub variants: Vec<PromptVariantRecord>,
118}
119
120impl ExperimentLedger {
121    pub fn new(
122        budget: OptimizationBudget,
123        dataset: &EvaluationDataset,
124        split: &DatasetSplit,
125    ) -> Self {
126        Self {
127            budget,
128            dataset_version: dataset.version.clone(),
129            dataset_hash: dataset.content_hash(),
130            train_samples: split.train.len(),
131            holdout_samples: split.holdout.len(),
132            variants: Vec::new(),
133        }
134    }
135
136    pub fn record_variant(&mut self, variant: PromptVariantRecord) {
137        self.variants.push(variant);
138    }
139}
140
141#[cfg(test)]
142mod tests {
143    use super::*;
144
145    #[test]
146    fn split_dataset_keeps_at_least_one_train_sample() {
147        let dataset = EvaluationDataset::synthetic_v1();
148        let split = split_dataset(&dataset, OptimizationBudget::Heavy);
149
150        assert!(!split.train.is_empty());
151        assert!(!split.holdout.is_empty());
152        assert_eq!(
153            split.train.len() + split.holdout.len(),
154            dataset.samples.len()
155        );
156    }
157
158    #[test]
159    fn variant_ids_are_stable_hashes() {
160        let first = PromptVariantRecord::from_patch("schema", "src/main.rs", "desc", "patch");
161        let second = PromptVariantRecord::from_patch("schema", "src/main.rs", "desc", "patch");
162
163        assert_eq!(first.id, second.id);
164        assert!(first.patch_hash.starts_with("fnv1a64:"));
165    }
166
167    #[test]
168    fn budget_caps_candidates_but_never_to_zero() {
169        assert_eq!(OptimizationBudget::Light.candidate_limit(99), 2);
170        assert_eq!(OptimizationBudget::Medium.candidate_limit(99), 4);
171        assert_eq!(OptimizationBudget::Heavy.candidate_limit(99), 8);
172        assert_eq!(OptimizationBudget::Light.candidate_limit(0), 1);
173    }
174
175    #[test]
176    fn ledger_recording_variants_does_not_record_acceptance() {
177        let dataset = EvaluationDataset::synthetic_v1();
178        let split = split_dataset(&dataset, OptimizationBudget::Medium);
179        let mut ledger = ExperimentLedger::new(OptimizationBudget::Medium, &dataset, &split);
180
181        ledger.record_variant(PromptVariantRecord::from_patch(
182            "schema",
183            "src/main.rs",
184            "candidate only",
185            "patch",
186        ));
187
188        assert_eq!(ledger.variants.len(), 1);
189        assert_eq!(
190            ledger.train_samples + ledger.holdout_samples,
191            dataset.samples.len()
192        );
193    }
194}