Skip to main content

mdx_rust_core/
eval.rs

1//! Evaluation dataset and scorer metadata.
2//!
3//! These types make experiment records explicit about what was measured and
4//! how. The current scorer is intentionally simple, but the optimizer now has
5//! a stable place to grow policy-aligned and LLM-judge scoring.
6
7use serde::{Deserialize, Serialize};
8use std::path::Path;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct EvaluationSample {
12    pub id: String,
13    pub input: serde_json::Value,
14}
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct EvaluationDataset {
18    pub version: String,
19    pub samples: Vec<EvaluationSample>,
20}
21
22impl EvaluationDataset {
23    pub fn synthetic_v1() -> Self {
24        let samples = (0..5)
25            .map(|i| EvaluationSample {
26                id: format!("synthetic-addition-{i}"),
27                input: serde_json::json!({
28                    "query": format!("What is {} + {}?", i, i + 1),
29                    "context": null
30                }),
31            })
32            .collect();
33
34        Self {
35            version: "synthetic_v1".to_string(),
36            samples,
37        }
38    }
39
40    pub fn content_hash(&self) -> String {
41        let bytes = serde_json::to_vec(self).unwrap_or_default();
42        stable_hash_hex(&bytes)
43    }
44
45    /// Load a dataset from JSON.
46    ///
47    /// Accepted shapes:
48    /// - `{ "version": "...", "samples": [{ "id": "...", "input": {...} }] }`
49    /// - `[{ "id": "...", "input": {...} }]`
50    /// - `[{...}, {...}]` where each object is treated directly as an input.
51    pub fn load_from_path(path: &Path) -> anyhow::Result<Self> {
52        let content = std::fs::read_to_string(path)?;
53
54        if let Ok(dataset) = serde_json::from_str::<EvaluationDataset>(&content) {
55            return Ok(dataset);
56        }
57
58        let value: serde_json::Value = serde_json::from_str(&content)?;
59        let Some(items) = value.as_array() else {
60            anyhow::bail!("dataset must be an EvaluationDataset object or JSON array");
61        };
62
63        let mut samples = Vec::with_capacity(items.len());
64        for (index, item) in items.iter().enumerate() {
65            if let Some(input) = item.get("input") {
66                let id = item
67                    .get("id")
68                    .and_then(|id| id.as_str())
69                    .map(str::to_string)
70                    .unwrap_or_else(|| format!("sample-{index}"));
71                samples.push(EvaluationSample {
72                    id,
73                    input: input.clone(),
74                });
75            } else {
76                samples.push(EvaluationSample {
77                    id: format!("sample-{index}"),
78                    input: item.clone(),
79                });
80            }
81        }
82
83        Ok(Self {
84            version: dataset_version_from_path(path),
85            samples,
86        })
87    }
88}
89
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ScorerMetadata {
92    pub id: String,
93    pub version: String,
94}
95
96impl ScorerMetadata {
97    pub fn mechanical_v1() -> Self {
98        Self {
99            id: "mechanical".to_string(),
100            version: "v1".to_string(),
101        }
102    }
103
104    pub fn label(&self) -> String {
105        format!("{}_{}", self.id, self.version)
106    }
107}
108
109pub fn stable_hash_hex(bytes: &[u8]) -> String {
110    let mut hash = 0xcbf29ce484222325u64;
111    for byte in bytes {
112        hash ^= u64::from(*byte);
113        hash = hash.wrapping_mul(0x100000001b3);
114    }
115    format!("fnv1a64:{hash:016x}")
116}
117
118fn dataset_version_from_path(path: &Path) -> String {
119    path.file_stem()
120        .and_then(|stem| stem.to_str())
121        .filter(|stem| !stem.is_empty())
122        .map(|stem| format!("file:{stem}"))
123        .unwrap_or_else(|| "file:dataset".to_string())
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129    use tempfile::tempdir;
130
131    #[test]
132    fn load_dataset_from_raw_input_array() {
133        let dir = tempdir().unwrap();
134        let path = dir.path().join("dataset.json");
135        std::fs::write(
136            &path,
137            r#"[{"query":"hello"},{"query":"world","context":null}]"#,
138        )
139        .unwrap();
140
141        let dataset = EvaluationDataset::load_from_path(&path).unwrap();
142
143        assert_eq!(dataset.samples.len(), 2);
144        assert_eq!(dataset.samples[0].id, "sample-0");
145        assert_eq!(dataset.version, "file:dataset");
146    }
147
148    #[test]
149    fn load_dataset_from_structured_object() {
150        let dir = tempdir().unwrap();
151        let path = dir.path().join("evals.json");
152        std::fs::write(
153            &path,
154            r#"{"version":"v9","samples":[{"id":"a","input":{"query":"hello"}}]}"#,
155        )
156        .unwrap();
157
158        let dataset = EvaluationDataset::load_from_path(&path).unwrap();
159
160        assert_eq!(dataset.version, "v9");
161        assert_eq!(dataset.samples[0].id, "a");
162    }
163}