Skip to main content

zeph_bench/
dataset.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4/// The on-disk format used by a dataset's data files.
5///
6/// The format determines how a [`crate::DatasetLoader`] reads the file:
7/// `Jsonl` loaders iterate line-by-line, while `Json` loaders parse the
8/// entire file as a single JSON value.
9///
10/// # Examples
11///
12/// ```
13/// use zeph_bench::DatasetFormat;
14///
15/// assert_ne!(DatasetFormat::Jsonl, DatasetFormat::Json);
16/// ```
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18#[non_exhaustive]
19pub enum DatasetFormat {
20    /// New-line–delimited JSON: one JSON object per line.
21    Jsonl,
22    /// A single JSON document (object or array) spanning the entire file.
23    Json,
24}
25
26/// Static metadata describing a benchmark dataset.
27///
28/// Instances are stored in [`DatasetRegistry`] and can be retrieved by name.
29/// The `url` field points to the canonical source so users know where to
30/// download the data.
31///
32/// # Examples
33///
34/// ```
35/// use zeph_bench::{DatasetMeta, DatasetFormat};
36///
37/// let meta = DatasetMeta {
38///     name: "example",
39///     description: "An example dataset",
40///     url: "https://example.com/dataset",
41///     format: DatasetFormat::Jsonl,
42/// };
43/// assert_eq!(meta.name, "example");
44/// ```
45#[derive(Debug, Clone)]
46pub struct DatasetMeta {
47    /// Short identifier used in CLI arguments (e.g. `"gaia"`).
48    pub name: &'static str,
49    /// One-line human-readable description.
50    pub description: &'static str,
51    /// Canonical download URL (`HuggingFace`, GitHub, etc.).
52    pub url: &'static str,
53    /// File format expected by the corresponding [`crate::DatasetLoader`].
54    pub format: DatasetFormat,
55}
56
57/// Registry of all datasets that `zeph-bench` knows about.
58///
59/// The registry is pre-populated with six built-in datasets on construction and
60/// provides case-insensitive lookup by name. It is the authoritative source for
61/// the `bench list` CLI subcommand.
62///
63/// # Built-in Datasets
64///
65/// | Name | Format | Source |
66/// |------|--------|--------|
67/// | `longmemeval` | JSONL | `HuggingFace` xiaowu0162/longmemeval |
68/// | `locomo` | JSON | `HuggingFace` lmlab/locomo |
69/// | `frames` | JSONL | `HuggingFace` google/frames-benchmark |
70/// | `tau2-bench-retail` | JSON | GitHub sierra-research/tau2-bench |
71/// | `tau2-bench-airline` | JSON | GitHub sierra-research/tau2-bench |
72/// | `gaia` | JSONL | `HuggingFace` gaia-benchmark/GAIA |
73///
74/// # Examples
75///
76/// ```
77/// use zeph_bench::DatasetRegistry;
78///
79/// let registry = DatasetRegistry::new();
80///
81/// // List all datasets.
82/// assert_eq!(registry.list().len(), 6);
83///
84/// // Lookup is case-insensitive.
85/// assert!(registry.get("GAIA").is_some());
86/// assert!(registry.get("unknown").is_none());
87/// ```
88pub struct DatasetRegistry {
89    datasets: Vec<DatasetMeta>,
90}
91
92impl DatasetRegistry {
93    /// Create a registry pre-populated with all built-in datasets.
94    ///
95    /// # Examples
96    ///
97    /// ```
98    /// use zeph_bench::DatasetRegistry;
99    ///
100    /// let registry = DatasetRegistry::new();
101    /// assert!(!registry.list().is_empty());
102    /// ```
103    #[must_use]
104    pub fn new() -> Self {
105        Self {
106            datasets: vec![
107                DatasetMeta {
108                    name: "longmemeval",
109                    description: "LongMemEval: long-term memory evaluation benchmark",
110                    url: "https://huggingface.co/datasets/xiaowu0162/longmemeval",
111                    format: DatasetFormat::Jsonl,
112                },
113                DatasetMeta {
114                    name: "locomo",
115                    description: "LOCOMO: long-context conversational memory benchmark",
116                    url: "https://huggingface.co/datasets/lmlab/locomo",
117                    format: DatasetFormat::Json,
118                },
119                DatasetMeta {
120                    name: "frames",
121                    description: "FRAMES: factual reasoning and multi-step evaluation",
122                    url: "https://huggingface.co/datasets/google/frames-benchmark",
123                    format: DatasetFormat::Jsonl,
124                },
125                DatasetMeta {
126                    name: "tau2-bench-retail",
127                    description: "tau2-bench retail domain: customer service tool-use evaluation",
128                    url: "https://github.com/sierra-research/tau2-bench",
129                    format: DatasetFormat::Json,
130                },
131                DatasetMeta {
132                    name: "tau2-bench-airline",
133                    description: "tau2-bench airline domain: flight reservation tool-use evaluation",
134                    url: "https://github.com/sierra-research/tau2-bench",
135                    format: DatasetFormat::Json,
136                },
137                DatasetMeta {
138                    name: "gaia",
139                    description: "GAIA: general AI assistants benchmark",
140                    url: "https://huggingface.co/datasets/gaia-benchmark/GAIA",
141                    format: DatasetFormat::Jsonl,
142                },
143            ],
144        }
145    }
146
147    /// Return a slice of all registered datasets.
148    ///
149    /// # Examples
150    ///
151    /// ```
152    /// use zeph_bench::DatasetRegistry;
153    ///
154    /// let registry = DatasetRegistry::new();
155    /// for meta in registry.list() {
156    ///     println!("{}: {}", meta.name, meta.url);
157    /// }
158    /// ```
159    #[must_use]
160    pub fn list(&self) -> &[DatasetMeta] {
161        &self.datasets
162    }
163
164    /// Look up a dataset by name using case-insensitive ASCII comparison.
165    ///
166    /// Returns `None` when no dataset with the given name is registered.
167    ///
168    /// # Examples
169    ///
170    /// ```
171    /// use zeph_bench::DatasetRegistry;
172    ///
173    /// let registry = DatasetRegistry::new();
174    /// let meta = registry.get("locomo").expect("locomo is built-in");
175    /// assert_eq!(meta.name, "locomo");
176    ///
177    /// // Case-insensitive.
178    /// assert!(registry.get("LOCOMO").is_some());
179    ///
180    /// // Unknown dataset.
181    /// assert!(registry.get("does-not-exist").is_none());
182    /// ```
183    #[must_use]
184    pub fn get(&self, name: &str) -> Option<&DatasetMeta> {
185        self.datasets
186            .iter()
187            .find(|d| d.name.eq_ignore_ascii_case(name))
188    }
189}
190
191impl Default for DatasetRegistry {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200
201    #[test]
202    fn registry_contains_six_datasets() {
203        let reg = DatasetRegistry::new();
204        assert_eq!(reg.list().len(), 6);
205    }
206
207    #[test]
208    fn registry_get_returns_correct_dataset() {
209        let reg = DatasetRegistry::new();
210        let ds = reg.get("gaia").unwrap();
211        assert_eq!(ds.name, "gaia");
212    }
213
214    #[test]
215    fn registry_get_case_insensitive() {
216        let reg = DatasetRegistry::new();
217        assert!(reg.get("LOCOMO").is_some());
218    }
219
220    #[test]
221    fn registry_get_unknown_returns_none() {
222        let reg = DatasetRegistry::new();
223        assert!(reg.get("unknown-dataset").is_none());
224    }
225}