Skip to main content

zeph_bench/
dataset.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4/// The on-disk format used by a dataset's data files.
5///
6/// The format determines how a [`crate::DatasetLoader`] reads the file:
7/// `Jsonl` loaders iterate line-by-line, while `Json` loaders parse the
8/// entire file as a single JSON value.
9///
10/// # Examples
11///
12/// ```
13/// use zeph_bench::DatasetFormat;
14///
15/// assert_ne!(DatasetFormat::Jsonl, DatasetFormat::Json);
16/// ```
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum DatasetFormat {
19    /// New-line–delimited JSON: one JSON object per line.
20    Jsonl,
21    /// A single JSON document (object or array) spanning the entire file.
22    Json,
23}
24
25/// Static metadata describing a benchmark dataset.
26///
27/// Instances are stored in [`DatasetRegistry`] and can be retrieved by name.
28/// The `url` field points to the canonical source so users know where to
29/// download the data.
30///
31/// # Examples
32///
33/// ```
34/// use zeph_bench::{DatasetMeta, DatasetFormat};
35///
36/// let meta = DatasetMeta {
37///     name: "example",
38///     description: "An example dataset",
39///     url: "https://example.com/dataset",
40///     format: DatasetFormat::Jsonl,
41/// };
42/// assert_eq!(meta.name, "example");
43/// ```
44#[derive(Debug, Clone)]
45pub struct DatasetMeta {
46    /// Short identifier used in CLI arguments (e.g. `"gaia"`).
47    pub name: &'static str,
48    /// One-line human-readable description.
49    pub description: &'static str,
50    /// Canonical download URL (`HuggingFace`, GitHub, etc.).
51    pub url: &'static str,
52    /// File format expected by the corresponding [`crate::DatasetLoader`].
53    pub format: DatasetFormat,
54}
55
56/// Registry of all datasets that `zeph-bench` knows about.
57///
58/// The registry is pre-populated with six built-in datasets on construction and
59/// provides case-insensitive lookup by name. It is the authoritative source for
60/// the `bench list` CLI subcommand.
61///
62/// # Built-in Datasets
63///
64/// | Name | Format | Source |
65/// |------|--------|--------|
66/// | `longmemeval` | JSONL | `HuggingFace` xiaowu0162/longmemeval |
67/// | `locomo` | JSON | `HuggingFace` lmlab/locomo |
68/// | `frames` | JSONL | `HuggingFace` google/frames-benchmark |
69/// | `tau2-bench-retail` | JSON | GitHub sierra-research/tau2-bench |
70/// | `tau2-bench-airline` | JSON | GitHub sierra-research/tau2-bench |
71/// | `gaia` | JSONL | `HuggingFace` gaia-benchmark/GAIA |
72///
73/// # Examples
74///
75/// ```
76/// use zeph_bench::DatasetRegistry;
77///
78/// let registry = DatasetRegistry::new();
79///
80/// // List all datasets.
81/// assert_eq!(registry.list().len(), 6);
82///
83/// // Lookup is case-insensitive.
84/// assert!(registry.get("GAIA").is_some());
85/// assert!(registry.get("unknown").is_none());
86/// ```
87pub struct DatasetRegistry {
88    datasets: Vec<DatasetMeta>,
89}
90
91impl DatasetRegistry {
92    /// Create a registry pre-populated with all built-in datasets.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// use zeph_bench::DatasetRegistry;
98    ///
99    /// let registry = DatasetRegistry::new();
100    /// assert!(!registry.list().is_empty());
101    /// ```
102    #[must_use]
103    pub fn new() -> Self {
104        Self {
105            datasets: vec![
106                DatasetMeta {
107                    name: "longmemeval",
108                    description: "LongMemEval: long-term memory evaluation benchmark",
109                    url: "https://huggingface.co/datasets/xiaowu0162/longmemeval",
110                    format: DatasetFormat::Jsonl,
111                },
112                DatasetMeta {
113                    name: "locomo",
114                    description: "LOCOMO: long-context conversational memory benchmark",
115                    url: "https://huggingface.co/datasets/lmlab/locomo",
116                    format: DatasetFormat::Json,
117                },
118                DatasetMeta {
119                    name: "frames",
120                    description: "FRAMES: factual reasoning and multi-step evaluation",
121                    url: "https://huggingface.co/datasets/google/frames-benchmark",
122                    format: DatasetFormat::Jsonl,
123                },
124                DatasetMeta {
125                    name: "tau2-bench-retail",
126                    description: "tau2-bench retail domain: customer service tool-use evaluation",
127                    url: "https://github.com/sierra-research/tau2-bench",
128                    format: DatasetFormat::Json,
129                },
130                DatasetMeta {
131                    name: "tau2-bench-airline",
132                    description: "tau2-bench airline domain: flight reservation tool-use evaluation",
133                    url: "https://github.com/sierra-research/tau2-bench",
134                    format: DatasetFormat::Json,
135                },
136                DatasetMeta {
137                    name: "gaia",
138                    description: "GAIA: general AI assistants benchmark",
139                    url: "https://huggingface.co/datasets/gaia-benchmark/GAIA",
140                    format: DatasetFormat::Jsonl,
141                },
142            ],
143        }
144    }
145
146    /// Return a slice of all registered datasets.
147    ///
148    /// # Examples
149    ///
150    /// ```
151    /// use zeph_bench::DatasetRegistry;
152    ///
153    /// let registry = DatasetRegistry::new();
154    /// for meta in registry.list() {
155    ///     println!("{}: {}", meta.name, meta.url);
156    /// }
157    /// ```
158    #[must_use]
159    pub fn list(&self) -> &[DatasetMeta] {
160        &self.datasets
161    }
162
163    /// Look up a dataset by name using case-insensitive ASCII comparison.
164    ///
165    /// Returns `None` when no dataset with the given name is registered.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// use zeph_bench::DatasetRegistry;
171    ///
172    /// let registry = DatasetRegistry::new();
173    /// let meta = registry.get("locomo").expect("locomo is built-in");
174    /// assert_eq!(meta.name, "locomo");
175    ///
176    /// // Case-insensitive.
177    /// assert!(registry.get("LOCOMO").is_some());
178    ///
179    /// // Unknown dataset.
180    /// assert!(registry.get("does-not-exist").is_none());
181    /// ```
182    #[must_use]
183    pub fn get(&self, name: &str) -> Option<&DatasetMeta> {
184        self.datasets
185            .iter()
186            .find(|d| d.name.eq_ignore_ascii_case(name))
187    }
188}
189
190impl Default for DatasetRegistry {
191    fn default() -> Self {
192        Self::new()
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn registry_contains_six_datasets() {
202        let reg = DatasetRegistry::new();
203        assert_eq!(reg.list().len(), 6);
204    }
205
206    #[test]
207    fn registry_get_returns_correct_dataset() {
208        let reg = DatasetRegistry::new();
209        let ds = reg.get("gaia").unwrap();
210        assert_eq!(ds.name, "gaia");
211    }
212
213    #[test]
214    fn registry_get_case_insensitive() {
215        let reg = DatasetRegistry::new();
216        assert!(reg.get("LOCOMO").is_some());
217    }
218
219    #[test]
220    fn registry_get_unknown_returns_none() {
221        let reg = DatasetRegistry::new();
222        assert!(reg.get("unknown-dataset").is_none());
223    }
224}