pub mod convomem;
pub mod custom;
pub mod download;
pub mod halumem;
pub mod locomo;
pub mod longmemeval;
pub mod mab;
pub mod membench;
use std::collections::HashMap;
use anyhow::Result;
use crate::traits::BenchmarkDataset;
pub struct DatasetRegistry {
datasets: HashMap<String, DatasetInfo>,
}
#[derive(Debug, Clone)]
pub struct DatasetInfo {
pub name: String,
pub description: String,
pub variants: Vec<String>,
}
impl DatasetRegistry {
pub fn new() -> Self {
let mut registry = Self {
datasets: HashMap::new(),
};
registry.register_defaults();
registry
}
fn register_defaults(&mut self) {
self.datasets.insert("longmemeval".to_string(), DatasetInfo {
name: "longmemeval".to_string(),
description: "LongMemEval (ICLR 2025) — 500 questions, 5 memory abilities, 7 types".to_string(),
variants: vec!["oracle".to_string(), "small".to_string(), "medium".to_string()],
});
self.datasets.insert("locomo".to_string(), DatasetInfo {
name: "locomo".to_string(),
description: "LoCoMo (Snap Research) — long-context conversation memory".to_string(),
variants: vec!["default".to_string()],
});
self.datasets.insert("convomem".to_string(), DatasetInfo {
name: "convomem".to_string(),
description: "ConvoMem (Salesforce) — conversational memory, 6 evidence categories".to_string(),
variants: vec!["user_evidence".to_string(), "assistant_facts".to_string(), "changing".to_string(), "abstention".to_string(), "preference".to_string(), "implicit_connection".to_string()],
});
self.datasets.insert("membench".to_string(), DatasetInfo {
name: "membench".to_string(),
description: "MemBench (ACL 2025) — multi-aspect memory evaluation, multiple-choice QA".to_string(),
variants: vec!["simple".to_string(), "aggregative".to_string(), "comparative".to_string(), "conditional".to_string(), "knowledge_update".to_string(), "highlevel".to_string()],
});
self.datasets.insert("memoryagentbench".to_string(), DatasetInfo {
name: "memoryagentbench".to_string(),
description: "MemoryAgentBench (ICLR 2026) — selective forgetting, fact consolidation, long-range retrieval".to_string(),
variants: vec!["conflict_resolution".to_string(), "accurate_retrieval".to_string(), "long_range".to_string(), "test_time_learning".to_string()],
});
self.datasets.insert("halumem".to_string(), DatasetInfo {
name: "halumem".to_string(),
description: "HaluMem (MemTensor) — memory hallucination detection".to_string(),
variants: vec!["medium".to_string(), "long".to_string()],
});
}
pub fn list(&self) -> Vec<&DatasetInfo> {
let mut datasets: Vec<_> = self.datasets.values().collect();
datasets.sort_by(|a, b| a.name.cmp(&b.name));
datasets
}
pub fn get(&self, name: &str) -> Option<&DatasetInfo> {
self.datasets.get(name)
}
fn resolve_name<'a>(&self, name: &'a str) -> &'a str {
match name {
"recall" => "longmemeval",
"longturn" => "locomo",
"conversation" => "convomem",
"multiaspect" => "membench",
"forgetting" => "memoryagentbench",
"hallucination" => "halumem",
other => other,
}
}
pub async fn load(&self, name: &str, variant: &str, force_download: bool) -> Result<Box<dyn BenchmarkDataset>> {
let resolved = self.resolve_name(name);
match resolved {
"longmemeval" => {
let dataset = longmemeval::LongMemEvalDataset::load(variant, force_download).await?;
Ok(Box::new(dataset))
}
"locomo" => {
let dataset = locomo::LoCoMoDataset::load(force_download).await?;
Ok(Box::new(dataset))
}
"convomem" => {
let dataset = convomem::ConvoMemDataset::load(variant, force_download).await?;
Ok(Box::new(dataset))
}
"membench" => {
let category = if variant == "default" { "simple" } else { variant };
let dataset = membench::MemBenchDataset::load(category, force_download).await?;
Ok(Box::new(dataset))
}
"halumem" => {
let dataset = halumem::HaluMemDataset::load(variant, force_download).await?;
Ok(Box::new(dataset))
}
"memoryagentbench" => {
let split = if variant == "default" { "conflict_resolution" } else { variant };
let dataset = mab::MemoryAgentBenchDataset::load(split).await?;
Ok(Box::new(dataset))
}
_ => anyhow::bail!("Unknown dataset: {name}. Run 'recallbench datasets' to see available datasets."),
}
}
}
impl Default for DatasetRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn registry_has_all_datasets() {
let registry = DatasetRegistry::new();
assert!(registry.get("longmemeval").is_some());
assert!(registry.get("locomo").is_some());
assert!(registry.get("convomem").is_some());
assert!(registry.get("membench").is_some());
assert!(registry.get("memoryagentbench").is_some());
assert!(registry.get("halumem").is_some());
}
#[test]
fn registry_list_sorted() {
let registry = DatasetRegistry::new();
let datasets = registry.list();
assert_eq!(datasets.len(), 6);
let names: Vec<_> = datasets.iter().map(|d| d.name.as_str()).collect();
let mut sorted = names.clone();
sorted.sort();
assert_eq!(names, sorted);
}
#[test]
fn registry_unknown_returns_none() {
let registry = DatasetRegistry::new();
assert!(registry.get("nonexistent").is_none());
}
}