scirs2_datasets/
registry.rs1use crate::cache::RegistryEntry;
4use crate::error::{DatasetsError, Result};
5use std::collections::HashMap;
6
7pub struct DatasetRegistry {
9 entries: HashMap<String, RegistryEntry>,
11}
12
13impl Default for DatasetRegistry {
14 fn default() -> Self {
15 let mut registry = Self::new();
16 registry.populate_default_datasets();
17 registry
18 }
19}
20
21impl DatasetRegistry {
22 pub fn new() -> Self {
24 Self {
25 entries: HashMap::new(),
26 }
27 }
28
29 pub fn register(&mut self, name: String, entry: RegistryEntry) {
31 self.entries.insert(name, entry);
32 }
33
34 pub fn get(&self, name: &str) -> Option<&RegistryEntry> {
36 self.entries.get(name)
37 }
38
39 pub fn list_datasets(&self) -> Vec<String> {
41 self.entries.keys().cloned().collect()
42 }
43
44 pub fn contains(&self, name: &str) -> bool {
46 self.entries.contains_key(name)
47 }
48
49 fn populate_default_datasets(&mut self) {
54 self.register(
56 "california_housing".to_string(),
57 RegistryEntry {
58 url: "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/california_housing.csv",
59 sha256: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", },
61 );
62
63 self.register(
64 "wine".to_string(),
65 RegistryEntry {
66 url: "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/wine.csv",
67 sha256: "d4e1c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b856", },
69 );
70
71 self.register(
73 "electrocardiogram".to_string(),
74 RegistryEntry {
75 url: "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/electrocardiogram.json",
76 sha256: "a1b2c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b857", },
78 );
79
80 self.register(
81 "stock_market".to_string(),
82 RegistryEntry {
83 url: "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/stock_market.json",
84 sha256: "f5e6c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b858", },
86 );
87
88 self.register(
89 "weather".to_string(),
90 RegistryEntry {
91 url:
92 "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/weather.json",
93 sha256: "b7c8c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b859", },
95 );
96 }
97}
98
99pub fn get_registry() -> DatasetRegistry {
101 DatasetRegistry::default()
102}
103
104#[cfg(feature = "download")]
106pub fn load_dataset_by_name(name: &str, force_download: bool) -> Result<crate::utils::Dataset> {
107 let registry = get_registry();
108
109 match name {
110 "california_housing" => crate::sample::load_california_housing(force_download),
111 "wine" => crate::sample::load_wine(force_download),
112 "electrocardiogram" => crate::time_series::electrocardiogram(),
113 "stock_market" => crate::time_series::stock_market(false), "weather" => crate::time_series::weather(None), _ => {
116 if registry.contains(name) {
117 Err(DatasetsError::Other(format!(
118 "Dataset '{}' is registered but not yet implemented for loading",
119 name
120 )))
121 } else {
122 Err(DatasetsError::Other(format!(
123 "Unknown dataset: '{}'. Available datasets: {:?}",
124 name,
125 registry.list_datasets()
126 )))
127 }
128 }
129 }
130}
131
132#[cfg(not(feature = "download"))]
133pub fn load_dataset_by_name(_name: &str, _force_download: bool) -> Result<crate::utils::Dataset> {
135 Err(DatasetsError::Other(
136 "Download feature is not enabled. Recompile with --features download".to_string(),
137 ))
138}
139
140#[cfg(test)]
141mod tests {
142 use super::*;
143
144 #[test]
145 fn test_registry_creation() {
146 let registry = DatasetRegistry::new();
147 assert!(registry.entries.is_empty());
148 }
149
150 #[test]
151 fn test_registry_default() {
152 let registry = DatasetRegistry::default();
153 assert!(!registry.entries.is_empty());
154 assert!(registry.contains("california_housing"));
155 assert!(registry.contains("wine"));
156 assert!(registry.contains("electrocardiogram"));
157 }
158
159 #[test]
160 fn test_registry_operations() {
161 let mut registry = DatasetRegistry::new();
162
163 let entry = RegistryEntry {
164 url: "https://example.com/test.csv",
165 sha256: "abcd1234",
166 };
167
168 registry.register("test_dataset".to_string(), entry);
169
170 assert!(registry.contains("test_dataset"));
171 assert!(!registry.contains("nonexistent"));
172
173 let retrieved = registry.get("test_dataset").unwrap();
174 assert_eq!(retrieved.url, "https://example.com/test.csv");
175 assert_eq!(retrieved.sha256, "abcd1234");
176
177 let datasets = registry.list_datasets();
178 assert_eq!(datasets.len(), 1);
179 assert!(datasets.contains(&"test_dataset".to_string()));
180 }
181
182 #[test]
183 fn test_get_registry() {
184 let registry = get_registry();
185 assert!(!registry.list_datasets().is_empty());
186 }
187}