scirs2_datasets/
sample.rs1use crate::error::{DatasetsError, Result};
7use crate::utils::Dataset;
8
9#[allow(dead_code)]
11const DATASET_BASE_URL: &str = "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/";
12
13#[cfg(feature = "download")]
15pub fn load_california_housing(force_download: bool) -> Result<Dataset> {
16 let url = format!("{}/california_housing.csv", DATASET_BASE_URL);
17
18 let data = download_data(&url, force_download)?;
20
21 use std::io::Write;
23 let temp_dir = std::env::temp_dir();
24 let temp_path = temp_dir.join("scirs2_california_housing.csv");
25
26 let mut temp_file = std::fs::File::create(&temp_path).map_err(|e| DatasetsError::IoError(e))?;
27
28 temp_file
29 .write_all(&data)
30 .map_err(|e| DatasetsError::IoError(e))?;
31
32 let mut dataset = loaders::load_csv(&temp_path, true, Some(8))?;
34
35 let feature_names = vec![
37 "MedInc".to_string(),
38 "HouseAge".to_string(),
39 "AveRooms".to_string(),
40 "AveBedrms".to_string(),
41 "Population".to_string(),
42 "AveOccup".to_string(),
43 "Latitude".to_string(),
44 "Longitude".to_string(),
45 ];
46
47 let description = "California Housing dataset
48
49The data was derived from the 1990 U.S. census, using one row per census block group.
50A block group is the smallest geographical unit for which the U.S. Census Bureau
51publishes sample data.
52
53Features:
54- MedInc: median income in block group
55- HouseAge: median house age in block group
56- AveRooms: average number of rooms per household
57- AveBedrms: average number of bedrooms per household
58- Population: block group population
59- AveOccup: average number of household members
60- Latitude: block group latitude
61- Longitude: block group longitude
62
63Target: Median house value for California districts, expressed in hundreds of thousands of dollars.
64
65This dataset is useful for regression tasks."
66 .to_string();
67
68 dataset = dataset
69 .with_feature_names(feature_names)
70 .with_description(description);
71
72 std::fs::remove_file(temp_path).ok();
74
75 Ok(dataset)
76}
77
78#[cfg(not(feature = "download"))]
80pub fn load_california_housing(_force_download: bool) -> Result<Dataset> {
93 Err(DatasetsError::Other(
94 "Download feature is not enabled. Recompile with --features download".to_string(),
95 ))
96}
97
98#[cfg(feature = "download")]
100pub fn load_wine(force_download: bool) -> Result<Dataset> {
101 let url = format!("{}/wine.csv", DATASET_BASE_URL);
102
103 let data = download_data(&url, force_download)?;
105
106 use std::io::Write;
108 let temp_dir = std::env::temp_dir();
109 let temp_path = temp_dir.join("scirs2_wine.csv");
110
111 let mut temp_file = std::fs::File::create(&temp_path).map_err(|e| DatasetsError::IoError(e))?;
112
113 temp_file
114 .write_all(&data)
115 .map_err(|e| DatasetsError::IoError(e))?;
116
117 let mut dataset = loaders::load_csv(&temp_path, true, Some(0))?;
119
120 let feature_names = vec![
122 "alcohol".to_string(),
123 "malic_acid".to_string(),
124 "ash".to_string(),
125 "alcalinity_of_ash".to_string(),
126 "magnesium".to_string(),
127 "total_phenols".to_string(),
128 "flavanoids".to_string(),
129 "nonflavanoid_phenols".to_string(),
130 "proanthocyanins".to_string(),
131 "color_intensity".to_string(),
132 "hue".to_string(),
133 "od280_od315_of_diluted_wines".to_string(),
134 "proline".to_string(),
135 ];
136
137 let target_names = vec![
138 "class_0".to_string(),
139 "class_1".to_string(),
140 "class_2".to_string(),
141 ];
142
143 let description = "Wine Recognition dataset
144
145The data is the results of a chemical analysis of wines grown in the same region in Italy
146but derived from three different cultivars. The analysis determined the quantities of
14713 constituents found in each of the three types of wines.
148
149Features: Various chemical properties of the wine
150
151Target: Class of wine (0, 1, or 2)
152
153This dataset is useful for classification tasks."
154 .to_string();
155
156 dataset = dataset
157 .with_feature_names(feature_names)
158 .with_target_names(target_names)
159 .with_description(description);
160
161 std::fs::remove_file(temp_path).ok();
163
164 Ok(dataset)
165}
166
167#[cfg(not(feature = "download"))]
169pub fn load_wine(_force_download: bool) -> Result<Dataset> {
182 Err(DatasetsError::Other(
183 "Download feature is not enabled. Recompile with --features download".to_string(),
184 ))
185}
186
187#[cfg(feature = "download")]
189pub fn get_available_datasets() -> Result<Vec<String>> {
190 let url = format!("{}/datasets_index.txt", DATASET_BASE_URL);
191
192 let data = download_data(&url, true)?;
194
195 let content = String::from_utf8(data).map_err(|e| {
197 DatasetsError::InvalidFormat(format!("Failed to parse datasets index: {}", e))
198 })?;
199
200 let datasets = content
201 .lines()
202 .map(|line| line.trim().to_string())
203 .filter(|line| !line.is_empty())
204 .collect();
205
206 Ok(datasets)
207}
208
209#[cfg(not(feature = "download"))]
211pub fn get_available_datasets() -> Result<Vec<String>> {
220 Err(DatasetsError::Other(
221 "Download feature is not enabled. Recompile with --features download".to_string(),
222 ))
223}