scirs2_datasets/
sample.rs1use crate::error::{DatasetsError, Result};
7use crate::utils::Dataset;
8
9#[cfg(feature = "download")]
10use crate::cache::download_data;
11#[cfg(feature = "download")]
12use crate::loaders;
13
14#[allow(dead_code)]
16const DATASET_BASE_URL: &str = "https://raw.githubusercontent.com/cool-japan/scirs-datasets/main/";
17
18#[cfg(feature = "download")]
20pub fn load_california_housing(force_download: bool) -> Result<Dataset> {
21 let url = format!("{}/california_housing.csv", DATASET_BASE_URL);
22
23 let data = download_data(&url, force_download)?;
25
26 use std::io::Write;
28 let temp_dir = std::env::temp_dir();
29 let temp_path = temp_dir.join("scirs2_california_housing.csv");
30
31 let mut temp_file = std::fs::File::create(&temp_path).map_err(DatasetsError::IoError)?;
32
33 temp_file.write_all(&data).map_err(DatasetsError::IoError)?;
34
35 let mut dataset = loaders::load_csv(&temp_path, true, Some(8))?;
37
38 let feature_names = vec![
40 "MedInc".to_string(),
41 "HouseAge".to_string(),
42 "AveRooms".to_string(),
43 "AveBedrms".to_string(),
44 "Population".to_string(),
45 "AveOccup".to_string(),
46 "Latitude".to_string(),
47 "Longitude".to_string(),
48 ];
49
50 let description = "California Housing dataset
51
52The data was derived from the 1990 U.S. census, using one row per census block group.
53A block group is the smallest geographical unit for which the U.S. Census Bureau
54publishes sample data.
55
56Features:
57- MedInc: median income in block group
58- HouseAge: median house age in block group
59- AveRooms: average number of rooms per household
60- AveBedrms: average number of bedrooms per household
61- Population: block group population
62- AveOccup: average number of household members
63- Latitude: block group latitude
64- Longitude: block group longitude
65
66Target: Median house value for California districts, expressed in hundreds of thousands of dollars.
67
68This dataset is useful for regression tasks."
69 .to_string();
70
71 dataset = dataset
72 .with_feature_names(feature_names)
73 .with_description(description);
74
75 std::fs::remove_file(temp_path).ok();
77
78 Ok(dataset)
79}
80
81#[cfg(not(feature = "download"))]
83pub fn load_california_housing(_force_download: bool) -> Result<Dataset> {
96 Err(DatasetsError::Other(
97 "Download feature is not enabled. Recompile with --features download".to_string(),
98 ))
99}
100
101#[cfg(feature = "download")]
103pub fn load_wine(force_download: bool) -> Result<Dataset> {
104 let url = format!("{}/wine.csv", DATASET_BASE_URL);
105
106 let data = download_data(&url, force_download)?;
108
109 use std::io::Write;
111 let temp_dir = std::env::temp_dir();
112 let temp_path = temp_dir.join("scirs2_wine.csv");
113
114 let mut temp_file = std::fs::File::create(&temp_path).map_err(DatasetsError::IoError)?;
115
116 temp_file.write_all(&data).map_err(DatasetsError::IoError)?;
117
118 let mut dataset = loaders::load_csv(&temp_path, true, Some(0))?;
120
121 let feature_names = vec![
123 "alcohol".to_string(),
124 "malic_acid".to_string(),
125 "ash".to_string(),
126 "alcalinity_of_ash".to_string(),
127 "magnesium".to_string(),
128 "total_phenols".to_string(),
129 "flavanoids".to_string(),
130 "nonflavanoid_phenols".to_string(),
131 "proanthocyanins".to_string(),
132 "color_intensity".to_string(),
133 "hue".to_string(),
134 "od280_od315_of_diluted_wines".to_string(),
135 "proline".to_string(),
136 ];
137
138 let target_names = vec![
139 "class_0".to_string(),
140 "class_1".to_string(),
141 "class_2".to_string(),
142 ];
143
144 let description = "Wine Recognition dataset
145
146The data is the results of a chemical analysis of wines grown in the same region in Italy
147but derived from three different cultivars. The analysis determined the quantities of
14813 constituents found in each of the three types of wines.
149
150Features: Various chemical properties of the wine
151
152Target: Class of wine (0, 1, or 2)
153
154This dataset is useful for classification tasks."
155 .to_string();
156
157 dataset = dataset
158 .with_feature_names(feature_names)
159 .with_target_names(target_names)
160 .with_description(description);
161
162 std::fs::remove_file(temp_path).ok();
164
165 Ok(dataset)
166}
167
168#[cfg(not(feature = "download"))]
170pub fn load_wine(_force_download: bool) -> Result<Dataset> {
183 Err(DatasetsError::Other(
184 "Download feature is not enabled. Recompile with --features download".to_string(),
185 ))
186}
187
188#[cfg(feature = "download")]
190pub fn get_available_datasets() -> Result<Vec<String>> {
191 let url = format!("{}/datasets_index.txt", DATASET_BASE_URL);
192
193 let data = download_data(&url, true)?;
195
196 let content = String::from_utf8(data).map_err(|e| {
198 DatasetsError::InvalidFormat(format!("Failed to parse datasets index: {}", e))
199 })?;
200
201 let datasets = content
202 .lines()
203 .map(|line| line.trim().to_string())
204 .filter(|line| !line.is_empty())
205 .collect();
206
207 Ok(datasets)
208}
209
210#[cfg(not(feature = "download"))]
212pub fn get_available_datasets() -> Result<Vec<String>> {
221 Err(DatasetsError::Other(
222 "Download feature is not enabled. Recompile with --features download".to_string(),
223 ))
224}