dataset_core/error.rs
1use ureq::Error as UreqError;
2use zip::result::ZipError;
3
4/// Specific kinds of data format errors that can occur during dataset parsing.
5///
6/// # Variants
7///
8/// - `CsvReadError` - Failed to read a CSV record.
9/// - `InvalidColumnCount` - The row has an unexpected number of columns.
10/// - `ParseFailed` - Failed to parse a field value into the target type.
11/// - `InvalidValue` - The field value is syntactically valid but semantically incorrect.
12/// - `LengthMismatch` - The total parsed data length doesn't match expected dimensions.
13/// - `EmptyDataset` - The dataset is empty.
14/// - `ArrayShapeError` - Failed to construct ndarray with the given shape and data.
15#[derive(Debug, thiserror::Error)]
16pub enum DataFormatErrorKind {
17 /// Failed to read a CSV record
18 #[error("[{dataset_name}] failed to read CSV record: {error}")]
19 CsvReadError {
20 /// Dataset identifier
21 dataset_name: String,
22 /// The underlying CSV error message
23 error: String,
24 },
25 /// The row has an unexpected number of columns
26 #[error(
27 "[{dataset_name}] invalid column count at line {line_num}: expected {expected}, got {actual}"
28 )]
29 InvalidColumnCount {
30 /// Dataset identifier
31 dataset_name: String,
32 /// Expected number of columns
33 expected: usize,
34 /// Actual number of columns found
35 actual: usize,
36 /// Line number (1-based)
37 line_num: usize,
38 },
39 /// Failed to parse a field value into the target type
40 #[error(
41 "[{dataset_name}] failed to parse `{field_name}` at line {line_num}: {error}"
42 )]
43 ParseFailed {
44 /// Dataset identifier
45 dataset_name: String,
46 /// Field name that failed to parse
47 field_name: String,
48 /// Line number (1-based)
49 line_num: usize,
50 /// The underlying parse error message
51 error: String,
52 },
53 /// The field value is syntactically valid but semantically incorrect
54 #[error(
55 "[{dataset_name}] invalid value for `{field_name}` at line {line_num}: `{value}`"
56 )]
57 InvalidValue {
58 /// Dataset identifier
59 dataset_name: String,
60 /// Field name with invalid value
61 field_name: String,
62 /// The invalid value
63 value: String,
64 /// Line number (1-based)
65 line_num: usize,
66 },
67 /// The total parsed data length doesn't match expected dimensions
68 #[error("[{dataset_name}] invalid `{field_name}` length: expected {expected}, got {actual}")]
69 LengthMismatch {
70 /// Dataset identifier
71 dataset_name: String,
72 /// Field name whose length is being validated
73 field_name: String,
74 /// Expected length
75 expected: usize,
76 /// Actual length
77 actual: usize,
78 },
79 /// The dataset is empty
80 #[error("[{dataset_name}] is empty")]
81 EmptyDataset {
82 /// Dataset identifier
83 dataset_name: String,
84 },
85 /// Failed to construct ndarray with the given shape and data
86 #[error("[{dataset_name}] failed to build `{array_name}` array: {error}")]
87 ArrayShapeError {
88 /// Dataset identifier
89 dataset_name: String,
90 /// Array name that failed to build
91 array_name: String,
92 /// The underlying shape error message
93 error: String,
94 },
95}
96
97/// Error type used by dataset loading utilities.
98///
99/// # Variants
100///
101/// - `DownloadError` - The download step failed (network, invalid URL, or downloader configuration).
102/// - `ValidationError` - Downloaded file content failed integrity validation (SHA256 mismatch).
103/// - `UnzipError` - Extracting a zip archive failed.
104/// - `IoError` - A standard I/O operation failed (reading directories, opening/removing files, etc.).
105/// - `DataFormatError` - The dataset content was not in the expected format.
106#[derive(Debug, thiserror::Error)]
107pub enum DatasetError {
108 #[error("Download error: {0}")]
109 DownloadError(#[from] UreqError),
110
111 #[error("Validation error: {0}")]
112 ValidationError(String),
113
114 #[error("Unzip error: {0}")]
115 UnzipError(#[from] ZipError),
116
117 #[error("I/O error: {0}")]
118 IoError(#[from] std::io::Error),
119
120 #[error("Data format error: {0}")]
121 DataFormatError(#[from] DataFormatErrorKind),
122}
123
124impl DatasetError {
125 /// Creates a standard SHA256 validation failure error message for a file.
126 ///
127 /// # Parameters
128 ///
129 /// - `dataset_name` - The dataset identifier used in the error prefix.
130 /// - `file_name` - The dataset file name that failed checksum validation.
131 ///
132 /// # Returns
133 ///
134 /// - `DatasetError::ValidationError` - A variant of `DatasetError` that contains the unified SHA256 failure message.
135 pub fn sha256_validation_failed(dataset_name: &str, file_name: &str) -> Self {
136 Self::ValidationError(format!(
137 "[{}] SHA256 validation failed for file `{}`",
138 dataset_name, file_name
139 ))
140 }
141
142 /// Creates a CSV read error.
143 ///
144 /// # Parameters
145 ///
146 /// - `dataset_name` - The dataset identifier.
147 /// - `error` - The underlying CSV error.
148 ///
149 /// # Returns
150 ///
151 /// - `DatasetError::DataFormatError(DataFormatErrorKind::CsvReadError)` - A variant of `DatasetError` describing the CSV read error.
152 pub fn csv_read_error(dataset_name: &str, error: impl std::fmt::Display) -> Self {
153 Self::DataFormatError(DataFormatErrorKind::CsvReadError {
154 dataset_name: dataset_name.to_string(),
155 error: error.to_string(),
156 })
157 }
158
159 /// Creates a unified invalid-column-count data format error.
160 ///
161 /// # Parameters
162 ///
163 /// - `dataset_name` - The dataset identifier used in the error prefix.
164 /// - `expected` - The expected number of columns.
165 /// - `actual` - The actual number of columns found.
166 /// - `line_num` - The line number (1-based) where the error occurred.
167 ///
168 /// # Returns
169 ///
170 /// - `DatasetError::DataFormatError(DataFormatErrorKind::InvalidColumnCount)` - A variant of `DatasetError` describing the column count mismatch.
171 pub fn invalid_column_count(
172 dataset_name: &str,
173 expected: usize,
174 actual: usize,
175 line_num: usize,
176 ) -> Self {
177 Self::DataFormatError(DataFormatErrorKind::InvalidColumnCount {
178 dataset_name: dataset_name.to_string(),
179 expected,
180 actual,
181 line_num,
182 })
183 }
184
185 /// Creates a unified parse failure data format error.
186 ///
187 /// # Parameters
188 ///
189 /// - `dataset_name` - The dataset identifier.
190 /// - `field_name` - The logical field name that failed to parse.
191 /// - `line_num` - The line number (1-based) where the error occurred.
192 /// - `line` - The original input line where parsing failed.
193 /// - `err` - The underlying parser error detail.
194 ///
195 /// # Returns
196 ///
197 /// - `DatasetError::DataFormatError(DataFormatErrorKind::ParseFailed)` - A variant of `DatasetError` describing the parse failure.
198 pub fn parse_failed(
199 dataset_name: &str,
200 field_name: &str,
201 line_num: usize,
202 err: impl std::fmt::Display,
203 ) -> Self {
204 Self::DataFormatError(DataFormatErrorKind::ParseFailed {
205 dataset_name: dataset_name.to_string(),
206 field_name: field_name.to_string(),
207 line_num,
208 error: err.to_string(),
209 })
210 }
211
212 /// Creates a unified invalid-field-value data format error.
213 ///
214 /// # Parameters
215 ///
216 /// - `dataset_name` - The dataset identifier.
217 /// - `field_name` - The logical field name with an invalid value.
218 /// - `value` - The invalid raw value.
219 /// - `line_num` - The line number (1-based) where the error occurred.
220 ///
221 /// # Returns
222 ///
223 /// - `DatasetError::DataFormatError(DataFormatErrorKind::InvalidValue)` - A variant of `DatasetError` describing the invalid value.
224 pub fn invalid_value(
225 dataset_name: &str,
226 field_name: &str,
227 value: &str,
228 line_num: usize,
229 ) -> Self {
230 Self::DataFormatError(DataFormatErrorKind::InvalidValue {
231 dataset_name: dataset_name.to_string(),
232 field_name: field_name.to_string(),
233 value: value.to_string(),
234 line_num,
235 })
236 }
237
238 /// Creates a unified vector/row length mismatch data format error.
239 ///
240 /// # Parameters
241 ///
242 /// - `dataset_name` - The dataset identifier.
243 /// - `field_name` - The logical field name whose length is being validated.
244 /// - `expected` - The expected length.
245 /// - `actual` - The actual length.
246 ///
247 /// # Returns
248 ///
249 /// - `DatasetError::DataFormatError(DataFormatErrorKind::LengthMismatch)` - A variant of `DatasetError` describing the length mismatch.
250 pub fn length_mismatch(
251 dataset_name: &str,
252 field_name: &str,
253 expected: usize,
254 actual: usize,
255 ) -> Self {
256 Self::DataFormatError(DataFormatErrorKind::LengthMismatch {
257 dataset_name: dataset_name.to_string(),
258 field_name: field_name.to_string(),
259 expected,
260 actual,
261 })
262 }
263
264 /// Creates a unified ndarray shape construction data format error.
265 ///
266 /// # Parameters
267 ///
268 /// - `dataset_name` - The dataset identifier.
269 /// - `array_name` - The logical array name that failed to build.
270 /// - `err` - The underlying ndarray shape construction error detail.
271 ///
272 /// # Returns
273 ///
274 /// - `DatasetError::DataFormatError(DataFormatErrorKind::ArrayShapeError)` - A variant of `DatasetError` describing the array shape failure.
275 pub fn array_shape_error(
276 dataset_name: &str,
277 array_name: &str,
278 err: impl std::fmt::Display,
279 ) -> Self {
280 Self::DataFormatError(DataFormatErrorKind::ArrayShapeError {
281 dataset_name: dataset_name.to_string(),
282 array_name: array_name.to_string(),
283 error: err.to_string(),
284 })
285 }
286
287 /// Creates an empty dataset error.
288 ///
289 /// # Parameters
290 ///
291 /// - `dataset_name` - The dataset identifier.
292 ///
293 /// # Returns
294 ///
295 /// - `DatasetError::DataFormatError(DataFormatErrorKind::EmptyDataset)` - A variant of `DatasetError` indicating the dataset is empty.
296 pub fn empty_dataset(dataset_name: &str) -> Self {
297 Self::DataFormatError(DataFormatErrorKind::EmptyDataset {
298 dataset_name: dataset_name.to_string(),
299 })
300 }
301}