use ureq::Error as UreqError;
use zip::result::ZipError;
#[derive(Debug, thiserror::Error)]
pub enum DataFormatErrorKind {
#[error("[{dataset_name}] failed to read CSV record: {error}")]
CsvReadError {
dataset_name: String,
error: String,
},
#[error(
"[{dataset_name}] invalid column count at line {line_num}: expected {expected}, got {actual} (line: `{line}`)"
)]
InvalidColumnCount {
dataset_name: String,
expected: usize,
actual: usize,
line_num: usize,
line: String,
},
#[error(
"[{dataset_name}] failed to parse `{field_name}` at line {line_num}: {error} (line: `{line}`)"
)]
ParseFailed {
dataset_name: String,
field_name: String,
line_num: usize,
line: String,
error: String,
},
#[error(
"[{dataset_name}] invalid value for `{field_name}` at line {line_num}: `{value}` (line: `{line}`)"
)]
InvalidValue {
dataset_name: String,
field_name: String,
value: String,
line_num: usize,
line: String,
},
#[error("[{dataset_name}] invalid `{field_name}` length: expected {expected}, got {actual}")]
LengthMismatch {
dataset_name: String,
field_name: String,
expected: usize,
actual: usize,
},
#[error("[{dataset_name}] is empty")]
EmptyDataset {
dataset_name: String,
},
#[error("[{dataset_name}] failed to build `{array_name}` array: {error}")]
ArrayShapeError {
dataset_name: String,
array_name: String,
error: String,
},
}
#[derive(Debug, thiserror::Error)]
pub enum DatasetError {
#[error("Download error: {0}")]
DownloadError(#[from] UreqError),
#[error("Validation error: {0}")]
ValidationError(String),
#[error("Unzip error: {0}")]
UnzipError(#[from] ZipError),
#[error("I/O error: {0}")]
IoError(#[from] std::io::Error),
#[error("Data format error: {0}")]
DataFormatError(#[from] DataFormatErrorKind),
}
impl DatasetError {
pub fn sha256_validation_failed(dataset_name: &str, file_name: &str) -> Self {
Self::ValidationError(format!(
"[{}] SHA256 validation failed for file `{}`",
dataset_name, file_name
))
}
pub fn csv_read_error(dataset_name: &str, error: impl std::fmt::Display) -> Self {
Self::DataFormatError(DataFormatErrorKind::CsvReadError {
dataset_name: dataset_name.to_string(),
error: error.to_string(),
})
}
pub fn invalid_column_count(
dataset_name: &str,
expected: usize,
actual: usize,
line_num: usize,
line: &str,
) -> Self {
Self::DataFormatError(DataFormatErrorKind::InvalidColumnCount {
dataset_name: dataset_name.to_string(),
expected,
actual,
line_num,
line: line.to_string(),
})
}
pub fn parse_failed(
dataset_name: &str,
field_name: &str,
line_num: usize,
line: &str,
err: impl std::fmt::Display,
) -> Self {
Self::DataFormatError(DataFormatErrorKind::ParseFailed {
dataset_name: dataset_name.to_string(),
field_name: field_name.to_string(),
line_num,
line: line.to_string(),
error: err.to_string(),
})
}
pub fn invalid_value(
dataset_name: &str,
field_name: &str,
value: &str,
line_num: usize,
line: &str,
) -> Self {
Self::DataFormatError(DataFormatErrorKind::InvalidValue {
dataset_name: dataset_name.to_string(),
field_name: field_name.to_string(),
value: value.to_string(),
line_num,
line: line.to_string(),
})
}
pub fn length_mismatch(
dataset_name: &str,
field_name: &str,
expected: usize,
actual: usize,
) -> Self {
Self::DataFormatError(DataFormatErrorKind::LengthMismatch {
dataset_name: dataset_name.to_string(),
field_name: field_name.to_string(),
expected,
actual,
})
}
pub fn array_shape_error(
dataset_name: &str,
array_name: &str,
err: impl std::fmt::Display,
) -> Self {
Self::DataFormatError(DataFormatErrorKind::ArrayShapeError {
dataset_name: dataset_name.to_string(),
array_name: array_name.to_string(),
error: err.to_string(),
})
}
pub fn empty_dataset(dataset_name: &str) -> Self {
Self::DataFormatError(DataFormatErrorKind::EmptyDataset {
dataset_name: dataset_name.to_string(),
})
}
}