use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum DatasetType {
Rain,
Temperature,
Wind,
Radiation,
}
impl DatasetType {
pub fn from_path(path: &Path) -> Option<Self> {
let path_str = path.to_string_lossy().to_lowercase();
if path_str.contains("rain") {
Some(DatasetType::Rain)
} else if path_str.contains("temperature") {
Some(DatasetType::Temperature)
} else if path_str.contains("wind") {
Some(DatasetType::Wind)
} else if path_str.contains("radiation") {
Some(DatasetType::Radiation)
} else {
None
}
}
pub fn expected_columns(&self) -> usize {
match self {
DatasetType::Rain => 15,
DatasetType::Temperature => 22,
DatasetType::Wind => 24,
DatasetType::Radiation => 12, }
}
pub fn primary_time_column(&self) -> &'static str {
match self {
DatasetType::Rain => "ob_end_ctime",
DatasetType::Temperature => "ob_end_time",
DatasetType::Wind => "ob_end_time",
DatasetType::Radiation => "ob_end_time", }
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StationMetadata {
pub station_name: String,
pub station_id: String,
pub county: String,
pub latitude: f64,
pub longitude: f64,
pub height: f64,
pub height_units: String,
}
#[derive(Debug, Clone)]
pub struct DataBoundaries {
pub skip_rows: usize,
pub data_rows: Option<usize>,
pub total_lines: usize,
}
#[derive(Debug, Default)]
pub struct ProcessingStats {
pub files_processed: usize,
pub files_failed: usize,
pub total_rows: usize,
pub output_path: PathBuf,
pub processing_time_ms: u128,
}
#[derive(Debug, Clone)]
pub struct DatasetConfig {
pub dataset_type: DatasetType,
pub schema: polars::prelude::Schema,
pub empty_columns: Vec<String>,
pub common_patterns: CommonPatterns,
}
#[derive(Debug, Clone)]
pub struct CommonPatterns {
pub always_na_columns: Vec<String>,
pub descriptor_columns: Vec<String>,
pub timestamp_columns: Vec<String>,
}
impl DatasetConfig {
pub fn new(
dataset_type: DatasetType,
schema: polars::prelude::Schema,
empty_columns: Vec<String>,
common_patterns: CommonPatterns,
) -> Self {
Self {
dataset_type,
schema,
empty_columns,
common_patterns,
}
}
}