use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct PreprocessingPipeline {
pub steps: Vec<PreprocessingStep>,
pub quality_checks: Vec<QualityCheck>,
pub validation_rules: Vec<ValidationRule>,
}
#[derive(Debug, Clone)]
pub enum PreprocessingStep {
OutlierRemoval {
method: OutlierDetectionMethod,
threshold: f64,
},
Smoothing {
method: SmoothingMethod,
window_size: usize,
},
Normalization {
method: super::features::FeatureNormalization,
},
NoiseFiltering {
filter_type: FilterType,
cutoff_frequency: f64,
},
Interpolation { method: InterpolationMethod },
}
#[derive(Debug, Clone, Copy)]
pub enum OutlierDetectionMethod {
ZScore,
IQR,
IsolationForest,
LocalOutlierFactor,
EllipticEnvelope,
}
#[derive(Debug, Clone, Copy)]
pub enum SmoothingMethod {
MovingAverage,
ExponentialSmoothing,
SavitzkyGolay,
Gaussian,
Median,
}
#[derive(Debug, Clone, Copy)]
pub enum FilterType {
LowPass,
HighPass,
BandPass,
BandStop,
Butterworth,
Chebyshev,
}
#[derive(Debug, Clone, Copy)]
pub enum InterpolationMethod {
Linear,
Cubic,
Spline,
Polynomial,
NearestNeighbor,
}
#[derive(Debug, Clone)]
pub enum QualityCheck {
MissingValues { max_missing_ratio: f64 },
RangeCheck { min_value: f64, max_value: f64 },
ConstantValues { tolerance: f64 },
SamplingRate { expected_rate: f64, tolerance: f64 },
Duplicates { max_duplicate_ratio: f64 },
}
#[derive(Debug, Clone)]
pub enum ValidationRule {
PhysicalConstraints { constraints: Vec<Constraint> },
StatisticalTests { tests: Vec<StatisticalTest> },
TrendValidation { max_trend_change: f64 },
CorrelationValidation {
expected_correlations: HashMap<String, f64>,
},
}
#[derive(Debug, Clone)]
pub enum Constraint {
Bounds {
variable: String,
min: f64,
max: f64,
},
Conservation {
law_type: ConservationLaw,
tolerance: f64,
},
RateLimit { variable: String, max_rate: f64 },
}
#[derive(Debug, Clone, Copy)]
pub enum ConservationLaw {
Energy,
Mass,
Momentum,
AngularMomentum,
Charge,
}
#[derive(Debug, Clone, Copy)]
pub enum StatisticalTest {
Normality,
Stationarity,
Independence,
Homoscedasticity,
Linearity,
}