use ndarray::{Array1, Array2};
use num_traits::{Float, FromPrimitive};
use std::fmt;
use thiserror::Error;
use super::traits::{FeatureTransformer, ModelPredictor};
#[derive(Error, Debug, Clone)]
pub enum PipelineError {
#[error("Pipeline step '{0}' has not been fitted. Call fit() before transform().")]
NotFitted(String),
#[error("Empty input data in step '{0}': data must have at least one row and one column.")]
EmptyInput(String),
#[error("Feature count mismatch in step '{step}': expected {expected} columns, got {actual}.")]
FeatureCountMismatch {
step: String,
expected: usize,
actual: usize,
},
#[error("Numeric error in step '{0}': {1}")]
NumericError(String, String),
#[error("No ModelPredictor is configured. Call set_predictor() before predict().")]
NoPredictorSet,
#[error("Step '{step}' error: {message}")]
StepError {
step: String,
message: String,
},
#[error("Configuration error: {0}")]
ConfigurationError(String),
}
pub struct PipelineStep<T: Clone + fmt::Debug + Float + FromPrimitive + 'static> {
transformer: Box<dyn FeatureTransformer<T>>,
}
impl<T: Clone + fmt::Debug + Float + FromPrimitive + 'static> PipelineStep<T> {
pub fn new(transformer: Box<dyn FeatureTransformer<T>>) -> Self {
Self { transformer }
}
pub fn fit(&mut self, data: &Array2<T>) -> Result<(), PipelineError> {
self.transformer.fit(data)
}
pub fn transform(&self, data: &Array2<T>) -> Result<Array2<T>, PipelineError> {
self.transformer.transform(data)
}
pub fn fit_transform(&mut self, data: &Array2<T>) -> Result<Array2<T>, PipelineError> {
self.transformer.fit_transform(data)
}
pub fn name(&self) -> &str {
self.transformer.name()
}
pub fn is_fitted(&self) -> bool {
self.transformer.is_fitted()
}
}
pub struct Pipeline<T: Clone + fmt::Debug + Float + FromPrimitive + 'static> {
steps: Vec<PipelineStep<T>>,
predictor: Option<Box<dyn ModelPredictor<T>>>,
is_fitted: bool,
}
impl<T: Clone + fmt::Debug + Float + FromPrimitive + 'static> Pipeline<T> {
pub fn new() -> Self {
Self {
steps: Vec::new(),
predictor: None,
is_fitted: false,
}
}
pub fn add_transformer(mut self, transformer: Box<dyn FeatureTransformer<T>>) -> Self {
self.steps.push(PipelineStep::new(transformer));
self
}
pub fn add_predictor(mut self, predictor: Box<dyn ModelPredictor<T>>) -> Self {
self.predictor = Some(predictor);
self
}
pub fn fit(&mut self, data: &Array2<T>) -> Result<(), PipelineError> {
if self.steps.is_empty() {
self.is_fitted = true;
return Ok(());
}
let mut current_data = data.clone();
for step in &mut self.steps {
current_data = step.fit_transform(¤t_data)?;
}
self.is_fitted = true;
Ok(())
}
pub fn transform(&self, data: &Array2<T>) -> Result<Array2<T>, PipelineError> {
if self.steps.is_empty() {
return Ok(data.clone());
}
if !self.is_fitted {
return Err(PipelineError::NotFitted("Pipeline".to_string()));
}
let mut current_data = data.clone();
for step in &self.steps {
current_data = step.transform(¤t_data)?;
}
Ok(current_data)
}
pub fn fit_transform(&mut self, data: &Array2<T>) -> Result<Array2<T>, PipelineError> {
if self.steps.is_empty() {
self.is_fitted = true;
return Ok(data.clone());
}
let mut current_data = data.clone();
for step in &mut self.steps {
current_data = step.fit_transform(¤t_data)?;
}
self.is_fitted = true;
Ok(current_data)
}
pub fn predict(&self, data: &Array2<T>) -> Result<Array1<T>, PipelineError> {
let transformed = self.transform(data)?;
match &self.predictor {
Some(pred) => pred.predict(&transformed),
None => Err(PipelineError::NoPredictorSet),
}
}
pub fn fit_predict(&mut self, data: &Array2<T>) -> Result<Array1<T>, PipelineError> {
let transformed = self.fit_transform(data)?;
match &self.predictor {
Some(pred) => pred.predict(&transformed),
None => Err(PipelineError::NoPredictorSet),
}
}
pub fn is_fitted(&self) -> bool {
self.is_fitted
}
pub fn n_steps(&self) -> usize {
self.steps.len()
}
pub fn step_names(&self) -> Vec<&str> {
self.steps.iter().map(|s| s.name()).collect()
}
pub fn has_predictor(&self) -> bool {
self.predictor.is_some()
}
}
impl<T: Clone + fmt::Debug + Float + FromPrimitive + 'static> Default for Pipeline<T> {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod builder_tests {
use super::*;
use crate::ml_pipeline::scalers::{MinMaxScaler, NormalizerTransform, StandardScaler};
use ndarray::Array2;
fn sample_data() -> Array2<f64> {
Array2::from_shape_vec((4, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 4.0, 40.0])
.expect("shape is valid")
}
#[test]
fn test_pipeline_default_is_empty() {
let p: Pipeline<f64> = Pipeline::default();
assert_eq!(p.n_steps(), 0);
assert!(!p.is_fitted());
assert!(!p.has_predictor());
}
#[test]
fn test_pipeline_empty_is_identity() {
let mut p: Pipeline<f64> = Pipeline::new();
let data = sample_data();
let result = p.fit_transform(&data).expect("empty pipeline is identity");
assert_eq!(result, data);
}
#[test]
fn test_pipeline_step_names() {
let p = Pipeline::<f64>::new()
.add_transformer(Box::new(StandardScaler::new()))
.add_transformer(Box::new(MinMaxScaler::new()))
.add_transformer(Box::new(NormalizerTransform::l2()));
assert_eq!(
p.step_names(),
vec!["StandardScaler", "MinMaxScaler", "NormalizerL2"]
);
}
#[test]
fn test_pipeline_transform_before_fit_returns_error() {
let p = Pipeline::<f64>::new().add_transformer(Box::new(StandardScaler::new()));
let data = sample_data();
let result = p.transform(&data);
assert!(matches!(result, Err(PipelineError::NotFitted(_))));
}
#[test]
fn test_pipeline_predict_without_predictor_returns_error() {
let mut p = Pipeline::<f64>::new().add_transformer(Box::new(StandardScaler::new()));
let data = sample_data();
p.fit(&data).expect("fit should succeed");
let result = p.predict(&data);
assert!(matches!(result, Err(PipelineError::NoPredictorSet)));
}
#[test]
fn test_pipeline_multiple_steps_shape_preserved() {
let data = sample_data();
let mut p = Pipeline::<f64>::new()
.add_transformer(Box::new(StandardScaler::new()))
.add_transformer(Box::new(MinMaxScaler::new()));
let result = p
.fit_transform(&data)
.expect("multi-step pipeline should work");
assert_eq!(result.shape(), data.shape());
}
#[test]
fn test_pipeline_fit_then_transform_new_data() {
let train = sample_data();
let test = Array2::from_shape_vec((2, 2), vec![100.0_f64, 200.0, -5.0, -50.0])
.expect("shape is valid");
let mut p = Pipeline::<f64>::new().add_transformer(Box::new(StandardScaler::new()));
p.fit(&train).expect("fit should succeed");
let result = p
.transform(&test)
.expect("transform new data should succeed");
assert_eq!(result.shape(), test.shape());
}
#[test]
fn test_pipeline_is_fitted_transitions() {
let data = sample_data();
let mut p = Pipeline::<f64>::new().add_transformer(Box::new(StandardScaler::new()));
assert!(!p.is_fitted());
p.fit(&data).expect("fit should succeed");
assert!(p.is_fitted());
}
}