sklears_compose/
fluent_api.rs

1//! Fluent API and advanced builder patterns for pipeline construction
2//!
3//! This module provides a fluent, chainable API for building complex machine learning
4//! pipelines with type safety, method chaining, and configuration presets.
5
6use scirs2_core::ndarray::{ArrayView1, ArrayView2};
7use sklears_core::{
8    error::Result as SklResult,
9    prelude::Fit,
10    traits::{Estimator, Untrained},
11    types::Float,
12};
13use std::collections::HashMap;
14use std::marker::PhantomData;
15
16use crate::{
17    ParallelConfig, ParallelExecutionStrategy, Pipeline, PipelinePredictor, PipelineStep,
18    SimdConfig,
19};
20
21/// Helper function to create high-performance SIMD configuration
22fn create_high_performance_simd_config() -> SimdConfig {
23    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
24    {
25        /// SimdConfig
26        SimdConfig {
27            use_avx2: true,
28            use_avx512: is_x86_feature_detected!("avx512f"),
29            use_fma: true,
30            vector_width: if is_x86_feature_detected!("avx512f") {
31                16
32            } else {
33                8
34            },
35            alignment: 64,
36            simd_threshold: 32,
37        }
38    }
39    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
40    {
41        /// SimdConfig
42        SimdConfig {
43            use_avx2: false,
44            use_avx512: false,
45            use_fma: false,
46            vector_width: 4,
47            alignment: 64,
48            simd_threshold: 32,
49        }
50    }
51}
52
53/// Fluent pipeline builder with advanced chaining capabilities
54#[derive(Debug)]
55pub struct FluentPipelineBuilder<State = BuilderEmpty> {
56    /// Builder state
57    state: PhantomData<State>,
58    /// Pipeline steps
59    steps: Vec<(String, Box<dyn PipelineStep>)>,
60    /// Final estimator
61    estimator: Option<Box<dyn PipelinePredictor>>,
62    /// Configuration options
63    config: PipelineConfiguration,
64    /// Validation rules
65    validators: Vec<ValidationRule>,
66    /// Presets applied
67    presets: Vec<String>,
68}
69
70/// Builder state types for type safety
71#[derive(Debug)]
72pub struct BuilderEmpty;
73
74#[derive(Debug)]
75pub struct BuilderWithSteps;
76
77#[derive(Debug)]
78pub struct BuilderWithEstimator;
79
80#[derive(Debug)]
81pub struct BuilderComplete;
82
83/// Pipeline configuration
84#[derive(Debug, Clone, Default)]
85pub struct PipelineConfiguration {
86    /// Parallel execution config
87    pub parallel: Option<ParallelConfig>,
88    /// SIMD optimization config
89    pub simd: Option<SimdConfig>,
90    /// Execution strategy
91    pub execution_strategy: Option<ParallelExecutionStrategy>,
92    /// Memory optimization settings
93    pub memory_config: MemoryConfiguration,
94    /// Caching settings
95    pub caching: CachingConfiguration,
96    /// Validation settings
97    pub validation: ValidationConfiguration,
98    /// Debug settings
99    pub debug: DebugConfiguration,
100}
101
102/// Memory configuration
103#[derive(Debug, Clone)]
104pub struct MemoryConfiguration {
105    /// Use memory-efficient operations
106    pub efficient_ops: bool,
107    /// Chunk size for large datasets
108    pub chunk_size: Option<usize>,
109    /// Memory limit (MB)
110    pub memory_limit_mb: Option<usize>,
111    /// Garbage collection frequency
112    pub gc_frequency: Option<usize>,
113}
114
115/// Caching configuration
116#[derive(Debug, Clone)]
117pub struct CachingConfiguration {
118    /// Enable intermediate result caching
119    pub enabled: bool,
120    /// Cache directory
121    pub cache_dir: Option<String>,
122    /// Maximum cache size (MB)
123    pub max_size_mb: Option<usize>,
124    /// Cache TTL (seconds)
125    pub ttl_sec: Option<usize>,
126    /// Cache strategy
127    pub strategy: CacheStrategy,
128}
129
130/// Cache strategy
131#[derive(Debug, Clone, PartialEq, Eq)]
132pub enum CacheStrategy {
133    /// Least Recently Used
134    LRU,
135    /// Least Frequently Used
136    LFU,
137    /// Time-based expiration
138    TimeExpire,
139    /// Size-based eviction
140    SizeBased,
141}
142
143/// Validation configuration
144#[derive(Debug, Clone)]
145pub struct ValidationConfiguration {
146    /// Enable input validation
147    pub validate_input: bool,
148    /// Enable output validation
149    pub validate_output: bool,
150    /// Enable pipeline structure validation
151    pub validate_structure: bool,
152    /// Validation level
153    pub level: ValidationLevel,
154}
155
156/// Validation level
157#[derive(Debug, Clone, PartialEq, Eq)]
158pub enum ValidationLevel {
159    /// No validation
160    None,
161    /// Basic validation
162    Basic,
163    /// Comprehensive validation
164    Comprehensive,
165    /// Strict validation with type checking
166    Strict,
167}
168
169/// Debug configuration
170#[derive(Debug, Clone)]
171pub struct DebugConfiguration {
172    /// Enable debug mode
173    pub enabled: bool,
174    /// Log level
175    pub log_level: LogLevel,
176    /// Profiling enabled
177    pub profiling: bool,
178    /// Trace execution
179    pub tracing: bool,
180}
181
182/// Log level
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub enum LogLevel {
185    /// Error
186    Error,
187    /// Warn
188    Warn,
189    /// Info
190    Info,
191    /// Debug
192    Debug,
193    /// Trace
194    Trace,
195}
196
197/// Validation rule for pipeline construction
198pub struct ValidationRule {
199    /// Rule name
200    pub name: String,
201    /// Rule description
202    pub description: String,
203    /// Validation function
204    pub validator: Box<dyn Fn(&FluentPipelineBuilder<BuilderComplete>) -> SklResult<()>>,
205}
206
207impl std::fmt::Debug for ValidationRule {
208    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209        f.debug_struct("ValidationRule")
210            .field("name", &self.name)
211            .field("description", &self.description)
212            .field("validator", &"<function>")
213            .finish()
214    }
215}
216
217/// Configuration preset for common pipeline patterns
218#[derive(Debug, Clone)]
219pub struct ConfigurationPreset {
220    /// Preset name
221    pub name: String,
222    /// Description
223    pub description: String,
224    /// Configuration
225    pub config: PipelineConfiguration,
226    /// Default steps
227    pub default_steps: Vec<PresetStep>,
228}
229
230/// Preset step definition
231#[derive(Debug, Clone)]
232pub struct PresetStep {
233    /// Step name
234    pub name: String,
235    /// Step type
236    pub step_type: String,
237    /// Parameters
238    pub parameters: HashMap<String, PresetParameter>,
239}
240
241/// Preset parameter
242#[derive(Debug, Clone)]
243pub enum PresetParameter {
244    /// Float
245    Float(f64),
246    /// Int
247    Int(i64),
248    /// Bool
249    Bool(bool),
250    /// String
251    String(String),
252    /// Array
253    Array(Vec<PresetParameter>),
254}
255
256impl Default for MemoryConfiguration {
257    fn default() -> Self {
258        Self {
259            efficient_ops: true,
260            chunk_size: Some(10000),
261            memory_limit_mb: None,
262            gc_frequency: Some(100),
263        }
264    }
265}
266
267impl Default for CachingConfiguration {
268    fn default() -> Self {
269        Self {
270            enabled: false,
271            cache_dir: None,
272            max_size_mb: Some(1024),
273            ttl_sec: Some(3600),
274            strategy: CacheStrategy::LRU,
275        }
276    }
277}
278
279impl Default for ValidationConfiguration {
280    fn default() -> Self {
281        Self {
282            validate_input: true,
283            validate_output: true,
284            validate_structure: true,
285            level: ValidationLevel::Basic,
286        }
287    }
288}
289
290impl Default for DebugConfiguration {
291    fn default() -> Self {
292        Self {
293            enabled: false,
294            log_level: LogLevel::Info,
295            profiling: false,
296            tracing: false,
297        }
298    }
299}
300
301impl FluentPipelineBuilder<BuilderEmpty> {
302    /// Create a new fluent pipeline builder
303    #[must_use]
304    pub fn new() -> Self {
305        Self {
306            state: PhantomData,
307            steps: Vec::new(),
308            estimator: None,
309            config: PipelineConfiguration::default(),
310            validators: Vec::new(),
311            presets: Vec::new(),
312        }
313    }
314
315    /// Create builder with a configuration preset
316    #[must_use]
317    pub fn with_preset(preset: ConfigurationPreset) -> Self {
318        let mut builder = Self::new();
319        builder.config = preset.config;
320        builder.presets.push(preset.name);
321        builder
322    }
323
324    /// Apply a common preset for data science workflows
325    #[must_use]
326    pub fn data_science_preset() -> Self {
327        let config = PipelineConfiguration {
328            parallel: Some(ParallelConfig::default()),
329            simd: Some(SimdConfig::default()),
330            execution_strategy: Some(ParallelExecutionStrategy::DataParallel { chunk_size: 5000 }),
331            memory_config: MemoryConfiguration {
332                efficient_ops: true,
333                chunk_size: Some(5000),
334                memory_limit_mb: Some(2048),
335                gc_frequency: Some(50),
336            },
337            caching: CachingConfiguration {
338                enabled: true,
339                cache_dir: Some("/tmp/sklearn_cache".to_string()),
340                max_size_mb: Some(512),
341                ttl_sec: Some(1800),
342                strategy: CacheStrategy::LRU,
343            },
344            validation: ValidationConfiguration {
345                validate_input: true,
346                validate_output: true,
347                validate_structure: true,
348                level: ValidationLevel::Comprehensive,
349            },
350            debug: DebugConfiguration {
351                enabled: true,
352                log_level: LogLevel::Info,
353                profiling: false,
354                tracing: false,
355            },
356        };
357
358        let mut builder = Self::new();
359        builder.config = config;
360        builder.presets.push("data_science".to_string());
361        builder
362    }
363
364    /// Apply a preset for high-performance computing
365    #[must_use]
366    pub fn high_performance_preset() -> Self {
367        let config = PipelineConfiguration {
368            parallel: Some(ParallelConfig {
369                num_workers: num_cpus::get() * 2,
370                work_stealing: true,
371                ..ParallelConfig::default()
372            }),
373            simd: Some(create_high_performance_simd_config()),
374            execution_strategy: Some(ParallelExecutionStrategy::FullParallel),
375            memory_config: MemoryConfiguration {
376                efficient_ops: true,
377                chunk_size: Some(100_000),
378                memory_limit_mb: Some(8192),
379                gc_frequency: Some(200),
380            },
381            caching: CachingConfiguration {
382                enabled: true,
383                cache_dir: Some("/tmp/hpc_cache".to_string()),
384                max_size_mb: Some(2048),
385                ttl_sec: Some(7200),
386                strategy: CacheStrategy::SizeBased,
387            },
388            validation: ValidationConfiguration {
389                validate_input: false,
390                validate_output: false,
391                validate_structure: false,
392                level: ValidationLevel::None,
393            },
394            debug: DebugConfiguration {
395                enabled: false,
396                log_level: LogLevel::Error,
397                profiling: true,
398                tracing: false,
399            },
400        };
401
402        let mut builder = Self::new();
403        builder.config = config;
404        builder.presets.push("high_performance".to_string());
405        builder
406    }
407
408    /// Apply a preset for development and debugging
409    #[must_use]
410    pub fn development_preset() -> Self {
411        let config = PipelineConfiguration {
412            parallel: None,
413            simd: None,
414            execution_strategy: None,
415            memory_config: MemoryConfiguration {
416                efficient_ops: false,
417                chunk_size: Some(1000),
418                memory_limit_mb: Some(512),
419                gc_frequency: Some(10),
420            },
421            caching: CachingConfiguration {
422                enabled: false,
423                ..CachingConfiguration::default()
424            },
425            validation: ValidationConfiguration {
426                validate_input: true,
427                validate_output: true,
428                validate_structure: true,
429                level: ValidationLevel::Strict,
430            },
431            debug: DebugConfiguration {
432                enabled: true,
433                log_level: LogLevel::Debug,
434                profiling: true,
435                tracing: true,
436            },
437        };
438
439        let mut builder = Self::new();
440        builder.config = config;
441        builder.presets.push("development".to_string());
442        builder
443    }
444}
445
446impl<State> FluentPipelineBuilder<State> {
447    /// Configure parallel execution
448    #[must_use]
449    pub fn parallel(mut self, config: ParallelConfig) -> Self {
450        self.config.parallel = Some(config);
451        self
452    }
453
454    /// Configure SIMD optimizations
455    #[must_use]
456    pub fn simd(mut self, config: SimdConfig) -> Self {
457        self.config.simd = Some(config);
458        self
459    }
460
461    /// Set execution strategy
462    #[must_use]
463    pub fn execution_strategy(mut self, strategy: ParallelExecutionStrategy) -> Self {
464        self.config.execution_strategy = Some(strategy);
465        self
466    }
467
468    /// Configure memory settings
469    #[must_use]
470    pub fn memory(mut self, config: MemoryConfiguration) -> Self {
471        self.config.memory_config = config;
472        self
473    }
474
475    /// Configure caching
476    #[must_use]
477    pub fn caching(mut self, config: CachingConfiguration) -> Self {
478        self.config.caching = config;
479        self
480    }
481
482    /// Configure validation
483    #[must_use]
484    pub fn validation(mut self, config: ValidationConfiguration) -> Self {
485        self.config.validation = config;
486        self
487    }
488
489    /// Configure debug settings
490    #[must_use]
491    pub fn debug(mut self, config: DebugConfiguration) -> Self {
492        self.config.debug = config;
493        self
494    }
495
496    /// Add a validation rule
497    #[must_use]
498    pub fn with_validation_rule(mut self, rule: ValidationRule) -> Self {
499        self.validators.push(rule);
500        self
501    }
502
503    /// Enable memory optimization
504    #[must_use]
505    pub fn memory_optimized(mut self) -> Self {
506        self.config.memory_config.efficient_ops = true;
507        self.config.memory_config.chunk_size = Some(50000);
508        self
509    }
510
511    /// Enable high performance mode
512    #[must_use]
513    pub fn high_performance(mut self) -> Self {
514        self.config.parallel = Some(ParallelConfig::default());
515        self.config.simd = Some(SimdConfig::default());
516        self.config.execution_strategy = Some(ParallelExecutionStrategy::FullParallel);
517        self
518    }
519
520    /// Enable development mode (with debugging and validation)
521    #[must_use]
522    pub fn development_mode(mut self) -> Self {
523        self.config.debug.enabled = true;
524        self.config.debug.log_level = LogLevel::Debug;
525        self.config.validation.level = ValidationLevel::Strict;
526        self
527    }
528}
529
530impl FluentPipelineBuilder<BuilderEmpty> {
531    /// Add the first step to the pipeline
532    pub fn step<S: Into<String>>(
533        mut self,
534        name: S,
535        step: Box<dyn PipelineStep>,
536    ) -> FluentPipelineBuilder<BuilderWithSteps> {
537        self.steps.push((name.into(), step));
538        /// FluentPipelineBuilder
539        FluentPipelineBuilder {
540            state: PhantomData,
541            steps: self.steps,
542            estimator: self.estimator,
543            config: self.config,
544            validators: self.validators,
545            presets: self.presets,
546        }
547    }
548
549    /// Start with a preprocessing chain
550    #[must_use]
551    pub fn preprocessing(self) -> PreprocessingChain {
552        /// PreprocessingChain
553        PreprocessingChain {
554            builder: self,
555            preprocessing_steps: Vec::new(),
556        }
557    }
558
559    /// Start with a feature engineering chain
560    #[must_use]
561    pub fn feature_engineering(self) -> FeatureEngineeringChain {
562        /// FeatureEngineeringChain
563        FeatureEngineeringChain {
564            builder: self,
565            feature_steps: Vec::new(),
566        }
567    }
568}
569
570impl FluentPipelineBuilder<BuilderWithSteps> {
571    /// Add another step to the pipeline
572    pub fn step<S: Into<String>>(mut self, name: S, step: Box<dyn PipelineStep>) -> Self {
573        self.steps.push((name.into(), step));
574        self
575    }
576
577    /// Add the final estimator
578    #[must_use]
579    pub fn estimator(
580        mut self,
581        estimator: Box<dyn PipelinePredictor>,
582    ) -> FluentPipelineBuilder<BuilderWithEstimator> {
583        self.estimator = Some(estimator);
584        /// FluentPipelineBuilder
585        FluentPipelineBuilder {
586            state: PhantomData,
587            steps: self.steps,
588            estimator: self.estimator,
589            config: self.config,
590            validators: self.validators,
591            presets: self.presets,
592        }
593    }
594
595    /// Create a feature union at this point
596    pub fn feature_union<F>(mut self, union_fn: F) -> Self
597    where
598        F: FnOnce(FeatureUnionBuilder) -> FeatureUnionBuilder,
599    {
600        let union_builder = FeatureUnionBuilder::new();
601        let union_builder = union_fn(union_builder);
602        let feature_union = union_builder.build();
603
604        self.steps
605            .push(("feature_union".to_string(), Box::new(feature_union)));
606        self
607    }
608
609    /// Add conditional execution
610    pub fn when<F>(mut self, condition: F, then_step: Box<dyn PipelineStep>) -> Self
611    where
612        F: Fn(&ArrayView2<Float>) -> bool + 'static,
613    {
614        // In a real implementation, this would wrap the step in a conditional wrapper
615        // For now, we'll just add the step directly
616        self.steps.push(("conditional".to_string(), then_step));
617        self
618    }
619}
620
621impl FluentPipelineBuilder<BuilderWithEstimator> {
622    /// Finalize the pipeline configuration
623    #[must_use]
624    pub fn finalize(self) -> FluentPipelineBuilder<BuilderComplete> {
625        /// FluentPipelineBuilder
626        FluentPipelineBuilder {
627            state: PhantomData,
628            steps: self.steps,
629            estimator: self.estimator,
630            config: self.config,
631            validators: self.validators,
632            presets: self.presets,
633        }
634    }
635}
636
637impl FluentPipelineBuilder<BuilderComplete> {
638    /// Build the final pipeline
639    pub fn build(self) -> SklResult<Pipeline<Untrained>> {
640        // Run validation rules
641        for validator in &self.validators {
642            (validator.validator)(&self)?;
643        }
644
645        // Create pipeline builder
646        let mut pipeline_builder = Pipeline::builder();
647
648        // Add steps
649        for (name, step) in self.steps {
650            pipeline_builder = pipeline_builder.step(&name, step);
651        }
652
653        // Add estimator if present
654        if let Some(estimator) = self.estimator {
655            pipeline_builder = pipeline_builder.estimator(estimator);
656        }
657
658        // Build and return
659        Ok(pipeline_builder.build())
660    }
661
662    /// Build and immediately fit the pipeline
663    pub fn build_and_fit(
664        self,
665        x: &ArrayView2<Float>,
666        y: &Option<&ArrayView1<Float>>,
667    ) -> SklResult<crate::Pipeline<crate::pipeline::PipelineTrained>> {
668        let pipeline = self.build()?;
669        pipeline.fit(x, y)
670    }
671}
672
673/// Preprocessing chain builder
674#[derive(Debug)]
675pub struct PreprocessingChain {
676    builder: FluentPipelineBuilder<BuilderEmpty>,
677    preprocessing_steps: Vec<(String, Box<dyn PipelineStep>)>,
678}
679
680impl PreprocessingChain {
681    /// Add a standard scaler
682    #[must_use]
683    pub fn standard_scaler(mut self) -> Self {
684        // In a real implementation, this would create an actual StandardScaler
685        self.preprocessing_steps.push((
686            "standard_scaler".to_string(),
687            Box::new(crate::MockTransformer::new()),
688        ));
689        self
690    }
691
692    /// Add a min-max scaler
693    #[must_use]
694    pub fn min_max_scaler(mut self, feature_range: (f64, f64)) -> Self {
695        self.preprocessing_steps.push((
696            "min_max_scaler".to_string(),
697            Box::new(crate::MockTransformer::new()),
698        ));
699        self
700    }
701
702    /// Add robust scaling
703    #[must_use]
704    pub fn robust_scaler(mut self) -> Self {
705        self.preprocessing_steps.push((
706            "robust_scaler".to_string(),
707            Box::new(crate::MockTransformer::new()),
708        ));
709        self
710    }
711
712    /// Add missing value imputation
713    #[must_use]
714    pub fn impute_missing(mut self, strategy: ImputationStrategy) -> Self {
715        self.preprocessing_steps.push((
716            "imputer".to_string(),
717            Box::new(crate::MockTransformer::new()),
718        ));
719        self
720    }
721
722    /// Finish preprocessing and return to main builder
723    #[must_use]
724    pub fn done(mut self) -> FluentPipelineBuilder<BuilderWithSteps> {
725        for (name, step) in self.preprocessing_steps {
726            self.builder.steps.push((name, step));
727        }
728
729        /// FluentPipelineBuilder
730        FluentPipelineBuilder {
731            state: PhantomData,
732            steps: self.builder.steps,
733            estimator: self.builder.estimator,
734            config: self.builder.config,
735            validators: self.builder.validators,
736            presets: self.builder.presets,
737        }
738    }
739}
740
741/// Feature engineering chain builder
742#[derive(Debug)]
743pub struct FeatureEngineeringChain {
744    builder: FluentPipelineBuilder<BuilderEmpty>,
745    feature_steps: Vec<(String, Box<dyn PipelineStep>)>,
746}
747
748impl FeatureEngineeringChain {
749    /// Add polynomial features
750    #[must_use]
751    pub fn polynomial_features(mut self, degree: usize, include_bias: bool) -> Self {
752        self.feature_steps.push((
753            "polynomial_features".to_string(),
754            Box::new(crate::MockTransformer::new()),
755        ));
756        self
757    }
758
759    /// Add feature selection
760    #[must_use]
761    pub fn feature_selection(mut self, k_best: usize) -> Self {
762        self.feature_steps.push((
763            "feature_selection".to_string(),
764            Box::new(crate::MockTransformer::new()),
765        ));
766        self
767    }
768
769    /// Add PCA
770    #[must_use]
771    pub fn pca(mut self, n_components: Option<usize>) -> Self {
772        self.feature_steps
773            .push(("pca".to_string(), Box::new(crate::MockTransformer::new())));
774        self
775    }
776
777    /// Add text vectorization
778    #[must_use]
779    pub fn text_vectorizer(mut self, max_features: Option<usize>) -> Self {
780        self.feature_steps.push((
781            "text_vectorizer".to_string(),
782            Box::new(crate::MockTransformer::new()),
783        ));
784        self
785    }
786
787    /// Finish feature engineering and return to main builder
788    #[must_use]
789    pub fn done(mut self) -> FluentPipelineBuilder<BuilderWithSteps> {
790        for (name, step) in self.feature_steps {
791            self.builder.steps.push((name, step));
792        }
793
794        /// FluentPipelineBuilder
795        FluentPipelineBuilder {
796            state: PhantomData,
797            steps: self.builder.steps,
798            estimator: self.builder.estimator,
799            config: self.builder.config,
800            validators: self.builder.validators,
801            presets: self.builder.presets,
802        }
803    }
804}
805
806/// Feature union builder for parallel feature extraction
807#[derive(Debug)]
808pub struct FeatureUnionBuilder {
809    transformers: Vec<(String, Box<dyn PipelineStep>)>,
810    weights: Option<HashMap<String, f64>>,
811    n_jobs: Option<i32>,
812}
813
814impl FeatureUnionBuilder {
815    /// Create a new feature union builder
816    #[must_use]
817    pub fn new() -> Self {
818        Self {
819            transformers: Vec::new(),
820            weights: None,
821            n_jobs: None,
822        }
823    }
824
825    /// Add a transformer to the union
826    pub fn add_transformer<S: Into<String>>(
827        mut self,
828        name: S,
829        transformer: Box<dyn PipelineStep>,
830    ) -> Self {
831        self.transformers.push((name.into(), transformer));
832        self
833    }
834
835    /// Set transformer weights
836    #[must_use]
837    pub fn weights(mut self, weights: HashMap<String, f64>) -> Self {
838        self.weights = Some(weights);
839        self
840    }
841
842    /// Set number of parallel jobs
843    #[must_use]
844    pub fn n_jobs(mut self, n_jobs: i32) -> Self {
845        self.n_jobs = Some(n_jobs);
846        self
847    }
848
849    /// Build the feature union
850    #[must_use]
851    pub fn build(self) -> crate::MockTransformer {
852        // In a real implementation, this would create an actual FeatureUnion
853        crate::MockTransformer::new()
854    }
855}
856
857/// Imputation strategy
858#[derive(Debug, Clone, PartialEq)]
859pub enum ImputationStrategy {
860    /// Mean
861    Mean,
862    /// Median
863    Median,
864    /// MostFrequent
865    MostFrequent,
866    /// Constant
867    Constant(f64),
868    /// Forward
869    Forward,
870    /// Backward
871    Backward,
872}
873
874/// Quick access functions for common patterns
875pub struct PipelinePresets;
876
877impl PipelinePresets {
878    /// Create a basic classification pipeline
879    #[must_use]
880    pub fn classification() -> FluentPipelineBuilder<BuilderEmpty> {
881        FluentPipelineBuilder::data_science_preset()
882    }
883
884    /// Create a basic regression pipeline  
885    #[must_use]
886    pub fn regression() -> FluentPipelineBuilder<BuilderEmpty> {
887        FluentPipelineBuilder::data_science_preset()
888    }
889
890    /// Create a text processing pipeline
891    #[must_use]
892    pub fn text_processing() -> FluentPipelineBuilder<BuilderEmpty> {
893        FluentPipelineBuilder::new()
894            .memory_optimized()
895            .validation(ValidationConfiguration {
896                validate_input: true,
897                validate_output: true,
898                validate_structure: true,
899                level: ValidationLevel::Basic,
900            })
901    }
902
903    /// Create an image processing pipeline
904    #[must_use]
905    pub fn image_processing() -> FluentPipelineBuilder<BuilderEmpty> {
906        FluentPipelineBuilder::high_performance_preset().memory(MemoryConfiguration {
907            efficient_ops: true,
908            chunk_size: Some(1000),
909            memory_limit_mb: Some(4096),
910            gc_frequency: Some(50),
911        })
912    }
913
914    /// Create a time series pipeline
915    #[must_use]
916    pub fn time_series() -> FluentPipelineBuilder<BuilderEmpty> {
917        FluentPipelineBuilder::new().caching(CachingConfiguration {
918            enabled: true,
919            cache_dir: Some("/tmp/ts_cache".to_string()),
920            max_size_mb: Some(512),
921            ttl_sec: Some(1800),
922            strategy: CacheStrategy::TimeExpire,
923        })
924    }
925}
926
927impl Default for FluentPipelineBuilder<BuilderEmpty> {
928    fn default() -> Self {
929        Self::new()
930    }
931}
932
933impl Default for FeatureUnionBuilder {
934    fn default() -> Self {
935        Self::new()
936    }
937}
938
939#[allow(non_snake_case)]
940#[cfg(test)]
941mod tests {
942    use super::*;
943
944    #[test]
945    fn test_fluent_builder_creation() {
946        let builder = FluentPipelineBuilder::new();
947        assert!(builder.steps.is_empty());
948        assert!(builder.estimator.is_none());
949    }
950
951    #[test]
952    fn test_preset_application() {
953        let builder = FluentPipelineBuilder::data_science_preset();
954        assert!(builder.config.parallel.is_some());
955        assert!(builder.config.simd.is_some());
956        assert!(builder.presets.contains(&"data_science".to_string()));
957    }
958
959    #[test]
960    fn test_high_performance_preset() {
961        let builder = FluentPipelineBuilder::high_performance_preset();
962        assert!(builder.config.parallel.is_some());
963        assert!(builder.config.simd.is_some());
964        assert_eq!(builder.config.validation.level, ValidationLevel::None);
965        assert!(builder.presets.contains(&"high_performance".to_string()));
966    }
967
968    #[test]
969    fn test_development_preset() {
970        let builder = FluentPipelineBuilder::development_preset();
971        assert!(builder.config.debug.enabled);
972        assert_eq!(builder.config.debug.log_level, LogLevel::Debug);
973        assert_eq!(builder.config.validation.level, ValidationLevel::Strict);
974    }
975
976    #[test]
977    fn test_method_chaining() {
978        let builder = FluentPipelineBuilder::new()
979            .memory_optimized()
980            .high_performance()
981            .development_mode();
982
983        assert!(builder.config.memory_config.efficient_ops);
984        assert!(builder.config.parallel.is_some());
985        assert!(builder.config.debug.enabled);
986    }
987
988    #[test]
989    fn test_preprocessing_chain() {
990        let chain = FluentPipelineBuilder::new()
991            .preprocessing()
992            .standard_scaler()
993            .min_max_scaler((0.0, 1.0))
994            .impute_missing(ImputationStrategy::Mean);
995
996        assert_eq!(chain.preprocessing_steps.len(), 3);
997    }
998
999    #[test]
1000    fn test_feature_engineering_chain() {
1001        let chain = FluentPipelineBuilder::new()
1002            .feature_engineering()
1003            .polynomial_features(2, true)
1004            .feature_selection(100)
1005            .pca(Some(50));
1006
1007        assert_eq!(chain.feature_steps.len(), 3);
1008    }
1009
1010    #[test]
1011    fn test_feature_union_builder() {
1012        let union_builder = FeatureUnionBuilder::new()
1013            .add_transformer("scaler", Box::new(crate::MockTransformer::new()))
1014            .add_transformer("pca", Box::new(crate::MockTransformer::new()))
1015            .n_jobs(2);
1016
1017        assert_eq!(union_builder.transformers.len(), 2);
1018        assert_eq!(union_builder.n_jobs, Some(2));
1019    }
1020
1021    #[test]
1022    fn test_configuration_defaults() {
1023        let config = PipelineConfiguration::default();
1024        assert!(config.memory_config.efficient_ops);
1025        assert_eq!(config.caching.strategy, CacheStrategy::LRU);
1026        assert_eq!(config.validation.level, ValidationLevel::Basic);
1027        assert_eq!(config.debug.log_level, LogLevel::Info);
1028    }
1029
1030    #[test]
1031    fn test_pipeline_presets() {
1032        let classification = PipelinePresets::classification();
1033        let regression = PipelinePresets::regression();
1034        let text = PipelinePresets::text_processing();
1035        let image = PipelinePresets::image_processing();
1036        let ts = PipelinePresets::time_series();
1037
1038        // All presets should create valid builders
1039        assert!(classification.steps.is_empty());
1040        assert!(regression.steps.is_empty());
1041        assert!(text.steps.is_empty());
1042        assert!(image.steps.is_empty());
1043        assert!(ts.steps.is_empty());
1044    }
1045
1046    #[test]
1047    fn test_memory_configuration() {
1048        let memory_config = MemoryConfiguration {
1049            efficient_ops: true,
1050            chunk_size: Some(5000),
1051            memory_limit_mb: Some(1024),
1052            gc_frequency: Some(100),
1053        };
1054
1055        assert!(memory_config.efficient_ops);
1056        assert_eq!(memory_config.chunk_size, Some(5000));
1057        assert_eq!(memory_config.memory_limit_mb, Some(1024));
1058    }
1059
1060    #[test]
1061    fn test_caching_configuration() {
1062        let cache_config = CachingConfiguration {
1063            enabled: true,
1064            cache_dir: Some("/tmp/test".to_string()),
1065            max_size_mb: Some(512),
1066            ttl_sec: Some(1800),
1067            strategy: CacheStrategy::LFU,
1068        };
1069
1070        assert!(cache_config.enabled);
1071        assert_eq!(cache_config.strategy, CacheStrategy::LFU);
1072        assert_eq!(cache_config.max_size_mb, Some(512));
1073    }
1074}