1use scirs2_core::ndarray::{ArrayView1, ArrayView2};
7use sklears_core::{
8 error::Result as SklResult,
9 prelude::Fit,
10 traits::{Estimator, Untrained},
11 types::Float,
12};
13use std::collections::HashMap;
14use std::marker::PhantomData;
15
16use crate::{
17 ParallelConfig, ParallelExecutionStrategy, Pipeline, PipelinePredictor, PipelineStep,
18 SimdConfig,
19};
20
21fn create_high_performance_simd_config() -> SimdConfig {
23 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
24 {
25 SimdConfig {
27 use_avx2: true,
28 use_avx512: is_x86_feature_detected!("avx512f"),
29 use_fma: true,
30 vector_width: if is_x86_feature_detected!("avx512f") {
31 16
32 } else {
33 8
34 },
35 alignment: 64,
36 simd_threshold: 32,
37 }
38 }
39 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
40 {
41 SimdConfig {
43 use_avx2: false,
44 use_avx512: false,
45 use_fma: false,
46 vector_width: 4,
47 alignment: 64,
48 simd_threshold: 32,
49 }
50 }
51}
52
53#[derive(Debug)]
55pub struct FluentPipelineBuilder<State = BuilderEmpty> {
56 state: PhantomData<State>,
58 steps: Vec<(String, Box<dyn PipelineStep>)>,
60 estimator: Option<Box<dyn PipelinePredictor>>,
62 config: PipelineConfiguration,
64 validators: Vec<ValidationRule>,
66 presets: Vec<String>,
68}
69
70#[derive(Debug)]
72pub struct BuilderEmpty;
73
74#[derive(Debug)]
75pub struct BuilderWithSteps;
76
77#[derive(Debug)]
78pub struct BuilderWithEstimator;
79
80#[derive(Debug)]
81pub struct BuilderComplete;
82
83#[derive(Debug, Clone, Default)]
85pub struct PipelineConfiguration {
86 pub parallel: Option<ParallelConfig>,
88 pub simd: Option<SimdConfig>,
90 pub execution_strategy: Option<ParallelExecutionStrategy>,
92 pub memory_config: MemoryConfiguration,
94 pub caching: CachingConfiguration,
96 pub validation: ValidationConfiguration,
98 pub debug: DebugConfiguration,
100}
101
102#[derive(Debug, Clone)]
104pub struct MemoryConfiguration {
105 pub efficient_ops: bool,
107 pub chunk_size: Option<usize>,
109 pub memory_limit_mb: Option<usize>,
111 pub gc_frequency: Option<usize>,
113}
114
115#[derive(Debug, Clone)]
117pub struct CachingConfiguration {
118 pub enabled: bool,
120 pub cache_dir: Option<String>,
122 pub max_size_mb: Option<usize>,
124 pub ttl_sec: Option<usize>,
126 pub strategy: CacheStrategy,
128}
129
130#[derive(Debug, Clone, PartialEq, Eq)]
132pub enum CacheStrategy {
133 LRU,
135 LFU,
137 TimeExpire,
139 SizeBased,
141}
142
143#[derive(Debug, Clone)]
145pub struct ValidationConfiguration {
146 pub validate_input: bool,
148 pub validate_output: bool,
150 pub validate_structure: bool,
152 pub level: ValidationLevel,
154}
155
156#[derive(Debug, Clone, PartialEq, Eq)]
158pub enum ValidationLevel {
159 None,
161 Basic,
163 Comprehensive,
165 Strict,
167}
168
169#[derive(Debug, Clone)]
171pub struct DebugConfiguration {
172 pub enabled: bool,
174 pub log_level: LogLevel,
176 pub profiling: bool,
178 pub tracing: bool,
180}
181
182#[derive(Debug, Clone, PartialEq, Eq)]
184pub enum LogLevel {
185 Error,
187 Warn,
189 Info,
191 Debug,
193 Trace,
195}
196
197pub struct ValidationRule {
199 pub name: String,
201 pub description: String,
203 pub validator: Box<dyn Fn(&FluentPipelineBuilder<BuilderComplete>) -> SklResult<()>>,
205}
206
207impl std::fmt::Debug for ValidationRule {
208 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
209 f.debug_struct("ValidationRule")
210 .field("name", &self.name)
211 .field("description", &self.description)
212 .field("validator", &"<function>")
213 .finish()
214 }
215}
216
217#[derive(Debug, Clone)]
219pub struct ConfigurationPreset {
220 pub name: String,
222 pub description: String,
224 pub config: PipelineConfiguration,
226 pub default_steps: Vec<PresetStep>,
228}
229
230#[derive(Debug, Clone)]
232pub struct PresetStep {
233 pub name: String,
235 pub step_type: String,
237 pub parameters: HashMap<String, PresetParameter>,
239}
240
241#[derive(Debug, Clone)]
243pub enum PresetParameter {
244 Float(f64),
246 Int(i64),
248 Bool(bool),
250 String(String),
252 Array(Vec<PresetParameter>),
254}
255
256impl Default for MemoryConfiguration {
257 fn default() -> Self {
258 Self {
259 efficient_ops: true,
260 chunk_size: Some(10000),
261 memory_limit_mb: None,
262 gc_frequency: Some(100),
263 }
264 }
265}
266
267impl Default for CachingConfiguration {
268 fn default() -> Self {
269 Self {
270 enabled: false,
271 cache_dir: None,
272 max_size_mb: Some(1024),
273 ttl_sec: Some(3600),
274 strategy: CacheStrategy::LRU,
275 }
276 }
277}
278
279impl Default for ValidationConfiguration {
280 fn default() -> Self {
281 Self {
282 validate_input: true,
283 validate_output: true,
284 validate_structure: true,
285 level: ValidationLevel::Basic,
286 }
287 }
288}
289
290impl Default for DebugConfiguration {
291 fn default() -> Self {
292 Self {
293 enabled: false,
294 log_level: LogLevel::Info,
295 profiling: false,
296 tracing: false,
297 }
298 }
299}
300
301impl FluentPipelineBuilder<BuilderEmpty> {
302 #[must_use]
304 pub fn new() -> Self {
305 Self {
306 state: PhantomData,
307 steps: Vec::new(),
308 estimator: None,
309 config: PipelineConfiguration::default(),
310 validators: Vec::new(),
311 presets: Vec::new(),
312 }
313 }
314
315 #[must_use]
317 pub fn with_preset(preset: ConfigurationPreset) -> Self {
318 let mut builder = Self::new();
319 builder.config = preset.config;
320 builder.presets.push(preset.name);
321 builder
322 }
323
324 #[must_use]
326 pub fn data_science_preset() -> Self {
327 let config = PipelineConfiguration {
328 parallel: Some(ParallelConfig::default()),
329 simd: Some(SimdConfig::default()),
330 execution_strategy: Some(ParallelExecutionStrategy::DataParallel { chunk_size: 5000 }),
331 memory_config: MemoryConfiguration {
332 efficient_ops: true,
333 chunk_size: Some(5000),
334 memory_limit_mb: Some(2048),
335 gc_frequency: Some(50),
336 },
337 caching: CachingConfiguration {
338 enabled: true,
339 cache_dir: Some("/tmp/sklearn_cache".to_string()),
340 max_size_mb: Some(512),
341 ttl_sec: Some(1800),
342 strategy: CacheStrategy::LRU,
343 },
344 validation: ValidationConfiguration {
345 validate_input: true,
346 validate_output: true,
347 validate_structure: true,
348 level: ValidationLevel::Comprehensive,
349 },
350 debug: DebugConfiguration {
351 enabled: true,
352 log_level: LogLevel::Info,
353 profiling: false,
354 tracing: false,
355 },
356 };
357
358 let mut builder = Self::new();
359 builder.config = config;
360 builder.presets.push("data_science".to_string());
361 builder
362 }
363
364 #[must_use]
366 pub fn high_performance_preset() -> Self {
367 let config = PipelineConfiguration {
368 parallel: Some(ParallelConfig {
369 num_workers: num_cpus::get() * 2,
370 work_stealing: true,
371 ..ParallelConfig::default()
372 }),
373 simd: Some(create_high_performance_simd_config()),
374 execution_strategy: Some(ParallelExecutionStrategy::FullParallel),
375 memory_config: MemoryConfiguration {
376 efficient_ops: true,
377 chunk_size: Some(100_000),
378 memory_limit_mb: Some(8192),
379 gc_frequency: Some(200),
380 },
381 caching: CachingConfiguration {
382 enabled: true,
383 cache_dir: Some("/tmp/hpc_cache".to_string()),
384 max_size_mb: Some(2048),
385 ttl_sec: Some(7200),
386 strategy: CacheStrategy::SizeBased,
387 },
388 validation: ValidationConfiguration {
389 validate_input: false,
390 validate_output: false,
391 validate_structure: false,
392 level: ValidationLevel::None,
393 },
394 debug: DebugConfiguration {
395 enabled: false,
396 log_level: LogLevel::Error,
397 profiling: true,
398 tracing: false,
399 },
400 };
401
402 let mut builder = Self::new();
403 builder.config = config;
404 builder.presets.push("high_performance".to_string());
405 builder
406 }
407
408 #[must_use]
410 pub fn development_preset() -> Self {
411 let config = PipelineConfiguration {
412 parallel: None,
413 simd: None,
414 execution_strategy: None,
415 memory_config: MemoryConfiguration {
416 efficient_ops: false,
417 chunk_size: Some(1000),
418 memory_limit_mb: Some(512),
419 gc_frequency: Some(10),
420 },
421 caching: CachingConfiguration {
422 enabled: false,
423 ..CachingConfiguration::default()
424 },
425 validation: ValidationConfiguration {
426 validate_input: true,
427 validate_output: true,
428 validate_structure: true,
429 level: ValidationLevel::Strict,
430 },
431 debug: DebugConfiguration {
432 enabled: true,
433 log_level: LogLevel::Debug,
434 profiling: true,
435 tracing: true,
436 },
437 };
438
439 let mut builder = Self::new();
440 builder.config = config;
441 builder.presets.push("development".to_string());
442 builder
443 }
444}
445
446impl<State> FluentPipelineBuilder<State> {
447 #[must_use]
449 pub fn parallel(mut self, config: ParallelConfig) -> Self {
450 self.config.parallel = Some(config);
451 self
452 }
453
454 #[must_use]
456 pub fn simd(mut self, config: SimdConfig) -> Self {
457 self.config.simd = Some(config);
458 self
459 }
460
461 #[must_use]
463 pub fn execution_strategy(mut self, strategy: ParallelExecutionStrategy) -> Self {
464 self.config.execution_strategy = Some(strategy);
465 self
466 }
467
468 #[must_use]
470 pub fn memory(mut self, config: MemoryConfiguration) -> Self {
471 self.config.memory_config = config;
472 self
473 }
474
475 #[must_use]
477 pub fn caching(mut self, config: CachingConfiguration) -> Self {
478 self.config.caching = config;
479 self
480 }
481
482 #[must_use]
484 pub fn validation(mut self, config: ValidationConfiguration) -> Self {
485 self.config.validation = config;
486 self
487 }
488
489 #[must_use]
491 pub fn debug(mut self, config: DebugConfiguration) -> Self {
492 self.config.debug = config;
493 self
494 }
495
496 #[must_use]
498 pub fn with_validation_rule(mut self, rule: ValidationRule) -> Self {
499 self.validators.push(rule);
500 self
501 }
502
503 #[must_use]
505 pub fn memory_optimized(mut self) -> Self {
506 self.config.memory_config.efficient_ops = true;
507 self.config.memory_config.chunk_size = Some(50000);
508 self
509 }
510
511 #[must_use]
513 pub fn high_performance(mut self) -> Self {
514 self.config.parallel = Some(ParallelConfig::default());
515 self.config.simd = Some(SimdConfig::default());
516 self.config.execution_strategy = Some(ParallelExecutionStrategy::FullParallel);
517 self
518 }
519
520 #[must_use]
522 pub fn development_mode(mut self) -> Self {
523 self.config.debug.enabled = true;
524 self.config.debug.log_level = LogLevel::Debug;
525 self.config.validation.level = ValidationLevel::Strict;
526 self
527 }
528}
529
530impl FluentPipelineBuilder<BuilderEmpty> {
531 pub fn step<S: Into<String>>(
533 mut self,
534 name: S,
535 step: Box<dyn PipelineStep>,
536 ) -> FluentPipelineBuilder<BuilderWithSteps> {
537 self.steps.push((name.into(), step));
538 FluentPipelineBuilder {
540 state: PhantomData,
541 steps: self.steps,
542 estimator: self.estimator,
543 config: self.config,
544 validators: self.validators,
545 presets: self.presets,
546 }
547 }
548
549 #[must_use]
551 pub fn preprocessing(self) -> PreprocessingChain {
552 PreprocessingChain {
554 builder: self,
555 preprocessing_steps: Vec::new(),
556 }
557 }
558
559 #[must_use]
561 pub fn feature_engineering(self) -> FeatureEngineeringChain {
562 FeatureEngineeringChain {
564 builder: self,
565 feature_steps: Vec::new(),
566 }
567 }
568}
569
570impl FluentPipelineBuilder<BuilderWithSteps> {
571 pub fn step<S: Into<String>>(mut self, name: S, step: Box<dyn PipelineStep>) -> Self {
573 self.steps.push((name.into(), step));
574 self
575 }
576
577 #[must_use]
579 pub fn estimator(
580 mut self,
581 estimator: Box<dyn PipelinePredictor>,
582 ) -> FluentPipelineBuilder<BuilderWithEstimator> {
583 self.estimator = Some(estimator);
584 FluentPipelineBuilder {
586 state: PhantomData,
587 steps: self.steps,
588 estimator: self.estimator,
589 config: self.config,
590 validators: self.validators,
591 presets: self.presets,
592 }
593 }
594
595 pub fn feature_union<F>(mut self, union_fn: F) -> Self
597 where
598 F: FnOnce(FeatureUnionBuilder) -> FeatureUnionBuilder,
599 {
600 let union_builder = FeatureUnionBuilder::new();
601 let union_builder = union_fn(union_builder);
602 let feature_union = union_builder.build();
603
604 self.steps
605 .push(("feature_union".to_string(), Box::new(feature_union)));
606 self
607 }
608
609 pub fn when<F>(mut self, condition: F, then_step: Box<dyn PipelineStep>) -> Self
611 where
612 F: Fn(&ArrayView2<Float>) -> bool + 'static,
613 {
614 self.steps.push(("conditional".to_string(), then_step));
617 self
618 }
619}
620
621impl FluentPipelineBuilder<BuilderWithEstimator> {
622 #[must_use]
624 pub fn finalize(self) -> FluentPipelineBuilder<BuilderComplete> {
625 FluentPipelineBuilder {
627 state: PhantomData,
628 steps: self.steps,
629 estimator: self.estimator,
630 config: self.config,
631 validators: self.validators,
632 presets: self.presets,
633 }
634 }
635}
636
637impl FluentPipelineBuilder<BuilderComplete> {
638 pub fn build(self) -> SklResult<Pipeline<Untrained>> {
640 for validator in &self.validators {
642 (validator.validator)(&self)?;
643 }
644
645 let mut pipeline_builder = Pipeline::builder();
647
648 for (name, step) in self.steps {
650 pipeline_builder = pipeline_builder.step(&name, step);
651 }
652
653 if let Some(estimator) = self.estimator {
655 pipeline_builder = pipeline_builder.estimator(estimator);
656 }
657
658 Ok(pipeline_builder.build())
660 }
661
662 pub fn build_and_fit(
664 self,
665 x: &ArrayView2<Float>,
666 y: &Option<&ArrayView1<Float>>,
667 ) -> SklResult<crate::Pipeline<crate::pipeline::PipelineTrained>> {
668 let pipeline = self.build()?;
669 pipeline.fit(x, y)
670 }
671}
672
673#[derive(Debug)]
675pub struct PreprocessingChain {
676 builder: FluentPipelineBuilder<BuilderEmpty>,
677 preprocessing_steps: Vec<(String, Box<dyn PipelineStep>)>,
678}
679
680impl PreprocessingChain {
681 #[must_use]
683 pub fn standard_scaler(mut self) -> Self {
684 self.preprocessing_steps.push((
686 "standard_scaler".to_string(),
687 Box::new(crate::MockTransformer::new()),
688 ));
689 self
690 }
691
692 #[must_use]
694 pub fn min_max_scaler(mut self, feature_range: (f64, f64)) -> Self {
695 self.preprocessing_steps.push((
696 "min_max_scaler".to_string(),
697 Box::new(crate::MockTransformer::new()),
698 ));
699 self
700 }
701
702 #[must_use]
704 pub fn robust_scaler(mut self) -> Self {
705 self.preprocessing_steps.push((
706 "robust_scaler".to_string(),
707 Box::new(crate::MockTransformer::new()),
708 ));
709 self
710 }
711
712 #[must_use]
714 pub fn impute_missing(mut self, strategy: ImputationStrategy) -> Self {
715 self.preprocessing_steps.push((
716 "imputer".to_string(),
717 Box::new(crate::MockTransformer::new()),
718 ));
719 self
720 }
721
722 #[must_use]
724 pub fn done(mut self) -> FluentPipelineBuilder<BuilderWithSteps> {
725 for (name, step) in self.preprocessing_steps {
726 self.builder.steps.push((name, step));
727 }
728
729 FluentPipelineBuilder {
731 state: PhantomData,
732 steps: self.builder.steps,
733 estimator: self.builder.estimator,
734 config: self.builder.config,
735 validators: self.builder.validators,
736 presets: self.builder.presets,
737 }
738 }
739}
740
741#[derive(Debug)]
743pub struct FeatureEngineeringChain {
744 builder: FluentPipelineBuilder<BuilderEmpty>,
745 feature_steps: Vec<(String, Box<dyn PipelineStep>)>,
746}
747
748impl FeatureEngineeringChain {
749 #[must_use]
751 pub fn polynomial_features(mut self, degree: usize, include_bias: bool) -> Self {
752 self.feature_steps.push((
753 "polynomial_features".to_string(),
754 Box::new(crate::MockTransformer::new()),
755 ));
756 self
757 }
758
759 #[must_use]
761 pub fn feature_selection(mut self, k_best: usize) -> Self {
762 self.feature_steps.push((
763 "feature_selection".to_string(),
764 Box::new(crate::MockTransformer::new()),
765 ));
766 self
767 }
768
769 #[must_use]
771 pub fn pca(mut self, n_components: Option<usize>) -> Self {
772 self.feature_steps
773 .push(("pca".to_string(), Box::new(crate::MockTransformer::new())));
774 self
775 }
776
777 #[must_use]
779 pub fn text_vectorizer(mut self, max_features: Option<usize>) -> Self {
780 self.feature_steps.push((
781 "text_vectorizer".to_string(),
782 Box::new(crate::MockTransformer::new()),
783 ));
784 self
785 }
786
787 #[must_use]
789 pub fn done(mut self) -> FluentPipelineBuilder<BuilderWithSteps> {
790 for (name, step) in self.feature_steps {
791 self.builder.steps.push((name, step));
792 }
793
794 FluentPipelineBuilder {
796 state: PhantomData,
797 steps: self.builder.steps,
798 estimator: self.builder.estimator,
799 config: self.builder.config,
800 validators: self.builder.validators,
801 presets: self.builder.presets,
802 }
803 }
804}
805
806#[derive(Debug)]
808pub struct FeatureUnionBuilder {
809 transformers: Vec<(String, Box<dyn PipelineStep>)>,
810 weights: Option<HashMap<String, f64>>,
811 n_jobs: Option<i32>,
812}
813
814impl FeatureUnionBuilder {
815 #[must_use]
817 pub fn new() -> Self {
818 Self {
819 transformers: Vec::new(),
820 weights: None,
821 n_jobs: None,
822 }
823 }
824
825 pub fn add_transformer<S: Into<String>>(
827 mut self,
828 name: S,
829 transformer: Box<dyn PipelineStep>,
830 ) -> Self {
831 self.transformers.push((name.into(), transformer));
832 self
833 }
834
835 #[must_use]
837 pub fn weights(mut self, weights: HashMap<String, f64>) -> Self {
838 self.weights = Some(weights);
839 self
840 }
841
842 #[must_use]
844 pub fn n_jobs(mut self, n_jobs: i32) -> Self {
845 self.n_jobs = Some(n_jobs);
846 self
847 }
848
849 #[must_use]
851 pub fn build(self) -> crate::MockTransformer {
852 crate::MockTransformer::new()
854 }
855}
856
857#[derive(Debug, Clone, PartialEq)]
859pub enum ImputationStrategy {
860 Mean,
862 Median,
864 MostFrequent,
866 Constant(f64),
868 Forward,
870 Backward,
872}
873
874pub struct PipelinePresets;
876
877impl PipelinePresets {
878 #[must_use]
880 pub fn classification() -> FluentPipelineBuilder<BuilderEmpty> {
881 FluentPipelineBuilder::data_science_preset()
882 }
883
884 #[must_use]
886 pub fn regression() -> FluentPipelineBuilder<BuilderEmpty> {
887 FluentPipelineBuilder::data_science_preset()
888 }
889
890 #[must_use]
892 pub fn text_processing() -> FluentPipelineBuilder<BuilderEmpty> {
893 FluentPipelineBuilder::new()
894 .memory_optimized()
895 .validation(ValidationConfiguration {
896 validate_input: true,
897 validate_output: true,
898 validate_structure: true,
899 level: ValidationLevel::Basic,
900 })
901 }
902
903 #[must_use]
905 pub fn image_processing() -> FluentPipelineBuilder<BuilderEmpty> {
906 FluentPipelineBuilder::high_performance_preset().memory(MemoryConfiguration {
907 efficient_ops: true,
908 chunk_size: Some(1000),
909 memory_limit_mb: Some(4096),
910 gc_frequency: Some(50),
911 })
912 }
913
914 #[must_use]
916 pub fn time_series() -> FluentPipelineBuilder<BuilderEmpty> {
917 FluentPipelineBuilder::new().caching(CachingConfiguration {
918 enabled: true,
919 cache_dir: Some("/tmp/ts_cache".to_string()),
920 max_size_mb: Some(512),
921 ttl_sec: Some(1800),
922 strategy: CacheStrategy::TimeExpire,
923 })
924 }
925}
926
927impl Default for FluentPipelineBuilder<BuilderEmpty> {
928 fn default() -> Self {
929 Self::new()
930 }
931}
932
933impl Default for FeatureUnionBuilder {
934 fn default() -> Self {
935 Self::new()
936 }
937}
938
939#[allow(non_snake_case)]
940#[cfg(test)]
941mod tests {
942 use super::*;
943
944 #[test]
945 fn test_fluent_builder_creation() {
946 let builder = FluentPipelineBuilder::new();
947 assert!(builder.steps.is_empty());
948 assert!(builder.estimator.is_none());
949 }
950
951 #[test]
952 fn test_preset_application() {
953 let builder = FluentPipelineBuilder::data_science_preset();
954 assert!(builder.config.parallel.is_some());
955 assert!(builder.config.simd.is_some());
956 assert!(builder.presets.contains(&"data_science".to_string()));
957 }
958
959 #[test]
960 fn test_high_performance_preset() {
961 let builder = FluentPipelineBuilder::high_performance_preset();
962 assert!(builder.config.parallel.is_some());
963 assert!(builder.config.simd.is_some());
964 assert_eq!(builder.config.validation.level, ValidationLevel::None);
965 assert!(builder.presets.contains(&"high_performance".to_string()));
966 }
967
968 #[test]
969 fn test_development_preset() {
970 let builder = FluentPipelineBuilder::development_preset();
971 assert!(builder.config.debug.enabled);
972 assert_eq!(builder.config.debug.log_level, LogLevel::Debug);
973 assert_eq!(builder.config.validation.level, ValidationLevel::Strict);
974 }
975
976 #[test]
977 fn test_method_chaining() {
978 let builder = FluentPipelineBuilder::new()
979 .memory_optimized()
980 .high_performance()
981 .development_mode();
982
983 assert!(builder.config.memory_config.efficient_ops);
984 assert!(builder.config.parallel.is_some());
985 assert!(builder.config.debug.enabled);
986 }
987
988 #[test]
989 fn test_preprocessing_chain() {
990 let chain = FluentPipelineBuilder::new()
991 .preprocessing()
992 .standard_scaler()
993 .min_max_scaler((0.0, 1.0))
994 .impute_missing(ImputationStrategy::Mean);
995
996 assert_eq!(chain.preprocessing_steps.len(), 3);
997 }
998
999 #[test]
1000 fn test_feature_engineering_chain() {
1001 let chain = FluentPipelineBuilder::new()
1002 .feature_engineering()
1003 .polynomial_features(2, true)
1004 .feature_selection(100)
1005 .pca(Some(50));
1006
1007 assert_eq!(chain.feature_steps.len(), 3);
1008 }
1009
1010 #[test]
1011 fn test_feature_union_builder() {
1012 let union_builder = FeatureUnionBuilder::new()
1013 .add_transformer("scaler", Box::new(crate::MockTransformer::new()))
1014 .add_transformer("pca", Box::new(crate::MockTransformer::new()))
1015 .n_jobs(2);
1016
1017 assert_eq!(union_builder.transformers.len(), 2);
1018 assert_eq!(union_builder.n_jobs, Some(2));
1019 }
1020
1021 #[test]
1022 fn test_configuration_defaults() {
1023 let config = PipelineConfiguration::default();
1024 assert!(config.memory_config.efficient_ops);
1025 assert_eq!(config.caching.strategy, CacheStrategy::LRU);
1026 assert_eq!(config.validation.level, ValidationLevel::Basic);
1027 assert_eq!(config.debug.log_level, LogLevel::Info);
1028 }
1029
1030 #[test]
1031 fn test_pipeline_presets() {
1032 let classification = PipelinePresets::classification();
1033 let regression = PipelinePresets::regression();
1034 let text = PipelinePresets::text_processing();
1035 let image = PipelinePresets::image_processing();
1036 let ts = PipelinePresets::time_series();
1037
1038 assert!(classification.steps.is_empty());
1040 assert!(regression.steps.is_empty());
1041 assert!(text.steps.is_empty());
1042 assert!(image.steps.is_empty());
1043 assert!(ts.steps.is_empty());
1044 }
1045
1046 #[test]
1047 fn test_memory_configuration() {
1048 let memory_config = MemoryConfiguration {
1049 efficient_ops: true,
1050 chunk_size: Some(5000),
1051 memory_limit_mb: Some(1024),
1052 gc_frequency: Some(100),
1053 };
1054
1055 assert!(memory_config.efficient_ops);
1056 assert_eq!(memory_config.chunk_size, Some(5000));
1057 assert_eq!(memory_config.memory_limit_mb, Some(1024));
1058 }
1059
1060 #[test]
1061 fn test_caching_configuration() {
1062 let cache_config = CachingConfiguration {
1063 enabled: true,
1064 cache_dir: Some("/tmp/test".to_string()),
1065 max_size_mb: Some(512),
1066 ttl_sec: Some(1800),
1067 strategy: CacheStrategy::LFU,
1068 };
1069
1070 assert!(cache_config.enabled);
1071 assert_eq!(cache_config.strategy, CacheStrategy::LFU);
1072 assert_eq!(cache_config.max_size_mb, Some(512));
1073 }
1074}