sklears_preprocessing/
lib.rs

1#![allow(non_snake_case)]
2#![allow(missing_docs)]
3#![allow(deprecated)]
4//! Preprocessing utilities for sklears
5//!
6//! This crate provides data preprocessing utilities including:
7//! - Scaling (StandardScaler, MinMaxScaler, RobustScaler, MaxAbsScaler, Normalizer)
8//! - Encoding (LabelEncoder, OneHotEncoder, OrdinalEncoder)
9//! - Imputation (SimpleImputer, KNNImputer, IterativeImputer, GAINImputer)
10//! - Feature engineering (PolynomialFeatures, SplineTransformer, PowerTransformer, FunctionTransformer)
11//! - Text processing (TfIdfVectorizer, TextTokenizer, NgramGenerator, TextSimilarity, BagOfWordsEmbedding)
12//! - Advanced pipelines (conditional steps, parallel branches, caching, dynamic construction)
13
14#![allow(dead_code)]
15#![allow(clippy::manual_clamp)]
16#![allow(clippy::single_char_add_str)]
17#![allow(clippy::let_and_return)]
18#![allow(clippy::map_clone)]
19#![allow(clippy::manual_slice_size_calculation)]
20#![allow(clippy::needless_range_loop)]
21#![allow(clippy::if_same_then_else)]
22#![allow(clippy::arc_with_non_send_sync)]
23#![allow(clippy::excessive_precision)]
24#![allow(clippy::type_complexity)]
25#![allow(clippy::too_many_arguments)]
26#![allow(clippy::only_used_in_recursion)]
27#![allow(clippy::new_without_default)]
28#![allow(clippy::derivable_impls)]
29#![allow(clippy::ptr_arg)]
30
31pub mod adaptive;
32pub mod automated_feature_engineering;
33pub mod binarization;
34pub mod column_transformer;
35pub mod dimensionality_reduction;
36pub mod encoding;
37pub mod feature_engineering;
38pub mod feature_union;
39pub mod functional;
40// TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
41// pub mod gpu_acceleration;
42pub mod image_preprocessing;
43pub mod imputation;
44pub mod kernel_centerer;
45pub mod label_binarization;
46// TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
47// pub mod lazy_evaluation;
48// TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
49// pub mod memory_management;
50pub mod outlier_detection;
51pub mod outlier_transformation;
52pub mod pipeline;
53pub mod quantile_transformer;
54pub mod robust_preprocessing;
55pub mod scaling;
56pub mod simd_optimizations;
57pub mod sparse_optimizations;
58pub mod streaming;
59pub mod temporal;
60pub mod text;
61pub mod winsorization;
62
63pub use adaptive::{
64    AdaptationStrategy, AdaptiveConfig, AdaptiveParameterSelector, DataCharacteristics,
65    DistributionType, ImputationParameters, OutlierDetectionParameters, ParameterEvaluation,
66    ParameterRecommendations, ScalingParameters,
67    TransformationParameters as AdaptiveTransformationParameters,
68};
69pub use automated_feature_engineering::{
70    AutoFeatureConfig, AutoFeatureEngineer, AutoFeatureEngineerFitted, Domain, GenerationStrategy,
71    MathFunction, SelectionMethod, TransformationFunction, TransformationType,
72};
73pub use binarization::{
74    Binarizer, BinarizerConfig, DiscretizationStrategy, DiscretizerEncoding, KBinsDiscretizer,
75    KBinsDiscretizerConfig,
76};
77pub use column_transformer::{
78    ColumnSelector, ColumnTransformer, ColumnTransformerConfig, DataType, RemainderStrategy,
79    TransformerStep, TransformerWrapper,
80};
81pub use dimensionality_reduction::{
82    ICAConfig, ICAFitted, IcaAlgorithm, IcaFunction, LDAConfig, LDAFitted, LdaSolver, NMFConfig,
83    NMFFitted, NmfInit, NmfSolver, PCAConfig, PCAFitted, PcaSolver, ICA, LDA, NMF, PCA,
84};
85pub use encoding::{
86    BinaryEncoder, BinaryEncoderConfig, CategoricalEmbedding, CategoricalEmbeddingConfig,
87    FrequencyEncoder, FrequencyEncoderConfig, HashEncoder, HashEncoderConfig, LabelEncoder,
88    OneHotEncoder, OrdinalEncoder, RareStrategy, TargetEncoder,
89};
90pub use feature_engineering::{
91    ExtrapolationStrategy, FeatureOrder, KnotStrategy, PolynomialFeatures, PowerMethod,
92    PowerTransformer, PowerTransformerConfig, SplineTransformer, SplineTransformerConfig,
93};
94pub use feature_union::{FeatureUnion, FeatureUnionConfig, FeatureUnionStep};
95// TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
96// pub use gpu_acceleration::{
97//     GpuBackend, GpuConfig, GpuContextManager, GpuMinMaxScaler, GpuMinMaxScalerFitted,
98//     GpuPerformanceStats, GpuStandardScaler, GpuStandardScalerFitted,
99// };
100pub use image_preprocessing::{
101    ColorSpace, ColorSpaceTransformer, EdgeDetectionMethod, EdgeDetector, ImageAugmenter,
102    ImageAugmenterConfig, ImageFeatureExtractor, ImageNormalizationStrategy, ImageNormalizer,
103    ImageNormalizerConfig, ImageNormalizerFitted, ImageResizer,
104    InterpolationMethod as ImageInterpolationMethod,
105};
106pub use imputation::{
107    BaseImputationMethod, DistanceMetric, FeatureMissingStats, GAINImputer, GAINImputerConfig,
108    ImputationStrategy, IterativeImputer, KNNImputer, MissingPattern, MissingValueAnalysis,
109    MissingnessType, MultipleImputationResult, MultipleImputer, MultipleImputerConfig,
110    OutlierAwareImputer, OutlierAwareImputerConfig, OutlierAwareStatistics, OutlierAwareStrategy,
111    OverallMissingStats, SimpleImputer,
112};
113pub use kernel_centerer::KernelCenterer;
114pub use label_binarization::{
115    LabelBinarizer, LabelBinarizerConfig, MultiLabelBinarizer, MultiLabelBinarizerConfig,
116};
117// TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
118// pub use lazy_evaluation::{LazyConfig, LazyGraph, LazyNode, LazyOp, LazyPreprocessor};
119// TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
120// pub use memory_management::{
121//     AdvancedMemoryConfig, AdvancedMemoryPool, AdvancedMemoryStats, CacheAlignedAllocator,
122//     CompressedData, CopyOnWriteArray, MemoryCompressor, MemoryMappedDataset, MemoryPool,
123//     MemoryPoolConfig, MemoryStats, PrefetchPattern, StreamingMemoryTransformer,
124// };
125pub use outlier_detection::{
126    FeatureOutlierParams, OutlierDetectionMethod, OutlierDetectionResult, OutlierDetector,
127    OutlierDetectorConfig, OutlierStatistics, OutlierSummary,
128};
129pub use outlier_transformation::{
130    FeatureTransformationParams, GlobalTransformationParams, OutlierTransformationConfig,
131    OutlierTransformationMethod, OutlierTransformer, TransformationParameters,
132};
133pub use pipeline::{
134    AdvancedPipeline, AdvancedPipelineBuilder, AdvancedPipelineConfig, BranchCombinationStrategy,
135    CacheConfig, CacheStats, ConditionalStep, ConditionalStepConfig, DynamicPipeline,
136    ErrorHandlingStrategy, ParallelBranchConfig, ParallelBranches, PipelineStep,
137    TransformationCache,
138};
139pub use quantile_transformer::{QuantileOutput, QuantileTransformer, QuantileTransformerConfig};
140pub use robust_preprocessing::{
141    MissingValueStats, RobustPreprocessingStats, RobustPreprocessor, RobustPreprocessorConfig,
142    RobustStrategy, TransformationStats,
143};
144pub use scaling::{
145    FeatureWiseScaler, FeatureWiseScalerConfig, MaxAbsScaler, MinMaxScaler, NormType, Normalizer,
146    OutlierAwareScaler, OutlierAwareScalerConfig, OutlierAwareScalingStrategy, OutlierScalingStats,
147    RobustScaler, RobustStatistic, ScalingMethod, StandardScaler, UnitVectorScaler,
148    UnitVectorScalerConfig,
149};
150pub use simd_optimizations::{
151    add_scalar_f64_simd, add_vectors_f64_simd, mean_f64_simd, min_max_f64_simd,
152    mul_scalar_f64_simd, ndarray_ops, sub_vectors_f64_simd, variance_f64_simd, SimdConfig,
153};
154pub use sparse_optimizations::{
155    sparse_matvec, SparseConfig, SparseFormat, SparseMatrix, SparseStandardScaler,
156    SparseStandardScalerFitted,
157};
158pub use streaming::{
159    AdaptiveConfig as StreamingAdaptiveConfig, AdaptiveParameterManager,
160    AdaptiveStreamingMinMaxScaler, AdaptiveStreamingStandardScaler, IncrementalPCA,
161    IncrementalPCAStats, MiniBatchConfig, MiniBatchIterator, MiniBatchPipeline, MiniBatchStats,
162    MiniBatchStreamingTransformer, MiniBatchTransformer, MultiQuantileEstimator,
163    OnlineMADEstimator, OnlineMADStats, OnlineQuantileEstimator, OnlineQuantileStats,
164    ParameterUpdate, StreamCharacteristics, StreamingConfig, StreamingLabelEncoder,
165    StreamingMinMaxScaler, StreamingPipeline, StreamingRobustScaler, StreamingRobustScalerStats,
166    StreamingSimpleImputer, StreamingStandardScaler, StreamingStats, StreamingTransformer,
167};
168pub use temporal::{
169    ChangePointDetector, ChangePointDetectorConfig, ChangePointMethod, DateComponents, DateTime,
170    DecompositionMethod, FillMethod, FourierFeatureGenerator, FourierFeatureGeneratorConfig,
171    InterpolationMethod, LagFeatureGenerator, LagFeatureGeneratorConfig,
172    MultiVariateTimeSeriesAligner, ResamplingMethod, SeasonalDecomposer, SeasonalDecomposerConfig,
173    StationarityMethod, StationarityTransformer, StationarityTransformerConfig,
174    StationarityTransformerFitted, TemporalFeatureExtractor, TemporalFeatureExtractorConfig,
175    TimeSeriesInterpolator, TimeSeriesResampler, TrendDetector, TrendDetectorConfig, TrendMethod,
176};
177pub use text::{
178    BagOfWordsConfig, BagOfWordsEmbedding, NgramGenerator, NgramGeneratorConfig, NgramType,
179    NormalizationStrategy, SimilarityMetric, TextSimilarity, TextSimilarityConfig, TextTokenizer,
180    TextTokenizerConfig, TfIdfVectorizer, TfIdfVectorizerConfig, TokenizationStrategy,
181};
182pub use winsorization::{NanStrategy, WinsorizationStats, Winsorizer, WinsorizerConfig};
183
184// Re-export functional APIs (excluding complex transformations that are commented out)
185pub use functional::{
186    add_dummy_feature, binarize, label_binarize, maxabs_scale, minmax_scale, normalize,
187    robust_scale, scale,
188};
189
190/// Prelude module for convenient imports
191pub mod prelude {
192    pub use crate::adaptive::{
193        AdaptationStrategy, AdaptiveConfig, AdaptiveParameterSelector, DataCharacteristics,
194        DistributionType, ImputationParameters, OutlierDetectionParameters, ParameterEvaluation,
195        ParameterRecommendations, ScalingParameters,
196        TransformationParameters as AdaptiveTransformationParameters,
197    };
198    pub use crate::automated_feature_engineering::{
199        AutoFeatureConfig, AutoFeatureEngineer, AutoFeatureEngineerFitted, Domain,
200        GenerationStrategy, MathFunction, SelectionMethod, TransformationFunction,
201        TransformationType,
202    };
203    pub use crate::binarization::{
204        Binarizer, BinarizerConfig, DiscretizationStrategy, DiscretizerEncoding, KBinsDiscretizer,
205        KBinsDiscretizerConfig,
206    };
207    pub use crate::column_transformer::{
208        ColumnSelector, ColumnTransformer, ColumnTransformerConfig, DataType, RemainderStrategy,
209        TransformerStep, TransformerWrapper,
210    };
211    pub use crate::dimensionality_reduction::{
212        ICAConfig, ICAFitted, IcaAlgorithm, IcaFunction, LDAConfig, LDAFitted, LdaSolver,
213        NMFConfig, NMFFitted, NmfInit, NmfSolver, PCAConfig, PCAFitted, PcaSolver, ICA, LDA, NMF,
214        PCA,
215    };
216    pub use crate::encoding::{
217        BinaryEncoder, BinaryEncoderConfig, CategoricalEmbedding, CategoricalEmbeddingConfig,
218        FrequencyEncoder, FrequencyEncoderConfig, HashEncoder, HashEncoderConfig, LabelEncoder,
219        OneHotEncoder, OrdinalEncoder, RareStrategy, TargetEncoder,
220    };
221    pub use crate::feature_engineering::{
222        ExtrapolationStrategy, FeatureOrder, KnotStrategy, PolynomialFeatures, PowerMethod,
223        PowerTransformer, PowerTransformerConfig, SplineTransformer, SplineTransformerConfig,
224    };
225    pub use crate::feature_union::{FeatureUnion, FeatureUnionConfig, FeatureUnionStep};
226    // TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
227    // pub use crate::gpu_acceleration::{
228    //     GpuBackend, GpuConfig, GpuContextManager, GpuMinMaxScaler, GpuMinMaxScalerFitted,
229    //     GpuPerformanceStats, GpuStandardScaler, GpuStandardScalerFitted,
230    // };
231    pub use crate::image_preprocessing::{
232        ColorSpace, ColorSpaceTransformer, EdgeDetectionMethod, EdgeDetector, ImageAugmenter,
233        ImageAugmenterConfig, ImageFeatureExtractor, ImageNormalizationStrategy, ImageNormalizer,
234        ImageNormalizerConfig, ImageNormalizerFitted, ImageResizer,
235        InterpolationMethod as ImageInterpolationMethod,
236    };
237    pub use crate::imputation::{
238        BaseImputationMethod, DistanceMetric, FeatureMissingStats, GAINImputer, GAINImputerConfig,
239        ImputationStrategy, IterativeImputer, KNNImputer, MissingPattern, MissingValueAnalysis,
240        MissingnessType, MultipleImputationResult, MultipleImputer, MultipleImputerConfig,
241        OutlierAwareImputer, OutlierAwareImputerConfig, OutlierAwareStatistics,
242        OutlierAwareStrategy, OverallMissingStats, SimpleImputer,
243    };
244    pub use crate::kernel_centerer::KernelCenterer;
245    pub use crate::label_binarization::{
246        LabelBinarizer, LabelBinarizerConfig, MultiLabelBinarizer, MultiLabelBinarizerConfig,
247    };
248    // TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
249    // pub use crate::lazy_evaluation::{LazyConfig, LazyGraph, LazyNode, LazyOp, LazyPreprocessor};
250    // TODO: Depends on scirs2_core::memory::BufferPool which doesn't exist yet
251    // pub use crate::memory_management::{
252    //     AdvancedMemoryConfig, AdvancedMemoryPool, AdvancedMemoryStats, CacheAlignedAllocator,
253    //     CompressedData, CopyOnWriteArray, MemoryCompressor, MemoryMappedDataset, MemoryPool,
254    //     MemoryPoolConfig, MemoryStats, PrefetchPattern, StreamingMemoryTransformer,
255    // };
256    pub use crate::outlier_detection::{
257        FeatureOutlierParams, OutlierDetectionMethod, OutlierDetectionResult, OutlierDetector,
258        OutlierDetectorConfig, OutlierStatistics, OutlierSummary,
259    };
260    pub use crate::outlier_transformation::{
261        FeatureTransformationParams, GlobalTransformationParams, OutlierTransformationConfig,
262        OutlierTransformationMethod, OutlierTransformer,
263    };
264    pub use crate::pipeline::{
265        AdvancedPipeline, AdvancedPipelineBuilder, AdvancedPipelineConfig,
266        BranchCombinationStrategy, CacheConfig, CacheStats, ConditionalStep, ConditionalStepConfig,
267        DynamicPipeline, ErrorHandlingStrategy, ParallelBranchConfig, ParallelBranches,
268        PipelineStep, TransformationCache,
269    };
270    pub use crate::quantile_transformer::{
271        QuantileOutput, QuantileTransformer, QuantileTransformerConfig,
272    };
273    pub use crate::robust_preprocessing::{
274        MissingValueStats, RobustPreprocessingStats, RobustPreprocessor, RobustPreprocessorConfig,
275        RobustStrategy, TransformationStats,
276    };
277    pub use crate::scaling::{
278        FeatureWiseScaler, FeatureWiseScalerConfig, MaxAbsScaler, MinMaxScaler, NormType,
279        Normalizer, OutlierAwareScaler, OutlierAwareScalerConfig, OutlierAwareScalingStrategy,
280        OutlierScalingStats, RobustScaler, RobustStatistic, ScalingMethod, StandardScaler,
281        UnitVectorScaler, UnitVectorScalerConfig,
282    };
283    pub use crate::simd_optimizations::{
284        add_scalar_f64_simd, add_vectors_f64_simd, mean_f64_simd, min_max_f64_simd,
285        mul_scalar_f64_simd, ndarray_ops, sub_vectors_f64_simd, variance_f64_simd, SimdConfig,
286    };
287    pub use crate::sparse_optimizations::{
288        sparse_matvec, SparseConfig, SparseFormat, SparseMatrix, SparseStandardScaler,
289        SparseStandardScalerFitted,
290    };
291    pub use crate::streaming::{
292        AdaptiveConfig as StreamingAdaptiveConfig, AdaptiveParameterManager,
293        AdaptiveStreamingMinMaxScaler, AdaptiveStreamingStandardScaler, IncrementalPCA,
294        IncrementalPCAStats, MiniBatchConfig, MiniBatchIterator, MiniBatchPipeline, MiniBatchStats,
295        MiniBatchStreamingTransformer, MiniBatchTransformer, MultiQuantileEstimator,
296        OnlineMADEstimator, OnlineMADStats, OnlineQuantileEstimator, OnlineQuantileStats,
297        ParameterUpdate, StreamCharacteristics, StreamingConfig, StreamingLabelEncoder,
298        StreamingMinMaxScaler, StreamingPipeline, StreamingRobustScaler,
299        StreamingRobustScalerStats, StreamingSimpleImputer, StreamingStandardScaler,
300        StreamingStats, StreamingTransformer,
301    };
302    pub use crate::temporal::{
303        ChangePointDetector, ChangePointDetectorConfig, ChangePointMethod, DateComponents,
304        DateTime, DecompositionMethod, FillMethod, FourierFeatureGenerator,
305        FourierFeatureGeneratorConfig, InterpolationMethod, LagFeatureGenerator,
306        LagFeatureGeneratorConfig, MultiVariateTimeSeriesAligner, ResamplingMethod,
307        SeasonalDecomposer, SeasonalDecomposerConfig, StationarityMethod, StationarityTransformer,
308        StationarityTransformerConfig, StationarityTransformerFitted, TemporalFeatureExtractor,
309        TemporalFeatureExtractorConfig, TimeSeriesInterpolator, TimeSeriesResampler, TrendDetector,
310        TrendDetectorConfig, TrendMethod,
311    };
312    pub use crate::text::{
313        BagOfWordsConfig, BagOfWordsEmbedding, NgramGenerator, NgramGeneratorConfig, NgramType,
314        NormalizationStrategy, SimilarityMetric, TextSimilarity, TextSimilarityConfig,
315        TextTokenizer, TextTokenizerConfig, TfIdfVectorizer, TfIdfVectorizerConfig,
316        TokenizationStrategy,
317    };
318    pub use crate::winsorization::{NanStrategy, WinsorizationStats, Winsorizer, WinsorizerConfig};
319
320    // Re-export functional APIs (excluding complex transformations that are commented out)
321    pub use crate::functional::{
322        add_dummy_feature, binarize, label_binarize, maxabs_scale, minmax_scale, normalize,
323        robust_scale, scale,
324    };
325}