Skip to main content

scirs2_transform/
lib.rs

1#![allow(clippy::all)]
2//! # SciRS2 Transform - Data Transformation and Preprocessing
3//!
4//! **scirs2-transform** provides comprehensive data transformation utilities for machine learning,
5//! offering normalization, feature engineering, dimensionality reduction, encoding, imputation,
6//! and pipelines with SIMD acceleration and out-of-core processing for large datasets.
7//!
8//! ## 🎯 Key Features
9//!
10//! - **Normalization**: Min-max, Z-score, robust scaling, quantile normalization
11//! - **Feature Engineering**: Polynomial features, interaction terms, binning
12//! - **Dimensionality Reduction**: PCA, SVD, t-SNE, UMAP, LDA
13//! - **Encoding**: One-hot, label, ordinal, target encoding
14//! - **Imputation**: Mean, median, mode, KNN, iterative imputation
15//! - **Pipelines**: Chained transformations with fit/transform API
16//! - **Performance**: SIMD operations, streaming, out-of-core processing
17//!
18//! ## 📦 Module Overview
19//!
20//! | SciRS2 Module | scikit-learn Equivalent | Description |
21//! |---------------|-------------------------|-------------|
22//! | `normalize` | `sklearn.preprocessing.StandardScaler` | Data normalization/standardization |
23//! | `features` | `sklearn.preprocessing.PolynomialFeatures` | Feature engineering |
24//! | `reduction` | `sklearn.decomposition.PCA` | Dimensionality reduction |
25//! | `encoding` | `sklearn.preprocessing.OneHotEncoder` | Categorical encoding |
26//! | `impute` | `sklearn.impute.SimpleImputer` | Missing value imputation |
27//! | `pipeline` | `sklearn.pipeline.Pipeline` | Transformation pipelines |
28//!
29//! ## 🚀 Quick Start
30//!
31//! ```toml
32//! [dependencies]
33//! scirs2-transform = "0.1.5"
34//! ```
35//!
36//! ```rust,no_run
37//! use scirs2_transform::normalize::{normalize_array, NormalizationMethod};
38//! use scirs2_core::ndarray::Array2;
39//!
40//! // Standardize data (Z-score normalization)
41//! let data = Array2::<f64>::zeros((100, 5));
42//! let normalized = normalize_array(&data, NormalizationMethod::ZScore, 0).expect("should succeed");
43//! ```
44//!
45//! ## 🔒 Version: 0.1.5 (January 15, 2026)
46
47#![warn(missing_docs)]
48#![allow(clippy::too_many_arguments)]
49
50/// Error handling for the transformation module
51pub mod error;
52
53/// Basic normalization methods for data
54pub mod normalize;
55
56/// Feature engineering techniques
57pub mod features;
58
59/// Dimensionality reduction algorithms
60pub mod reduction;
61
62/// Matrix decomposition techniques
63pub mod decomposition;
64
65/// Advanced scaling and transformation methods
66pub mod scaling;
67
68/// Missing value imputation utilities
69pub mod impute;
70
71/// Categorical data encoding utilities
72pub mod encoding;
73
74/// Feature selection utilities
75pub mod selection;
76
77/// Time series feature extraction
78pub mod time_series;
79
80/// Pipeline API for chaining transformations
81pub mod pipeline;
82
83/// SIMD-accelerated normalization operations
84#[cfg(feature = "simd")]
85pub mod normalize_simd;
86
87/// SIMD-accelerated feature engineering operations
88#[cfg(feature = "simd")]
89pub mod features_simd;
90
91/// SIMD-accelerated scaling operations
92#[cfg(feature = "simd")]
93pub mod scaling_simd;
94
95/// Out-of-core processing for large datasets
96pub mod out_of_core;
97
98/// Streaming transformations for continuous data
99pub mod streaming;
100
101/// Text processing transformers
102pub mod text;
103
104/// Image processing transformers
105pub mod image;
106
107/// Utility functions and helpers for data transformation
108pub mod utils;
109
110/// Signal transforms (DWT, CWT, WPT, STFT, MFCC, CQT, Chromagram)
111pub mod signal_transforms;
112
113/// Test module for advanced implementations
114#[cfg(test)]
115mod advanced_test;
116/// Performance optimizations and enhanced implementations
117pub mod performance;
118
119/// Optimization configuration and auto-tuning system
120pub mod optimization_config;
121
122/// Graph embedding transformers
123pub mod graph;
124
125/// GPU-accelerated transformations
126#[cfg(feature = "gpu")]
127pub mod gpu;
128
129/// Distributed processing for multi-node transformations
130#[cfg(feature = "distributed")]
131pub mod distributed;
132
133/// Automated feature engineering with meta-learning
134pub mod auto_feature_engineering;
135
136/// Quantum-inspired optimization for data transformations
137pub mod quantum_optimization;
138
139/// Neuromorphic computing integration for real-time adaptation
140pub mod neuromorphic_adaptation;
141
142/// Production monitoring with drift detection
143#[cfg(feature = "monitoring")]
144pub mod monitoring;
145
146/// Kernel methods (Kernel PCA, Kernel Ridge Regression, kernel functions)
147pub mod kernel;
148
149// Re-export important types and functions
150pub use decomposition::{DictionaryLearning, NMF};
151pub use encoding::{
152    BinaryEncoder, EncodedOutput, FrequencyEncoder, OneHotEncoder, OrdinalEncoder, SparseMatrix,
153    TargetEncoder, WOEEncoder,
154};
155pub use error::{Result, TransformError};
156pub use features::{
157    binarize, discretize_equal_frequency, discretize_equal_width, log_transform, power_transform,
158    PolynomialFeatures, PowerTransformer,
159};
160pub use impute::{
161    DistanceMetric, ImputeStrategy, IterativeImputer, KNNImputer, MissingIndicator, SimpleImputer,
162    WeightingScheme,
163};
164pub use normalize::{normalize_array, normalize_vector, NormalizationMethod, Normalizer};
165pub use pipeline::{
166    make_column_transformer, make_pipeline, ColumnTransformer, Pipeline, RemainderOption,
167    Transformer,
168};
169pub use reduction::{
170    factor_analysis, scree_plot_data, trustworthiness, AffinityMethod, DiffusionMaps,
171    FactorAnalysis, FactorAnalysisResult, GraphMethod, Isomap, LaplacianEigenmaps, RotationMethod,
172    ScreePlotData, SpectralEmbedding, TruncatedSVD, LDA, LLE, PCA, TSNE, UMAP,
173};
174pub use scaling::{MaxAbsScaler, QuantileTransformer};
175pub use selection::{MutualInfoSelector, RecursiveFeatureElimination, VarianceThreshold};
176pub use time_series::{FourierFeatures, LagFeatures, TimeSeriesFeatures, WaveletFeatures};
177
178#[cfg(feature = "simd")]
179pub use normalize_simd::{
180    simd_l2_normalize_1d, simd_maxabs_normalize_1d, simd_minmax_normalize_1d,
181    simd_normalize_adaptive, simd_normalize_batch, simd_normalizearray, simd_zscore_normalize_1d,
182    AdaptiveBlockSizer,
183};
184
185#[cfg(feature = "simd")]
186pub use features_simd::{
187    simd_binarize, simd_polynomial_features_optimized, simd_power_transform, SimdPolynomialFeatures,
188};
189
190#[cfg(feature = "simd")]
191pub use scaling_simd::{SimdMaxAbsScaler, SimdRobustScaler, SimdStandardScaler};
192
193pub use graph::{
194    adjacency_to_edge_list, edge_list_to_adjacency, ActivationType, DeepWalk, GraphAutoencoder,
195    LaplacianType, Node2Vec,
196};
197pub use image::{
198    resize_images, rgb_to_grayscale, BlockNorm, HOGDescriptor, ImageNormMethod, ImageNormalizer,
199    PatchExtractor,
200};
201pub use optimization_config::{
202    AdaptiveParameterTuner, AdvancedConfigOptimizer, AutoTuner, ConfigurationPredictor,
203    DataCharacteristics, OptimizationConfig, OptimizationReport, PerformanceMetric, SystemMonitor,
204    SystemResources, TransformationRecommendation,
205};
206pub use out_of_core::{
207    csv_chunks, ChunkedArrayReader, ChunkedArrayWriter, OutOfCoreConfig, OutOfCoreNormalizer,
208    OutOfCoreTransformer,
209};
210pub use performance::{EnhancedPCA, EnhancedStandardScaler};
211pub use streaming::{
212    OutlierMethod, StreamingFeatureSelector, StreamingMinMaxScaler, StreamingOutlierDetector,
213    StreamingPCA, StreamingQuantileTracker, StreamingStandardScaler, StreamingTransformer,
214    WindowedStreamingTransformer,
215};
216pub use text::{CountVectorizer, HashingVectorizer, StreamingCountVectorizer, TfidfVectorizer};
217pub use utils::{
218    ArrayMemoryPool, DataChunker, PerfUtils, ProcessingStrategy, StatUtils, TypeConverter,
219    ValidationUtils,
220};
221
222// Signal transforms exports
223pub use signal_transforms::cqt::{CQTConfig, Chromagram, WindowFunction, CQT};
224pub use signal_transforms::cwt::{
225    ComplexMorletWavelet, ContinuousWavelet, GaussianWavelet, MexicanHatWavelet, MorletWavelet, CWT,
226};
227pub use signal_transforms::dwt::{
228    BoundaryMode, Dwt2dCoeffs, WaveletFilters, WaveletType, DWT, DWT2D, DWTN,
229};
230pub use signal_transforms::mfcc::{MFCCConfig, MelFilterbank, MFCC};
231pub use signal_transforms::stft::{
232    PaddingMode, STFTConfig, Spectrogram, SpectrogramScaling, WindowType, STFT,
233};
234pub use signal_transforms::wpt::{denoise_wpt, BestBasisCriterion, WaveletPacketNode, WPT};
235
236// GPU acceleration exports
237#[cfg(feature = "gpu")]
238pub use gpu::{GpuMatrixOps, GpuPCA, GpuTSNE};
239
240// Distributed processing exports
241#[cfg(feature = "distributed")]
242pub use distributed::{
243    AutoScalingConfig, CircuitBreaker, ClusterHealthSummary, DistributedConfig,
244    DistributedCoordinator, DistributedPCA, EnhancedDistributedCoordinator, NodeHealth, NodeInfo,
245    NodeStatus, PartitioningStrategy,
246};
247
248// Automated feature engineering exports
249pub use auto_feature_engineering::{
250    AdvancedMetaLearningSystem, AutoFeatureEngineer, DatasetMetaFeatures, EnhancedMetaFeatures,
251    MultiObjectiveRecommendation, TransformationConfig, TransformationType,
252};
253
254// Quantum optimization exports
255pub use quantum_optimization::{
256    AdvancedQuantumMetrics, AdvancedQuantumOptimizer, AdvancedQuantumParams,
257    QuantumHyperparameterTuner, QuantumInspiredOptimizer, QuantumParticle,
258    QuantumTransformationOptimizer,
259};
260
261// Neuromorphic computing exports
262pub use neuromorphic_adaptation::{
263    AdvancedNeuromorphicMetrics, AdvancedNeuromorphicProcessor, NeuromorphicAdaptationNetwork,
264    NeuromorphicMemorySystem, NeuromorphicTransformationSystem, SpikingNeuron, SystemState,
265    TransformationEpisode,
266};
267
268// Kernel methods exports
269pub use kernel::{
270    center_kernel_matrix, cross_gram_matrix, estimate_rbf_gamma, gram_matrix,
271    is_positive_semidefinite, kernel_alignment, kernel_diagonal, kernel_eval, KernelPCA,
272    KernelRidgeRegression, KernelType,
273};
274
275// Production monitoring exports
276#[cfg(feature = "monitoring")]
277pub use monitoring::{
278    AlertConfig, AlertType, DriftDetectionResult, DriftMethod, PerformanceMetrics,
279    TransformationMonitor,
280};