Skip to main content

scirs2_transform/
lib.rs

1#![allow(clippy::all)]
2//! # SciRS2 Transform - Data Transformation and Preprocessing
3//!
4//! **scirs2-transform** provides comprehensive data transformation utilities for machine learning,
5//! offering normalization, feature engineering, dimensionality reduction, encoding, imputation,
6//! and pipelines with SIMD acceleration and out-of-core processing for large datasets.
7//!
8//! ## 🎯 Key Features
9//!
10//! - **Normalization**: Min-max, Z-score, robust scaling, quantile normalization
11//! - **Feature Engineering**: Polynomial features, interaction terms, binning
12//! - **Dimensionality Reduction**: PCA, SVD, t-SNE, UMAP, LDA
13//! - **Encoding**: One-hot, label, ordinal, target encoding
14//! - **Imputation**: Mean, median, mode, KNN, iterative imputation
15//! - **Pipelines**: Chained transformations with fit/transform API
16//! - **Performance**: SIMD operations, streaming, out-of-core processing
17//!
18//! ## 📦 Module Overview
19//!
20//! | SciRS2 Module | scikit-learn Equivalent | Description |
21//! |---------------|-------------------------|-------------|
22//! | `normalize` | `sklearn.preprocessing.StandardScaler` | Data normalization/standardization |
23//! | `features` | `sklearn.preprocessing.PolynomialFeatures` | Feature engineering |
24//! | `reduction` | `sklearn.decomposition.PCA` | Dimensionality reduction |
25//! | `encoding` | `sklearn.preprocessing.OneHotEncoder` | Categorical encoding |
26//! | `impute` | `sklearn.impute.SimpleImputer` | Missing value imputation |
27//! | `pipeline` | `sklearn.pipeline.Pipeline` | Transformation pipelines |
28//!
29//! ## 🚀 Quick Start
30//!
31//! ```toml
32//! [dependencies]
33//! scirs2-transform = "0.4.2"
34//! ```
35//!
36//! ```rust,no_run
37//! use scirs2_transform::normalize::{normalize_array, NormalizationMethod};
38//! use scirs2_core::ndarray::Array2;
39//!
40//! // Standardize data (Z-score normalization)
41//! let data = Array2::<f64>::zeros((100, 5));
42//! let normalized = normalize_array(&data, NormalizationMethod::ZScore, 0).expect("should succeed");
43//! ```
44//!
45//! ## 🔒 Version: 0.4.2 (March 27, 2026)
46
47#![warn(missing_docs)]
48#![allow(clippy::too_many_arguments)]
49
50/// Error handling for the transformation module
51pub mod error;
52
53/// Basic normalization methods for data
54pub mod normalize;
55
56/// Feature engineering techniques
57pub mod features;
58
59/// Dimensionality reduction algorithms
60pub mod reduction;
61
62/// Matrix decomposition techniques
63pub mod decomposition;
64
65/// Advanced scaling and transformation methods
66pub mod scaling;
67
68/// Missing value imputation utilities
69pub mod impute;
70
71/// Categorical data encoding utilities
72pub mod encoding;
73
74/// Feature selection utilities
75pub mod selection;
76
77/// Time series feature extraction
78pub mod time_series;
79
80/// Pipeline API for chaining transformations
81pub mod pipeline;
82
83/// SIMD-accelerated normalization operations
84#[cfg(feature = "simd")]
85pub mod normalize_simd;
86
87/// SIMD-accelerated feature engineering operations
88#[cfg(feature = "simd")]
89pub mod features_simd;
90
91/// SIMD-accelerated scaling operations
92#[cfg(feature = "simd")]
93pub mod scaling_simd;
94
95/// Out-of-core processing for large datasets
96pub mod out_of_core;
97
98/// Streaming transformations for continuous data
99pub mod streaming;
100
101/// Text processing transformers
102pub mod text;
103
104/// Image processing transformers
105pub mod image;
106
107/// Utility functions and helpers for data transformation
108pub mod utils;
109
110/// Signal transforms (DWT, CWT, WPT, STFT, MFCC, CQT, Chromagram)
111pub mod signal_transforms;
112
113/// Test module for advanced implementations
114#[cfg(test)]
115mod advanced_test;
116/// Performance optimizations and enhanced implementations
117pub mod performance;
118
119/// Optimization configuration and auto-tuning system
120pub mod optimization_config;
121
122/// Graph embedding transformers
123pub mod graph;
124
125/// GPU-accelerated transformations
126#[cfg(feature = "gpu")]
127pub mod gpu;
128
129/// Distributed processing for multi-node transformations
130#[cfg(feature = "distributed")]
131pub mod distributed;
132
133/// Automated feature engineering with meta-learning
134pub mod auto_feature_engineering;
135
136/// Quantum-inspired optimization for data transformations
137pub mod quantum_optimization;
138
139/// Neuromorphic computing integration for real-time adaptation
140pub mod neuromorphic_adaptation;
141
142/// Production monitoring with drift detection
143#[cfg(feature = "monitoring")]
144pub mod monitoring;
145
146/// Kernel methods (Kernel PCA, Kernel Ridge Regression, kernel functions)
147pub mod kernel;
148
149// Alignment transforms (Procrustes)
150pub mod alignment;
151// Drift detection and adaptation
152pub mod drift;
153// Optimal transport
154pub mod ot;
155// Topological data analysis (Alpha, Cubical, Zigzag)
156pub mod tda;
157// Vietoris-Rips TDA
158pub mod tda_vr;
159// Extended Vietoris-Rips TDA
160pub mod tda_vr_ext;
161// TDA extension module (alias for tda_vr_ext, used by doc tests)
162pub mod tda_ext;
163
164// Re-export important types and functions
165pub use decomposition::{DictionaryLearning, NMF};
166pub use encoding::{
167    BinaryEncoder, EncodedOutput, FrequencyEncoder, OneHotEncoder, OrdinalEncoder, SparseMatrix,
168    TargetEncoder, WOEEncoder,
169};
170pub use error::{Result, TransformError};
171pub use features::{
172    binarize, discretize_equal_frequency, discretize_equal_width, log_transform, power_transform,
173    PolynomialFeatures, PowerTransformer,
174};
175pub use impute::{
176    DistanceMetric, ImputeStrategy, IterativeImputer, KNNImputer, MissingIndicator, SimpleImputer,
177    WeightingScheme,
178};
179pub use normalize::{normalize_array, normalize_vector, NormalizationMethod, Normalizer};
180pub use pipeline::{
181    make_column_transformer, make_pipeline, ColumnTransformer, Pipeline, RemainderOption,
182    Transformer,
183};
184pub use reduction::{
185    factor_analysis, scree_plot_data, trustworthiness, AffinityMethod, DiffusionMaps,
186    FactorAnalysis, FactorAnalysisResult, GraphMethod, Isomap, LaplacianEigenmaps, RotationMethod,
187    ScreePlotData, SpectralEmbedding, TruncatedSVD, LDA, LLE, PCA, TSNE, UMAP,
188};
189pub use scaling::{MaxAbsScaler, QuantileTransformer};
190pub use selection::{MutualInfoSelector, RecursiveFeatureElimination, VarianceThreshold};
191pub use time_series::{FourierFeatures, LagFeatures, TimeSeriesFeatures, WaveletFeatures};
192
193#[cfg(feature = "simd")]
194pub use normalize_simd::{
195    simd_l2_normalize_1d, simd_maxabs_normalize_1d, simd_minmax_normalize_1d,
196    simd_normalize_adaptive, simd_normalize_batch, simd_normalizearray, simd_zscore_normalize_1d,
197    AdaptiveBlockSizer,
198};
199
200#[cfg(feature = "simd")]
201pub use features_simd::{
202    simd_binarize, simd_polynomial_features_optimized, simd_power_transform, SimdPolynomialFeatures,
203};
204
205#[cfg(feature = "simd")]
206pub use scaling_simd::{SimdMaxAbsScaler, SimdRobustScaler, SimdStandardScaler};
207
208pub use graph::{
209    adjacency_to_edge_list, edge_list_to_adjacency, ActivationType, DeepWalk, GraphAutoencoder,
210    LaplacianType, Node2Vec,
211};
212pub use image::{
213    resize_images, rgb_to_grayscale, BlockNorm, HOGDescriptor, ImageNormMethod, ImageNormalizer,
214    PatchExtractor,
215};
216pub use optimization_config::{
217    AdaptiveParameterTuner, AdvancedConfigOptimizer, AutoTuner, ConfigurationPredictor,
218    DataCharacteristics, OptimizationConfig, OptimizationReport, PerformanceMetric, SystemMonitor,
219    SystemResources, TransformationRecommendation,
220};
221pub use out_of_core::{
222    csv_chunks, ChunkedArrayReader, ChunkedArrayWriter, OutOfCoreConfig, OutOfCoreNormalizer,
223    OutOfCoreTransformer,
224};
225pub use performance::{EnhancedPCA, EnhancedStandardScaler};
226pub use streaming::{
227    OutlierMethod, StreamingFeatureSelector, StreamingMinMaxScaler, StreamingOutlierDetector,
228    StreamingPCA, StreamingQuantileTracker, StreamingStandardScaler, StreamingTransformer,
229    WindowedStreamingTransformer,
230};
231pub use text::{CountVectorizer, HashingVectorizer, StreamingCountVectorizer, TfidfVectorizer};
232pub use utils::{
233    ArrayMemoryPool, DataChunker, PerfUtils, ProcessingStrategy, StatUtils, TypeConverter,
234    ValidationUtils,
235};
236
237// Signal transforms exports
238pub use signal_transforms::cqt::{CQTConfig, Chromagram, WindowFunction, CQT};
239pub use signal_transforms::cwt::{
240    ComplexMorletWavelet, ContinuousWavelet, GaussianWavelet, MexicanHatWavelet, MorletWavelet, CWT,
241};
242pub use signal_transforms::dwt::{
243    BoundaryMode, Dwt2dCoeffs, WaveletFilters, WaveletType, DWT, DWT2D, DWTN,
244};
245pub use signal_transforms::mfcc::{MFCCConfig, MelFilterbank, MFCC};
246pub use signal_transforms::stft::{
247    PaddingMode, STFTConfig, Spectrogram, SpectrogramScaling, WindowType, STFT,
248};
249pub use signal_transforms::wpt::{denoise_wpt, BestBasisCriterion, WaveletPacketNode, WPT};
250
251// GPU acceleration exports
252#[cfg(feature = "gpu")]
253pub use gpu::{GpuMatrixOps, GpuPCA, GpuTSNE};
254
255// Distributed processing exports
256#[cfg(feature = "distributed")]
257pub use distributed::{
258    AutoScalingConfig, CircuitBreaker, ClusterHealthSummary, DistributedConfig,
259    DistributedCoordinator, DistributedPCA, EnhancedDistributedCoordinator, NodeHealth, NodeInfo,
260    NodeStatus, PartitioningStrategy,
261};
262
263// Automated feature engineering exports
264pub use auto_feature_engineering::{
265    AdvancedMetaLearningSystem, AutoFeatureEngineer, DatasetMetaFeatures, EnhancedMetaFeatures,
266    MultiObjectiveRecommendation, TransformationConfig, TransformationType,
267};
268
269// Quantum optimization exports
270pub use quantum_optimization::{
271    AdvancedQuantumMetrics, AdvancedQuantumOptimizer, AdvancedQuantumParams,
272    QuantumHyperparameterTuner, QuantumInspiredOptimizer, QuantumParticle,
273    QuantumTransformationOptimizer,
274};
275
276// Neuromorphic computing exports
277pub use neuromorphic_adaptation::{
278    AdvancedNeuromorphicMetrics, AdvancedNeuromorphicProcessor, NeuromorphicAdaptationNetwork,
279    NeuromorphicMemorySystem, NeuromorphicTransformationSystem, SpikingNeuron, SystemState,
280    TransformationEpisode,
281};
282
283// Kernel methods exports
284pub use kernel::{
285    center_kernel_matrix, cross_gram_matrix, estimate_rbf_gamma, gram_matrix,
286    is_positive_semidefinite, kernel_alignment, kernel_diagonal, kernel_eval, KernelPCA,
287    KernelRidgeRegression, KernelType,
288};
289
290// Production monitoring exports
291#[cfg(feature = "monitoring")]
292pub use monitoring::{
293    AlertConfig, AlertType, DriftDetectionResult, DriftMethod, PerformanceMetrics,
294    TransformationMonitor,
295};