scirs2_transform/
lib.rs

1#![allow(deprecated)]
2#![allow(clippy::all)]
3//! # SciRS2 Transform - Data Transformation and Preprocessing
4//!
5//! **scirs2-transform** provides comprehensive data transformation utilities for machine learning,
6//! offering normalization, feature engineering, dimensionality reduction, encoding, imputation,
7//! and pipelines with SIMD acceleration and out-of-core processing for large datasets.
8//!
9//! ## 🎯 Key Features
10//!
11//! - **Normalization**: Min-max, Z-score, robust scaling, quantile normalization
12//! - **Feature Engineering**: Polynomial features, interaction terms, binning
13//! - **Dimensionality Reduction**: PCA, SVD, t-SNE, UMAP, LDA
14//! - **Encoding**: One-hot, label, ordinal, target encoding
15//! - **Imputation**: Mean, median, mode, KNN, iterative imputation
16//! - **Pipelines**: Chained transformations with fit/transform API
17//! - **Performance**: SIMD operations, streaming, out-of-core processing
18//!
19//! ## 📦 Module Overview
20//!
21//! | SciRS2 Module | scikit-learn Equivalent | Description |
22//! |---------------|-------------------------|-------------|
23//! | `normalize` | `sklearn.preprocessing.StandardScaler` | Data normalization/standardization |
24//! | `features` | `sklearn.preprocessing.PolynomialFeatures` | Feature engineering |
25//! | `reduction` | `sklearn.decomposition.PCA` | Dimensionality reduction |
26//! | `encoding` | `sklearn.preprocessing.OneHotEncoder` | Categorical encoding |
27//! | `impute` | `sklearn.impute.SimpleImputer` | Missing value imputation |
28//! | `pipeline` | `sklearn.pipeline.Pipeline` | Transformation pipelines |
29//!
30//! ## 🚀 Quick Start
31//!
32//! ```toml
33//! [dependencies]
34//! scirs2-transform = "0.1.0-rc.1"
35//! ```
36//!
37//! ```rust,no_run
38//! use scirs2_transform::normalize::{normalize_array, NormalizationMethod};
39//! use scirs2_core::ndarray::Array2;
40//!
41//! // Standardize data (Z-score normalization)
42//! let data = Array2::<f64>::zeros((100, 5));
43//! let normalized = normalize_array(&data, NormalizationMethod::ZScore, 0).unwrap();
44//! ```
45//!
46//! ## 🔒 Version: 0.1.0-rc.1 (October 03, 2025)
47
48#![warn(missing_docs)]
49#![allow(clippy::too_many_arguments)]
50
51/// Error handling for the transformation module
52pub mod error;
53
54/// Basic normalization methods for data
55pub mod normalize;
56
57/// Feature engineering techniques
58pub mod features;
59
60/// Dimensionality reduction algorithms
61pub mod reduction;
62
63/// Matrix decomposition techniques
64pub mod decomposition;
65
66/// Advanced scaling and transformation methods
67pub mod scaling;
68
69/// Missing value imputation utilities
70pub mod impute;
71
72/// Categorical data encoding utilities
73pub mod encoding;
74
75/// Feature selection utilities
76pub mod selection;
77
78/// Time series feature extraction
79pub mod time_series;
80
81/// Pipeline API for chaining transformations
82pub mod pipeline;
83
84/// SIMD-accelerated normalization operations
85#[cfg(feature = "simd")]
86pub mod normalize_simd;
87
88/// SIMD-accelerated feature engineering operations
89#[cfg(feature = "simd")]
90pub mod features_simd;
91
92/// SIMD-accelerated scaling operations
93#[cfg(feature = "simd")]
94pub mod scaling_simd;
95
96/// Out-of-core processing for large datasets
97pub mod out_of_core;
98
99/// Streaming transformations for continuous data
100pub mod streaming;
101
102/// Text processing transformers
103pub mod text;
104
105/// Image processing transformers
106pub mod image;
107
108/// Utility functions and helpers for data transformation
109pub mod utils;
110
111/// Test module for advanced implementations
112#[cfg(test)]
113mod advanced_test;
114/// Performance optimizations and enhanced implementations
115pub mod performance;
116
117/// Optimization configuration and auto-tuning system
118pub mod optimization_config;
119
120/// Graph embedding transformers
121pub mod graph;
122
123/// GPU-accelerated transformations
124#[cfg(feature = "gpu")]
125pub mod gpu;
126
127/// Distributed processing for multi-node transformations
128#[cfg(feature = "distributed")]
129pub mod distributed;
130
131/// Automated feature engineering with meta-learning
132pub mod auto_feature_engineering;
133
134/// Quantum-inspired optimization for data transformations
135pub mod quantum_optimization;
136
137/// Neuromorphic computing integration for real-time adaptation
138pub mod neuromorphic_adaptation;
139
140/// Production monitoring with drift detection
141#[cfg(feature = "monitoring")]
142pub mod monitoring;
143
144// Re-export important types and functions
145pub use decomposition::{DictionaryLearning, NMF};
146pub use encoding::{
147    BinaryEncoder, EncodedOutput, FrequencyEncoder, OneHotEncoder, OrdinalEncoder, SparseMatrix,
148    TargetEncoder, WOEEncoder,
149};
150pub use error::{Result, TransformError};
151pub use features::{
152    binarize, discretize_equal_frequency, discretize_equal_width, log_transform, power_transform,
153    PolynomialFeatures, PowerTransformer,
154};
155pub use impute::{
156    DistanceMetric, ImputeStrategy, IterativeImputer, KNNImputer, MissingIndicator, SimpleImputer,
157    WeightingScheme,
158};
159pub use normalize::{normalize_array, normalize_vector, NormalizationMethod, Normalizer};
160pub use pipeline::{
161    make_column_transformer, make_pipeline, ColumnTransformer, Pipeline, RemainderOption,
162    Transformer,
163};
164pub use reduction::{
165    trustworthiness, AffinityMethod, Isomap, SpectralEmbedding, TruncatedSVD, LDA, LLE, PCA, TSNE,
166    UMAP,
167};
168pub use scaling::{MaxAbsScaler, QuantileTransformer};
169pub use selection::{MutualInfoSelector, RecursiveFeatureElimination, VarianceThreshold};
170pub use time_series::{FourierFeatures, LagFeatures, TimeSeriesFeatures, WaveletFeatures};
171
172#[cfg(feature = "simd")]
173pub use normalize_simd::{
174    simd_l2_normalize_1d, simd_maxabs_normalize_1d, simd_minmax_normalize_1d,
175    simd_normalize_adaptive, simd_normalize_batch, simd_normalizearray, simd_zscore_normalize_1d,
176    AdaptiveBlockSizer,
177};
178
179#[cfg(feature = "simd")]
180pub use features_simd::{
181    simd_binarize, simd_polynomial_features_optimized, simd_power_transform, SimdPolynomialFeatures,
182};
183
184#[cfg(feature = "simd")]
185pub use scaling_simd::{SimdMaxAbsScaler, SimdRobustScaler, SimdStandardScaler};
186
187pub use graph::{
188    adjacency_to_edge_list, edge_list_to_adjacency, ActivationType, DeepWalk, GraphAutoencoder,
189    LaplacianType, Node2Vec,
190};
191pub use image::{
192    resize_images, rgb_to_grayscale, BlockNorm, HOGDescriptor, ImageNormMethod, ImageNormalizer,
193    PatchExtractor,
194};
195pub use optimization_config::{
196    AdaptiveParameterTuner, AdvancedConfigOptimizer, AutoTuner, ConfigurationPredictor,
197    DataCharacteristics, OptimizationConfig, OptimizationReport, PerformanceMetric, SystemMonitor,
198    SystemResources, TransformationRecommendation,
199};
200pub use out_of_core::{
201    csv_chunks, ChunkedArrayReader, ChunkedArrayWriter, OutOfCoreConfig, OutOfCoreNormalizer,
202    OutOfCoreTransformer,
203};
204pub use performance::{EnhancedPCA, EnhancedStandardScaler};
205pub use streaming::{
206    OutlierMethod, StreamingFeatureSelector, StreamingMinMaxScaler, StreamingOutlierDetector,
207    StreamingPCA, StreamingQuantileTracker, StreamingStandardScaler, StreamingTransformer,
208    WindowedStreamingTransformer,
209};
210pub use text::{CountVectorizer, HashingVectorizer, StreamingCountVectorizer, TfidfVectorizer};
211pub use utils::{
212    ArrayMemoryPool, DataChunker, PerfUtils, ProcessingStrategy, StatUtils, TypeConverter,
213    ValidationUtils,
214};
215
216// GPU acceleration exports
217#[cfg(feature = "gpu")]
218pub use gpu::{GpuMatrixOps, GpuPCA, GpuTSNE};
219
220// Distributed processing exports
221#[cfg(feature = "distributed")]
222pub use distributed::{
223    AutoScalingConfig, CircuitBreaker, ClusterHealthSummary, DistributedConfig,
224    DistributedCoordinator, DistributedPCA, EnhancedDistributedCoordinator, NodeHealth, NodeInfo,
225    NodeStatus, PartitioningStrategy,
226};
227
228// Automated feature engineering exports
229pub use auto_feature_engineering::{
230    AdvancedMetaLearningSystem, AutoFeatureEngineer, DatasetMetaFeatures, EnhancedMetaFeatures,
231    MultiObjectiveRecommendation, TransformationConfig, TransformationType,
232};
233
234// Quantum optimization exports
235pub use quantum_optimization::{
236    AdvancedQuantumMetrics, AdvancedQuantumOptimizer, AdvancedQuantumParams,
237    QuantumHyperparameterTuner, QuantumInspiredOptimizer, QuantumParticle,
238    QuantumTransformationOptimizer,
239};
240
241// Neuromorphic computing exports
242pub use neuromorphic_adaptation::{
243    AdvancedNeuromorphicMetrics, AdvancedNeuromorphicProcessor, NeuromorphicAdaptationNetwork,
244    NeuromorphicMemorySystem, NeuromorphicTransformationSystem, SpikingNeuron, SystemState,
245    TransformationEpisode,
246};
247
248// Production monitoring exports
249#[cfg(feature = "monitoring")]
250pub use monitoring::{
251    AlertConfig, AlertType, DriftDetectionResult, DriftMethod, PerformanceMetrics,
252    TransformationMonitor,
253};