Skip to main content

anofox_ml_preprocessing/
lib.rs

1//! Feature preprocessing, scaling, and dimensionality reduction.
2//!
3//! This crate provides transformers for preparing data before model training,
4//! including [`StandardScaler`] (z-score normalization), [`MinMaxScaler`]
5//! (min-max normalization), [`Pca`] (principal component analysis),
6//! [`VarianceThreshold`] (low-variance feature removal), and
7//! [`MutualInformationSelector`] (feature selection by mutual information).
8//!
9//! All transformers follow the type-state pattern: call
10//! [`FitUnsupervised::fit`](anofox_ml_core::FitUnsupervised::fit) to learn
11//! parameters, then [`Transform::transform`](anofox_ml_core::Transform::transform)
12//! on the fitted result to apply the transformation.
13//!
14//! # Examples
15//!
16//! ```
17//! use ndarray::array;
18//! use anofox_ml_core::{FitUnsupervised, Transform};
19//! use anofox_ml_preprocessing::StandardScaler;
20//!
21//! let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
22//!
23//! let scaler = StandardScaler::new();
24//! let fitted = FitUnsupervised::<f64>::fit(&scaler, &x).unwrap();
25//! let x_scaled = fitted.transform(&x).unwrap();
26//!
27//! // Each column now has mean ~0 and std ~1
28//! let col0_mean: f64 = x_scaled.column(0).sum() / 3.0;
29//! assert!(col0_mean.abs() < 1e-10);
30//! ```
31
32pub mod binarizer;
33pub mod cca;
34pub mod fast_ica;
35pub mod kbins_discretizer;
36pub mod kernel_pca;
37pub mod label_encoder;
38pub mod max_abs_scaler;
39pub mod minmax_scaler;
40pub mod mutual_information;
41pub mod nmf;
42pub mod normalizer;
43pub mod one_hot_encoder;
44pub mod ordinal_encoder;
45pub mod pca;
46pub mod pls;
47pub mod polynomial_features;
48pub mod power_transformer;
49pub mod quantile_transformer;
50pub mod rfe;
51pub mod robust_scaler;
52pub mod select_from_model;
53pub mod select_k_best;
54pub mod simple_imputer;
55pub mod standard_scaler;
56pub mod truncated_svd;
57pub mod variance_threshold;
58
59pub use binarizer::{Binarizer, FittedBinarizer};
60pub use cca::{Cca, FittedCca};
61pub use fast_ica::{FastIca, FittedFastIca};
62pub use kbins_discretizer::{
63    BinStrategy, EncodeStrategy, FittedKBinsDiscretizer, KBinsDiscretizer,
64};
65pub use kernel_pca::{FittedKernelPca, KernelPca, KpcaKernel};
66pub use label_encoder::{FittedLabelEncoder, LabelEncoder};
67pub use max_abs_scaler::{FittedMaxAbsScaler, MaxAbsScaler};
68pub use minmax_scaler::{FittedMinMaxScaler, MinMaxScaler};
69pub use mutual_information::{FittedMutualInformationSelector, MutualInformationSelector};
70pub use nmf::{FittedNmf, Nmf};
71pub use normalizer::{FittedNormalizer, NormType, Normalizer};
72pub use one_hot_encoder::{FittedOneHotEncoder, OneHotEncoder};
73pub use ordinal_encoder::{FittedOrdinalEncoder, OrdinalEncoder};
74pub use pca::{FittedPca, Pca};
75pub use pls::{FittedPlsRegression, PlsRegression};
76pub use polynomial_features::{FittedPolynomialFeatures, PolynomialFeatures};
77pub use power_transformer::{FittedPowerTransformer, PowerTransformer};
78pub use quantile_transformer::{
79    FittedQuantileTransformer, OutputDistribution, QuantileTransformer,
80};
81pub use rfe::{FittedRfe, FittedSequentialFeatureSelector, Rfe, SequentialFeatureSelector};
82pub use robust_scaler::{FittedRobustScaler, RobustScaler};
83pub use select_from_model::{FittedSelectFromModel, SelectFromModel};
84pub use select_k_best::{FittedSelectKBest, SelectKBest};
85pub use simple_imputer::{FittedSimpleImputer, ImputeStrategy, SimpleImputer};
86pub use standard_scaler::{FittedStandardScaler, StandardScaler};
87pub use truncated_svd::{FittedTruncatedSvd, TruncatedSvd};
88pub use variance_threshold::{FittedVarianceThreshold, VarianceThreshold};