Expand description
Feature preprocessing, scaling, and dimensionality reduction.
This crate provides transformers for preparing data before model training,
including StandardScaler (z-score normalization), MinMaxScaler
(min-max normalization), Pca (principal component analysis),
VarianceThreshold (low-variance feature removal), and
MutualInformationSelector (feature selection by mutual information).
All transformers follow the type-state pattern: call
FitUnsupervised::fit to learn
parameters, then Transform::transform
on the fitted result to apply the transformation.
§Examples
use ndarray::array;
use anofox_ml_core::{FitUnsupervised, Transform};
use anofox_ml_preprocessing::StandardScaler;
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let scaler = StandardScaler::new();
let fitted = FitUnsupervised::<f64>::fit(&scaler, &x).unwrap();
let x_scaled = fitted.transform(&x).unwrap();
// Each column now has mean ~0 and std ~1
let col0_mean: f64 = x_scaled.column(0).sum() / 3.0;
assert!(col0_mean.abs() < 1e-10);Re-exports§
pub use binarizer::Binarizer;pub use binarizer::FittedBinarizer;pub use cca::Cca;pub use cca::FittedCca;pub use fast_ica::FastIca;pub use fast_ica::FittedFastIca;pub use kbins_discretizer::BinStrategy;pub use kbins_discretizer::EncodeStrategy;pub use kbins_discretizer::FittedKBinsDiscretizer;pub use kbins_discretizer::KBinsDiscretizer;pub use kernel_pca::FittedKernelPca;pub use kernel_pca::KernelPca;pub use kernel_pca::KpcaKernel;pub use label_encoder::FittedLabelEncoder;pub use label_encoder::LabelEncoder;pub use max_abs_scaler::FittedMaxAbsScaler;pub use max_abs_scaler::MaxAbsScaler;pub use minmax_scaler::FittedMinMaxScaler;pub use minmax_scaler::MinMaxScaler;pub use mutual_information::FittedMutualInformationSelector;pub use mutual_information::MutualInformationSelector;pub use nmf::FittedNmf;pub use nmf::Nmf;pub use normalizer::FittedNormalizer;pub use normalizer::NormType;pub use normalizer::Normalizer;pub use one_hot_encoder::FittedOneHotEncoder;pub use one_hot_encoder::OneHotEncoder;pub use ordinal_encoder::FittedOrdinalEncoder;pub use ordinal_encoder::OrdinalEncoder;pub use pca::FittedPca;pub use pca::Pca;pub use pls::FittedPlsRegression;pub use pls::PlsRegression;pub use polynomial_features::FittedPolynomialFeatures;pub use polynomial_features::PolynomialFeatures;pub use power_transformer::FittedPowerTransformer;pub use power_transformer::PowerTransformer;pub use quantile_transformer::FittedQuantileTransformer;pub use quantile_transformer::OutputDistribution;pub use quantile_transformer::QuantileTransformer;pub use rfe::FittedRfe;pub use rfe::FittedSequentialFeatureSelector;pub use rfe::Rfe;pub use rfe::SequentialFeatureSelector;pub use robust_scaler::FittedRobustScaler;pub use robust_scaler::RobustScaler;pub use select_from_model::FittedSelectFromModel;pub use select_from_model::SelectFromModel;pub use select_k_best::FittedSelectKBest;pub use select_k_best::SelectKBest;pub use simple_imputer::FittedSimpleImputer;pub use simple_imputer::ImputeStrategy;pub use simple_imputer::SimpleImputer;pub use standard_scaler::FittedStandardScaler;pub use standard_scaler::StandardScaler;pub use truncated_svd::FittedTruncatedSvd;pub use truncated_svd::TruncatedSvd;pub use variance_threshold::FittedVarianceThreshold;pub use variance_threshold::VarianceThreshold;
Modules§
- binarizer
- cca
- Canonical Correlation Analysis.
- fast_
ica - FastICA — fixed-point Independent Component Analysis with deflation.
- kbins_
discretizer - kernel_
pca - Kernel PCA.
- label_
encoder - max_
abs_ scaler - minmax_
scaler - mutual_
information - nmf
- Non-negative Matrix Factorisation.
- normalizer
- one_
hot_ encoder - ordinal_
encoder - pca
- pls
- Partial Least Squares Regression (PLS1).
- polynomial_
features - power_
transformer - quantile_
transformer - rfe
- Recursive Feature Elimination (RFE).
- robust_
scaler - select_
from_ model - select_
k_ best - simple_
imputer - standard_
scaler - truncated_
svd - Truncated SVD (a.k.a. LSA when applied to a term-document matrix).
- variance_
threshold