impl RobustScaler {
#[must_use]
pub fn new() -> Self {
Self {
median: None,
iqr: None,
with_centering: true,
with_scaling: true,
}
}
#[must_use]
pub fn with_centering(mut self, centering: bool) -> Self {
self.with_centering = centering;
self
}
#[must_use]
pub fn with_scaling(mut self, scaling: bool) -> Self {
self.with_scaling = scaling;
self
}
#[must_use]
pub fn median(&self) -> &[f32] {
self.median
.as_ref()
.expect("Scaler not fitted. Call fit() first.")
}
#[must_use]
pub fn iqr(&self) -> &[f32] {
self.iqr
.as_ref()
.expect("Scaler not fitted. Call fit() first.")
}
#[must_use]
pub fn is_fitted(&self) -> bool {
self.median.is_some()
}
}
fn percentile(sorted: &[f32], p: f32) -> f32 {
if sorted.is_empty() {
return 0.0;
}
if sorted.len() == 1 {
return sorted[0];
}
let idx = p * (sorted.len() - 1) as f32;
let lo = idx.floor() as usize;
let hi = idx.ceil() as usize;
let frac = idx - lo as f32;
if lo == hi {
sorted[lo]
} else {
sorted[lo] * (1.0 - frac) + sorted[hi] * frac
}
}
impl Transformer for RobustScaler {
fn fit(&mut self, x: &Matrix<f32>) -> Result<()> {
let (n_samples, n_features) = x.shape();
if n_samples == 0 {
return Err("Cannot fit with zero samples".into());
}
let mut medians = Vec::with_capacity(n_features);
let mut iqrs = Vec::with_capacity(n_features);
for j in 0..n_features {
let mut col: Vec<f32> = (0..n_samples).map(|i| x.get(i, j)).collect();
col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
medians.push(percentile(&col, 0.5));
let q1 = percentile(&col, 0.25);
let q3 = percentile(&col, 0.75);
iqrs.push(q3 - q1);
}
self.median = Some(medians);
self.iqr = Some(iqrs);
Ok(())
}
fn transform(&self, x: &Matrix<f32>) -> Result<Matrix<f32>> {
let median = self
.median
.as_ref()
.ok_or_else(|| AprenderError::from("Scaler not fitted"))?;
let iqr = self
.iqr
.as_ref()
.ok_or_else(|| AprenderError::from("Scaler not fitted"))?;
let (n_samples, n_features) = x.shape();
if n_features != median.len() {
return Err("Feature dimension mismatch".into());
}
let mut result = vec![0.0; n_samples * n_features];
for i in 0..n_samples {
for j in 0..n_features {
let mut val = x.get(i, j);
if self.with_centering {
val -= median[j];
}
if self.with_scaling && iqr[j] > 1e-10 {
val /= iqr[j];
}
result[i * n_features + j] = val;
}
}
Matrix::from_vec(n_samples, n_features, result).map_err(Into::into)
}
}
include!("pca.rs");
include!("tsne.rs");