use ferray::linalg::LinalgFloat;
use ferrolearn_core::error::FerroError;
use ferrolearn_core::traits::{Fit, FitTransform, Predict, Transform};
use ferrolearn_linear::{BayesianRidge, FittedBayesianRidge};
use ndarray::{Array1, Array2, ScalarOperand};
use num_traits::{Float, FromPrimitive};
pub trait ImputerFloat:
LinalgFloat + ScalarOperand + FromPrimitive + Send + Sync + 'static
{
}
impl<F> ImputerFloat for F where
F: LinalgFloat + ScalarOperand + FromPrimitive + Send + Sync + 'static
{
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InitialStrategy {
Mean,
Median,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ImputationOrder {
Ascending,
Descending,
Roman,
Arabic,
}
#[must_use]
#[derive(Debug, Clone)]
pub struct IterativeImputer<F> {
max_iter: usize,
tol: F,
initial_strategy: InitialStrategy,
imputation_order: ImputationOrder,
min_value: F,
max_value: F,
}
impl<F: Float + Send + Sync + 'static> IterativeImputer<F> {
pub fn new(max_iter: usize, tol: F, initial_strategy: InitialStrategy) -> Self {
Self {
max_iter,
tol,
initial_strategy,
imputation_order: ImputationOrder::Ascending,
min_value: F::neg_infinity(),
max_value: F::infinity(),
}
}
pub fn with_imputation_order(mut self, order: ImputationOrder) -> Self {
self.imputation_order = order;
self
}
pub fn with_min_value(mut self, min_value: F) -> Self {
self.min_value = min_value;
self
}
pub fn with_max_value(mut self, max_value: F) -> Self {
self.max_value = max_value;
self
}
#[must_use]
pub fn max_iter(&self) -> usize {
self.max_iter
}
#[must_use]
pub fn tol(&self) -> F {
self.tol
}
#[must_use]
pub fn initial_strategy(&self) -> InitialStrategy {
self.initial_strategy
}
#[must_use]
pub fn imputation_order(&self) -> ImputationOrder {
self.imputation_order
}
#[must_use]
pub fn min_value(&self) -> F {
self.min_value
}
#[must_use]
pub fn max_value(&self) -> F {
self.max_value
}
}
impl<F: Float + Send + Sync + 'static> Default for IterativeImputer<F> {
fn default() -> Self {
Self::new(
10,
F::from(1e-3).unwrap_or_else(F::epsilon),
InitialStrategy::Mean,
)
}
}
#[derive(Debug, Clone)]
pub struct FittedIterativeImputer<F> {
initial_fill: Array1<F>,
imputation_sequence: Vec<ImputationStep<F>>,
n_iter: usize,
min_value: F,
max_value: F,
initial_strategy: InitialStrategy,
}
#[derive(Debug, Clone)]
struct ImputationStep<F> {
feat_idx: usize,
model: FittedBayesianRidge<F>,
}
impl<F: Float + Send + Sync + 'static> FittedIterativeImputer<F> {
#[must_use]
pub fn n_iter(&self) -> usize {
self.n_iter
}
#[must_use]
pub fn initial_fill(&self) -> &Array1<F> {
&self.initial_fill
}
#[must_use]
pub fn initial_strategy(&self) -> InitialStrategy {
self.initial_strategy
}
}
fn column_means_nan<F: Float>(x: &Array2<F>) -> Array1<F> {
let n_features = x.ncols();
let mut means = Array1::zeros(n_features);
for j in 0..n_features {
let col = x.column(j);
let mut sum = F::zero();
let mut count = 0usize;
for &v in col {
if !v.is_nan() {
sum = sum + v;
count += 1;
}
}
means[j] = if count > 0 {
sum / F::from(count).unwrap_or_else(F::one)
} else {
F::zero()
};
}
means
}
fn column_medians_nan<F: Float>(x: &Array2<F>) -> Array1<F> {
let n_features = x.ncols();
let mut medians = Array1::zeros(n_features);
for j in 0..n_features {
let col = x.column(j);
let mut vals: Vec<F> = col.iter().copied().filter(|v| !v.is_nan()).collect();
if vals.is_empty() {
medians[j] = F::zero();
} else {
vals.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = vals.len();
medians[j] = if n % 2 == 1 {
vals[n / 2]
} else {
(vals[n / 2 - 1] + vals[n / 2]) / (F::one() + F::one())
};
}
}
medians
}
fn initial_fill<F: Float>(x: &Array2<F>, fill: &Array1<F>) -> Array2<F> {
let mut out = x.to_owned();
for (mut col, &f) in out.columns_mut().into_iter().zip(fill.iter()) {
for v in &mut col {
if v.is_nan() {
*v = f;
}
}
}
out
}
fn ordered_feature_idx(missing_counts: &[usize], order: ImputationOrder) -> Vec<usize> {
let n_features = missing_counts.len();
match order {
ImputationOrder::Roman => (0..n_features).collect(),
ImputationOrder::Arabic => (0..n_features).rev().collect(),
ImputationOrder::Ascending => {
let mut idx: Vec<usize> = (0..n_features).collect();
idx.sort_by_key(|&j| missing_counts[j]);
idx
}
ImputationOrder::Descending => {
let mut idx: Vec<usize> = (0..n_features).collect();
idx.sort_by_key(|&j| missing_counts[j]);
idx.reverse();
idx
}
}
}
fn clip<F: Float>(v: F, min_value: F, max_value: F) -> F {
if v < min_value {
min_value
} else if v > max_value {
max_value
} else {
v
}
}
fn neighbor_feat_idx(n_features: usize, feat_idx: usize) -> Vec<usize> {
(0..n_features).filter(|&k| k != feat_idx).collect()
}
fn impute_one_feature<F>(
imputed: &mut Array2<F>,
mask: &Array2<bool>,
feat_idx: usize,
predictors: &[usize],
min_value: F,
max_value: F,
) -> Result<Option<FittedBayesianRidge<F>>, FerroError>
where
F: ImputerFloat,
{
let n_samples = imputed.nrows();
let n_predictors = predictors.len();
if n_predictors == 0 {
return Ok(None);
}
let observed_rows: Vec<usize> = (0..n_samples).filter(|&i| !mask[[i, feat_idx]]).collect();
if observed_rows.is_empty() {
return Ok(None);
}
let mut x_train = Array2::zeros((observed_rows.len(), n_predictors));
let mut y_train = Array1::zeros(observed_rows.len());
for (r, &i) in observed_rows.iter().enumerate() {
for (c, &k) in predictors.iter().enumerate() {
x_train[[r, c]] = imputed[[i, k]];
}
y_train[r] = imputed[[i, feat_idx]];
}
let model = BayesianRidge::<F>::new().fit(&x_train, &y_train)?;
let missing_rows: Vec<usize> = (0..n_samples).filter(|&i| mask[[i, feat_idx]]).collect();
if !missing_rows.is_empty() {
let mut x_test = Array2::zeros((missing_rows.len(), n_predictors));
for (r, &i) in missing_rows.iter().enumerate() {
for (c, &k) in predictors.iter().enumerate() {
x_test[[r, c]] = imputed[[i, k]];
}
}
let preds = model.predict(&x_test)?;
for (r, &i) in missing_rows.iter().enumerate() {
imputed[[i, feat_idx]] = clip(preds[r], min_value, max_value);
}
}
Ok(Some(model))
}
fn replay_one_feature<F>(
imputed: &mut Array2<F>,
mask: &Array2<bool>,
feat_idx: usize,
predictors: &[usize],
model: &FittedBayesianRidge<F>,
min_value: F,
max_value: F,
) -> Result<(), FerroError>
where
F: ImputerFloat,
{
let n_samples = imputed.nrows();
let missing_rows: Vec<usize> = (0..n_samples).filter(|&i| mask[[i, feat_idx]]).collect();
if missing_rows.is_empty() {
return Ok(());
}
let mut x_test = Array2::zeros((missing_rows.len(), predictors.len()));
for (r, &i) in missing_rows.iter().enumerate() {
for (c, &k) in predictors.iter().enumerate() {
x_test[[r, c]] = imputed[[i, k]];
}
}
let preds = model.predict(&x_test)?;
for (r, &i) in missing_rows.iter().enumerate() {
imputed[[i, feat_idx]] = clip(preds[r], min_value, max_value);
}
Ok(())
}
fn inf_norm_diff<F: Float>(a: &Array2<F>, b: &Array2<F>) -> F {
let mut m = F::zero();
for (&x, &y) in a.iter().zip(b.iter()) {
let d = (x - y).abs();
if d > m {
m = d;
}
}
m
}
fn max_abs_observed<F: Float>(x: &Array2<F>, mask: &Array2<bool>) -> F {
let mut m = F::zero();
for (&v, &is_missing) in x.iter().zip(mask.iter()) {
if !is_missing {
let a = v.abs();
if a > m {
m = a;
}
}
}
m
}
fn missing_mask_and_counts<F: Float>(x: &Array2<F>) -> (Array2<bool>, Vec<usize>) {
let (n_samples, n_features) = x.dim();
let mut mask = Array2::from_elem((n_samples, n_features), false);
let mut counts = vec![0usize; n_features];
for j in 0..n_features {
for i in 0..n_samples {
if x[[i, j]].is_nan() {
mask[[i, j]] = true;
counts[j] += 1;
}
}
}
(mask, counts)
}
impl<F: ImputerFloat> Fit<Array2<F>, ()> for IterativeImputer<F> {
type Fitted = FittedIterativeImputer<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedIterativeImputer<F>, FerroError> {
let n_samples = x.nrows();
if n_samples == 0 {
return Err(FerroError::InsufficientSamples {
required: 1,
actual: 0,
context: "IterativeImputer::fit".into(),
});
}
let n_features = x.ncols();
let fill_values = match self.initial_strategy {
InitialStrategy::Mean => column_means_nan(x),
InitialStrategy::Median => column_medians_nan(x),
};
let (mask, missing_counts) = missing_mask_and_counts(x);
let n_features_with_missing = missing_counts.iter().filter(|&&c| c > 0).count();
let mut imputed = initial_fill(x, &fill_values);
let mut imputation_sequence: Vec<ImputationStep<F>> = Vec::new();
let mut n_iter = 0usize;
if self.max_iter == 0 || n_features_with_missing == 0 || n_features <= 1 {
return Ok(FittedIterativeImputer {
initial_fill: fill_values,
imputation_sequence,
n_iter: 0,
min_value: self.min_value,
max_value: self.max_value,
initial_strategy: self.initial_strategy,
});
}
let normalized_tol = self.tol * max_abs_observed(x, &mask);
let order = ordered_feature_idx(&missing_counts, self.imputation_order);
let mut prev = imputed.clone();
for round in 1..=self.max_iter {
n_iter = round;
for &feat_idx in &order {
let predictors = neighbor_feat_idx(n_features, feat_idx);
if let Some(model) = impute_one_feature(
&mut imputed,
&mask,
feat_idx,
&predictors,
self.min_value,
self.max_value,
)? {
imputation_sequence.push(ImputationStep { feat_idx, model });
}
}
let inf_norm = inf_norm_diff(&imputed, &prev);
if inf_norm < normalized_tol {
break;
}
prev = imputed.clone();
}
Ok(FittedIterativeImputer {
initial_fill: fill_values,
imputation_sequence,
n_iter,
min_value: self.min_value,
max_value: self.max_value,
initial_strategy: self.initial_strategy,
})
}
}
impl<F: ImputerFloat> Transform<Array2<F>> for FittedIterativeImputer<F> {
type Output = Array2<F>;
type Error = FerroError;
fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let n_features = self.initial_fill.len();
if x.ncols() != n_features {
return Err(FerroError::ShapeMismatch {
expected: vec![x.nrows(), n_features],
actual: vec![x.nrows(), x.ncols()],
context: "FittedIterativeImputer::transform".into(),
});
}
let mut imputed = initial_fill(x, &self.initial_fill);
if self.n_iter == 0 || self.imputation_sequence.is_empty() {
return Ok(imputed);
}
let (mask, _) = missing_mask_and_counts(x);
for step in &self.imputation_sequence {
let predictors = neighbor_feat_idx(n_features, step.feat_idx);
replay_one_feature(
&mut imputed,
&mask,
step.feat_idx,
&predictors,
&step.model,
self.min_value,
self.max_value,
)?;
}
Ok(imputed)
}
}
impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for IterativeImputer<F> {
type Output = Array2<F>;
type Error = FerroError;
fn transform(&self, _x: &Array2<F>) -> Result<Array2<F>, FerroError> {
Err(FerroError::InvalidParameter {
name: "IterativeImputer".into(),
reason: "imputer must be fitted before calling transform; use fit() first".into(),
})
}
}
impl<F: ImputerFloat> FitTransform<Array2<F>> for IterativeImputer<F> {
type FitError = FerroError;
fn fit_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
let fitted = self.fit(x, &())?;
fitted.transform(x)
}
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::array;
#[test]
fn test_iterative_imputer_basic() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x = array![[1.0, 2.0], [3.0, f64::NAN], [f64::NAN, 6.0]];
let fitted = imputer.fit(&x, &()).unwrap();
let out = fitted.transform(&x).unwrap();
for v in &out {
assert!(!v.is_nan(), "Output contains NaN");
}
}
#[test]
fn test_iterative_imputer_no_missing() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
let fitted = imputer.fit(&x, &()).unwrap();
let out = fitted.transform(&x).unwrap();
for (a, b) in x.iter().zip(out.iter()) {
assert!((a - b).abs() < 1e-10);
}
}
#[test]
fn test_iterative_imputer_convergence() {
let imputer = IterativeImputer::<f64>::new(100, 1e-6, InitialStrategy::Mean);
let x = array![
[1.0, 2.0],
[2.0, 4.0],
[3.0, 6.0],
[4.0, f64::NAN],
[f64::NAN, 10.0]
];
let fitted = imputer.fit(&x, &()).unwrap();
let out = fitted.transform(&x).unwrap();
assert!(
(out[[3, 1]] - 8.0).abs() < 2.0,
"Expected ~8.0, got {}",
out[[3, 1]]
);
assert!(
(out[[4, 0]] - 5.0).abs() < 2.0,
"Expected ~5.0, got {}",
out[[4, 0]]
);
}
#[test]
fn test_iterative_imputer_median_strategy() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Median);
let x = array![[1.0, 10.0], [2.0, 20.0], [3.0, f64::NAN]];
let fitted = imputer.fit(&x, &()).unwrap();
let out = fitted.transform(&x).unwrap();
assert!(!out[[2, 1]].is_nan());
}
#[test]
fn test_iterative_imputer_fit_transform() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x = array![[1.0, 2.0], [3.0, f64::NAN], [f64::NAN, 6.0]];
let out = imputer.fit_transform(&x).unwrap();
for v in &out {
assert!(!v.is_nan());
}
}
#[test]
fn test_iterative_imputer_zero_rows_error() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x: Array2<f64> = Array2::zeros((0, 3));
assert!(imputer.fit(&x, &()).is_err());
}
#[test]
fn test_iterative_imputer_zero_max_iter_returns_initial_fill() {
let imputer = IterativeImputer::<f64>::new(0, 1e-3, InitialStrategy::Mean);
let x = array![[1.0, 2.0], [f64::NAN, 3.0], [5.0, f64::NAN], [7.0, 8.0]];
let fit_res = imputer.fit(&x, &());
assert!(
fit_res.is_ok(),
"max_iter=0 must be accepted (sklearn parity), got {fit_res:?}"
);
let Ok(fitted) = fit_res else { return };
assert_eq!(fitted.n_iter(), 0);
let out_res = fitted.transform(&x);
assert!(
out_res.is_ok(),
"transform after max_iter=0 fit failed: {out_res:?}"
);
let Ok(out) = out_res else { return };
let mean_fill = 13.0 / 3.0;
let expected = array![[1.0, 2.0], [mean_fill, 3.0], [5.0, mean_fill], [7.0, 8.0]];
for (got, want) in out.iter().zip(expected.iter()) {
assert!(
(got - want).abs() < 1e-9,
"max_iter=0 output {got} != initial fill {want}"
);
}
}
#[test]
fn test_iterative_imputer_shape_mismatch_error() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x_train = array![[1.0, 2.0], [3.0, 4.0]];
let fitted = imputer.fit(&x_train, &()).unwrap();
let x_bad = array![[1.0, 2.0, 3.0]];
assert!(fitted.transform(&x_bad).is_err());
}
#[test]
fn test_iterative_imputer_unfitted_transform_error() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x = array![[1.0, 2.0]];
assert!(imputer.transform(&x).is_err());
}
#[test]
fn test_iterative_imputer_default() {
let imputer = IterativeImputer::<f64>::default();
assert_eq!(imputer.max_iter(), 10);
assert_eq!(imputer.initial_strategy(), InitialStrategy::Mean);
assert_eq!(imputer.imputation_order(), ImputationOrder::Ascending);
assert!(imputer.min_value().is_infinite() && imputer.min_value() < 0.0);
assert!(imputer.max_value().is_infinite() && imputer.max_value() > 0.0);
}
#[test]
fn test_iterative_imputer_n_iter_accessor() {
let imputer = IterativeImputer::<f64>::new(10, 1e-3, InitialStrategy::Mean);
let x = array![[1.0, 2.0], [3.0, f64::NAN], [5.0, 6.0]];
let fitted = imputer.fit(&x, &()).unwrap();
assert!(fitted.n_iter() > 0);
assert!(fitted.n_iter() <= 10);
}
#[test]
fn test_iterative_imputer_f32() {
let imputer = IterativeImputer::<f32>::new(10, 1e-3, InitialStrategy::Mean);
let x: Array2<f32> = array![[1.0f32, 2.0], [3.0, f32::NAN], [5.0, 6.0]];
let fitted = imputer.fit(&x, &()).unwrap();
let out = fitted.transform(&x).unwrap();
assert!(!out[[1, 1]].is_nan());
}
#[test]
fn test_ordered_feature_idx_ascending_stable() {
assert_eq!(
ordered_feature_idx(&[1, 2, 1], ImputationOrder::Ascending),
vec![0, 2, 1]
);
assert_eq!(
ordered_feature_idx(&[1, 2, 1], ImputationOrder::Roman),
vec![0, 1, 2]
);
assert_eq!(
ordered_feature_idx(&[1, 2, 1], ImputationOrder::Descending),
vec![1, 2, 0]
);
assert_eq!(
ordered_feature_idx(&[1, 2, 1], ImputationOrder::Arabic),
vec![2, 1, 0]
);
}
#[test]
fn test_clip_bounds() {
assert_eq!(clip(10.0, 0.0, 5.0), 5.0);
assert_eq!(clip(-2.0, 0.0, 5.0), 0.0);
assert_eq!(clip(3.0, 0.0, 5.0), 3.0);
assert_eq!(clip(3.0, f64::NEG_INFINITY, f64::INFINITY), 3.0);
}
}