use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
use scirs2_core::numeric::{Float, NumCast};
use scirs2_core::simd_ops::SimdUnifiedOps;
use crate::error::{Result, TransformError};
use crate::scaling::EPSILON;
pub struct SimdMaxAbsScaler<F: Float + NumCast + SimdUnifiedOps> {
max_abs_: Option<Array1<F>>,
scale_: Option<Array1<F>>,
}
impl<F: Float + NumCast + SimdUnifiedOps> SimdMaxAbsScaler<F> {
pub fn new() -> Self {
SimdMaxAbsScaler {
max_abs_: None,
scale_: None,
}
}
pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
where
S: Data<Elem = F>,
{
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
if n_samples == 0 || n_features == 0 {
return Err(TransformError::InvalidInput("Empty input data".to_string()));
}
let mut max_abs = Array1::zeros(n_features);
for j in 0..n_features {
let col = x.column(j);
let col_array = col.to_owned();
let abs_col = F::simd_abs(&col_array.view());
max_abs[j] = F::simd_max_element(&abs_col.view());
}
let scale = max_abs.mapv(|max_abs_val| {
if max_abs_val > F::from(EPSILON).expect("Failed to convert to float") {
F::one() / max_abs_val
} else {
F::one()
}
});
self.max_abs_ = Some(max_abs);
self.scale_ = Some(scale);
Ok(())
}
pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
where
S: Data<Elem = F>,
{
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
if self.scale_.is_none() {
return Err(TransformError::TransformationError(
"Scaler has not been fitted".to_string(),
));
}
let scale = self.scale_.as_ref().expect("Operation failed");
if n_features != scale.len() {
return Err(TransformError::InvalidInput(format!(
"X has {} features, but scaler was fitted with {} features",
n_features,
scale.len()
)));
}
let mut result = Array2::zeros((n_samples, n_features));
for i in 0..n_samples {
let row = x.row(i);
let row_array = row.to_owned();
let scaled_row = F::simd_mul(&row_array.view(), &scale.view());
for j in 0..n_features {
result[[i, j]] = scaled_row[j];
}
}
Ok(result)
}
pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
where
S: Data<Elem = F>,
{
self.fit(x)?;
self.transform(x)
}
}
pub struct SimdRobustScaler<F: Float + NumCast + SimdUnifiedOps> {
median_: Option<Array1<F>>,
iqr_: Option<Array1<F>>,
scale_: Option<Array1<F>>,
}
impl<F: Float + NumCast + SimdUnifiedOps> SimdRobustScaler<F> {
pub fn new() -> Self {
SimdRobustScaler {
median_: None,
iqr_: None,
scale_: None,
}
}
pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
where
S: Data<Elem = F>,
{
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
if n_samples == 0 || n_features == 0 {
return Err(TransformError::InvalidInput("Empty input data".to_string()));
}
let mut median = Array1::zeros(n_features);
let mut iqr = Array1::zeros(n_features);
for j in 0..n_features {
let col = x.column(j);
let mut col_data: Vec<F> = col.to_vec();
col_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = col_data.len();
median[j] = if n % 2 == 0 {
(col_data[n / 2 - 1] + col_data[n / 2])
/ F::from(2.0).expect("Failed to convert constant to float")
} else {
col_data[n / 2]
};
let q1_idx = n / 4;
let q3_idx = 3 * n / 4;
let q1 = col_data[q1_idx];
let q3 = col_data[q3_idx];
iqr[j] = q3 - q1;
}
let scale = iqr.mapv(|iqr_val| {
if iqr_val > F::from(EPSILON).expect("Failed to convert to float") {
F::one() / iqr_val
} else {
F::one()
}
});
self.median_ = Some(median);
self.iqr_ = Some(iqr);
self.scale_ = Some(scale);
Ok(())
}
pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
where
S: Data<Elem = F>,
{
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
if self.median_.is_none() || self.scale_.is_none() {
return Err(TransformError::TransformationError(
"Scaler has not been fitted".to_string(),
));
}
let median = self.median_.as_ref().expect("Operation failed");
let scale = self.scale_.as_ref().expect("Operation failed");
if n_features != median.len() {
return Err(TransformError::InvalidInput(format!(
"X has {} features, but scaler was fitted with {} features",
n_features,
median.len()
)));
}
let mut result = Array2::zeros((n_samples, n_features));
for i in 0..n_samples {
let row = x.row(i);
let row_array = row.to_owned();
let centered = F::simd_sub(&row_array.view(), &median.view());
let scaled = F::simd_mul(¢ered.view(), &scale.view());
for j in 0..n_features {
result[[i, j]] = scaled[j];
}
}
Ok(result)
}
pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
where
S: Data<Elem = F>,
{
self.fit(x)?;
self.transform(x)
}
}
pub struct SimdStandardScaler<F: Float + NumCast + SimdUnifiedOps> {
mean_: Option<Array1<F>>,
std_: Option<Array1<F>>,
with_mean: bool,
with_std: bool,
}
impl<F: Float + NumCast + SimdUnifiedOps> SimdStandardScaler<F> {
pub fn new(with_mean: bool, with_std: bool) -> Self {
SimdStandardScaler {
mean_: None,
std_: None,
with_mean,
with_std,
}
}
pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
where
S: Data<Elem = F>,
{
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
if n_samples == 0 || n_features == 0 {
return Err(TransformError::InvalidInput("Empty input data".to_string()));
}
let n_samples_f = F::from(n_samples).expect("Failed to convert to float");
let mut mean = Array1::zeros(n_features);
let mut std = Array1::ones(n_features);
if self.with_mean {
for j in 0..n_features {
let col = x.column(j);
let col_array = col.to_owned();
mean[j] = F::simd_sum(&col_array.view()) / n_samples_f;
}
}
if self.with_std {
for j in 0..n_features {
let col = x.column(j);
let col_array = col.to_owned();
let m = if self.with_mean { mean[j] } else { F::zero() };
let mean_array = Array1::from_elem(n_samples, m);
let centered = F::simd_sub(&col_array.view(), &mean_array.view());
let squared = F::simd_mul(¢ered.view(), ¢ered.view());
let variance = F::simd_sum(&squared.view()) / n_samples_f;
std[j] = variance.sqrt();
if std[j] <= F::from(EPSILON).expect("Failed to convert to float") {
std[j] = F::one();
}
}
}
self.mean_ = Some(mean);
self.std_ = Some(std);
Ok(())
}
pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
where
S: Data<Elem = F>,
{
let n_samples = x.shape()[0];
let n_features = x.shape()[1];
if self.mean_.is_none() || self.std_.is_none() {
return Err(TransformError::TransformationError(
"Scaler has not been fitted".to_string(),
));
}
let mean = self.mean_.as_ref().expect("Operation failed");
let std = self.std_.as_ref().expect("Operation failed");
if n_features != mean.len() {
return Err(TransformError::InvalidInput(format!(
"X has {} features, but scaler was fitted with {} features",
n_features,
mean.len()
)));
}
let mut result = Array2::zeros((n_samples, n_features));
for i in 0..n_samples {
let row = x.row(i);
let mut row_array = row.to_owned();
if self.with_mean {
row_array = F::simd_sub(&row_array.view(), &mean.view());
}
if self.with_std {
row_array = F::simd_div(&row_array.view(), &std.view());
}
for j in 0..n_features {
result[[i, j]] = row_array[j];
}
}
Ok(result)
}
pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<F>>
where
S: Data<Elem = F>,
{
self.fit(x)?;
self.transform(x)
}
}