use super::SklearnEstimator;
use crate::error::{MLError, Result};
use scirs2_core::ndarray::{Array1, Array2, Axis};
use std::collections::HashMap;
pub struct StandardScaler {
mean_: Option<Array1<f64>>,
scale_: Option<Array1<f64>>,
fitted: bool,
}
impl StandardScaler {
pub fn new() -> Self {
Self {
mean_: None,
scale_: None,
fitted: false,
}
}
}
impl Default for StandardScaler {
fn default() -> Self {
Self::new()
}
}
impl SklearnEstimator for StandardScaler {
#[allow(non_snake_case)]
fn fit(&mut self, X: &Array2<f64>, _y: Option<&Array1<f64>>) -> Result<()> {
let mean = X.mean_axis(Axis(0)).ok_or_else(|| {
MLError::InvalidInput("Cannot compute mean of empty array".to_string())
})?;
let std = X.std_axis(Axis(0), 0.0);
self.mean_ = Some(mean);
self.scale_ = Some(std);
self.fitted = true;
Ok(())
}
fn get_params(&self) -> HashMap<String, String> {
HashMap::new()
}
fn set_params(&mut self, _params: HashMap<String, String>) -> Result<()> {
Ok(())
}
fn is_fitted(&self) -> bool {
self.fitted
}
}
pub struct MinMaxScaler {
min: Option<Array1<f64>>,
max: Option<Array1<f64>>,
feature_range: (f64, f64),
fitted: bool,
}
impl MinMaxScaler {
pub fn new() -> Self {
Self {
min: None,
max: None,
feature_range: (0.0, 1.0),
fitted: false,
}
}
pub fn feature_range(mut self, min_val: f64, max_val: f64) -> Self {
self.feature_range = (min_val, max_val);
self
}
#[allow(non_snake_case)]
pub fn fit(&mut self, X: &Array2<f64>) -> Result<()> {
let n_features = X.ncols();
let mut min = Array1::zeros(n_features);
let mut max = Array1::zeros(n_features);
for j in 0..n_features {
let col = X.column(j);
min[j] = col.iter().cloned().fold(f64::INFINITY, f64::min);
max[j] = col.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
}
self.min = Some(min);
self.max = Some(max);
self.fitted = true;
Ok(())
}
#[allow(non_snake_case)]
pub fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
let min = self
.min
.as_ref()
.ok_or_else(|| MLError::ModelNotTrained("Scaler not fitted".to_string()))?;
let max = self
.max
.as_ref()
.ok_or_else(|| MLError::ModelNotTrained("Scaler not fitted".to_string()))?;
let (range_min, range_max) = self.feature_range;
let mut result = X.clone();
for j in 0..X.ncols() {
let scale = if (max[j] - min[j]).abs() > 1e-10 {
(range_max - range_min) / (max[j] - min[j])
} else {
1.0
};
for i in 0..X.nrows() {
result[[i, j]] = (X[[i, j]] - min[j]) * scale + range_min;
}
}
Ok(result)
}
#[allow(non_snake_case)]
pub fn fit_transform(&mut self, X: &Array2<f64>) -> Result<Array2<f64>> {
self.fit(X)?;
self.transform(X)
}
}
impl Default for MinMaxScaler {
fn default() -> Self {
Self::new()
}
}
impl SklearnEstimator for MinMaxScaler {
#[allow(non_snake_case)]
fn fit(&mut self, X: &Array2<f64>, _y: Option<&Array1<f64>>) -> Result<()> {
MinMaxScaler::fit(self, X)
}
fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert(
"feature_range_min".to_string(),
self.feature_range.0.to_string(),
);
params.insert(
"feature_range_max".to_string(),
self.feature_range.1.to_string(),
);
params
}
fn set_params(&mut self, params: HashMap<String, String>) -> Result<()> {
if let Some(min_str) = params.get("feature_range_min") {
if let Some(max_str) = params.get("feature_range_max") {
let min_val: f64 = min_str.parse().map_err(|_| {
MLError::InvalidConfiguration("Invalid feature_range_min".to_string())
})?;
let max_val: f64 = max_str.parse().map_err(|_| {
MLError::InvalidConfiguration("Invalid feature_range_max".to_string())
})?;
self.feature_range = (min_val, max_val);
}
}
Ok(())
}
fn is_fitted(&self) -> bool {
self.fitted
}
}
pub struct RobustScaler {
center: Option<Array1<f64>>,
scale: Option<Array1<f64>>,
with_centering: bool,
with_scaling: bool,
fitted: bool,
}
impl RobustScaler {
pub fn new() -> Self {
Self {
center: None,
scale: None,
with_centering: true,
with_scaling: true,
fitted: false,
}
}
#[allow(non_snake_case)]
pub fn fit(&mut self, X: &Array2<f64>) -> Result<()> {
let n_features = X.ncols();
let mut center = Array1::zeros(n_features);
let mut scale = Array1::zeros(n_features);
for j in 0..n_features {
let mut col: Vec<f64> = X.column(j).iter().cloned().collect();
col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let n = col.len();
center[j] = col[n / 2];
let q1 = col[n / 4];
let q3 = col[3 * n / 4];
scale[j] = if (q3 - q1).abs() > 1e-10 {
q3 - q1
} else {
1.0
};
}
self.center = Some(center);
self.scale = Some(scale);
self.fitted = true;
Ok(())
}
#[allow(non_snake_case)]
pub fn transform(&self, X: &Array2<f64>) -> Result<Array2<f64>> {
let center = self
.center
.as_ref()
.ok_or_else(|| MLError::ModelNotTrained("Scaler not fitted".to_string()))?;
let scale = self
.scale
.as_ref()
.ok_or_else(|| MLError::ModelNotTrained("Scaler not fitted".to_string()))?;
let mut result = X.clone();
for j in 0..X.ncols() {
for i in 0..X.nrows() {
result[[i, j]] = if self.with_centering {
(X[[i, j]] - center[j]) / scale[j]
} else {
X[[i, j]] / scale[j]
};
}
}
Ok(result)
}
#[allow(non_snake_case)]
pub fn fit_transform(&mut self, X: &Array2<f64>) -> Result<Array2<f64>> {
self.fit(X)?;
self.transform(X)
}
}
impl Default for RobustScaler {
fn default() -> Self {
Self::new()
}
}
pub struct LabelEncoder {
classes: Vec<String>,
fitted: bool,
}
impl LabelEncoder {
pub fn new() -> Self {
Self {
classes: Vec::new(),
fitted: false,
}
}
pub fn fit(&mut self, y: &[String]) {
let mut classes: Vec<String> = y.iter().cloned().collect();
classes.sort();
classes.dedup();
self.classes = classes;
self.fitted = true;
}
pub fn transform(&self, y: &[String]) -> Result<Array1<i32>> {
if !self.fitted {
return Err(MLError::ModelNotTrained("Encoder not fitted".to_string()));
}
let encoded: Vec<i32> = y
.iter()
.map(|label| {
self.classes
.iter()
.position(|c| c == label)
.map(|p| p as i32)
.unwrap_or(-1)
})
.collect();
Ok(Array1::from_vec(encoded))
}
pub fn inverse_transform(&self, y: &Array1<i32>) -> Result<Vec<String>> {
if !self.fitted {
return Err(MLError::ModelNotTrained("Encoder not fitted".to_string()));
}
let decoded: Vec<String> = y
.iter()
.map(|&idx| {
if idx >= 0 && (idx as usize) < self.classes.len() {
self.classes[idx as usize].clone()
} else {
"unknown".to_string()
}
})
.collect();
Ok(decoded)
}
pub fn fit_transform(&mut self, y: &[String]) -> Result<Array1<i32>> {
self.fit(y);
self.transform(y)
}
pub fn classes(&self) -> &[String] {
&self.classes
}
pub fn is_fitted(&self) -> bool {
self.fitted
}
}
impl Default for LabelEncoder {
fn default() -> Self {
Self::new()
}
}
pub struct OneHotEncoder {
categories: Vec<Vec<String>>,
fitted: bool,
sparse: bool,
}
impl OneHotEncoder {
pub fn new() -> Self {
Self {
categories: Vec::new(),
fitted: false,
sparse: false,
}
}
pub fn sparse(mut self, sparse: bool) -> Self {
self.sparse = sparse;
self
}
#[allow(non_snake_case)]
pub fn fit(&mut self, X: &Array2<String>) {
self.categories = Vec::new();
for j in 0..X.ncols() {
let mut cats: Vec<String> = X.column(j).iter().cloned().collect();
cats.sort();
cats.dedup();
self.categories.push(cats);
}
self.fitted = true;
}
#[allow(non_snake_case)]
pub fn transform(&self, X: &Array2<String>) -> Result<Array2<f64>> {
if !self.fitted {
return Err(MLError::ModelNotTrained("Encoder not fitted".to_string()));
}
let total_cols: usize = self.categories.iter().map(|c| c.len()).sum();
let mut result = Array2::zeros((X.nrows(), total_cols));
let mut col_offset = 0;
for j in 0..X.ncols() {
let cats = &self.categories[j];
for i in 0..X.nrows() {
if let Some(idx) = cats.iter().position(|c| c == &X[[i, j]]) {
result[[i, col_offset + idx]] = 1.0;
}
}
col_offset += cats.len();
}
Ok(result)
}
#[allow(non_snake_case)]
pub fn fit_transform(&mut self, X: &Array2<String>) -> Result<Array2<f64>> {
self.fit(X);
self.transform(X)
}
}
impl Default for OneHotEncoder {
fn default() -> Self {
Self::new()
}
}
pub struct QuantumFeatureEncoder {
encoding_type: String,
normalization: String,
fitted: bool,
}
impl QuantumFeatureEncoder {
pub fn new(encoding_type: &str, normalization: &str) -> Self {
Self {
encoding_type: encoding_type.to_string(),
normalization: normalization.to_string(),
fitted: false,
}
}
}
impl SklearnEstimator for QuantumFeatureEncoder {
#[allow(non_snake_case)]
fn fit(&mut self, _X: &Array2<f64>, _y: Option<&Array1<f64>>) -> Result<()> {
self.fitted = true;
Ok(())
}
fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("encoding_type".to_string(), self.encoding_type.clone());
params.insert("normalization".to_string(), self.normalization.clone());
params
}
fn set_params(&mut self, params: HashMap<String, String>) -> Result<()> {
for (key, value) in params {
match key.as_str() {
"encoding_type" => {
self.encoding_type = value;
}
"normalization" => {
self.normalization = value;
}
_ => {}
}
}
Ok(())
}
fn is_fitted(&self) -> bool {
self.fitted
}
}