use ferrolearn_core::error::FerroError;
use ferrolearn_core::introspection::HasCoefficients;
use ferrolearn_core::pipeline::{FittedPipelineEstimator, PipelineEstimator};
use ferrolearn_core::traits::{Fit, PartialFit, Predict};
use ndarray::{Array1, Array2, ScalarOperand};
use num_traits::{Float, FromPrimitive, ToPrimitive};
use rand::SeedableRng;
use rand::seq::SliceRandom;
pub trait Loss<F: Float>: Clone + Send + Sync {
fn loss(&self, y_true: F, y_pred: F) -> F;
fn gradient(&self, y_true: F, y_pred: F) -> F;
}
#[derive(Debug, Clone, Copy)]
pub struct Hinge;
impl<F: Float> Loss<F> for Hinge {
fn loss(&self, y_true: F, y_pred: F) -> F {
let margin = y_true * y_pred;
if margin < F::one() {
F::one() - margin
} else {
F::zero()
}
}
fn gradient(&self, y_true: F, y_pred: F) -> F {
let margin = y_true * y_pred;
if margin < F::one() {
-y_true
} else {
F::zero()
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct LogLoss;
impl<F: Float> Loss<F> for LogLoss {
fn loss(&self, y_true: F, y_pred: F) -> F {
let z = y_true * y_pred;
if z > F::from(18.0).unwrap() {
(-z).exp()
} else if z < F::from(-18.0).unwrap() {
-z
} else {
(F::one() + (-z).exp()).ln()
}
}
fn gradient(&self, y_true: F, y_pred: F) -> F {
let z = y_true * y_pred;
let exp_nz = if z > F::from(18.0).unwrap() {
(-z).exp()
} else if z < F::from(-18.0).unwrap() {
F::from(1e18).unwrap()
} else {
(-z).exp()
};
-y_true * exp_nz / (F::one() + exp_nz)
}
}
#[derive(Debug, Clone, Copy)]
pub struct SquaredError;
impl<F: Float> Loss<F> for SquaredError {
fn loss(&self, y_true: F, y_pred: F) -> F {
let diff = y_true - y_pred;
F::from(0.5).unwrap() * diff * diff
}
fn gradient(&self, y_true: F, y_pred: F) -> F {
y_pred - y_true
}
}
#[derive(Debug, Clone, Copy)]
pub struct ModifiedHuber;
impl<F: Float> Loss<F> for ModifiedHuber {
fn loss(&self, y_true: F, y_pred: F) -> F {
let z = y_true * y_pred;
if z >= -F::one() {
let margin = F::one() - z;
if margin > F::zero() {
margin * margin
} else {
F::zero()
}
} else {
-F::from(4.0).unwrap() * z
}
}
fn gradient(&self, y_true: F, y_pred: F) -> F {
let z = y_true * y_pred;
if z >= -F::one() {
if z < F::one() {
F::from(-2.0).unwrap() * y_true * (F::one() - z)
} else {
F::zero()
}
} else {
-F::from(4.0).unwrap() * y_true
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct Huber<F> {
pub epsilon: F,
}
impl<F: Float + Send + Sync> Loss<F> for Huber<F> {
fn loss(&self, y_true: F, y_pred: F) -> F {
let diff = y_true - y_pred;
let abs_diff = diff.abs();
if abs_diff <= self.epsilon {
F::from(0.5).unwrap() * diff * diff
} else {
self.epsilon * (abs_diff - F::from(0.5).unwrap() * self.epsilon)
}
}
fn gradient(&self, y_true: F, y_pred: F) -> F {
let diff = y_pred - y_true;
let abs_diff = diff.abs();
if abs_diff <= self.epsilon {
diff
} else if diff > F::zero() {
self.epsilon
} else {
-self.epsilon
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct EpsilonInsensitive<F> {
pub epsilon: F,
}
impl<F: Float + Send + Sync> Loss<F> for EpsilonInsensitive<F> {
fn loss(&self, y_true: F, y_pred: F) -> F {
let diff = (y_true - y_pred).abs();
if diff > self.epsilon {
diff - self.epsilon
} else {
F::zero()
}
}
fn gradient(&self, y_true: F, y_pred: F) -> F {
let diff = y_pred - y_true;
if diff > self.epsilon {
F::one()
} else if diff < -self.epsilon {
-F::one()
} else {
F::zero()
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum LearningRateSchedule<F> {
Constant,
Optimal,
InvScaling,
Adaptive,
#[doc(hidden)]
_Phantom(std::marker::PhantomData<F>),
}
fn compute_lr<F: Float>(
schedule: &LearningRateSchedule<F>,
eta0: F,
alpha: F,
power_t: F,
t: usize,
) -> F {
let t_f = F::from(t.max(1)).unwrap();
match schedule {
LearningRateSchedule::Constant => eta0,
LearningRateSchedule::Optimal => F::one() / (alpha * t_f),
LearningRateSchedule::InvScaling => eta0 / t_f.powf(power_t),
LearningRateSchedule::Adaptive => eta0,
LearningRateSchedule::_Phantom(_) => unreachable!(),
}
}
#[derive(Debug, Clone, Copy)]
pub enum ClassifierLoss {
Hinge,
Log,
SquaredError,
ModifiedHuber,
}
#[derive(Debug, Clone, Copy)]
pub enum RegressorLoss<F> {
SquaredError,
Huber(F),
EpsilonInsensitive(F),
}
#[derive(Debug, Clone)]
pub struct SGDClassifier<F> {
pub loss: ClassifierLoss,
pub learning_rate: LearningRateSchedule<F>,
pub eta0: F,
pub alpha: F,
pub max_iter: usize,
pub tol: F,
pub random_state: Option<u64>,
pub power_t: F,
}
impl<F: Float> SGDClassifier<F> {
#[must_use]
pub fn new() -> Self {
Self {
loss: ClassifierLoss::Hinge,
learning_rate: LearningRateSchedule::InvScaling,
eta0: F::from(0.01).unwrap(),
alpha: F::from(0.0001).unwrap(),
max_iter: 1000,
tol: F::from(1e-3).unwrap(),
random_state: None,
power_t: F::from(0.25).unwrap(),
}
}
#[must_use]
pub fn with_loss(mut self, loss: ClassifierLoss) -> Self {
self.loss = loss;
self
}
#[must_use]
pub fn with_learning_rate(mut self, lr: LearningRateSchedule<F>) -> Self {
self.learning_rate = lr;
self
}
#[must_use]
pub fn with_eta0(mut self, eta0: F) -> Self {
self.eta0 = eta0;
self
}
#[must_use]
pub fn with_alpha(mut self, alpha: F) -> Self {
self.alpha = alpha;
self
}
#[must_use]
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
#[must_use]
pub fn with_tol(mut self, tol: F) -> Self {
self.tol = tol;
self
}
#[must_use]
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
#[must_use]
pub fn with_power_t(mut self, power_t: F) -> Self {
self.power_t = power_t;
self
}
}
impl<F: Float> Default for SGDClassifier<F> {
fn default() -> Self {
Self::new()
}
}
fn clf_hyper<F: Float>(clf: &SGDClassifier<F>) -> SGDHyper<F> {
SGDHyper {
learning_rate: clf.learning_rate,
eta0: clf.eta0,
alpha: clf.alpha,
max_iter: clf.max_iter,
tol: clf.tol,
random_state: clf.random_state,
power_t: clf.power_t,
}
}
#[derive(Debug, Clone)]
struct SGDHyper<F> {
learning_rate: LearningRateSchedule<F>,
eta0: F,
alpha: F,
max_iter: usize,
tol: F,
random_state: Option<u64>,
power_t: F,
}
fn train_binary_sgd<F, L>(
x: &Array2<F>,
y_binary: &Array1<F>,
weights: &mut Array1<F>,
intercept: &mut F,
loss_fn: &L,
hyper: &SGDHyper<F>,
initial_t: usize,
) -> (F, usize)
where
F: Float + ScalarOperand + Send + Sync + 'static,
L: Loss<F>,
{
let n_samples = x.nrows();
let n_features = x.ncols();
let mut t = initial_t;
let mut prev_loss = F::infinity();
let mut current_eta = hyper.eta0;
let mut no_improve_count: usize = 0;
let mut indices: Vec<usize> = (0..n_samples).collect();
let mut rng = match hyper.random_state {
Some(seed) => rand::rngs::StdRng::seed_from_u64(seed),
None => rand::rngs::StdRng::from_os_rng(),
};
let mut total_loss = F::zero();
for _epoch in 0..hyper.max_iter {
indices.shuffle(&mut rng);
let mut epoch_loss = F::zero();
for &i in &indices {
t += 1;
let eta = match hyper.learning_rate {
LearningRateSchedule::Adaptive => current_eta,
_ => compute_lr(
&hyper.learning_rate,
hyper.eta0,
hyper.alpha,
hyper.power_t,
t,
),
};
let mut y_pred = *intercept;
let xi = x.row(i);
for j in 0..n_features {
y_pred = y_pred + weights[j] * xi[j];
}
let grad = loss_fn.gradient(y_binary[i], y_pred);
epoch_loss = epoch_loss + loss_fn.loss(y_binary[i], y_pred);
for j in 0..n_features {
weights[j] = weights[j] - eta * (grad * xi[j] + hyper.alpha * weights[j]);
}
*intercept = *intercept - eta * grad;
}
epoch_loss = epoch_loss / F::from(n_samples).unwrap();
total_loss = epoch_loss;
if (prev_loss - epoch_loss).abs() < hyper.tol {
break;
}
if let LearningRateSchedule::Adaptive = hyper.learning_rate {
if epoch_loss >= prev_loss {
no_improve_count += 1;
if no_improve_count >= 5 {
current_eta = current_eta / F::from(2.0).unwrap();
no_improve_count = 0;
if current_eta < F::from(1e-6).unwrap() {
break;
}
}
} else {
no_improve_count = 0;
}
}
prev_loss = epoch_loss;
}
(total_loss, t)
}
#[derive(Debug, Clone)]
pub struct FittedSGDClassifier<F> {
weight_matrix: Vec<Array1<F>>,
intercepts: Vec<F>,
classes: Vec<usize>,
n_features: usize,
loss: ClassifierLoss,
hyper: SGDHyper<F>,
t: usize,
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> Fit<Array2<F>, Array1<usize>>
for SGDClassifier<F>
{
type Fitted = FittedSGDClassifier<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, y: &Array1<usize>) -> Result<FittedSGDClassifier<F>, FerroError> {
validate_clf_params(x, y, self.eta0, self.alpha)?;
let n_features = x.ncols();
let mut classes: Vec<usize> = y.to_vec();
classes.sort_unstable();
classes.dedup();
if classes.len() < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: classes.len(),
context: "SGDClassifier requires at least 2 distinct classes".into(),
});
}
let hyper = clf_hyper(self);
let loss_enum = self.loss;
let (weight_matrix, intercepts, t) =
fit_ova(x, y, &classes, n_features, &loss_enum, &hyper, 0)?;
Ok(FittedSGDClassifier {
weight_matrix,
intercepts,
classes,
n_features,
loss: loss_enum,
hyper,
t,
})
}
}
fn validate_clf_params<F: Float>(
x: &Array2<F>,
y: &Array1<usize>,
eta0: F,
alpha: F,
) -> Result<(), FerroError> {
let n_samples = x.nrows();
if n_samples != y.len() {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples],
actual: vec![y.len()],
context: "y length must match number of samples in X".into(),
});
}
if n_samples == 0 {
return Err(FerroError::InsufficientSamples {
required: 1,
actual: 0,
context: "SGDClassifier requires at least one sample".into(),
});
}
if eta0 <= F::zero() {
return Err(FerroError::InvalidParameter {
name: "eta0".into(),
reason: "must be positive".into(),
});
}
if alpha < F::zero() {
return Err(FerroError::InvalidParameter {
name: "alpha".into(),
reason: "must be non-negative".into(),
});
}
Ok(())
}
type OvaResult<F> = (Vec<Array1<F>>, Vec<F>, usize);
fn fit_ova<F: Float + Send + Sync + ScalarOperand + 'static>(
x: &Array2<F>,
y: &Array1<usize>,
classes: &[usize],
n_features: usize,
loss_enum: &ClassifierLoss,
hyper: &SGDHyper<F>,
initial_t: usize,
) -> Result<OvaResult<F>, FerroError> {
let n_classes = classes.len();
let mut weight_matrix: Vec<Array1<F>> = Vec::with_capacity(n_classes);
let mut intercepts: Vec<F> = Vec::with_capacity(n_classes);
let mut global_t = initial_t;
if n_classes == 2 {
let y_binary: Array1<F> = y.mapv(|label| {
if label == classes[1] {
F::one()
} else {
-F::one()
}
});
let mut w = Array1::<F>::zeros(n_features);
let mut b = F::zero();
let (_, t) =
dispatch_train_binary(x, &y_binary, &mut w, &mut b, loss_enum, hyper, global_t);
global_t = t;
weight_matrix.push(w);
intercepts.push(b);
} else {
for &cls in classes {
let y_binary: Array1<F> =
y.mapv(|label| if label == cls { F::one() } else { -F::one() });
let mut w = Array1::<F>::zeros(n_features);
let mut b = F::zero();
let (_, t) =
dispatch_train_binary(x, &y_binary, &mut w, &mut b, loss_enum, hyper, global_t);
global_t = t;
weight_matrix.push(w);
intercepts.push(b);
}
}
Ok((weight_matrix, intercepts, global_t))
}
#[allow(clippy::too_many_arguments)]
fn partial_fit_ova<F: Float + Send + Sync + ScalarOperand + 'static>(
x: &Array2<F>,
y: &Array1<usize>,
classes: &[usize],
weight_matrix: &mut [Array1<F>],
intercepts: &mut [F],
loss_enum: &ClassifierLoss,
hyper: &SGDHyper<F>,
initial_t: usize,
) -> usize {
let n_classes = classes.len();
let mut global_t = initial_t;
if n_classes == 2 {
let y_binary: Array1<F> = y.mapv(|label| {
if label == classes[1] {
F::one()
} else {
-F::one()
}
});
let (_, t) = dispatch_train_binary(
x,
&y_binary,
&mut weight_matrix[0],
&mut intercepts[0],
loss_enum,
hyper,
global_t,
);
global_t = t;
} else {
for (idx, &cls) in classes.iter().enumerate() {
let y_binary: Array1<F> =
y.mapv(|label| if label == cls { F::one() } else { -F::one() });
let (_, t) = dispatch_train_binary(
x,
&y_binary,
&mut weight_matrix[idx],
&mut intercepts[idx],
loss_enum,
hyper,
global_t,
);
global_t = t;
}
}
global_t
}
fn dispatch_train_binary<F: Float + Send + Sync + ScalarOperand + 'static>(
x: &Array2<F>,
y_binary: &Array1<F>,
w: &mut Array1<F>,
b: &mut F,
loss_enum: &ClassifierLoss,
hyper: &SGDHyper<F>,
initial_t: usize,
) -> (F, usize) {
match loss_enum {
ClassifierLoss::Hinge => train_binary_sgd(x, y_binary, w, b, &Hinge, hyper, initial_t),
ClassifierLoss::Log => train_binary_sgd(x, y_binary, w, b, &LogLoss, hyper, initial_t),
ClassifierLoss::SquaredError => {
train_binary_sgd(x, y_binary, w, b, &SquaredError, hyper, initial_t)
}
ClassifierLoss::ModifiedHuber => {
train_binary_sgd(x, y_binary, w, b, &ModifiedHuber, hyper, initial_t)
}
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> Predict<Array2<F>>
for FittedSGDClassifier<F>
{
type Output = Array1<usize>;
type Error = FerroError;
fn predict(&self, x: &Array2<F>) -> Result<Array1<usize>, FerroError> {
let n_features = x.ncols();
if n_features != self.n_features {
return Err(FerroError::ShapeMismatch {
expected: vec![self.n_features],
actual: vec![n_features],
context: "number of features must match fitted model".into(),
});
}
let n_samples = x.nrows();
let mut predictions = Array1::<usize>::zeros(n_samples);
if self.classes.len() == 2 {
let scores = x.dot(&self.weight_matrix[0]) + self.intercepts[0];
for i in 0..n_samples {
predictions[i] = if scores[i] >= F::zero() {
self.classes[1]
} else {
self.classes[0]
};
}
} else {
for i in 0..n_samples {
let xi = x.row(i);
let mut best_class = 0;
let mut best_score = F::neg_infinity();
for (c, w) in self.weight_matrix.iter().enumerate() {
let score = xi.dot(w) + self.intercepts[c];
if score > best_score {
best_score = score;
best_class = c;
}
}
predictions[i] = self.classes[best_class];
}
}
Ok(predictions)
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> PartialFit<Array2<F>, Array1<usize>>
for FittedSGDClassifier<F>
{
type FitResult = FittedSGDClassifier<F>;
type Error = FerroError;
fn partial_fit(
mut self,
x: &Array2<F>,
y: &Array1<usize>,
) -> Result<FittedSGDClassifier<F>, FerroError> {
let n_samples = x.nrows();
if n_samples != y.len() {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples],
actual: vec![y.len()],
context: "y length must match number of samples in X".into(),
});
}
if x.ncols() != self.n_features {
return Err(FerroError::ShapeMismatch {
expected: vec![self.n_features],
actual: vec![x.ncols()],
context: "number of features must match fitted model".into(),
});
}
let mut hyper = self.hyper.clone();
hyper.max_iter = 1;
let t = partial_fit_ova(
x,
y,
&self.classes,
&mut self.weight_matrix,
&mut self.intercepts,
&self.loss,
&hyper,
self.t,
);
self.t = t;
Ok(self)
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> PartialFit<Array2<F>, Array1<usize>>
for SGDClassifier<F>
{
type FitResult = FittedSGDClassifier<F>;
type Error = FerroError;
fn partial_fit(
self,
x: &Array2<F>,
y: &Array1<usize>,
) -> Result<FittedSGDClassifier<F>, FerroError> {
validate_clf_params(x, y, self.eta0, self.alpha)?;
let n_features = x.ncols();
let mut classes: Vec<usize> = y.to_vec();
classes.sort_unstable();
classes.dedup();
if classes.len() < 2 {
return Err(FerroError::InsufficientSamples {
required: 2,
actual: classes.len(),
context: "SGDClassifier requires at least 2 distinct classes".into(),
});
}
let mut hyper = clf_hyper(&self);
hyper.max_iter = 1;
let loss_enum = self.loss;
let (weight_matrix, intercepts, t) =
fit_ova(x, y, &classes, n_features, &loss_enum, &hyper, 0)?;
Ok(FittedSGDClassifier {
weight_matrix,
intercepts,
classes,
n_features,
loss: loss_enum,
hyper: clf_hyper(&self),
t,
})
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> HasCoefficients<F>
for FittedSGDClassifier<F>
{
fn coefficients(&self) -> &Array1<F> {
&self.weight_matrix[0]
}
fn intercept(&self) -> F {
self.intercepts[0]
}
}
impl<F> PipelineEstimator<F> for SGDClassifier<F>
where
F: Float + ToPrimitive + FromPrimitive + ScalarOperand + Send + Sync + 'static,
{
fn fit_pipeline(
&self,
x: &Array2<F>,
y: &Array1<F>,
) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
let y_usize: Array1<usize> = y.mapv(|v| v.to_usize().unwrap_or(0));
let fitted = self.fit(x, &y_usize)?;
Ok(Box::new(FittedSGDClassifierPipeline(fitted)))
}
}
struct FittedSGDClassifierPipeline<F>(FittedSGDClassifier<F>)
where
F: Float + Send + Sync + 'static;
unsafe impl<F> Send for FittedSGDClassifierPipeline<F> where F: Float + Send + Sync + 'static {}
unsafe impl<F> Sync for FittedSGDClassifierPipeline<F> where F: Float + Send + Sync + 'static {}
impl<F> FittedPipelineEstimator<F> for FittedSGDClassifierPipeline<F>
where
F: Float + ToPrimitive + FromPrimitive + ScalarOperand + Send + Sync + 'static,
{
fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
let preds = self.0.predict(x)?;
Ok(preds.mapv(|v| F::from_usize(v).unwrap_or_else(F::nan)))
}
}
#[derive(Debug, Clone)]
pub struct SGDRegressor<F> {
pub loss: RegressorLoss<F>,
pub learning_rate: LearningRateSchedule<F>,
pub eta0: F,
pub alpha: F,
pub max_iter: usize,
pub tol: F,
pub random_state: Option<u64>,
pub power_t: F,
}
impl<F: Float> SGDRegressor<F> {
#[must_use]
pub fn new() -> Self {
Self {
loss: RegressorLoss::SquaredError,
learning_rate: LearningRateSchedule::InvScaling,
eta0: F::from(0.01).unwrap(),
alpha: F::from(0.0001).unwrap(),
max_iter: 1000,
tol: F::from(1e-3).unwrap(),
random_state: None,
power_t: F::from(0.25).unwrap(),
}
}
#[must_use]
pub fn with_loss(mut self, loss: RegressorLoss<F>) -> Self {
self.loss = loss;
self
}
#[must_use]
pub fn with_learning_rate(mut self, lr: LearningRateSchedule<F>) -> Self {
self.learning_rate = lr;
self
}
#[must_use]
pub fn with_eta0(mut self, eta0: F) -> Self {
self.eta0 = eta0;
self
}
#[must_use]
pub fn with_alpha(mut self, alpha: F) -> Self {
self.alpha = alpha;
self
}
#[must_use]
pub fn with_max_iter(mut self, max_iter: usize) -> Self {
self.max_iter = max_iter;
self
}
#[must_use]
pub fn with_tol(mut self, tol: F) -> Self {
self.tol = tol;
self
}
#[must_use]
pub fn with_random_state(mut self, seed: u64) -> Self {
self.random_state = Some(seed);
self
}
#[must_use]
pub fn with_power_t(mut self, power_t: F) -> Self {
self.power_t = power_t;
self
}
}
impl<F: Float> Default for SGDRegressor<F> {
fn default() -> Self {
Self::new()
}
}
fn reg_hyper<F: Float>(reg: &SGDRegressor<F>) -> SGDHyper<F> {
SGDHyper {
learning_rate: reg.learning_rate,
eta0: reg.eta0,
alpha: reg.alpha,
max_iter: reg.max_iter,
tol: reg.tol,
random_state: reg.random_state,
power_t: reg.power_t,
}
}
fn train_regressor_sgd<F, L>(
x: &Array2<F>,
y: &Array1<F>,
weights: &mut Array1<F>,
intercept: &mut F,
loss_fn: &L,
hyper: &SGDHyper<F>,
initial_t: usize,
) -> (F, usize)
where
F: Float + ScalarOperand + Send + Sync + 'static,
L: Loss<F>,
{
let n_samples = x.nrows();
let n_features = x.ncols();
let mut t = initial_t;
let mut prev_loss = F::infinity();
let mut current_eta = hyper.eta0;
let mut no_improve_count: usize = 0;
let mut indices: Vec<usize> = (0..n_samples).collect();
let mut rng = match hyper.random_state {
Some(seed) => rand::rngs::StdRng::seed_from_u64(seed),
None => rand::rngs::StdRng::from_os_rng(),
};
let mut total_loss = F::zero();
for _epoch in 0..hyper.max_iter {
indices.shuffle(&mut rng);
let mut epoch_loss = F::zero();
for &i in &indices {
t += 1;
let eta = match hyper.learning_rate {
LearningRateSchedule::Adaptive => current_eta,
_ => compute_lr(
&hyper.learning_rate,
hyper.eta0,
hyper.alpha,
hyper.power_t,
t,
),
};
let xi = x.row(i);
let mut y_pred = *intercept;
for j in 0..n_features {
y_pred = y_pred + weights[j] * xi[j];
}
let grad = loss_fn.gradient(y[i], y_pred);
epoch_loss = epoch_loss + loss_fn.loss(y[i], y_pred);
for j in 0..n_features {
weights[j] = weights[j] - eta * (grad * xi[j] + hyper.alpha * weights[j]);
}
*intercept = *intercept - eta * grad;
}
epoch_loss = epoch_loss / F::from(n_samples).unwrap();
total_loss = epoch_loss;
if (prev_loss - epoch_loss).abs() < hyper.tol {
break;
}
if let LearningRateSchedule::Adaptive = hyper.learning_rate {
if epoch_loss >= prev_loss {
no_improve_count += 1;
if no_improve_count >= 5 {
current_eta = current_eta / F::from(2.0).unwrap();
no_improve_count = 0;
if current_eta < F::from(1e-6).unwrap() {
break;
}
}
} else {
no_improve_count = 0;
}
}
prev_loss = epoch_loss;
}
(total_loss, t)
}
fn dispatch_train_regressor<F: Float + Send + Sync + ScalarOperand + 'static>(
x: &Array2<F>,
y: &Array1<F>,
w: &mut Array1<F>,
b: &mut F,
loss_enum: &RegressorLoss<F>,
hyper: &SGDHyper<F>,
initial_t: usize,
) -> (F, usize) {
match loss_enum {
RegressorLoss::SquaredError => {
train_regressor_sgd(x, y, w, b, &SquaredError, hyper, initial_t)
}
RegressorLoss::Huber(eps) => {
train_regressor_sgd(x, y, w, b, &Huber { epsilon: *eps }, hyper, initial_t)
}
RegressorLoss::EpsilonInsensitive(eps) => train_regressor_sgd(
x,
y,
w,
b,
&EpsilonInsensitive { epsilon: *eps },
hyper,
initial_t,
),
}
}
#[derive(Debug, Clone)]
pub struct FittedSGDRegressor<F> {
weights: Array1<F>,
intercept: F,
n_features: usize,
loss: RegressorLoss<F>,
hyper: SGDHyper<F>,
t: usize,
}
fn validate_reg_params<F: Float>(
x: &Array2<F>,
y: &Array1<F>,
eta0: F,
alpha: F,
) -> Result<(), FerroError> {
let n_samples = x.nrows();
if n_samples != y.len() {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples],
actual: vec![y.len()],
context: "y length must match number of samples in X".into(),
});
}
if n_samples == 0 {
return Err(FerroError::InsufficientSamples {
required: 1,
actual: 0,
context: "SGDRegressor requires at least one sample".into(),
});
}
if eta0 <= F::zero() {
return Err(FerroError::InvalidParameter {
name: "eta0".into(),
reason: "must be positive".into(),
});
}
if alpha < F::zero() {
return Err(FerroError::InvalidParameter {
name: "alpha".into(),
reason: "must be non-negative".into(),
});
}
Ok(())
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> Fit<Array2<F>, Array1<F>>
for SGDRegressor<F>
{
type Fitted = FittedSGDRegressor<F>;
type Error = FerroError;
fn fit(&self, x: &Array2<F>, y: &Array1<F>) -> Result<FittedSGDRegressor<F>, FerroError> {
validate_reg_params(x, y, self.eta0, self.alpha)?;
let n_features = x.ncols();
let hyper = reg_hyper(self);
let mut w = Array1::<F>::zeros(n_features);
let mut b = F::zero();
let (_, t) = dispatch_train_regressor(x, y, &mut w, &mut b, &self.loss, &hyper, 0);
Ok(FittedSGDRegressor {
weights: w,
intercept: b,
n_features,
loss: self.loss,
hyper,
t,
})
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> Predict<Array2<F>>
for FittedSGDRegressor<F>
{
type Output = Array1<F>;
type Error = FerroError;
fn predict(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
let n_features = x.ncols();
if n_features != self.n_features {
return Err(FerroError::ShapeMismatch {
expected: vec![self.n_features],
actual: vec![n_features],
context: "number of features must match fitted model".into(),
});
}
let preds = x.dot(&self.weights) + self.intercept;
Ok(preds)
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> PartialFit<Array2<F>, Array1<F>>
for FittedSGDRegressor<F>
{
type FitResult = FittedSGDRegressor<F>;
type Error = FerroError;
fn partial_fit(
mut self,
x: &Array2<F>,
y: &Array1<F>,
) -> Result<FittedSGDRegressor<F>, FerroError> {
let n_samples = x.nrows();
if n_samples != y.len() {
return Err(FerroError::ShapeMismatch {
expected: vec![n_samples],
actual: vec![y.len()],
context: "y length must match number of samples in X".into(),
});
}
if x.ncols() != self.n_features {
return Err(FerroError::ShapeMismatch {
expected: vec![self.n_features],
actual: vec![x.ncols()],
context: "number of features must match fitted model".into(),
});
}
let mut hyper = self.hyper.clone();
hyper.max_iter = 1;
let (_, t) = dispatch_train_regressor(
x,
y,
&mut self.weights,
&mut self.intercept,
&self.loss,
&hyper,
self.t,
);
self.t = t;
Ok(self)
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> PartialFit<Array2<F>, Array1<F>>
for SGDRegressor<F>
{
type FitResult = FittedSGDRegressor<F>;
type Error = FerroError;
fn partial_fit(
self,
x: &Array2<F>,
y: &Array1<F>,
) -> Result<FittedSGDRegressor<F>, FerroError> {
validate_reg_params(x, y, self.eta0, self.alpha)?;
let n_features = x.ncols();
let mut hyper = reg_hyper(&self);
hyper.max_iter = 1;
let mut w = Array1::<F>::zeros(n_features);
let mut b = F::zero();
let (_, t) = dispatch_train_regressor(x, y, &mut w, &mut b, &self.loss, &hyper, 0);
Ok(FittedSGDRegressor {
weights: w,
intercept: b,
n_features,
loss: self.loss,
hyper: reg_hyper(&self),
t,
})
}
}
impl<F: Float + Send + Sync + ScalarOperand + 'static> HasCoefficients<F>
for FittedSGDRegressor<F>
{
fn coefficients(&self) -> &Array1<F> {
&self.weights
}
fn intercept(&self) -> F {
self.intercept
}
}
impl<F> PipelineEstimator<F> for SGDRegressor<F>
where
F: Float + ScalarOperand + Send + Sync + 'static,
{
fn fit_pipeline(
&self,
x: &Array2<F>,
y: &Array1<F>,
) -> Result<Box<dyn FittedPipelineEstimator<F>>, FerroError> {
let fitted = self.fit(x, y)?;
Ok(Box::new(fitted))
}
}
impl<F> FittedPipelineEstimator<F> for FittedSGDRegressor<F>
where
F: Float + ScalarOperand + Send + Sync + 'static,
{
fn predict_pipeline(&self, x: &Array2<F>) -> Result<Array1<F>, FerroError> {
self.predict(x)
}
}
#[cfg(test)]
mod tests {
use super::*;
use ndarray::array;
#[test]
fn test_hinge_loss_correct_side() {
let h = Hinge;
assert!((Loss::<f64>::loss(&h, 1.0, 2.0) - 0.0).abs() < 1e-10);
assert!((Loss::<f64>::gradient(&h, 1.0, 2.0) - 0.0).abs() < 1e-10);
}
#[test]
fn test_hinge_loss_wrong_side() {
let h = Hinge;
assert!((Loss::<f64>::loss(&h, 1.0, -0.5) - 1.5).abs() < 1e-10);
assert!((Loss::<f64>::gradient(&h, 1.0, -0.5) - (-1.0)).abs() < 1e-10);
}
#[test]
fn test_log_loss_zero_pred() {
let l = LogLoss;
let loss = Loss::<f64>::loss(&l, 1.0, 0.0);
assert!((loss - 2.0_f64.ln()).abs() < 1e-10);
}
#[test]
fn test_log_loss_large_correct() {
let l = LogLoss;
let loss = Loss::<f64>::loss(&l, 1.0, 20.0);
assert!(loss < 1e-5);
}
#[test]
fn test_squared_error_loss() {
let s = SquaredError;
assert!((Loss::<f64>::loss(&s, 3.0, 1.0) - 2.0).abs() < 1e-10);
assert!((Loss::<f64>::gradient(&s, 3.0, 1.0) - (-2.0)).abs() < 1e-10);
}
#[test]
fn test_modified_huber_loss() {
let mh = ModifiedHuber;
assert!((Loss::<f64>::loss(&mh, 1.0, 2.0)).abs() < 1e-10);
assert!((Loss::<f64>::loss(&mh, 1.0, 0.5) - 0.25).abs() < 1e-10);
assert!((Loss::<f64>::loss(&mh, 1.0, -2.0) - 8.0).abs() < 1e-10);
}
#[test]
fn test_huber_loss_quadratic_region() {
let h = Huber { epsilon: 1.0_f64 };
assert!((Loss::<f64>::loss(&h, 1.0, 0.5) - 0.125).abs() < 1e-10);
}
#[test]
fn test_huber_loss_linear_region() {
let h = Huber { epsilon: 1.0_f64 };
assert!((Loss::<f64>::loss(&h, 3.0, 0.0) - 2.5).abs() < 1e-10);
}
#[test]
fn test_epsilon_insensitive_inside() {
let ei = EpsilonInsensitive { epsilon: 0.1_f64 };
assert!((Loss::<f64>::loss(&ei, 1.0, 0.95)).abs() < 1e-10);
}
#[test]
fn test_epsilon_insensitive_outside() {
let ei = EpsilonInsensitive { epsilon: 0.1_f64 };
assert!((Loss::<f64>::loss(&ei, 1.0, 0.5) - 0.4).abs() < 1e-10);
}
#[test]
fn test_constant_lr() {
let lr: LearningRateSchedule<f64> = LearningRateSchedule::Constant;
assert!((compute_lr(&lr, 0.1, 0.01, 0.25, 100) - 0.1).abs() < 1e-10);
}
#[test]
fn test_optimal_lr() {
let lr: LearningRateSchedule<f64> = LearningRateSchedule::Optimal;
assert!((compute_lr(&lr, 0.1, 0.01, 0.25, 10) - 10.0).abs() < 1e-10);
}
#[test]
fn test_invscaling_lr() {
let lr: LearningRateSchedule<f64> = LearningRateSchedule::InvScaling;
let result = compute_lr(&lr, 0.1, 0.01, 0.5, 10);
let expected = 0.1 / 10.0_f64.sqrt();
assert!((result - expected).abs() < 1e-10);
}
#[test]
fn test_adaptive_lr_returns_eta0() {
let lr: LearningRateSchedule<f64> = LearningRateSchedule::Adaptive;
assert!((compute_lr(&lr, 0.05, 0.01, 0.25, 100) - 0.05).abs() < 1e-10);
}
#[test]
fn test_sgd_classifier_binary() {
let x = Array2::from_shape_vec(
(8, 2),
vec![
-2.0, -2.0, -1.5, -2.0, -2.0, -1.5, -1.5, -1.5, 2.0, 2.0, 1.5, 2.0, 2.0, 1.5, 1.5,
1.5,
],
)
.unwrap();
let y = array![0, 0, 0, 0, 1, 1, 1, 1];
let clf = SGDClassifier::<f64>::new()
.with_loss(ClassifierLoss::Log)
.with_random_state(42)
.with_max_iter(1000)
.with_eta0(0.01);
let fitted = clf.fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
assert!(correct >= 6, "expected >= 6 correct, got {correct}");
}
#[test]
fn test_sgd_classifier_log_loss() {
let x = Array2::from_shape_vec((6, 1), vec![-3.0, -2.0, -1.0, 1.0, 2.0, 3.0]).unwrap();
let y = array![0, 0, 0, 1, 1, 1];
let clf = SGDClassifier::<f64>::new()
.with_loss(ClassifierLoss::Log)
.with_random_state(42)
.with_max_iter(500);
let fitted = clf.fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
assert!(correct >= 4, "expected >= 4 correct, got {correct}");
}
#[test]
fn test_sgd_classifier_multiclass() {
let x = Array2::from_shape_vec(
(9, 2),
vec![
0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 5.0, 0.0, 5.5, 0.0, 5.0, 0.5, 0.0, 5.0, 0.5, 5.0,
0.0, 5.5,
],
)
.unwrap();
let y = array![0, 0, 0, 1, 1, 1, 2, 2, 2];
let clf = SGDClassifier::<f64>::new()
.with_random_state(42)
.with_max_iter(1000)
.with_eta0(0.01);
let fitted = clf.fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
let correct: usize = preds.iter().zip(y.iter()).filter(|(p, a)| p == a).count();
assert!(
correct >= 6,
"expected >= 6 correct for multiclass, got {correct}"
);
}
#[test]
fn test_sgd_classifier_shape_mismatch_fit() {
let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
let y = array![0, 1]; let clf = SGDClassifier::<f64>::new();
assert!(clf.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_classifier_shape_mismatch_predict() {
let x =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 8.0, 8.0, 9.0, 9.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_random_state(42);
let fitted = clf.fit(&x, &y).unwrap();
let x_bad = Array2::from_shape_vec((2, 3), vec![1.0; 6]).unwrap();
assert!(fitted.predict(&x_bad).is_err());
}
#[test]
fn test_sgd_classifier_single_class_error() {
let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
let y = array![0, 0, 0];
let clf = SGDClassifier::<f64>::new();
assert!(clf.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_classifier_invalid_eta0() {
let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_eta0(0.0);
assert!(clf.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_classifier_invalid_alpha() {
let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_alpha(-1.0);
assert!(clf.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_classifier_has_coefficients() {
let x =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 8.0, 8.0, 9.0, 9.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_random_state(42);
let fitted = clf.fit(&x, &y).unwrap();
assert_eq!(fitted.coefficients().len(), 2);
}
#[test]
fn test_sgd_classifier_partial_fit() {
let x =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 8.0, 8.0, 9.0, 9.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_random_state(42);
let fitted = clf.partial_fit(&x, &y).unwrap();
let fitted = fitted.partial_fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
assert_eq!(preds.len(), 4);
}
#[test]
fn test_sgd_classifier_partial_fit_chain() {
let x1 =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 8.0, 8.0, 9.0, 9.0]).unwrap();
let y1 = array![0, 0, 1, 1];
let x2 =
Array2::from_shape_vec((4, 2), vec![0.5, 0.5, 1.5, 1.5, 7.5, 7.5, 8.5, 8.5]).unwrap();
let y2 = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_random_state(42);
let preds = clf
.partial_fit(&x1, &y1)
.unwrap()
.partial_fit(&x2, &y2)
.unwrap()
.predict(&x1)
.unwrap();
assert_eq!(preds.len(), 4);
}
#[test]
fn test_sgd_classifier_partial_fit_shape_mismatch() {
let x =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 8.0, 8.0, 9.0, 9.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new().with_random_state(42);
let fitted = clf.partial_fit(&x, &y).unwrap();
let x_bad = Array2::from_shape_vec((2, 3), vec![1.0; 6]).unwrap();
let y_bad = array![0, 1];
assert!(fitted.partial_fit(&x_bad, &y_bad).is_err());
}
#[test]
fn test_sgd_classifier_modified_huber() {
let x = Array2::from_shape_vec((6, 1), vec![-3.0, -2.0, -1.0, 1.0, 2.0, 3.0]).unwrap();
let y = array![0, 0, 0, 1, 1, 1];
let clf = SGDClassifier::<f64>::new()
.with_loss(ClassifierLoss::ModifiedHuber)
.with_random_state(42)
.with_max_iter(500);
let fitted = clf.fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
assert_eq!(preds.len(), 6);
}
#[test]
fn test_sgd_classifier_squared_error_loss() {
let x = Array2::from_shape_vec((6, 1), vec![-3.0, -2.0, -1.0, 1.0, 2.0, 3.0]).unwrap();
let y = array![0, 0, 0, 1, 1, 1];
let clf = SGDClassifier::<f64>::new()
.with_loss(ClassifierLoss::SquaredError)
.with_random_state(42)
.with_max_iter(500);
let fitted = clf.fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
assert_eq!(preds.len(), 6);
}
#[test]
fn test_sgd_classifier_pipeline() {
let x = Array2::from_shape_vec((6, 1), vec![-3.0, -2.0, -1.0, 1.0, 2.0, 3.0]).unwrap();
let y = Array1::from_vec(vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0]);
let clf = SGDClassifier::<f64>::new().with_random_state(42);
let fitted = clf.fit_pipeline(&x, &y).unwrap();
let preds = fitted.predict_pipeline(&x).unwrap();
assert_eq!(preds.len(), 6);
}
#[test]
fn test_sgd_classifier_constant_lr() {
let x = Array2::from_shape_vec((4, 1), vec![-2.0, -1.0, 1.0, 2.0]).unwrap();
let y = array![0, 0, 1, 1];
let clf = SGDClassifier::<f64>::new()
.with_learning_rate(LearningRateSchedule::Constant)
.with_random_state(42)
.with_max_iter(200);
let fitted = clf.fit(&x, &y).unwrap();
assert_eq!(fitted.predict(&x).unwrap().len(), 4);
}
#[test]
fn test_sgd_classifier_f32() {
let x = Array2::from_shape_vec((4, 1), vec![-2.0f32, -1.0, 1.0, 2.0]).unwrap();
let y = array![0_usize, 0, 1, 1];
let clf = SGDClassifier::<f32>::new()
.with_random_state(42)
.with_max_iter(200);
let fitted = clf.fit(&x, &y).unwrap();
assert_eq!(fitted.predict(&x).unwrap().len(), 4);
}
#[test]
fn test_sgd_regressor_basic() {
let x = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
let y = array![3.0, 5.0, 7.0, 9.0, 11.0];
let model = SGDRegressor::<f64>::new()
.with_random_state(42)
.with_max_iter(2000)
.with_eta0(0.01)
.with_alpha(0.0);
let fitted = model.fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
for (p, &actual) in preds.iter().zip(y.iter()) {
assert!(
(*p - actual).abs() < 2.0,
"prediction {p} too far from {actual}"
);
}
}
#[test]
fn test_sgd_regressor_shape_mismatch() {
let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
let y = array![1.0, 2.0]; let model = SGDRegressor::<f64>::new();
assert!(model.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_regressor_predict_shape_mismatch() {
let x =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0]).unwrap();
let y = array![1.0, 2.0, 3.0, 4.0];
let model = SGDRegressor::<f64>::new().with_random_state(42);
let fitted = model.fit(&x, &y).unwrap();
let x_bad = Array2::from_shape_vec((2, 3), vec![1.0; 6]).unwrap();
assert!(fitted.predict(&x_bad).is_err());
}
#[test]
fn test_sgd_regressor_invalid_eta0() {
let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
let y = array![1.0, 2.0, 3.0];
let model = SGDRegressor::<f64>::new().with_eta0(-0.1);
assert!(model.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_regressor_has_coefficients() {
let x =
Array2::from_shape_vec((4, 2), vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0]).unwrap();
let y = array![1.0, 2.0, 3.0, 4.0];
let model = SGDRegressor::<f64>::new().with_random_state(42);
let fitted = model.fit(&x, &y).unwrap();
assert_eq!(fitted.coefficients().len(), 2);
}
#[test]
fn test_sgd_regressor_partial_fit() {
let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let y = array![2.0, 4.0, 6.0, 8.0];
let model = SGDRegressor::<f64>::new().with_random_state(42);
let fitted = model.partial_fit(&x, &y).unwrap();
let fitted = fitted.partial_fit(&x, &y).unwrap();
let preds = fitted.predict(&x).unwrap();
assert_eq!(preds.len(), 4);
}
#[test]
fn test_sgd_regressor_partial_fit_chain() {
let x1 = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap();
let y1 = array![2.0, 4.0, 6.0];
let x2 = Array2::from_shape_vec((3, 1), vec![4.0, 5.0, 6.0]).unwrap();
let y2 = array![8.0, 10.0, 12.0];
let model = SGDRegressor::<f64>::new().with_random_state(42);
let preds = model
.partial_fit(&x1, &y1)
.unwrap()
.partial_fit(&x2, &y2)
.unwrap()
.predict(&x1)
.unwrap();
assert_eq!(preds.len(), 3);
}
#[test]
fn test_sgd_regressor_partial_fit_shape_mismatch() {
let x = Array2::from_shape_vec((3, 2), vec![1.0, 1.0, 2.0, 2.0, 3.0, 3.0]).unwrap();
let y = array![1.0, 2.0, 3.0];
let model = SGDRegressor::<f64>::new().with_random_state(42);
let fitted = model.partial_fit(&x, &y).unwrap();
let x_bad = Array2::from_shape_vec((2, 3), vec![1.0; 6]).unwrap();
let y_bad = array![1.0, 2.0];
assert!(fitted.partial_fit(&x_bad, &y_bad).is_err());
}
#[test]
fn test_sgd_regressor_huber_loss() {
let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let y = array![2.0, 4.0, 6.0, 8.0];
let model = SGDRegressor::<f64>::new()
.with_loss(RegressorLoss::Huber(1.35))
.with_random_state(42)
.with_max_iter(500);
let fitted = model.fit(&x, &y).unwrap();
assert_eq!(fitted.predict(&x).unwrap().len(), 4);
}
#[test]
fn test_sgd_regressor_epsilon_insensitive() {
let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let y = array![2.0, 4.0, 6.0, 8.0];
let model = SGDRegressor::<f64>::new()
.with_loss(RegressorLoss::EpsilonInsensitive(0.1))
.with_random_state(42)
.with_max_iter(500);
let fitted = model.fit(&x, &y).unwrap();
assert_eq!(fitted.predict(&x).unwrap().len(), 4);
}
#[test]
fn test_sgd_regressor_pipeline() {
let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
let y = Array1::from_vec(vec![2.0, 4.0, 6.0, 8.0]);
let model = SGDRegressor::<f64>::new().with_random_state(42);
let fitted = model.fit_pipeline(&x, &y).unwrap();
let preds = fitted.predict_pipeline(&x).unwrap();
assert_eq!(preds.len(), 4);
}
#[test]
fn test_sgd_regressor_f32() {
let x = Array2::from_shape_vec((4, 1), vec![1.0f32, 2.0, 3.0, 4.0]).unwrap();
let y = Array1::from_vec(vec![2.0f32, 4.0, 6.0, 8.0]);
let model = SGDRegressor::<f32>::new().with_random_state(42);
let fitted = model.fit(&x, &y).unwrap();
assert_eq!(fitted.predict(&x).unwrap().len(), 4);
}
#[test]
fn test_sgd_regressor_empty_data() {
let x = Array2::<f64>::zeros((0, 2));
let y = Array1::<f64>::zeros(0);
let model = SGDRegressor::<f64>::new();
assert!(model.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_classifier_empty_data() {
let x = Array2::<f64>::zeros((0, 2));
let y = Array1::<usize>::zeros(0);
let clf = SGDClassifier::<f64>::new();
assert!(clf.fit(&x, &y).is_err());
}
#[test]
fn test_sgd_classifier_default() {
let clf = SGDClassifier::<f64>::default();
assert!(clf.eta0 > 0.0);
assert!(clf.alpha >= 0.0);
}
#[test]
fn test_sgd_regressor_default() {
let model = SGDRegressor::<f64>::default();
assert!(model.eta0 > 0.0);
assert!(model.alpha >= 0.0);
}
}