use crate::bench::classification_metrics::ClassificationMetrics;
use crate::builders::logistic_regression::LogisticRegressionBuilder;
use crate::core::activations::activation::Activation;
use crate::core::activations::activation_functions::ActivationFn;
use crate::core::activations::leaky_relu::LeakyReLU;
use crate::core::activations::relu::ReLU;
use crate::core::activations::sigmoid::Sigmoid;
use crate::core::activations::tanh::Tanh;
use crate::core::error::ModelError;
use crate::core::types::{Matrix, Vector};
use crate::model::core::base::{BaseModel, OptimizableModel};
use crate::model::core::classification_model::ClassificationModel;
use crate::model::core::param_collection::{GradientCollection, ParamCollection};
use ndarray::{ArrayView, ArrayViewMut, Dimension, IxDyn};
#[derive(Debug)]
pub struct LogisticRegression {
pub weights: Vector,
pub bias: Vector,
pub activation_fn: ActivationFn,
dw: Vector,
db: Vector,
threshold: f64,
}
impl LogisticRegression {
pub fn new(n_features: usize, activation_fn: ActivationFn, threshold: f64) -> Self {
let weights = Vector::zeros(n_features);
let bias = Vector::from_elem(1, 0.0);
if !(0.0..=1.0).contains(&threshold) {
panic!("Threshold must be between 0 and 1");
}
Self {
weights,
bias,
activation_fn,
threshold,
dw: Vector::zeros(n_features),
db: Vector::from_elem(1, 0.0),
}
}
pub fn builder() -> LogisticRegressionBuilder {
LogisticRegressionBuilder::new()
}
fn compute_activation(&self, z: &Vector) -> Result<Vector, ModelError> {
match self.activation_fn {
ActivationFn::Sigmoid => Ok(Sigmoid::activate(z)),
ActivationFn::ReLU => Ok(ReLU::activate(z)),
ActivationFn::Tanh => Ok(Tanh::activate(z)),
ActivationFn::LeakyReLU => Ok(LeakyReLU::activate(z)),
}
}
fn compute_z(&self, x: &Matrix) -> Result<Vector, ModelError> {
let z = self.weights.t().dot(x) + &self.bias;
Ok(z)
}
fn compute_derivative(&self, z: &Vector) -> Result<Vector, ModelError> {
match self.activation_fn {
ActivationFn::Sigmoid => Ok(Sigmoid::derivative(z)),
ActivationFn::ReLU => Ok(ReLU::derivative(z)),
ActivationFn::Tanh => Ok(Tanh::derivative(z)),
ActivationFn::LeakyReLU => Ok(LeakyReLU::derivative(z)),
}
}
}
impl ParamCollection for LogisticRegression {
fn get<D: Dimension>(&self, key: &str) -> Result<ArrayView<f64, D>, ModelError> {
match key {
"weights" => Ok(self.weights.view().into_dimensionality::<D>()?),
"bias" => Ok(self.bias.view().into_dimensionality::<D>()?),
_ => Err(ModelError::KeyError(key.to_string())),
}
}
fn get_mut<D: Dimension>(&mut self, key: &str) -> Result<ArrayViewMut<f64, D>, ModelError> {
match key {
"weights" => Ok(self.weights.view_mut().into_dimensionality::<D>()?),
"bias" => Ok(self.bias.view_mut().into_dimensionality::<D>()?),
_ => Err(ModelError::KeyError(key.to_string())),
}
}
fn set<D: Dimension>(&mut self, key: &str, value: ArrayView<f64, D>) -> Result<(), ModelError> {
match key {
"weights" => {
self.weights.assign(&value.to_shape(self.weights.shape())?);
Ok(())
}
"bias" => {
self.bias.assign(&value.to_shape(self.bias.shape())?);
Ok(())
}
_ => Err(ModelError::KeyError(key.to_string())),
}
}
fn param_iter(&self) -> Vec<(&str, ArrayView<f64, IxDyn>)> {
vec![
("weights", self.weights.view().into_dyn()),
("bias", self.bias.view().into_dyn()),
]
}
}
impl GradientCollection for LogisticRegression {
fn get_gradient<D: Dimension>(&self, key: &str) -> Result<ArrayView<f64, D>, ModelError> {
match key {
"weights" => Ok(self.dw.view().into_dimensionality::<D>()?),
"bias" => Ok(self.db.view().into_dimensionality::<D>()?),
_ => Err(ModelError::KeyError(key.to_string())),
}
}
fn set_gradient<D: Dimension>(
&mut self,
key: &str,
value: ArrayView<f64, D>,
) -> Result<(), ModelError> {
match key {
"weights" => {
self.dw.assign(&value.to_shape(self.weights.shape())?);
Ok(())
}
"bias" => {
self.db.assign(&value.to_shape(self.bias.shape())?);
Ok(())
}
_ => Err(ModelError::KeyError(key.to_string())),
}
}
}
impl OptimizableModel<Matrix, Vector> for LogisticRegression {
fn forward(&self, input: &Matrix) -> Result<Vector, ModelError> {
let z = self.compute_z(input)?;
let a = self.compute_activation(&z)?;
let epsilon = 1e-15;
let a_safe = a.mapv(|val| val.max(epsilon).min(1.0 - epsilon));
Ok(a_safe)
}
fn backward(&mut self, input: &Matrix, dz: &Vector) -> Result<(), ModelError> {
let m = input.shape()[1] as f64;
let dw = input.dot(dz) / m;
let db = dz.sum() / m;
self.set_gradient("weights", dw.view())?;
self.set_gradient("bias", ArrayView::from(&[db]))?;
Ok(())
}
fn compute_output_gradient(&self, x: &Matrix, y: &Vector) -> Result<Vector, ModelError> {
let z = self.compute_z(x)?;
let y_hat = self.compute_activation(&z)?;
let g_prime_of_z = self.compute_derivative(&z)?;
let dy = (1.0 - y) / (1.0 - &y_hat) - y / &y_hat;
let dz = dy * g_prime_of_z;
Ok(dz)
}
}
#[cfg(test)]
mod optimizable_model_tests {
use ndarray::{ArrayView1, arr1, arr2};
use crate::builders::builder::Builder;
use crate::core::activations::activation::Activation;
use crate::core::activations::activation_functions::ActivationFn;
use crate::core::activations::sigmoid::Sigmoid;
use crate::core::types::{Matrix, Scalar};
use crate::model::core::base::OptimizableModel;
use crate::model::core::param_collection::GradientCollection;
use crate::model::logistic_regression::LogisticRegression;
#[test]
fn test_logistic_regression_forward_sigmoid() {
let mut model = LogisticRegression::builder()
.n_features(3)
.activation_function(ActivationFn::Sigmoid)
.build()
.unwrap();
let weights = arr1(&[0.5, -0.2, 0.1]);
let bias = Scalar::from_elem((), 0.2);
model.weights.assign(&weights);
model.bias.assign(&bias);
let input = Matrix::zeros((3, 3));
let z = model.weights.t().dot(&input) + bias;
let a = Sigmoid::activate(&z);
let expected_output = a;
let output = model.forward(&input).unwrap();
assert_eq!(output.shape(), [3]);
assert_eq!(output, expected_output);
}
#[test]
fn test_compute_output_gradient() {
let mut model = LogisticRegression::builder()
.n_features(2)
.activation_function(ActivationFn::Sigmoid)
.build()
.unwrap();
let weights = arr1(&[0.5, -0.3]);
let bias = Scalar::from_elem((), 0.1);
model.weights.assign(&weights);
model.bias.assign(&bias);
let x = arr2(&[[0.2, 0.7], [0.3, 0.5]]);
let y = arr1(&[0.5, 1.0]);
let y_hat = model.forward(&x).unwrap();
let expected_dz = &y_hat - &y;
let dz = model.compute_output_gradient(&x, &y).unwrap();
assert_eq!(dz.shape(), expected_dz.shape());
for (a, b) in dz.iter().zip(expected_dz.iter()) {
assert!((a - b).abs() < 1e-5, "Expected {}, got {}", b, a);
}
}
#[test]
fn test_backward() {
let mut model = LogisticRegression::builder()
.n_features(2)
.activation_function(ActivationFn::Sigmoid)
.build()
.unwrap();
let weights = arr1(&[0.5, -0.3]);
let bias = Scalar::from_elem((), 0.1);
model.weights.assign(&weights);
model.bias.assign(&bias);
let x = arr2(&[[0.2, 0.7], [0.3, 0.5]]);
let dz = arr1(&[0.1, -0.2]);
println!("x.shape(): {:?}", x.shape());
println!("dz.shape(): {:?}", dz.shape());
model.backward(&x, &dz).unwrap();
let m = x.shape()[1] as f64;
let expected_dw = x.dot(&dz) / m;
let expected_db = dz.sum() / m;
let actual_dw: ArrayView1<f64> = model.get_gradient("weights").unwrap();
let actual_db: ArrayView1<f64> = model.get_gradient("bias").unwrap();
let actual_db_value = actual_db[0];
assert!(
(actual_db_value - expected_db).abs() < 1e-5,
"Expected {}, got {}",
expected_db,
actual_db_value
);
for (a, b) in actual_dw.iter().zip(expected_dw.iter()) {
assert!((a - b).abs() < 1e-5, "Expected {}, got {}", b, a);
}
}
}
impl BaseModel<Matrix, Vector> for LogisticRegression {
fn predict(&self, x: &Matrix) -> Result<Vector, ModelError> {
let bias = self.bias[0];
let z = self.weights.dot(x) + bias;
let a = self.compute_activation(&z)?;
let y_hat = a.mapv(|x| if x >= self.threshold { 1.0 } else { 0.0 });
Ok(y_hat)
}
fn compute_cost(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
let m = y.len() as f64;
let y_hat = self.forward(x)?;
let loss = -(y * y_hat.ln() + (1.0 - y) * (1.0 - &y_hat).ln());
let cost = loss.sum() / m;
Ok(cost)
}
}
#[cfg(test)]
mod base_model_tests {
use super::*;
use crate::builders::builder::Builder;
use crate::model::core::base::BaseModel;
use ndarray::{arr1, arr2};
#[test]
fn test_predict() {
let mut model = LogisticRegression::builder()
.n_features(2)
.activation_function(ActivationFn::Sigmoid)
.build()
.unwrap();
model.weights = arr1(&[0.5, -0.5]);
model.bias = arr1(&[0.1]);
let x = arr2(&[[0.2, 0.8], [0.9, 0.1]]);
let predictions = model.predict(&x).unwrap();
let expected = arr1(&[0.0, 1.0]);
assert_eq!(predictions.len(), 2);
assert_eq!(predictions, expected);
}
#[test]
fn test_compute_cost() {
let mut model = LogisticRegression::builder()
.n_features(2)
.activation_function(ActivationFn::Sigmoid)
.build()
.unwrap();
model.weights = arr1(&[1.0, 1.0]);
model.bias = arr1(&[0.0]);
let x = arr2(&[[10.0, -10.0], [10.0, -10.0]]); let y = arr1(&[0.0, 1.0]);
let y_hat = model.forward(&x).unwrap();
let loss = -(&y * &y_hat.ln() + (1.0 - &y) * (1.0 - &y_hat).ln());
println!("loss: {:?}", loss);
let expected_cost = loss.sum() / 2.0;
let cost = model.compute_cost(&x, &y).unwrap();
assert!(
(cost - expected_cost).abs() < 1e-5,
"Expected cost {}, got {}",
expected_cost,
cost
);
}
#[test]
fn test_predict_all_classes() {
let mut model = LogisticRegression::builder()
.n_features(2)
.activation_function(ActivationFn::Sigmoid)
.build()
.unwrap();
model.weights = arr1(&[2.0, -2.0]);
model.bias = arr1(&[0.0]);
let x = arr2(&[
[1.0, 0.1], [0.1, 1.0], ]);
let predictions = model.predict(&x).unwrap();
assert_eq!(predictions[0], 1.0);
assert_eq!(predictions[1], 0.0);
}
}
impl ClassificationModel<Matrix, Vector> for LogisticRegression {
fn accuracy(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
let y_pred = self.predict(x)?;
let y_pred_binary = y_pred.mapv(|val| if val >= self.threshold { 1.0 } else { 0.0 });
let correct = y_pred_binary
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| (pred - actual).abs() < f64::EPSILON)
.count();
Ok(correct as f64 / y.len() as f64)
}
fn loss(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
let y_pred = self.predict(x)?;
let epsilon = 1e-15; let y_pred = y_pred.mapv(|val| val.max(epsilon).min(1.0 - epsilon));
let loss = y
.iter()
.zip(y_pred.iter())
.map(|(y_i, y_pred_i)| -y_i * y_pred_i.ln() - (1.0 - y_i) * (1.0 - y_pred_i).ln())
.sum::<f64>()
/ y.len() as f64;
Ok(loss)
}
fn recall(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
let y_pred = self.predict(x)?;
let true_positives = y_pred
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| *pred > 0.5 && *actual > 0.5)
.count();
let actual_positives = y.iter().filter(|&&actual| actual > 0.5).count();
if actual_positives == 0 {
return Ok(0.0);
}
Ok(true_positives as f64 / actual_positives as f64)
}
fn f1_score(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
let y_pred = self.predict(x)?;
let y_pred_binary = y_pred.mapv(|val| if val >= 0.5 { 1.0 } else { 0.0 });
let true_positives = y_pred_binary
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| *pred > 0.5 && *actual > 0.5)
.count() as f64;
let false_positives = y_pred_binary
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| *pred > 0.5 && *actual <= 0.5)
.count() as f64;
let false_negatives = y_pred_binary
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| *pred <= 0.5 && *actual > 0.5)
.count() as f64;
let precision = if true_positives + false_positives == 0.0 {
0.0
} else {
true_positives / (true_positives + false_positives)
};
let recall = if true_positives + false_negatives == 0.0 {
0.0
} else {
true_positives / (true_positives + false_negatives)
};
if precision + recall == 0.0 {
return Ok(0.0);
}
Ok(2.0 * precision * recall / (precision + recall))
}
fn compute_metrics(&self, x: &Matrix, y: &Vector) -> Result<ClassificationMetrics, ModelError> {
let accuracy = self.accuracy(x, y)?;
let loss = self.loss(x, y)?;
let recall = self.recall(x, y)?;
let f1 = self.f1_score(x, y)?;
let y_pred = self.predict(x)?;
let y_pred_binary = y_pred.mapv(|val| if val >= 0.5 { 1.0 } else { 0.0 });
let true_positives = y_pred_binary
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| *pred > 0.5 && *actual > 0.5)
.count() as f64;
let false_positives = y_pred_binary
.iter()
.zip(y.iter())
.filter(|&(pred, actual)| *pred > 0.5 && *actual <= 0.5)
.count() as f64;
let precision = if true_positives + false_positives == 0.0 {
0.0
} else {
true_positives / (true_positives + false_positives)
};
Ok(ClassificationMetrics {
accuracy,
loss,
precision,
recall,
f1_score: f1,
})
}
}