use crate::ActivationFunction;
use nalgebra::{DMatrix, DVector};
#[allow(clippy::module_name_repetitions)]
#[derive(Clone, Debug)]
pub struct LogisticRegressionInput<T> {
pub x: DMatrix<T>,
pub y: DVector<T>,
}
#[allow(clippy::module_name_repetitions)]
#[derive(Clone, Debug)]
pub struct LogisticRegressionOutput<T> {
pub coefficients: DVector<T>,
pub iterations: usize,
}
#[allow(clippy::module_name_repetitions)]
#[derive(Copy, Clone)]
pub enum LogisticRegressionAlgorithm {
MLE,
IRLS,
}
type PrepareInputResult = Result<(DMatrix<f64>, DMatrix<f64>, DVector<f64>), &'static str>;
impl LogisticRegressionInput<f64> {
#[must_use]
pub fn new(x: DMatrix<f64>, y: DVector<f64>) -> Self {
assert!(x.nrows() == y.len());
Self { x, y }
}
fn prepare_input(&self) -> PrepareInputResult {
if self
.y
.iter()
.any(|&x| x.abs() < f64::EPSILON && (x - 1.0).abs() < f64::EPSILON)
{
return Err("The elements of the response vector should be either 0 or 1.");
}
let (n_rows, _) = self.x.shape();
if n_rows != self.y.len() {
return Err("The number of rows in the design matrix should match the length of the response vector.");
}
if self.x.iter().any(|&x| !x.is_finite()) || self.y.iter().any(|&x| !x.is_finite()) {
return Err("The input data should be finite.");
}
let x = self.x.clone().insert_column(0, 1.0);
Ok((x.clone(), x.transpose(), self.y.clone()))
}
fn prepare_output(&self) -> LogisticRegressionOutput<f64> {
let (_n_sample, n_feat) = self.x.shape();
let ones = DVector::from_element(n_feat, 1.).transpose();
let mask0 = &self.y * &ones;
let x0_mean = self.x.component_mul(&mask0).row_mean();
let mask1 = (-&mask0).add_scalar(1.);
let x1_mean = self.x.component_mul(&mask1).row_mean();
let delta = &x1_mean - &x0_mean;
let y_mean = self.y.mean();
let mid = x1_mean * y_mean + x0_mean * (1. - y_mean);
let scaler = mid.dot(&delta) / delta.magnitude_squared();
let dir = delta * scaler;
let bias = -dir.magnitude_squared();
let coef = dir.insert_column(0, bias);
LogisticRegressionOutput {
coefficients: coef.transpose(),
iterations: 0,
}
}
pub fn fit(
&self,
method: LogisticRegressionAlgorithm,
tolerance: f64,
) -> Result<LogisticRegressionOutput<f64>, &'static str> {
let (X, X_T, y) = self.prepare_input()?;
let mut output = self.prepare_output();
let (n_rows, n_cols) = X.shape();
let ones_samples = DVector::from_element(n_rows, 1.);
let ones_features = DVector::from_element(n_cols, 1.);
let mut coefs = DVector::zeros(n_cols);
match method {
LogisticRegressionAlgorithm::MLE => unimplemented!(),
LogisticRegressionAlgorithm::IRLS => {
let mut eta: DVector<f64>;
let mut mu: DVector<f64>;
while (&coefs - &output.coefficients).norm() >= tolerance {
eta = &X * &output.coefficients;
mu = ActivationFunction::logistic(&eta);
let diag_entries = &mu.component_mul(&(&ones_samples - &mu));
if (&y - &mu).max() < tolerance {
break;
}
let diag_repeated = &ones_features * diag_entries.transpose();
let X_T_W = X_T.component_mul(&diag_repeated);
let hessian = &X_T_W * &X;
let z = &X_T * (&y - &mu);
let delta_coefs = hessian
.lu()
.solve(&z)
.unwrap_or_else(|| panic!("IRLS[{}]:", output.iterations));
coefs = &output.coefficients + delta_coefs;
output.iterations += 1;
std::mem::swap(&mut output.coefficients, &mut coefs);
}
}
}
Ok(output)
}
}
impl LogisticRegressionOutput<f64> {
#[must_use]
pub fn predict(&self, input: &DMatrix<f64>) -> DVector<f64> {
let probabilities = self.predict_proba(input);
probabilities.map(|p| if p > 0.5 { 1. } else { 0. })
}
#[must_use]
pub fn predict_proba(&self, input: &DMatrix<f64>) -> DVector<f64> {
let coef = self.coefficients.clone();
let bias = coef[0];
let n = coef.remove_row(0);
let eta = (input * n).add_scalar(bias);
ActivationFunction::logistic(&eta)
}
#[must_use]
pub fn score_misclassification(&self, y: &DVector<f64>, y_hat: &DVector<f64>) -> f64 {
assert_eq!(y.shape(), y_hat.shape());
let (N_samples, _) = y.shape();
(y - y_hat).abs().sum() / N_samples as f64
}
#[must_use]
pub fn score_cross_entropy(&self, y: &DVector<f64>, p_hat: &DVector<f64>) -> f64 {
assert_eq!(y.shape(), p_hat.shape());
let y_complement = (-y).add_scalar(1.);
let p_complement = (-p_hat).add_scalar(1.);
let log_p = p_hat.map(f64::ln);
let log_p_complement = p_complement.map(f64::ln);
(y.component_mul(&log_p) + y_complement.component_mul(&log_p_complement)).mean()
}
}
#[cfg(test)]
mod tests_logistic_regression {
use super::*;
use std::time::Instant;
use RustQuant_math::distributions::DistributionClass;
#[test]
fn test_logistic_regression() {
#[rustfmt::skip]
let x_train = DMatrix::from_row_slice(
4, 3, &[-1.207_065_7, 0.429_124_7, -0.564_452_0,
0.277_429_2, 0.506_055_9, -0.890_037_8,
1.084_441_2, -0.574_740_0, -0.477_192_7,
-2.345_697_7, -0.546_631_9, -0.998_386_4],
);
#[rustfmt::skip]
let _x_test = DMatrix::from_row_slice(
4, 3, &[-0.776_253_89, -0.511_009_5, 0.134_088_2,
0.064_458_82, -0.911_195_4, -0.490_685_9,
0.959_494_06, -0.837_171_7, -0.440_547_9,
-0.110_285_49, 2.415_835_2, 0.459_589_4],
);
let response = DVector::from_row_slice(&[0., 1., 1., 1.]);
let input = LogisticRegressionInput {
x: x_train,
y: response,
};
let start_none = Instant::now();
let output = input.fit(LogisticRegressionAlgorithm::IRLS, f64::EPSILON.sqrt());
let elapsed_none = start_none.elapsed();
match output {
Ok(output) => {
println!("Iterations: \t{}", output.iterations);
println!("Time taken: \t{:?}", elapsed_none);
println!("Coefficients: \t{:?}", output.coefficients);
}
Err(err) => {
panic!("Failed to fit logistic regression model: {}", err);
}
}
}
#[test]
fn test_logistic_regression_stochastic() {
use RustQuant_math::distributions::{Bernoulli, Distribution, Gaussian, Uniform};
let N_train = 200; let N_test = 40; let K = 10;
let distr_normal = Gaussian::default();
let distr_uniform_bias = Uniform::new(-0.7, 0.7, DistributionClass::Continuous);
let distr_uniform_steepness = Uniform::new(0.5, 5., DistributionClass::Continuous);
let bias = distr_uniform_bias.sample(1).unwrap()[0];
let steepness = distr_uniform_steepness.sample(1).unwrap()[0];
let coefs = DVector::from_vec(distr_normal.sample(K).unwrap())
.normalize()
.insert_row(0, bias)
.scale(steepness);
let logistic_regression = LogisticRegressionOutput {
coefficients: coefs,
iterations: 0,
};
let distr_uniform_features = Uniform::new(-0.5, 0.5, DistributionClass::Continuous);
let x_train = DMatrix::<f64>::from_vec(
N_train,
K,
distr_uniform_features.sample(N_train * K).unwrap(),
);
let x_test = DMatrix::from_vec(
N_test,
K,
distr_uniform_features.sample(N_test * K).unwrap(),
);
let probs_train = logistic_regression.predict_proba(&x_train);
let probs_test = logistic_regression.predict_proba(&x_test);
let y_train = probs_train.map(|p| Bernoulli::new(p).sample(1).unwrap()[0]);
let y_test = probs_test.map(|p| Bernoulli::new(p).sample(1).unwrap()[0]);
let input = LogisticRegressionInput {
x: x_train,
y: y_train,
};
let start_none = Instant::now();
let output = input.fit(LogisticRegressionAlgorithm::IRLS, f64::EPSILON.sqrt());
let elapsed_none = start_none.elapsed();
match output {
Ok(output) => {
let y_hat = output.predict(&x_test);
let misclassification_rate = output.score_misclassification(&y_test, &y_hat);
let p_hat = output.predict_proba(&x_test);
let crossentropy = output.score_cross_entropy(&y_test, &p_hat);
println!(
"Number of samples N_train={}, N_test={}, number of Features K={}",
N_train, N_test, K
);
println!(
"Misclassification_rate(out of sample): \t{}",
misclassification_rate
);
println!("Avg crossentropy(out of sample): \t{}", crossentropy);
println!("Iterations: \t{}", output.iterations);
println!("Time taken: \t{:?}", elapsed_none);
println!("Coefficients found by IRLS:\n{:?}", &output.coefficients);
println!(
"Coefficients used for the generation of the training data:\n{:?}",
&logistic_regression.coefficients
);
}
Err(err) => {
panic!("Failed to fit logistic regression model: {}", err);
}
}
}
}