1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
//! This file provides multiclass logistic regression.
//!
//! It is used in examples with MNIST data.
//! It implements traits defined in types.rs with 2 dimensional arrays, which is
//! more natural to solve a multiclass problem.
//!
use ndarray::prelude::*;
#[allow(unused_imports)]
use log::Level::*;
#[allow(unused_imports)]
use log::*;
// use log::{debug, info, warn, trace, log_enabled};
use crate::types::*;
// data dimension have been augmented by one for interception term.
// The coefficients of the last class have been assumed to be 0 to take into account
// for the identifiability constraint (Cf Less Than a Single Pass SCSG Lei-Jordan or
// Machine Learning Murphy par 9.2.2.1-2
//
pub struct LogisticRegression {
nbclass: usize,
// length of observation + 1. Values 1. in slot 0 of arrays.
observations: Vec<(Array1<f64>, usize)>,
}
impl LogisticRegression {
pub fn new(nbclass: usize, observations: Vec<(Array1<f64>, usize)>) -> LogisticRegression {
LogisticRegression {
nbclass,
observations,
}
} // end of new
} // end of impl LogisticRegression
/// We implement the trait Summation using 2 dimensional Arrays.
/// We use the variable coefficients : Array2\<f64\> so that
/// a row is coefficient array corresponding to (1 augmented) observations and a column is coefficients by class.
/// Recall that ndarray is by default with C storage (row oriented)
///
impl Summation<Ix2> for LogisticRegression {
fn terms(&self) -> usize {
self.observations.len()
}
//
fn term_value(&self, coefficients: &Array2<f64>, term: usize) -> f64 {
// extract vector i of data and its class
let (ref x, term_class) = self.observations[term];
//
let mut dot_xi = Array1::<f64>::zeros(self.nbclass - 1);
//
let mut log_arg = 1.0f64;
for i in 0..self.nbclass - 1 {
// take i row
assert_eq!(x.len(), coefficients.index_axis(Axis(0), i).len());
dot_xi[i] = x.dot(&coefficients.slice(s![i, ..]));
// we could do
// let dot_i = x.dot(&coefficients.index_axis(Axis(0),i));
log_arg += dot_xi[i].exp();
}
// keep term corresponding to term_class (class of term passed as arg)
let mut other_term = 0.;
if term_class < self.nbclass - 1 {
other_term = dot_xi[term_class];
}
let t_value = log_arg.ln() - other_term;
t_value
} // end of term_value
}
impl SummationC1<Ix2> for LogisticRegression {
// gradient has in a row coefficients of class k, q row has a numboer of columns equal to
// length of observations.
fn term_gradient(&self, w: &Array2<f64>, term: &usize, gradient: &mut Array2<f64>) {
// get observation corresponding to term
let (ref x, term_class) = self.observations[*term];
//
let mut dot_xk = Array1::<f64>::zeros(self.nbclass - 1);
let mut den: f64 = 1.;
for k in 0..self.nbclass - 1 {
dot_xk[k] = x.dot(&w.slice(s![k, ..])).exp();
den += dot_xk[k];
}
//
for k in 0..self.nbclass - 1 {
let mut g_term: f64;
for j in 0..x.len() {
g_term = x[j] * dot_xk[k] / den;
// keep term corresponding to term_class (class of term passed as arg)
if term_class == k {
g_term -= x[j];
}
gradient[[k, j]] = g_term;
}
}
} // end of term_gradient
}