use crate::mat_util::*;
use const_default::ConstDefault;
use itertools::Itertools;
use rand::prelude::*;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use std::f32::{INFINITY, NEG_INFINITY};
use std::ops::Deref;
#[derive(Eq, PartialEq, Copy, Clone, Debug, Serialize, Deserialize)]
pub enum LossType {
Log,
Hinge,
}
#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
pub struct HyperParam {
pub loss_type: LossType,
pub eps: f32,
pub c: f32,
pub weight_threshold: f32,
pub max_iter: u32,
}
impl ConstDefault for HyperParam {
const DEFAULT: Self = Self {
loss_type: LossType::Hinge,
eps: 0.1,
c: 1.,
weight_threshold: 0.1,
max_iter: 20,
};
}
impl Default for HyperParam {
fn default() -> Self {
<Self as ConstDefault>::DEFAULT
}
}
impl HyperParam {
pub fn validate(&self) -> Result<(), String> {
if self.eps <= 0. {
Err(format!("eps must be positive, but is {}", self.eps))
} else if self.c <= 0. {
Err(format!("c must be positive, but is {}", self.c))
} else if self.weight_threshold < 0. {
Err(format!(
"weight_threshold must be non-negative, but is {}",
self.weight_threshold
))
} else if self.max_iter == 0 {
Err(format!(
"max_iter must be positive, but is {}",
self.max_iter
))
} else {
Ok(())
}
}
pub(crate) fn adapt_to_sample_size(
&self,
n_curr_examples: usize,
n_total_examples: usize,
) -> Self {
match self.loss_type {
LossType::Hinge => *self,
LossType::Log => Self {
c: self.c * n_total_examples as f32 / n_curr_examples as f32,
..*self
},
}
}
pub(crate) fn train<Indices: Deref<Target = [usize]> + Sync>(
&self,
feature_matrix: &SparseMatView,
label_to_example_indices: &[Indices],
) -> WeightMat {
self.validate().unwrap();
assert!(feature_matrix.is_csr());
let n_features = feature_matrix.inner_dims();
let (feature_matrix, index_to_feature) = feature_matrix.to_owned().shrink_inner_indices();
let solver = match self.loss_type {
LossType::Hinge => solve_l2r_l2_svc,
LossType::Log => solve_l2r_lr_dual,
};
let weights = label_to_example_indices
.par_iter()
.map(|indices| {
let mut labels = vec![false; feature_matrix.rows()];
let mut n_pos = 0;
for &i in indices.iter() {
labels[i] = true;
n_pos += 1;
}
assert_ne!(n_pos, 0);
let (indices, data) = solver(
&feature_matrix.view(),
&labels,
self.eps,
self.c,
self.c,
self.max_iter,
)
.indexed_iter()
.filter_map(|(index, &value)| {
if value.abs() <= self.weight_threshold {
None
} else {
Some((index_to_feature[index], value))
}
})
.unzip();
SparseVec::new(n_features, indices, data)
})
.collect::<Vec<_>>();
WeightMat::from_rows(&weights)
}
}
pub(crate) fn predict(
weights: &WeightMat,
loss_type: LossType,
feature_vec: &SparseVec,
) -> DenseVec {
let scores = weights.t_dot_vec(feature_vec.view());
match loss_type {
LossType::Log => scores.mapv(|score| -(-score).exp().ln_1p()),
LossType::Hinge => scores.mapv(|score| -(1. - score).max(0.).powi(2)),
}
}
#[allow(clippy::many_single_char_names)]
fn solve_l2r_l2_svc(
x: &SparseMatView,
y: &[bool],
eps: f32,
cp: f32,
cn: f32,
max_iter: u32,
) -> DenseVec {
assert!(x.is_csr());
assert_eq!(x.rows(), y.len());
let l = x.rows();
let w_size = x.cols();
let mut w = DenseVec::zeros(w_size);
let mut active_size = l;
let mut pg: f32;
let mut pgmax_old = INFINITY;
let mut pgmax_new: f32;
let mut pgmin_new: f32;
let diag: [f32; 2] = [0.5 / cn, 0.5 / cp];
let mut alpha = vec![0.; l];
let mut index = (0..l).collect_vec();
let qd = x
.outer_iterator()
.zip(y.iter())
.map(|(xi, &yi)| diag[yi as usize] + csvec_dot_self(&xi))
.collect_vec();
let mut iter = 0;
let mut rng = thread_rng();
while iter < max_iter {
pgmax_new = NEG_INFINITY;
pgmin_new = INFINITY;
index.shuffle(&mut rng);
let mut s = 0;
while s < active_size {
let i = index[s];
let yi = y[i];
let yi_sign = if yi { 1. } else { -1. };
let xi = x.outer_view(i).unwrap_or_else(|| {
panic!(
"Failed to take {}-th outer view for matrix x of shape {:?}",
i,
x.shape()
)
});
let alpha_i = &mut alpha[i];
let g = yi_sign * xi.dot_dense(w.view()) - 1. + *alpha_i * diag[yi as usize];
pg = 0.;
if *alpha_i == 0. {
if g > pgmax_old {
active_size -= 1;
index.swap(s, active_size);
continue;
} else if g < 0. {
pg = g;
}
} else {
pg = g;
}
pgmax_new = pgmax_new.max(pg);
pgmin_new = pgmin_new.min(pg);
if pg.abs() > 1e-12 {
let alpha_old = *alpha_i;
*alpha_i = (*alpha_i - g / qd[i]).max(0.);
let d = (*alpha_i - alpha_old) * yi_sign;
dense_add_assign_csvec_mul_scalar(w.view_mut(), xi, d);
}
s += 1;
}
iter += 1;
if pgmax_new - pgmin_new <= eps {
if active_size == l {
break;
} else {
active_size = l;
pgmax_old = INFINITY;
continue;
}
}
pgmax_old = pgmax_new;
if pgmax_old <= 0. {
pgmax_old = INFINITY;
}
}
w
}
#[allow(clippy::many_single_char_names)]
fn solve_l2r_lr_dual(
x: &SparseMatView,
y: &[bool],
eps: f32,
cp: f32,
cn: f32,
max_iter: u32,
) -> DenseVec {
assert!(x.is_csr());
assert_eq!(x.rows(), y.len());
let l = x.rows();
let w_size = x.cols();
let max_inner_iter = 100; let mut innereps = 1e-2;
let innereps_min = eps.min(1e-8);
let upper_bound = [cn, cp];
let mut alpha = y
.iter()
.flat_map(|&yi| {
let c = upper_bound[yi as usize];
let alpha = (0.001 * c).min(1e-8);
vec![alpha, c - alpha]
})
.collect_vec();
let xtx = x
.outer_iterator()
.map(|xi| csvec_dot_self(&xi))
.collect_vec();
let mut w = DenseVec::zeros(w_size);
for (i, (xi, &yi)) in x.outer_iterator().zip(y.iter()).enumerate() {
let yi_sign = if yi { 1. } else { -1. };
dense_add_assign_csvec_mul_scalar(w.view_mut(), xi, yi_sign * alpha[2 * i]);
}
let mut index = (0..l).collect_vec();
let mut iter = 0;
let mut rng = thread_rng();
while iter < max_iter {
index.shuffle(&mut rng);
let mut newton_iter = 0;
let mut gmax = 0f32;
for &i in &index {
let yi = y[i];
let yi_sign = if yi { 1. } else { -1. };
let c = upper_bound[yi as usize];
let xi = x.outer_view(i).unwrap_or_else(|| {
panic!(
"Failed to take {}-th outer view for matrix x of shape {:?}",
i,
x.shape()
)
});
let a = xtx[i];
let b = yi_sign * xi.dot_dense(w.view());
let (ind1, ind2, sign) = if 0.5 * a * (alpha[2 * i + 1] - alpha[2 * i]) + b < 0. {
(2 * i + 1, 2 * i, -1.)
} else {
(2 * i, 2 * i + 1, 1.)
};
let alpha_old = alpha[ind1];
let mut z = if c - alpha_old < 0.5 * c {
0.1 * alpha_old
} else {
alpha_old
};
let mut gp = a * (z - alpha_old) + sign * b + (z / (c - z)).ln();
gmax = gmax.max(gp.abs());
let eta = 0.1; let mut inner_iter = 0;
while inner_iter <= max_inner_iter {
if gp.abs() < innereps {
break;
}
let gpp = a + c / (c - z) / z;
let tmpz = z - gp / gpp;
if tmpz <= 0. {
z *= eta;
} else {
z = tmpz;
}
gp = a * (z - alpha_old) + sign * b + (z / (c - z)).ln();
newton_iter += 1;
inner_iter += 1;
}
if inner_iter > 0 {
alpha[ind1] = z;
alpha[ind2] = c - z;
dense_add_assign_csvec_mul_scalar(
w.view_mut(),
xi,
sign * (z - alpha_old) * yi_sign,
);
}
}
iter += 1;
if gmax < eps {
break;
}
if newton_iter <= l / 10 {
innereps = innereps_min.max(0.1 * innereps);
}
}
w
}