use super::permutable_kernel::Permutable;
use super::{ExitReason, Float, Svm};
use ndarray::{Array1, Array2, ArrayView2, Axis};
#[cfg(feature = "serde")]
use serde_crate::{Deserialize, Serialize};
use std::marker::PhantomData;
#[derive(Clone, Debug, PartialEq)]
pub struct SolverParams<F: Float> {
pub eps: F,
pub shrinking: bool,
}
#[derive(Clone, Debug, PartialEq)]
struct Alpha<F: Float> {
value: F,
upper_bound: F,
}
impl<F: Float> Alpha<F> {
pub fn from(value: F, upper_bound: F) -> Alpha<F> {
Alpha { value, upper_bound }
}
pub fn reached_upper(&self) -> bool {
self.value >= self.upper_bound
}
pub fn free_floating(&self) -> bool {
self.value < self.upper_bound && self.value > F::zero()
}
pub fn reached_lower(&self) -> bool {
self.value == F::zero()
}
pub fn val(&self) -> F {
self.value
}
}
#[cfg_attr(
feature = "serde",
derive(Serialize, Deserialize),
serde(crate = "serde_crate")
)]
#[derive(Clone, Debug, PartialEq)]
pub enum SeparatingHyperplane<F: Float> {
Linear(Array1<F>),
WeightedCombination(Array2<F>),
}
#[derive(Clone, Debug, PartialEq)]
pub struct SolverState<'a, F: Float, K: Permutable<F>> {
gradient: Vec<F>,
gradient_fixed: Vec<F>,
alpha: Vec<Alpha<F>>,
active_set: Vec<usize>,
nactive: usize,
unshrink: bool,
nu_constraint: bool,
r: F,
dataset: ArrayView2<'a, F>,
kernel: K,
p: Vec<F>,
targets: Vec<bool>,
bounds: Vec<F>,
params: SolverParams<F>,
phantom: PhantomData<&'a K>,
}
#[allow(clippy::needless_range_loop, clippy::too_many_arguments)]
impl<'a, F: Float, K: 'a + Permutable<F>> SolverState<'a, F, K> {
pub fn new(
alpha: Vec<F>,
p: Vec<F>,
targets: Vec<bool>,
dataset: ArrayView2<'a, F>,
kernel: K,
bounds: Vec<F>,
params: SolverParams<F>,
nu_constraint: bool,
) -> SolverState<'a, F, K> {
let alpha = alpha
.into_iter()
.enumerate()
.map(|(i, alpha)| Alpha::from(alpha, bounds[i]))
.collect::<Vec<_>>();
let active_set = (0..alpha.len()).collect::<Vec<_>>();
let mut gradient = p.clone();
let mut gradient_fixed = vec![F::zero(); alpha.len()];
for i in 0..alpha.len() {
if !alpha[i].reached_lower() {
let dist_i = kernel.distances(i, alpha.len());
let alpha_i = alpha[i].val();
for j in 0..alpha.len() {
gradient[j] += alpha_i * dist_i[j];
}
if alpha[i].reached_upper() {
for j in 0..alpha.len() {
gradient_fixed[j] += bounds[i] * dist_i[j];
}
}
}
}
SolverState {
gradient,
gradient_fixed,
alpha,
p,
nactive: active_set.len(),
unshrink: false,
active_set,
dataset,
kernel,
targets,
bounds,
params,
nu_constraint,
r: F::zero(),
phantom: PhantomData,
}
}
pub fn nactive(&self) -> usize {
self.nactive
}
pub fn ntotal(&self) -> usize {
self.alpha.len()
}
pub fn target(&self, idx: usize) -> F {
if self.targets[idx] {
F::one()
} else {
-F::one()
}
}
pub fn bound(&self, idx: usize) -> F {
self.bounds[idx]
}
pub fn swap(&mut self, i: usize, j: usize) {
self.gradient.swap(i, j);
self.gradient_fixed.swap(i, j);
self.alpha.swap(i, j);
self.p.swap(i, j);
self.active_set.swap(i, j);
self.kernel.swap_indices(i, j);
self.targets.swap(i, j);
}
fn reconstruct_gradient(&mut self) {
if self.nactive() == self.ntotal() {
return;
}
for j in self.nactive()..self.ntotal() {
self.gradient[j] = self.gradient_fixed[j] + self.p[j];
}
let nfree: usize = (0..self.nactive())
.filter(|x| self.alpha[*x].free_floating())
.count();
if nfree * self.ntotal() > 2 * self.nactive() * (self.ntotal() - self.nactive()) {
for i in self.nactive()..self.ntotal() {
let dist_i = self.kernel.distances(i, self.nactive());
for j in 0..self.nactive() {
if self.alpha[i].free_floating() {
self.gradient[i] += self.alpha[j].val() * dist_i[j];
}
}
}
} else {
for i in 0..self.nactive() {
if self.alpha[i].free_floating() {
let dist_i = self.kernel.distances(i, self.ntotal());
let alpha_i = self.alpha[i].val();
for j in self.nactive()..self.ntotal() {
self.gradient[j] += alpha_i * dist_i[j];
}
}
}
}
}
pub fn update(&mut self, working_set: (usize, usize)) {
let (i, j) = working_set;
let dist_i = self.kernel.distances(i, self.nactive());
let dist_j = self.kernel.distances(j, self.nactive());
let bound_i = self.bound(i);
let bound_j = self.bound(j);
let old_alpha_i = self.alpha[i].val();
let old_alpha_j = self.alpha[j].val();
if self.targets[i] != self.targets[j] {
let mut quad_coef = self.kernel.self_distance(i)
+ self.kernel.self_distance(j)
+ (F::one() + F::one()) * dist_i[j];
if quad_coef <= F::zero() {
quad_coef = F::cast(1e-10);
}
let delta = -(self.gradient[i] + self.gradient[j]) / quad_coef;
let diff = self.alpha[i].val() - self.alpha[j].val();
self.alpha[i].value += delta;
self.alpha[j].value += delta;
if diff > F::zero() {
if self.alpha[j].val() < F::zero() {
self.alpha[j].value = F::zero();
self.alpha[i].value = diff;
}
} else if self.alpha[i].val() < F::zero() {
self.alpha[i].value = F::zero();
self.alpha[j].value = -diff;
}
if diff > bound_i - bound_j {
if self.alpha[i].val() > bound_i {
self.alpha[i].value = bound_i;
self.alpha[j].value = bound_i - diff;
}
} else if self.alpha[j].val() > bound_j {
self.alpha[j].value = bound_j;
self.alpha[i].value = bound_j + diff;
}
} else {
let mut quad_coef = self.kernel.self_distance(i) + self.kernel.self_distance(j)
- F::cast(2.0) * dist_i[j];
if quad_coef <= F::zero() {
quad_coef = F::cast(1e-10);
}
let delta = (self.gradient[i] - self.gradient[j]) / quad_coef;
let sum = self.alpha[i].val() + self.alpha[j].val();
self.alpha[i].value -= delta;
self.alpha[j].value += delta;
if sum > bound_i {
if self.alpha[i].val() > bound_i {
self.alpha[i].value = bound_i;
self.alpha[j].value = sum - bound_i;
}
} else if self.alpha[j].val() < F::zero() {
self.alpha[j].value = F::zero();
self.alpha[i].value = sum;
}
if sum > bound_j {
if self.alpha[j].val() > bound_j {
self.alpha[j].value = bound_j;
self.alpha[i].value = sum - bound_j;
}
} else if self.alpha[i].val() < F::zero() {
self.alpha[i].value = F::zero();
self.alpha[j].value = sum;
}
}
let delta_alpha_i = self.alpha[i].val() - old_alpha_i;
let delta_alpha_j = self.alpha[j].val() - old_alpha_j;
for k in 0..self.nactive() {
self.gradient[k] += dist_i[k] * delta_alpha_i + dist_j[k] * delta_alpha_j;
}
let ui = self.alpha[i].reached_upper();
let uj = self.alpha[j].reached_upper();
self.alpha[i] = Alpha::from(self.alpha[i].val(), self.bound(i));
self.alpha[j] = Alpha::from(self.alpha[j].val(), self.bound(j));
if ui != self.alpha[i].reached_upper() {
let dist_i = self.kernel.distances(i, self.ntotal());
let bound_i = self.bound(i);
if ui {
for k in 0..self.ntotal() {
self.gradient_fixed[k] -= bound_i * dist_i[k];
}
} else {
for k in 0..self.ntotal() {
self.gradient_fixed[k] += bound_i * dist_i[k];
}
}
}
if uj != self.alpha[j].reached_upper() {
let dist_j = self.kernel.distances(j, self.ntotal());
let bound_j = self.bound(j);
if uj {
for k in 0..self.nactive() {
self.gradient_fixed[k] -= bound_j * dist_j[k];
}
} else {
for k in 0..self.nactive() {
self.gradient_fixed[k] += bound_j * dist_j[k];
}
}
}
}
pub fn max_violating_pair(&self) -> ((F, isize), (F, isize)) {
let mut gmax1 = (-F::infinity(), -1);
let mut gmax2 = (-F::infinity(), -1);
for i in 0..self.nactive() {
if self.targets[i] {
if !self.alpha[i].reached_upper() && -self.gradient[i] >= gmax1.0 {
gmax1 = (-self.gradient[i], i as isize);
}
if !self.alpha[i].reached_lower() && self.gradient[i] >= gmax2.0 {
gmax2 = (self.gradient[i], i as isize);
}
} else {
if !self.alpha[i].reached_upper() && -self.gradient[i] >= gmax2.0 {
gmax2 = (-self.gradient[i], i as isize);
}
if !self.alpha[i].reached_lower() && self.gradient[i] >= gmax1.0 {
gmax1 = (self.gradient[i], i as isize);
}
}
}
(gmax1, gmax2)
}
#[allow(clippy::type_complexity)]
pub fn max_violating_pair_nu(&self) -> ((F, isize), (F, isize), (F, isize), (F, isize)) {
let mut gmax1 = (-F::infinity(), -1);
let mut gmax2 = (-F::infinity(), -1);
let mut gmax3 = (-F::infinity(), -1);
let mut gmax4 = (-F::infinity(), -1);
for i in 0..self.nactive() {
if self.targets[i] {
if !self.alpha[i].reached_upper() && -self.gradient[i] > gmax1.0 {
gmax1 = (-self.gradient[i], i as isize);
}
if !self.alpha[i].reached_lower() && self.gradient[i] > gmax3.0 {
gmax3 = (self.gradient[i], i as isize);
}
} else {
if !self.alpha[i].reached_upper() && -self.gradient[i] > gmax4.0 {
gmax4 = (-self.gradient[i], i as isize);
}
if !self.alpha[i].reached_lower() && self.gradient[i] > gmax2.0 {
gmax2 = (self.gradient[i], i as isize);
}
}
}
(gmax1, gmax2, gmax3, gmax4)
}
pub fn select_working_set(&self) -> (usize, usize, bool) {
if self.nu_constraint {
return self.select_working_set_nu();
}
let (gmax, gmax2) = self.max_violating_pair();
let mut obj_diff_min = (F::infinity(), -1);
if gmax.1 != -1 {
let dist_i = self.kernel.distances(gmax.1 as usize, self.ntotal());
for (j, dist_ij) in dist_i.into_iter().enumerate().take(self.nactive()) {
if self.targets[j] {
if !self.alpha[j].reached_lower() {
let grad_diff = gmax.0 + self.gradient[j];
if grad_diff > F::zero() {
let i = gmax.1 as usize;
let quad_coef = self.kernel.self_distance(i)
+ self.kernel.self_distance(j)
- F::cast(2.0) * self.target(i) * dist_ij;
let obj_diff = if quad_coef > F::zero() {
-(grad_diff * grad_diff) / quad_coef
} else {
-(grad_diff * grad_diff) / F::cast(1e-10)
};
if obj_diff <= obj_diff_min.0 {
obj_diff_min = (obj_diff, j as isize);
}
}
}
} else if !self.alpha[j].reached_upper() {
let grad_diff = gmax.0 - self.gradient[j];
if grad_diff > F::zero() {
let i = gmax.1 as usize;
let quad_coef = self.kernel.self_distance(i)
+ self.kernel.self_distance(j)
+ F::cast(2.0) * self.target(i) * dist_ij;
let obj_diff = if quad_coef > F::zero() {
-(grad_diff * grad_diff) / quad_coef
} else {
-(grad_diff * grad_diff) / F::cast(1e-10)
};
if obj_diff <= obj_diff_min.0 {
obj_diff_min = (obj_diff, j as isize);
}
}
}
}
}
if gmax.0 + gmax2.0 < self.params.eps || obj_diff_min.1 == -1 {
(0, 0, true)
} else {
(gmax.1 as usize, obj_diff_min.1 as usize, false)
}
}
pub fn select_working_set_nu(&self) -> (usize, usize, bool) {
let (gmaxp1, gmaxn1, gmaxp2, gmaxn2) = self.max_violating_pair_nu();
let mut obj_diff_min = (F::infinity(), -1);
let dist_i_p = if gmaxp1.1 != -1 {
Some(self.kernel.distances(gmaxp1.1 as usize, self.ntotal()))
} else {
None
};
let dist_i_n = if gmaxn1.1 != -1 {
Some(self.kernel.distances(gmaxn1.1 as usize, self.ntotal()))
} else {
None
};
for j in 0..self.nactive() {
if self.targets[j] {
if !self.alpha[j].reached_lower() {
let grad_diff = gmaxp1.0 + self.gradient[j];
if grad_diff > F::zero() {
let dist_i_p = match dist_i_p {
Some(ref x) => x,
None => continue,
};
let i = gmaxp1.1 as usize;
let quad_coef = self.kernel.self_distance(i) + self.kernel.self_distance(j)
- F::cast(2.0) * dist_i_p[j];
let obj_diff = if quad_coef > F::zero() {
-(grad_diff * grad_diff) / quad_coef
} else {
-(grad_diff * grad_diff) / F::cast(1e-10)
};
if obj_diff <= obj_diff_min.0 {
obj_diff_min = (obj_diff, j as isize);
}
}
}
} else if !self.alpha[j].reached_upper() {
let grad_diff = gmaxn1.0 - self.gradient[j];
if grad_diff > F::zero() {
let dist_i_n = match dist_i_n {
Some(ref x) => x,
None => continue,
};
let i = gmaxn1.1 as usize;
let quad_coef = self.kernel.self_distance(i) + self.kernel.self_distance(j)
- F::cast(2.0) * dist_i_n[j];
let obj_diff = if quad_coef > F::zero() {
-(grad_diff * grad_diff) / quad_coef
} else {
-(grad_diff * grad_diff) / F::cast(1e-10)
};
if obj_diff <= obj_diff_min.0 {
obj_diff_min = (obj_diff, j as isize);
}
}
}
}
if F::max(gmaxp1.0 + gmaxp2.0, gmaxn1.0 + gmaxn2.0) < self.params.eps
|| obj_diff_min.1 == -1
{
(0, 0, true)
} else {
let out_j = obj_diff_min.1 as usize;
let out_i = if self.targets[out_j] {
gmaxp1.1 as usize
} else {
gmaxn1.1 as usize
};
(out_i, out_j, false)
}
}
pub fn should_shrunk(&self, i: usize, gmax1: F, gmax2: F) -> bool {
if self.alpha[i].reached_upper() {
if self.targets[i] {
-self.gradient[i] > gmax1
} else {
-self.gradient[i] > gmax2
}
} else if self.alpha[i].reached_lower() {
if self.targets[i] {
self.gradient[i] > gmax2
} else {
-self.gradient[i] > gmax1
}
} else {
false
}
}
pub fn should_shrunk_nu(&self, i: usize, gmax1: F, gmax2: F, gmax3: F, gmax4: F) -> bool {
if self.alpha[i].reached_upper() {
if self.targets[i] {
-self.gradient[i] > gmax1
} else {
-self.gradient[i] > gmax4
}
} else if self.alpha[i].reached_lower() {
if self.targets[i] {
self.gradient[i] > gmax2
} else {
self.gradient[i] > gmax3
}
} else {
false
}
}
pub fn do_shrinking(&mut self) {
if self.nu_constraint {
self.do_shrinking_nu();
return;
}
let (gmax1, gmax2) = self.max_violating_pair();
let (gmax1, gmax2) = (gmax1.0, gmax2.0);
if !self.unshrink && gmax1 + gmax2 <= self.params.eps * F::cast(10.0) {
self.unshrink = true;
self.reconstruct_gradient();
self.nactive = self.ntotal();
}
for i in 0..self.nactive() {
if self.should_shrunk(i, gmax1, gmax2) {
self.nactive -= 1;
while self.nactive > i {
if !self.should_shrunk(self.nactive(), gmax1, gmax2) {
self.swap(i, self.nactive());
break;
}
self.nactive -= 1;
}
}
}
}
pub fn do_shrinking_nu(&mut self) {
let (gmax1, gmax2, gmax3, gmax4) = self.max_violating_pair_nu();
let (gmax1, gmax2, gmax3, gmax4) = (gmax1.0, gmax2.0, gmax3.0, gmax4.0);
if !self.unshrink && F::max(gmax1 + gmax2, gmax3 + gmax4) <= self.params.eps * F::cast(10.0)
{
self.unshrink = true;
self.reconstruct_gradient();
self.nactive = self.ntotal();
}
for i in 0..self.nactive() {
if self.should_shrunk_nu(i, gmax1, gmax2, gmax3, gmax4) {
self.nactive -= 1;
while self.nactive > i {
if !self.should_shrunk_nu(self.nactive(), gmax1, gmax2, gmax3, gmax4) {
self.swap(i, self.nactive());
break;
}
self.nactive -= 1;
}
}
}
}
pub fn calculate_rho(&mut self) -> F {
if self.nu_constraint {
return self.calculate_rho_nu();
}
let mut nfree = 0;
let mut sum_free = F::zero();
let mut ub = F::infinity();
let mut lb = -F::infinity();
for i in 0..self.nactive() {
let yg = self.target(i) * self.gradient[i];
if self.alpha[i].reached_upper() {
if self.targets[i] {
lb = F::max(lb, yg);
} else {
ub = F::min(ub, yg);
}
} else if self.alpha[i].reached_lower() {
if self.targets[i] {
ub = F::min(ub, yg);
} else {
lb = F::max(lb, yg);
}
} else {
nfree += 1;
sum_free += yg;
}
}
if nfree > 0 {
sum_free / F::cast(nfree)
} else {
(ub + lb) / F::cast(2.0)
}
}
pub fn calculate_rho_nu(&mut self) -> F {
let (mut nfree1, mut nfree2) = (0, 0);
let (mut sum_free1, mut sum_free2) = (F::zero(), F::zero());
let (mut ub1, mut ub2) = (F::infinity(), F::infinity());
let (mut lb1, mut lb2) = (-F::infinity(), -F::infinity());
for i in 0..self.nactive() {
if self.targets[i] {
if self.alpha[i].reached_upper() {
lb1 = F::max(lb1, self.gradient[i]);
} else if self.alpha[i].reached_lower() {
ub1 = F::max(ub1, self.gradient[i]);
} else {
nfree1 += 1;
sum_free1 += self.gradient[i];
}
}
if !self.targets[i] {
if self.alpha[i].reached_upper() {
lb2 = F::max(lb2, self.gradient[i]);
} else if self.alpha[i].reached_lower() {
ub2 = F::max(ub2, self.gradient[i]);
} else {
nfree2 += 1;
sum_free2 += self.gradient[i];
}
}
}
let r1 = if nfree1 > 0 {
sum_free1 / F::cast(nfree1)
} else {
(ub1 + lb1) / F::cast(2.0)
};
let r2 = if nfree2 > 0 {
sum_free2 / F::cast(nfree2)
} else {
(ub2 + lb2) / F::cast(2.0)
};
self.r = (r1 + r2) / F::cast(2.0);
(r1 - r2) / F::cast(2.0)
}
pub fn solve(mut self) -> Svm<F, F> {
let mut iter = 0;
let max_iter = if self.targets.len() > std::usize::MAX / 100 {
std::usize::MAX
} else {
100 * self.targets.len()
};
let max_iter = usize::max(10_000_000, max_iter);
let mut counter = usize::min(self.targets.len(), 1000) + 1;
while iter < max_iter {
counter -= 1;
if counter == 0 {
counter = usize::min(self.ntotal(), 1000);
if self.params.shrinking {
self.do_shrinking();
}
}
let (mut i, mut j, is_optimal) = self.select_working_set();
if is_optimal {
self.reconstruct_gradient();
let (i2, j2, is_optimal) = self.select_working_set();
if is_optimal {
break;
} else {
counter = 1;
i = i2;
j = j2;
}
}
iter += 1;
self.update((i, j));
}
if iter >= max_iter && self.nactive() < self.targets.len() {
self.reconstruct_gradient();
self.nactive = self.ntotal();
}
let rho = self.calculate_rho();
let r = if self.nu_constraint {
Some(self.r)
} else {
None
};
let mut v = F::zero();
for i in 0..self.targets.len() {
v += self.alpha[i].val() * (self.gradient[i] + self.p[i]);
}
let obj = v / F::cast(2.0);
let exit_reason = if max_iter == iter {
ExitReason::ReachedIterations
} else {
ExitReason::ReachedThreshold
};
let mut alpha: Vec<F> = (0..self.ntotal())
.map(|i| self.alpha[self.active_set[i]].val())
.collect();
if self.ntotal() > self.dataset.len_of(Axis(0)) {
for i in 0..self.dataset.len_of(Axis(0)) {
let tmp = alpha[i + self.dataset.len_of(Axis(0))];
alpha[i] -= tmp;
}
alpha.truncate(self.dataset.len_of(Axis(0)));
}
let alpha = alpha;
let sep_hyperplane = if self.kernel.inner().is_linear() {
let mut tmp = Array1::zeros(self.dataset.len_of(Axis(1)));
for (i, elm) in self.dataset.outer_iter().enumerate() {
tmp.scaled_add(self.target(i) * alpha[i], &elm);
}
SeparatingHyperplane::Linear(tmp)
} else {
let support_vectors = self.dataset.select(
Axis(0),
&alpha
.iter()
.enumerate()
.filter(|(_, a)| a.abs() > F::cast(100.) * F::epsilon())
.map(|(i, _)| i)
.collect::<Vec<_>>(),
);
SeparatingHyperplane::WeightedCombination(support_vectors)
};
Svm {
alpha,
rho,
r,
exit_reason,
obj,
iterations: iter,
sep_hyperplane,
kernel_method: self.kernel.into_inner().method,
probability_coeffs: None,
phantom: PhantomData,
}
}
}
#[cfg(test)]
mod tests {
use super::{SolverParams, SolverState};
use crate::permutable_kernel::PermutableKernel;
use crate::SeparatingHyperplane;
#[test]
fn autotraits() {
fn has_autotraits<T: Send + Sync + Sized + Unpin>() {}
has_autotraits::<SolverState<f64, PermutableKernel<f64>>>();
has_autotraits::<SolverParams<f64>>();
has_autotraits::<SeparatingHyperplane<f64>>();
}
}