use rayon::prelude::*;
use crate::constants::SVM_KERNEL_PAR_THRESHOLD;
use crate::dataset::Dataset;
use crate::error::{Result, ScryLearnError};
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum Kernel {
Linear,
RBF {
gamma: f64,
},
Polynomial {
degree: usize,
coef0: f64,
},
}
impl Default for Kernel {
fn default() -> Self {
Self::RBF { gamma: 1.0 }
}
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum Gamma {
Scale,
Auto,
Value(f64),
}
impl Gamma {
pub(crate) fn resolve(&self, n_features: usize, feature_variance: f64) -> f64 {
match self {
Gamma::Scale => {
let denom = n_features as f64 * feature_variance;
if denom > f64::EPSILON {
1.0 / denom
} else {
1.0
}
}
Gamma::Auto => {
if n_features > 0 {
1.0 / n_features as f64
} else {
1.0
}
}
Gamma::Value(v) => *v,
}
}
}
impl Kernel {
#[inline]
pub(crate) fn eval(&self, a: &[f64], b: &[f64]) -> f64 {
match self {
Kernel::Linear => dot(a, b),
Kernel::RBF { gamma } => {
let sq: f64 = a.iter().zip(b.iter()).map(|(x, y)| (x - y).powi(2)).sum();
(-gamma * sq).exp()
}
#[allow(clippy::cast_possible_wrap)]
Kernel::Polynomial { degree, coef0 } => (dot(a, b) + coef0).powi(*degree as i32),
}
}
}
#[inline]
pub(crate) fn dot(a: &[f64], b: &[f64]) -> f64 {
a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
}
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct KernelSVC {
kernel: Kernel,
c: f64,
tol: f64,
max_iter: usize,
gamma_strategy: Option<Gamma>,
probability: bool,
models: Vec<BinarySMO>,
platt_params: Vec<(f64, f64)>,
n_classes: usize,
fitted: bool,
#[cfg_attr(feature = "serde", serde(default))]
_schema_version: u32,
}
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub(crate) struct BinarySMO {
pub(crate) alphas: Vec<f64>,
pub(crate) b: f64,
pub(crate) support_vectors: Vec<Vec<f64>>,
pub(crate) labels: Vec<f64>,
}
impl KernelSVC {
pub fn new() -> Self {
Self {
kernel: Kernel::default(),
c: 1.0,
tol: crate::constants::SMO_TOL,
max_iter: 1000,
gamma_strategy: Some(Gamma::Scale),
probability: false,
models: Vec::new(),
platt_params: Vec::new(),
n_classes: 0,
fitted: false,
_schema_version: crate::version::SCHEMA_VERSION,
}
}
pub fn kernel(mut self, k: Kernel) -> Self {
if !matches!(k, Kernel::RBF { .. }) {
self.gamma_strategy = None;
}
self.kernel = k;
self
}
pub fn c(mut self, c: f64) -> Self {
self.c = c;
self
}
pub fn tol(mut self, t: f64) -> Self {
self.tol = t;
self
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn gamma(mut self, g: Gamma) -> Self {
self.gamma_strategy = Some(g);
self
}
pub fn probability(mut self, enable: bool) -> Self {
self.probability = enable;
self
}
pub fn fit(&mut self, data: &Dataset) -> Result<()> {
data.validate_finite()?;
let n = data.n_samples();
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
if self.c <= 0.0 || !self.c.is_finite() {
return Err(ScryLearnError::InvalidParameter(
"C must be finite and positive".into(),
));
}
if let Some(ref gs) = self.gamma_strategy {
let m = data.n_features();
let rows = data.feature_matrix();
let variance = compute_feature_variance(&rows, m);
let g = gs.resolve(m, variance);
self.kernel = Kernel::RBF { gamma: g };
}
self.n_classes = data.n_classes();
let rows = data.feature_matrix();
if self.n_classes > 2 {
let results: Vec<(BinarySMO, (f64, f64))> = (0..self.n_classes)
.into_par_iter()
.map(|cls| {
let binary_y: Vec<f64> = data
.target
.iter()
.map(|&t| if t as usize == cls { 1.0 } else { -1.0 })
.collect();
let model = smo_train(
&rows,
&binary_y,
&self.kernel,
self.c,
self.tol,
self.max_iter,
);
let ab = if self.probability {
let dvals: Vec<f64> = rows
.iter()
.map(|x| smo_decision(&model, x, &self.kernel))
.collect();
platt_fit(&dvals, &binary_y)
} else {
(0.0, 0.0)
};
(model, ab)
})
.collect();
self.models = Vec::with_capacity(self.n_classes);
self.platt_params = Vec::with_capacity(self.n_classes);
for (model, ab) in results {
self.models.push(model);
self.platt_params.push(ab);
}
} else {
self.models = Vec::with_capacity(self.n_classes);
self.platt_params = Vec::with_capacity(self.n_classes);
for cls in 0..self.n_classes {
let binary_y: Vec<f64> = data
.target
.iter()
.map(|&t| if t as usize == cls { 1.0 } else { -1.0 })
.collect();
let model = smo_train(
&rows,
&binary_y,
&self.kernel,
self.c,
self.tol,
self.max_iter,
);
let ab = if self.probability {
let dvals: Vec<f64> = rows
.iter()
.map(|x| smo_decision(&model, x, &self.kernel))
.collect();
platt_fit(&dvals, &binary_y)
} else {
(0.0, 0.0)
};
self.platt_params.push(ab);
self.models.push(model);
}
}
self.fitted = true;
Ok(())
}
pub fn predict(&self, features: &[Vec<f64>]) -> Result<Vec<f64>> {
crate::version::check_schema_version(self._schema_version)?;
let scores = self.decision_function(features)?;
Ok(scores
.into_iter()
.map(|row| {
row.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map_or(0.0, |(idx, _)| idx as f64)
})
.collect())
}
pub fn decision_function(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
Ok(features
.iter()
.map(|x| {
self.models
.iter()
.map(|model| smo_decision(model, x, &self.kernel))
.collect()
})
.collect())
}
pub fn predict_proba(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
if !self.probability {
return Err(ScryLearnError::InvalidParameter(
"call .probability(true) before fit to enable predict_proba".into(),
));
}
Ok(features
.iter()
.map(|x| {
let raw: Vec<f64> = self
.models
.iter()
.zip(self.platt_params.iter())
.map(|(model, &(a, b))| {
let dv = smo_decision(model, x, &self.kernel);
platt_predict(dv, a, b)
})
.collect();
let sum: f64 = raw.iter().sum();
if sum > f64::EPSILON {
raw.iter().map(|&p| p / sum).collect()
} else {
vec![1.0 / raw.len() as f64; raw.len()]
}
})
.collect())
}
}
impl Default for KernelSVC {
fn default() -> Self {
Self::new()
}
}
pub(crate) fn smo_train(
x: &[Vec<f64>], y: &[f64], kernel: &Kernel,
c: f64,
tol: f64,
max_passes: usize,
) -> BinarySMO {
let n = x.len();
if n <= 1 {
return BinarySMO {
alphas: if n == 1 { vec![c.min(1.0)] } else { Vec::new() },
b: 0.0,
support_vectors: x.to_vec(),
labels: y.to_vec(),
};
}
let mut alphas = vec![0.0; n];
let mut b = 0.0_f64;
let mut k_matrix = vec![vec![0.0; n]; n];
if n * n >= SVM_KERNEL_PAR_THRESHOLD {
k_matrix.par_iter_mut().enumerate().for_each(|(i, row)| {
for j in 0..n {
row[j] = kernel.eval(&x[i], &x[j]);
}
});
} else {
for i in 0..n {
for j in i..n {
let val = kernel.eval(&x[i], &x[j]);
k_matrix[i][j] = val;
k_matrix[j][i] = val;
}
}
}
let mut passes = 0_usize;
let mut total_iter = 0_usize;
let hard_cap = max_passes * n;
while passes < max_passes && total_iter < hard_cap {
let mut num_changed = 0_usize;
total_iter += 1;
for i in 0..n {
let e_i = smo_predict_raw(&alphas, y, &k_matrix[i], b) - y[i];
if (y[i] * e_i < -tol && alphas[i] < c) || (y[i] * e_i > tol && alphas[i] > 0.0) {
let j = (i + 1 + (passes % (n - 1))) % n;
let e_j = smo_predict_raw(&alphas, y, &k_matrix[j], b) - y[j];
let alpha_i_old = alphas[i];
let alpha_j_old = alphas[j];
let (l, h) = if (y[i] - y[j]).abs() > f64::EPSILON {
(
f64::max(0.0, alphas[j] - alphas[i]),
f64::min(c, c + alphas[j] - alphas[i]),
)
} else {
(
f64::max(0.0, alphas[i] + alphas[j] - c),
f64::min(c, alphas[i] + alphas[j]),
)
};
if (l - h).abs() < crate::constants::SMO_BOUNDS_EQ {
continue;
}
let eta = 2.0 * k_matrix[i][j] - k_matrix[i][i] - k_matrix[j][j];
if eta >= 0.0 {
continue;
}
alphas[j] -= y[j] * (e_i - e_j) / eta;
alphas[j] = alphas[j].clamp(l, h);
if (alphas[j] - alpha_j_old).abs() < crate::constants::SMO_ALPHA_CHANGE_THRESH {
continue;
}
alphas[i] += y[i] * y[j] * (alpha_j_old - alphas[j]);
let b1 = b
- e_i
- y[i] * (alphas[i] - alpha_i_old) * k_matrix[i][i]
- y[j] * (alphas[j] - alpha_j_old) * k_matrix[i][j];
let b2 = b
- e_j
- y[i] * (alphas[i] - alpha_i_old) * k_matrix[i][j]
- y[j] * (alphas[j] - alpha_j_old) * k_matrix[j][j];
b = if alphas[i] > 0.0 && alphas[i] < c {
b1
} else if alphas[j] > 0.0 && alphas[j] < c {
b2
} else {
(b1 + b2) / 2.0
};
num_changed += 1;
}
}
if num_changed == 0 {
passes += 1;
} else {
passes = 0;
}
}
let mut sv_list = Vec::new();
let mut sv_labels = Vec::new();
let mut sv_alphas = Vec::new();
for i in 0..n {
if alphas[i] > crate::constants::SV_ALPHA_THRESH {
sv_alphas.push(alphas[i]);
sv_labels.push(y[i]);
sv_list.push(x[i].clone());
}
}
BinarySMO {
alphas: sv_alphas,
b,
support_vectors: sv_list,
labels: sv_labels,
}
}
#[inline]
fn smo_predict_raw(alphas: &[f64], y: &[f64], k_row: &[f64], b: f64) -> f64 {
let mut sum = b;
for ((&a, &yi), &ki) in alphas.iter().zip(y.iter()).zip(k_row.iter()) {
sum += a * yi * ki;
}
sum
}
pub(crate) fn smo_decision(model: &BinarySMO, x: &[f64], kernel: &Kernel) -> f64 {
let mut sum = model.b;
for i in 0..model.alphas.len() {
sum += model.alphas[i] * model.labels[i] * kernel.eval(&model.support_vectors[i], x);
}
sum
}
fn platt_fit(decision_values: &[f64], labels: &[f64]) -> (f64, f64) {
let n = decision_values.len();
if n == 0 {
return (0.0, 0.0);
}
let n_pos = labels.iter().filter(|&&y| y > 0.0).count() as f64;
let n_neg = n as f64 - n_pos;
let t_pos = (n_pos + 1.0) / (n_pos + 2.0);
let t_neg = 1.0 / (n_neg + 2.0);
let targets: Vec<f64> = labels
.iter()
.map(|&y| if y > 0.0 { t_pos } else { t_neg })
.collect();
let mut a = 0.0_f64;
let mut b = ((n_neg + 1.0) / (n_pos + 1.0)).ln();
let max_iter = 100;
let min_step = crate::constants::PLATT_MIN_STEP;
let sigma = crate::constants::PLATT_HESSIAN_REG;
for _ in 0..max_iter {
let mut g1 = 0.0_f64; let mut g2 = 0.0_f64; let mut h11 = sigma; let mut h22 = sigma;
let mut h21 = 0.0_f64;
for i in 0..n {
let fval = decision_values[i] * a + b;
let p = 1.0 / (1.0 + (-fval).exp());
let d = p - targets[i];
let s = p * (1.0 - p);
g1 += d * decision_values[i];
g2 += d;
h11 += s * decision_values[i] * decision_values[i];
h22 += s;
h21 += s * decision_values[i];
}
let det = h11 * h22 - h21 * h21;
if det.abs() < crate::constants::PLATT_SINGULAR_DET {
break;
}
let da = -(h22 * g1 - h21 * g2) / det;
let db = -(h11 * g2 - h21 * g1) / det;
if da.abs() < min_step && db.abs() < min_step {
break;
}
a += da;
b += db;
}
(a, b)
}
#[inline]
fn platt_predict(dv: f64, a: f64, b: f64) -> f64 {
1.0 / (1.0 + (a * dv + b).exp())
}
fn compute_feature_variance(rows: &[Vec<f64>], n_features: usize) -> f64 {
let n = rows.len() as f64;
if n <= 1.0 || n_features == 0 {
return 1.0;
}
let mut total_var = 0.0;
for j in 0..n_features {
let mean = rows.iter().map(|r| r[j]).sum::<f64>() / n;
let var = rows.iter().map(|r| (r[j] - mean).powi(2)).sum::<f64>() / n;
total_var += var;
}
total_var / n_features as f64
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_kernel_svc_linear() {
let features = vec![
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
];
let target = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = KernelSVC::new().kernel(Kernel::Linear).c(1.0);
svc.fit(&data).unwrap();
let preds = svc.predict(&[vec![1.0, 1.0], vec![9.0, 9.0]]).unwrap();
assert_eq!(preds[0] as usize, 0);
assert_eq!(preds[1] as usize, 1);
}
#[test]
fn test_kernel_svc_rbf_xor() {
let features = vec![vec![0.0, 1.0, 0.0, 1.0], vec![0.0, 0.0, 1.0, 1.0]];
let target = vec![0.0, 1.0, 1.0, 0.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = KernelSVC::new()
.kernel(Kernel::RBF { gamma: 5.0 })
.c(10.0)
.max_iter(500);
svc.fit(&data).unwrap();
let preds = svc
.predict(&[
vec![0.0, 0.0],
vec![1.0, 0.0],
vec![0.0, 1.0],
vec![1.0, 1.0],
])
.unwrap();
let correct = preds
.iter()
.zip([0.0, 1.0, 1.0, 0.0].iter())
.filter(|(p, t)| (**p - **t).abs() < 0.5)
.count();
assert!(
correct >= 3,
"RBF should solve XOR (got {correct}/4 correct)"
);
}
#[test]
fn test_kernel_svc_not_fitted() {
let svc = KernelSVC::new();
assert!(svc.predict(&[vec![1.0]]).is_err());
}
#[test]
fn test_kernel_svc_decision_function() {
let features = vec![vec![0.0, 0.0, 10.0, 10.0], vec![0.0, 0.0, 10.0, 10.0]];
let target = vec![0.0, 0.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = KernelSVC::new().kernel(Kernel::Linear);
svc.fit(&data).unwrap();
let scores = svc.decision_function(&[vec![1.0, 1.0]]).unwrap();
assert_eq!(scores[0].len(), 2);
}
#[test]
fn test_kernel_svc_predict_proba() {
let features = vec![
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
];
let target = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = KernelSVC::new()
.kernel(Kernel::Linear)
.c(1.0)
.probability(true);
svc.fit(&data).unwrap();
let proba = svc
.predict_proba(&[vec![1.0, 1.0], vec![9.0, 9.0]])
.unwrap();
for row in &proba {
let sum: f64 = row.iter().sum();
assert!(
(sum - 1.0).abs() < 1e-6,
"probabilities should sum to 1, got {sum}"
);
for &p in row {
assert!(p >= 0.0 && p <= 1.0, "probability out of range: {p}");
}
}
}
#[test]
fn test_kernel_svc_predict_proba_not_enabled() {
let features = vec![vec![0.0, 0.0, 10.0, 10.0], vec![0.0, 0.0, 10.0, 10.0]];
let target = vec![0.0, 0.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = KernelSVC::new().kernel(Kernel::Linear);
svc.fit(&data).unwrap();
assert!(svc.predict_proba(&[vec![1.0, 1.0]]).is_err());
}
#[test]
fn test_gamma_auto() {
let features = vec![
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
];
let target = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let data = Dataset::new(
features,
target.clone(),
vec!["x".into(), "y".into(), "z".into()],
"class",
);
let mut svc = KernelSVC::new().gamma(Gamma::Auto).c(1.0);
svc.fit(&data).unwrap();
match &svc.kernel {
Kernel::RBF { gamma } => {
assert!(
(*gamma - 1.0 / 3.0).abs() < 1e-10,
"Gamma::Auto should give 1/n_features, got {gamma}",
);
}
other => panic!("expected RBF kernel, got {:?}", other),
}
}
#[test]
fn test_gamma_scale() {
let features = vec![vec![1.0, 2.0, 3.0, 4.0], vec![2.0, 3.0, 4.0, 5.0]];
let target = vec![0.0, 0.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["a".into(), "b".into()], "class");
let mut svc = KernelSVC::new().gamma(Gamma::Scale).c(1.0);
svc.fit(&data).unwrap();
match &svc.kernel {
Kernel::RBF { gamma } => {
assert!(*gamma > 0.0, "gamma should be positive, got {gamma}");
}
other => panic!("expected RBF kernel, got {:?}", other),
}
}
#[test]
fn test_kernel_svc_single_sample() {
let features = vec![vec![1.0], vec![2.0]];
let target = vec![0.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = KernelSVC::new().kernel(Kernel::Linear).c(1.0);
svc.fit(&data).unwrap();
let preds = svc.predict(&[vec![1.0, 2.0]]).unwrap();
assert_eq!(preds.len(), 1);
}
}