use crate::dataset::Dataset;
use crate::error::{Result, ScryLearnError};
use crate::sparse::{CscMatrix, CsrMatrix};
use crate::weights::{compute_sample_weights, ClassWeight};
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct LinearSVC {
c: f64,
max_iter: usize,
tol: f64,
class_weight: ClassWeight,
probability: bool,
weights: Vec<Vec<f64>>,
platt_params: Vec<(f64, f64)>,
n_classes: usize,
fitted: bool,
#[cfg_attr(feature = "serde", serde(default))]
_schema_version: u32,
}
impl LinearSVC {
pub fn new() -> Self {
Self {
c: 1.0,
max_iter: 1000,
tol: crate::constants::DEFAULT_TOL,
class_weight: ClassWeight::Uniform,
probability: false,
weights: Vec::new(),
platt_params: Vec::new(),
n_classes: 0,
fitted: false,
_schema_version: crate::version::SCHEMA_VERSION,
}
}
pub fn c(mut self, c: f64) -> Self {
self.c = c;
self
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn tol(mut self, t: f64) -> Self {
self.tol = t;
self
}
pub fn class_weight(mut self, cw: ClassWeight) -> Self {
self.class_weight = cw;
self
}
pub fn probability(mut self, enable: bool) -> Self {
self.probability = enable;
self
}
pub fn fit(&mut self, data: &Dataset) -> Result<()> {
data.validate_finite()?;
if let Some(csc) = data.sparse_csc() {
return self.fit_sparse(csc, &data.target);
}
let n = data.n_samples();
let m = data.n_features();
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
if self.c <= 0.0 || !self.c.is_finite() {
return Err(ScryLearnError::InvalidParameter(
"C must be finite and positive".into(),
));
}
self.n_classes = data.n_classes();
let sample_weights = compute_sample_weights(&data.target, &self.class_weight);
self.weights = Vec::with_capacity(self.n_classes);
self.platt_params = Vec::with_capacity(self.n_classes);
for cls in 0..self.n_classes {
let binary_target: Vec<f64> = data
.target
.iter()
.map(|&t| if t as usize == cls { 1.0 } else { -1.0 })
.collect();
let w = pegasos_train(
&data.features,
&binary_target,
&sample_weights,
m,
n,
self.c,
self.max_iter,
self.tol,
);
let ab = if self.probability {
let dvals: Vec<f64> = (0..n)
.map(|i| {
let mut score = w[m]; for (j, feat_col) in data.features.iter().enumerate().take(m) {
score += w[j] * feat_col[i];
}
score
})
.collect();
platt_fit(&dvals, &binary_target)
} else {
(0.0, 0.0)
};
self.platt_params.push(ab);
self.weights.push(w);
}
self.fitted = true;
Ok(())
}
fn fit_sparse(&mut self, csc: &CscMatrix, target: &[f64]) -> Result<()> {
let csr = csc.to_csr();
let n = csr.n_rows();
let m = csc.n_cols();
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
if self.c <= 0.0 || !self.c.is_finite() {
return Err(ScryLearnError::InvalidParameter(
"C must be finite and positive".into(),
));
}
self.n_classes = {
let mut max_class = 0usize;
for &t in target {
let c = t as usize;
if c > max_class {
max_class = c;
}
}
max_class + 1
};
let sample_weights = compute_sample_weights(target, &self.class_weight);
self.weights = Vec::with_capacity(self.n_classes);
self.platt_params = Vec::with_capacity(self.n_classes);
for cls in 0..self.n_classes {
let binary_target: Vec<f64> = target
.iter()
.map(|&t| if t as usize == cls { 1.0 } else { -1.0 })
.collect();
let w = pegasos_train_sparse(
&csr,
&binary_target,
&sample_weights,
m,
n,
self.c,
self.max_iter,
self.tol,
);
let ab = if self.probability {
let dvals: Vec<f64> = (0..n)
.map(|i| {
let row = csr.row(i);
let mut score = w[m]; for (col, val) in row.iter() {
score += w[col] * val;
}
score
})
.collect();
platt_fit(&dvals, &binary_target)
} else {
(0.0, 0.0)
};
self.platt_params.push(ab);
self.weights.push(w);
}
self.fitted = true;
Ok(())
}
pub fn predict_sparse(&self, csr: &CsrMatrix) -> Result<Vec<f64>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
let n = csr.n_rows();
let mut preds = Vec::with_capacity(n);
for i in 0..n {
let row = csr.row(i);
let mut best_cls = 0usize;
let mut best_score = f64::NEG_INFINITY;
for (cls, w) in self.weights.iter().enumerate() {
let m = w.len() - 1;
let mut score = w[m]; for (col, val) in row.iter() {
if col < m {
score += w[col] * val;
}
}
if score > best_score {
best_score = score;
best_cls = cls;
}
}
preds.push(best_cls as f64);
}
Ok(preds)
}
pub fn predict(&self, features: &[Vec<f64>]) -> Result<Vec<f64>> {
crate::version::check_schema_version(self._schema_version)?;
let scores = self.decision_function(features)?;
Ok(scores
.into_iter()
.map(|row| {
row.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map_or(0.0, |(idx, _)| idx as f64)
})
.collect())
}
pub fn decision_function(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
Ok(features
.iter()
.map(|row| {
self.weights
.iter()
.map(|w| {
let m = w.len() - 1;
let mut score = w[m]; for (j, &x) in row.iter().enumerate().take(m) {
score += w[j] * x;
}
score
})
.collect()
})
.collect())
}
pub fn predict_proba(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
if !self.probability {
return Err(ScryLearnError::InvalidParameter(
"call .probability(true) before fit to enable predict_proba".into(),
));
}
let scores = self.decision_function(features)?;
Ok(scores
.into_iter()
.map(|row| {
let raw: Vec<f64> = row
.iter()
.zip(self.platt_params.iter())
.map(|(&dv, &(a, b))| platt_predict(dv, a, b))
.collect();
let sum: f64 = raw.iter().sum();
if sum > f64::EPSILON {
raw.iter().map(|&p| p / sum).collect()
} else {
vec![1.0 / raw.len() as f64; raw.len()]
}
})
.collect())
}
}
impl Default for LinearSVC {
fn default() -> Self {
Self::new()
}
}
#[derive(Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct LinearSVR {
c: f64,
epsilon: f64,
max_iter: usize,
tol: f64,
weights: Vec<f64>,
fitted: bool,
#[cfg_attr(feature = "serde", serde(default))]
_schema_version: u32,
}
impl LinearSVR {
pub fn new() -> Self {
Self {
c: 1.0,
epsilon: 0.1,
max_iter: 1000,
tol: crate::constants::DEFAULT_TOL,
weights: Vec::new(),
fitted: false,
_schema_version: crate::version::SCHEMA_VERSION,
}
}
pub fn c(mut self, c: f64) -> Self {
self.c = c;
self
}
pub fn epsilon(mut self, e: f64) -> Self {
self.epsilon = e;
self
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn tol(mut self, t: f64) -> Self {
self.tol = t;
self
}
pub fn fit(&mut self, data: &Dataset) -> Result<()> {
data.validate_finite()?;
if let Some(csc) = data.sparse_csc() {
return self.fit_sparse(csc, &data.target);
}
let n = data.n_samples();
let m = data.n_features();
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
if self.c <= 0.0 || !self.c.is_finite() {
return Err(ScryLearnError::InvalidParameter(
"C must be finite and positive".into(),
));
}
let lambda = 1.0 / (self.c * n as f64);
let mut w = vec![0.0; m + 1];
let mut t = 1.0_f64;
let mut prev_w = w.clone();
for _epoch in 0..self.max_iter {
for i in 0..n {
let eta = 1.0 / (lambda * t);
t += 1.0;
let mut pred = w[m]; for (wj, feat_col) in w.iter().zip(data.features.iter()) {
pred += wj * feat_col[i];
}
let residual = pred - data.target[i];
let sign = if residual > self.epsilon {
1.0
} else if residual < -self.epsilon {
-1.0
} else {
0.0
};
for (wj, feat_col) in w.iter_mut().zip(data.features.iter()) {
*wj = (1.0 - eta * lambda) * *wj - eta * sign * feat_col[i];
}
w[m] -= eta * sign;
}
let max_delta = w
.iter()
.zip(prev_w.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
if max_delta < self.tol {
break;
}
prev_w.copy_from_slice(&w);
}
self.weights = w;
self.fitted = true;
Ok(())
}
fn fit_sparse(&mut self, csc: &CscMatrix, target: &[f64]) -> Result<()> {
let csr = csc.to_csr();
let n = csr.n_rows();
let m = csc.n_cols();
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
if self.c <= 0.0 || !self.c.is_finite() {
return Err(ScryLearnError::InvalidParameter(
"C must be finite and positive".into(),
));
}
let lambda = 1.0 / (self.c * n as f64);
let mut w = vec![0.0; m + 1];
let mut t = 1.0_f64;
let mut prev_w = w.clone();
for _epoch in 0..self.max_iter {
for i in 0..n {
let eta = 1.0 / (lambda * t);
t += 1.0;
let row = csr.row(i);
let mut pred = w[m]; for (col, val) in row.iter() {
pred += w[col] * val;
}
let residual = pred - target[i];
let sign = if residual > self.epsilon {
1.0
} else if residual < -self.epsilon {
-1.0
} else {
0.0
};
let decay = 1.0 - eta * lambda;
for wj in w.iter_mut().take(m) {
*wj *= decay;
}
for (col, val) in row.iter() {
w[col] -= eta * sign * val;
}
w[m] -= eta * sign;
}
let max_delta = w
.iter()
.zip(prev_w.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
if max_delta < self.tol {
break;
}
prev_w.copy_from_slice(&w);
}
self.weights = w;
self.fitted = true;
Ok(())
}
pub fn predict(&self, features: &[Vec<f64>]) -> Result<Vec<f64>> {
crate::version::check_schema_version(self._schema_version)?;
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
let m = self.weights.len() - 1;
Ok(features
.iter()
.map(|row| {
let mut pred = self.weights[m]; for (j, &x) in row.iter().enumerate().take(m) {
pred += self.weights[j] * x;
}
pred
})
.collect())
}
pub fn predict_sparse(&self, csr: &CsrMatrix) -> Result<Vec<f64>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
let m = self.weights.len() - 1;
let n = csr.n_rows();
let mut preds = Vec::with_capacity(n);
for i in 0..n {
let row = csr.row(i);
let mut pred = self.weights[m]; for (col, val) in row.iter() {
if col < m {
pred += self.weights[col] * val;
}
}
preds.push(pred);
}
Ok(preds)
}
}
impl Default for LinearSVR {
fn default() -> Self {
Self::new()
}
}
#[allow(clippy::too_many_arguments)]
fn pegasos_train(
features: &[Vec<f64>], binary_target: &[f64], sample_weights: &[f64], m: usize, n: usize, c: f64,
max_iter: usize,
tol: f64,
) -> Vec<f64> {
let lambda = 1.0 / (c * n as f64);
let mut w = vec![0.0; m + 1]; let mut best_w = w.clone();
let mut best_loss = f64::INFINITY;
let mut prev_w = w.clone();
for epoch in 0..max_iter {
let eta = 1.0 / (1.0 + crate::constants::PEGASOS_LR_DECAY * epoch as f64);
let mut grad = vec![0.0; m + 1];
let mut hinge_loss = 0.0_f64;
for i in 0..n {
let mut score = w[m]; for j in 0..m {
score += w[j] * features[j][i];
}
let y = binary_target[i];
let sw = sample_weights[i];
let margin = y * score;
if margin < 1.0 {
let loss_contrib = sw * (1.0 - margin);
hinge_loss += loss_contrib;
for j in 0..m {
grad[j] -= sw * y * features[j][i];
}
grad[m] -= sw * y;
}
}
for j in 0..m {
grad[j] = grad[j] / n as f64 + lambda * w[j];
}
grad[m] /= n as f64;
for j in 0..=m {
w[j] -= eta * grad[j];
}
let total_loss =
hinge_loss / n as f64 + 0.5 * lambda * w.iter().take(m).map(|x| x * x).sum::<f64>();
if total_loss < best_loss {
best_loss = total_loss;
best_w.copy_from_slice(&w);
}
let max_delta = w
.iter()
.zip(prev_w.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
if max_delta < tol {
break;
}
prev_w.copy_from_slice(&w);
}
best_w
}
#[allow(clippy::too_many_arguments)]
fn pegasos_train_sparse(
csr: &CsrMatrix,
binary_target: &[f64],
sample_weights: &[f64],
m: usize,
n: usize,
c: f64,
max_iter: usize,
tol: f64,
) -> Vec<f64> {
let lambda = 1.0 / (c * n as f64);
let mut w = vec![0.0; m + 1];
let mut best_w = w.clone();
let mut best_loss = f64::INFINITY;
let mut prev_w = w.clone();
for epoch in 0..max_iter {
let eta = 1.0 / (1.0 + crate::constants::PEGASOS_LR_DECAY * epoch as f64);
let mut grad = vec![0.0; m + 1];
let mut hinge_loss = 0.0_f64;
for i in 0..n {
let row = csr.row(i);
let mut score = w[m]; for (col, val) in row.iter() {
score += w[col] * val;
}
let y = binary_target[i];
let sw = sample_weights[i];
let margin = y * score;
if margin < 1.0 {
hinge_loss += sw * (1.0 - margin);
for (col, val) in row.iter() {
grad[col] -= sw * y * val;
}
grad[m] -= sw * y;
}
}
for j in 0..m {
grad[j] = grad[j] / n as f64 + lambda * w[j];
}
grad[m] /= n as f64;
for j in 0..=m {
w[j] -= eta * grad[j];
}
let total_loss =
hinge_loss / n as f64 + 0.5 * lambda * w.iter().take(m).map(|x| x * x).sum::<f64>();
if total_loss < best_loss {
best_loss = total_loss;
best_w.copy_from_slice(&w);
}
let max_delta = w
.iter()
.zip(prev_w.iter())
.map(|(a, b)| (a - b).abs())
.fold(0.0_f64, f64::max);
if max_delta < tol {
break;
}
prev_w.copy_from_slice(&w);
}
best_w
}
fn platt_fit(decision_values: &[f64], labels: &[f64]) -> (f64, f64) {
let n = decision_values.len();
if n == 0 {
return (0.0, 0.0);
}
let n_pos = labels.iter().filter(|&&y| y > 0.0).count() as f64;
let n_neg = n as f64 - n_pos;
let t_pos = (n_pos + 1.0) / (n_pos + 2.0);
let t_neg = 1.0 / (n_neg + 2.0);
let targets: Vec<f64> = labels
.iter()
.map(|&y| if y > 0.0 { t_pos } else { t_neg })
.collect();
let mut a = 0.0_f64;
let mut b = ((n_neg + 1.0) / (n_pos + 1.0)).ln();
let sigma = crate::constants::PLATT_HESSIAN_REG;
for _ in 0..100 {
let mut g1 = 0.0_f64;
let mut g2 = 0.0_f64;
let mut h11 = sigma;
let mut h22 = sigma;
let mut h21 = 0.0_f64;
for i in 0..n {
let fval = decision_values[i] * a + b;
let p = 1.0 / (1.0 + (-fval).exp());
let d = p - targets[i];
let s = p * (1.0 - p);
g1 += d * decision_values[i];
g2 += d;
h11 += s * decision_values[i] * decision_values[i];
h22 += s;
h21 += s * decision_values[i];
}
let det = h11 * h22 - h21 * h21;
if det.abs() < crate::constants::PLATT_SINGULAR_DET {
break;
}
let da = -(h22 * g1 - h21 * g2) / det;
let db = -(h11 * g2 - h21 * g1) / det;
if da.abs() < crate::constants::PLATT_CONVERGENCE
&& db.abs() < crate::constants::PLATT_CONVERGENCE
{
break;
}
a += da;
b += db;
}
(a, b)
}
#[inline]
fn platt_predict(dv: f64, a: f64, b: f64) -> f64 {
1.0 / (1.0 + (a * dv + b).exp())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_linear_svc_binary() {
let features = vec![
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
];
let target = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = LinearSVC::new().c(1.0).max_iter(500);
svc.fit(&data).unwrap();
let preds = svc.predict(&[vec![1.0, 1.0], vec![9.0, 9.0]]).unwrap();
assert_eq!(preds[0] as usize, 0);
assert_eq!(preds[1] as usize, 1);
}
#[test]
fn test_linear_svc_decision_function() {
let features = vec![vec![0.0, 0.0, 10.0, 10.0], vec![0.0, 0.0, 10.0, 10.0]];
let target = vec![0.0, 0.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = LinearSVC::new();
svc.fit(&data).unwrap();
let scores = svc.decision_function(&[vec![1.0, 1.0]]).unwrap();
assert_eq!(scores[0].len(), 2);
}
#[test]
fn test_linear_svc_not_fitted() {
let svc = LinearSVC::new();
assert!(svc.predict(&[vec![1.0]]).is_err());
assert!(svc.decision_function(&[vec![1.0]]).is_err());
}
#[test]
fn test_linear_svc_invalid_c() {
let features = vec![vec![1.0]];
let target = vec![0.0];
let data = Dataset::new(features, target, vec!["x".into()], "class");
let mut svc = LinearSVC::new().c(-1.0);
assert!(svc.fit(&data).is_err());
}
#[test]
fn test_linear_svr_simple() {
let features = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]];
let target = vec![2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0];
let data = Dataset::new(features, target, vec!["x".into()], "y");
let mut svr = LinearSVR::new().c(10.0).epsilon(0.1).max_iter(2000);
svr.fit(&data).unwrap();
let preds = svr.predict(&[vec![3.0], vec![5.0]]).unwrap();
assert!(
(preds[0] - 6.0).abs() < 2.0,
"Expected ~6.0, got {}",
preds[0]
);
assert!(
(preds[1] - 10.0).abs() < 2.0,
"Expected ~10.0, got {}",
preds[1]
);
}
#[test]
fn test_linear_svr_not_fitted() {
let svr = LinearSVR::new();
assert!(svr.predict(&[vec![1.0]]).is_err());
}
#[test]
fn test_linear_svc_predict_proba() {
let features = vec![
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
vec![0.0, 0.0, 0.0, 10.0, 10.0, 10.0],
];
let target = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = LinearSVC::new().c(1.0).max_iter(500).probability(true);
svc.fit(&data).unwrap();
let proba = svc
.predict_proba(&[vec![1.0, 1.0], vec![9.0, 9.0]])
.unwrap();
for row in &proba {
let sum: f64 = row.iter().sum();
assert!(
(sum - 1.0).abs() < 1e-6,
"probabilities should sum to 1, got {sum}"
);
for &p in row {
assert!((0.0..=1.0).contains(&p), "probability out of range: {p}");
}
}
}
#[test]
fn test_linear_svc_predict_proba_not_enabled() {
let features = vec![vec![0.0, 0.0, 10.0, 10.0], vec![0.0, 0.0, 10.0, 10.0]];
let target = vec![0.0, 0.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut svc = LinearSVC::new();
svc.fit(&data).unwrap();
assert!(svc.predict_proba(&[vec![1.0, 1.0]]).is_err());
}
}