use crate::error::{Result, ScryLearnError};
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct PlattScaling {
a: f64,
b: f64,
max_iter: usize,
fitted: bool,
#[cfg_attr(feature = "serde", serde(default))]
_schema_version: u32,
}
impl PlattScaling {
pub fn new() -> Self {
Self {
a: 0.0,
b: 0.0,
max_iter: 100,
fitted: false,
_schema_version: crate::version::SCHEMA_VERSION,
}
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn a(&self) -> f64 {
self.a
}
pub fn b(&self) -> f64 {
self.b
}
pub fn fit(&mut self, decision_values: &[f64], labels: &[f64]) -> Result<()> {
let n = decision_values.len();
if n != labels.len() {
return Err(ScryLearnError::InvalidParameter(
"decision_values and labels must have the same length".into(),
));
}
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
let n_pos = labels.iter().filter(|&&y| y > 0.5).count();
let n_neg = n - n_pos;
if n_pos == 0 || n_neg == 0 {
return Err(ScryLearnError::InvalidParameter(
"labels must contain both positive and negative samples".into(),
));
}
let dv_min = decision_values
.iter()
.copied()
.fold(f64::INFINITY, f64::min);
let dv_max = decision_values
.iter()
.copied()
.fold(f64::NEG_INFINITY, f64::max);
if (dv_max - dv_min).abs() < f64::EPSILON {
return Err(ScryLearnError::InvalidData(
"all decision values are identical — Platt scaling cannot calibrate".into(),
));
}
let t_pos = (n_pos as f64 + 1.0) / (n_pos as f64 + 2.0);
let t_neg = 1.0 / (n_neg as f64 + 2.0);
let t: Vec<f64> = labels
.iter()
.map(|&y| if y > 0.5 { t_pos } else { t_neg })
.collect();
let mut a = 0.0_f64;
let mut b = ((n_neg as f64 + 1.0) / (n_pos as f64 + 1.0)).ln();
let min_step = crate::constants::PLATT_MIN_STEP;
let sigma = crate::constants::PLATT_HESSIAN_REG;
for _ in 0..self.max_iter {
let mut g1 = 0.0_f64; let mut g2 = 0.0_f64; let mut h11 = sigma; let mut h22 = sigma; let mut h21 = 0.0_f64;
for i in 0..n {
let fval = decision_values[i] * a + b;
let p = sigmoid(fval);
let d = p - t[i];
let w = p * (1.0 - p).max(crate::constants::SINGULAR_THRESHOLD);
let fi = decision_values[i];
g1 += fi * d;
g2 += d;
h11 += fi * fi * w;
h22 += w;
h21 += fi * w;
}
let det = h11 * h22 - h21 * h21;
if det.abs() < crate::constants::PLATT_SINGULAR_DET {
return Err(ScryLearnError::ConvergenceFailure {
iterations: self.max_iter,
tolerance: crate::constants::PLATT_SINGULAR_DET,
});
}
let da = -(h22 * g1 - h21 * g2) / det;
let db = -(h11 * g2 - h21 * g1) / det;
let mut step = 1.0;
let old_nll = neg_log_likelihood(decision_values, &t, a, b);
loop {
let new_a = a + step * da;
let new_b = b + step * db;
let new_nll = neg_log_likelihood(decision_values, &t, new_a, new_b);
if new_nll < old_nll + crate::constants::ARMIJO_C * step * (g1 * da + g2 * db) {
a = new_a;
b = new_b;
break;
}
step *= 0.5;
if step < min_step {
a += step * da;
b += step * db;
break;
}
}
if (da * step).abs() < crate::constants::PLATT_CONVERGENCE
&& (db * step).abs() < crate::constants::PLATT_CONVERGENCE
{
break;
}
}
self.a = a;
self.b = b;
self.fitted = true;
Ok(())
}
pub fn predict(&self, decision_values: &[f64]) -> Vec<f64> {
decision_values
.iter()
.map(|&f| sigmoid(self.a * f + self.b))
.collect()
}
}
impl Default for PlattScaling {
fn default() -> Self {
Self::new()
}
}
fn sigmoid(x: f64) -> f64 {
if x >= 0.0 {
1.0 / (1.0 + (-x).exp())
} else {
let ex = x.exp();
ex / (1.0 + ex)
}
}
fn neg_log_likelihood(f: &[f64], t: &[f64], a: f64, b: f64) -> f64 {
let mut nll = 0.0;
for i in 0..f.len() {
let p = sigmoid(a * f[i] + b);
let p_clamped = p.clamp(
crate::constants::NEAR_ZERO,
1.0 - crate::constants::NEAR_ZERO,
);
nll -= t[i] * p_clamped.ln() + (1.0 - t[i]) * (1.0 - p_clamped).ln();
}
nll
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub struct IsotonicRegression {
xs: Vec<f64>,
ys: Vec<f64>,
fitted: bool,
#[cfg_attr(feature = "serde", serde(default))]
_schema_version: u32,
}
impl IsotonicRegression {
pub fn new() -> Self {
Self {
xs: Vec::new(),
ys: Vec::new(),
fitted: false,
_schema_version: crate::version::SCHEMA_VERSION,
}
}
pub fn fit(&mut self, x: &[f64], y: &[f64]) -> Result<()> {
let n = x.len();
if n != y.len() {
return Err(ScryLearnError::InvalidParameter(
"x and y must have the same length".into(),
));
}
if n == 0 {
return Err(ScryLearnError::EmptyDataset);
}
let mut indices: Vec<usize> = (0..n).collect();
indices.sort_by(|&a, &b| x[a].partial_cmp(&x[b]).unwrap_or(std::cmp::Ordering::Equal));
let sorted_x: Vec<f64> = indices.iter().map(|&i| x[i]).collect();
let sorted_y: Vec<f64> = indices.iter().map(|&i| y[i]).collect();
let mut blocks: Vec<(f64, usize, usize)> = Vec::with_capacity(n);
for i in 0..n {
blocks.push((sorted_y[i], 1, i));
while blocks.len() >= 2 {
let len = blocks.len();
let mean_last = blocks[len - 1].0 / blocks[len - 1].1 as f64;
let mean_prev = blocks[len - 2].0 / blocks[len - 2].1 as f64;
if mean_prev <= mean_last {
break;
}
let last = blocks.pop().expect("loop guard: blocks.len() >= 2");
let prev = blocks.last_mut().expect("loop guard: blocks.len() >= 2");
prev.0 += last.0;
prev.1 += last.1;
}
}
let mut fit_x = Vec::with_capacity(blocks.len());
let mut fit_y = Vec::with_capacity(blocks.len());
let mut idx = 0;
for &(sum_y, count, _) in &blocks {
let mean_y = sum_y / count as f64;
let block_start = idx;
let block_end = idx + count;
let mean_x: f64 = sorted_x[block_start..block_end].iter().sum::<f64>() / count as f64;
fit_x.push(mean_x);
fit_y.push(mean_y);
idx = block_end;
}
self.xs = fit_x;
self.ys = fit_y;
self.fitted = true;
Ok(())
}
pub fn predict(&self, x: &[f64]) -> Vec<f64> {
x.iter().map(|&v| self.interpolate(v)).collect()
}
fn interpolate(&self, x: f64) -> f64 {
if self.xs.is_empty() {
return 0.5;
}
if self.xs.len() == 1 {
return self.ys[0];
}
if x <= self.xs[0] {
return self.ys[0];
}
if x >= self.xs[self.xs.len() - 1] {
return self.ys[self.ys.len() - 1];
}
let mut lo = 0;
let mut hi = self.xs.len() - 1;
while lo + 1 < hi {
let mid = usize::midpoint(lo, hi);
if self.xs[mid] <= x {
lo = mid;
} else {
hi = mid;
}
}
let dx = self.xs[hi] - self.xs[lo];
if dx.abs() < crate::constants::NEAR_ZERO {
return f64::midpoint(self.ys[lo], self.ys[hi]);
}
let t = (x - self.xs[lo]) / dx;
self.ys[lo] + t * (self.ys[hi] - self.ys[lo])
}
}
impl Default for IsotonicRegression {
fn default() -> Self {
Self::new()
}
}
#[derive(Clone, Debug, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum CalibrationMethod {
#[default]
Sigmoid,
Isotonic,
}
#[non_exhaustive]
pub struct CalibratedClassifierCV {
base: Box<dyn CalibrableClassifier>,
method: CalibrationMethod,
n_folds: usize,
calibrators: Vec<CalibratorKind>,
fitted: bool,
}
enum CalibratorKind {
Platt(PlattScaling),
Isotonic(IsotonicRegression),
}
impl CalibratorKind {
fn predict(&self, values: &[f64]) -> Vec<f64> {
match self {
Self::Platt(p) => p.predict(values),
Self::Isotonic(iso) => iso.predict(values),
}
}
}
pub trait CalibrableClassifier {
fn fit(&mut self, data: &crate::dataset::Dataset) -> Result<()>;
fn predict(&self, features: &[Vec<f64>]) -> Result<Vec<f64>>;
fn predict_proba(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>>;
fn clone_box(&self) -> Box<dyn CalibrableClassifier>;
}
macro_rules! impl_calibrable {
($($ty:ty),* $(,)?) => {
$(
impl CalibrableClassifier for $ty {
fn fit(&mut self, data: &crate::dataset::Dataset) -> Result<()> {
self.fit(data)
}
fn predict(&self, features: &[Vec<f64>]) -> Result<Vec<f64>> {
self.predict(features)
}
fn predict_proba(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
self.predict_proba(features)
}
fn clone_box(&self) -> Box<dyn CalibrableClassifier> {
Box::new(self.clone())
}
}
)*
};
}
impl_calibrable! {
crate::tree::DecisionTreeClassifier,
crate::tree::RandomForestClassifier,
crate::tree::GradientBoostingClassifier,
crate::tree::HistGradientBoostingClassifier,
crate::linear::LogisticRegression,
crate::naive_bayes::GaussianNb,
crate::naive_bayes::BernoulliNB,
crate::naive_bayes::MultinomialNB,
crate::svm::LinearSVC,
crate::neighbors::KnnClassifier,
}
#[cfg(feature = "experimental")]
impl_calibrable! {
crate::svm::KernelSVC,
}
impl CalibratedClassifierCV {
pub fn new<C: CalibrableClassifier + 'static>(
classifier: C,
method: CalibrationMethod,
) -> Self {
Self {
base: Box::new(classifier),
method,
n_folds: 5,
calibrators: Vec::new(),
fitted: false,
}
}
pub fn n_folds(mut self, n: usize) -> Self {
self.n_folds = n;
self
}
pub fn fit(&mut self, data: &crate::dataset::Dataset) -> Result<()> {
let n = data.n_samples();
if n < self.n_folds {
return Err(ScryLearnError::InvalidParameter(format!(
"n_folds ({}) must be ≤ n_samples ({n})",
self.n_folds
)));
}
let features = data.feature_matrix(); let targets = &data.target;
let n_classes = {
let mut max_class = 0usize;
for &t in targets {
let c = t as usize;
if c > max_class {
max_class = c;
}
}
max_class + 1
};
let mut oof_proba: Vec<Vec<f64>> = vec![vec![0.0; n_classes]; n];
let fold_indices = k_fold_indices(n, self.n_folds);
for fold in 0..self.n_folds {
let val_mask = &fold_indices[fold];
let train_indices: Vec<usize> = (0..n).filter(|i| !val_mask.contains(i)).collect();
let val_indices: Vec<usize> = val_mask.clone();
let train_features: Vec<Vec<f64>> = data
.features
.iter()
.map(|col| train_indices.iter().map(|&i| col[i]).collect())
.collect();
let train_target: Vec<f64> = train_indices.iter().map(|&i| targets[i]).collect();
let train_data = crate::dataset::Dataset::new(
train_features,
train_target,
data.feature_names.clone(),
&data.target_name,
);
let mut clf = self.base.clone_box();
clf.fit(&train_data)?;
let val_features: Vec<Vec<f64>> =
val_indices.iter().map(|&i| features[i].clone()).collect();
let proba = clf.predict_proba(&val_features)?;
for (j, &val_idx) in val_indices.iter().enumerate() {
if j < proba.len() {
for c in 0..n_classes.min(proba[j].len()) {
oof_proba[val_idx][c] = proba[j][c];
}
}
}
}
self.calibrators = Vec::with_capacity(n_classes);
for c in 0..n_classes {
let proba_c: Vec<f64> = oof_proba.iter().map(|p| p[c]).collect();
let labels_c: Vec<f64> = targets
.iter()
.map(|&t| if (t as usize) == c { 1.0 } else { 0.0 })
.collect();
let cal = match &self.method {
CalibrationMethod::Sigmoid => {
let mut platt = PlattScaling::new();
platt.fit(&proba_c, &labels_c)?;
CalibratorKind::Platt(platt)
}
CalibrationMethod::Isotonic => {
let mut iso = IsotonicRegression::new();
iso.fit(&proba_c, &labels_c)?;
CalibratorKind::Isotonic(iso)
}
};
self.calibrators.push(cal);
}
self.base.fit(data)?;
self.fitted = true;
Ok(())
}
pub fn predict_proba(&self, features: &[Vec<f64>]) -> Result<Vec<Vec<f64>>> {
if !self.fitted {
return Err(ScryLearnError::NotFitted);
}
let raw = self.base.predict_proba(features)?;
let n_classes = self.calibrators.len();
let mut result = Vec::with_capacity(raw.len());
for row in &raw {
let mut calibrated = Vec::with_capacity(n_classes);
for c in 0..n_classes {
let raw_p = if c < row.len() { row[c] } else { 0.0 };
let cal_p = self.calibrators[c].predict(&[raw_p])[0];
calibrated.push(cal_p.max(0.0));
}
let sum: f64 = calibrated.iter().sum();
if sum > 0.0 {
for p in &mut calibrated {
*p /= sum;
}
} else {
let uniform = 1.0 / n_classes as f64;
calibrated.fill(uniform);
}
result.push(calibrated);
}
Ok(result)
}
pub fn predict(&self, features: &[Vec<f64>]) -> Result<Vec<f64>> {
let proba = self.predict_proba(features)?;
Ok(proba
.iter()
.map(|row| {
row.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
.map_or(0.0, |(i, _)| i as f64)
})
.collect())
}
}
fn k_fold_indices(n: usize, k: usize) -> Vec<Vec<usize>> {
let mut folds: Vec<Vec<usize>> = (0..k).map(|_| Vec::new()).collect();
for i in 0..n {
folds[i % k].push(i);
}
folds
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_platt_basic_separation() {
let mut platt = PlattScaling::new();
let dv = vec![3.0, 2.0, 1.0, 0.5, -0.5, -1.0, -2.0, -3.0];
let labels = vec![1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0];
platt.fit(&dv, &labels).unwrap();
let probs = platt.predict(&[2.0, -2.0]);
assert!(
probs[0] > 0.7,
"positive should have high prob: {}",
probs[0]
);
assert!(
probs[1] < 0.3,
"negative should have low prob: {}",
probs[1]
);
}
#[test]
fn test_platt_monotone() {
let mut platt = PlattScaling::new();
let dv: Vec<f64> = (-10..=10).map(|x| x as f64).collect();
let labels: Vec<f64> = dv
.iter()
.map(|&x| if x >= 0.0 { 1.0 } else { 0.0 })
.collect();
platt.fit(&dv, &labels).unwrap();
let test_vals = vec![-5.0, -2.0, 0.0, 2.0, 5.0];
let probs = platt.predict(&test_vals);
for w in probs.windows(2) {
assert!(
w[1] >= w[0] - 1e-6,
"probabilities should be monotone: {} < {}",
w[0],
w[1]
);
}
}
#[test]
fn test_isotonic_monotone_output() {
let mut iso = IsotonicRegression::new();
let x = vec![0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9];
let y = vec![0.0, 0.0, 0.3, 0.2, 0.5, 0.4, 0.8, 0.9, 1.0];
iso.fit(&x, &y).unwrap();
let pred = iso.predict(&x);
for w in pred.windows(2) {
assert!(
w[1] >= w[0] - 1e-10,
"isotonic output must be non-decreasing: {} > {}",
w[0],
w[1]
);
}
}
#[test]
fn test_isotonic_perfect_data() {
let mut iso = IsotonicRegression::new();
let x = vec![0.0, 0.25, 0.5, 0.75, 1.0];
let y = vec![0.0, 0.25, 0.5, 0.75, 1.0];
iso.fit(&x, &y).unwrap();
let pred = iso.predict(&[0.0, 0.5, 1.0]);
assert!((pred[0] - 0.0).abs() < 0.05);
assert!((pred[1] - 0.5).abs() < 0.05);
assert!((pred[2] - 1.0).abs() < 0.05);
}
#[test]
fn test_isotonic_clamp_extrapolation() {
let mut iso = IsotonicRegression::new();
iso.fit(&[0.2, 0.5, 0.8], &[0.1, 0.5, 0.9]).unwrap();
let pred = iso.predict(&[0.0, 1.0]);
assert!((pred[0] - 0.1).abs() < 1e-6);
assert!((pred[1] - 0.9).abs() < 1e-6);
}
#[test]
fn test_calibrated_classifier_cv_smoke() {
use crate::dataset::Dataset;
use crate::tree::DecisionTreeClassifier;
let features = vec![
vec![0.0, 0.5, 1.0, 1.5, 5.0, 5.5, 6.0, 6.5],
vec![0.0, 0.5, 1.0, 1.5, 5.0, 5.5, 6.0, 6.5],
];
let target = vec![0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut cal =
CalibratedClassifierCV::new(DecisionTreeClassifier::new(), CalibrationMethod::Isotonic)
.n_folds(2);
cal.fit(&data).unwrap();
let proba = cal.predict_proba(&data.feature_matrix()).unwrap();
assert_eq!(proba.len(), 8);
for row in &proba {
assert_eq!(row.len(), 2);
let sum: f64 = row.iter().sum();
assert!(
(sum - 1.0).abs() < 1e-6,
"probabilities should sum to 1, got {sum}"
);
}
let preds = cal.predict(&data.feature_matrix()).unwrap();
assert_eq!(preds.len(), 8);
}
#[test]
fn test_calibrated_classifier_cv_sigmoid() {
use crate::dataset::Dataset;
use crate::tree::DecisionTreeClassifier;
let features = vec![
vec![0.0, 0.5, 1.0, 1.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5],
vec![0.0, 0.5, 1.0, 1.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5],
];
let target = vec![0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0];
let data = Dataset::new(features, target, vec!["x".into(), "y".into()], "class");
let mut cal =
CalibratedClassifierCV::new(DecisionTreeClassifier::new(), CalibrationMethod::Sigmoid)
.n_folds(2);
cal.fit(&data).unwrap();
let proba = cal.predict_proba(&data.feature_matrix()).unwrap();
for row in &proba {
for &p in row {
assert!((0.0..=1.0).contains(&p), "prob out of range: {p}");
}
}
}
#[test]
fn test_platt_constant_decision_values() {
let mut platt = PlattScaling::new();
let dv = vec![1.0; 10];
let labels = vec![1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0];
let result = platt.fit(&dv, &labels);
assert!(result.is_err(), "constant decision values should fail");
let err = result.unwrap_err();
assert!(
matches!(err, ScryLearnError::InvalidData(_)),
"expected InvalidData, got {err:?}"
);
}
#[test]
fn test_platt_near_singular() {
let mut platt = PlattScaling::new().max_iter(5);
let dv = vec![1.0, 1.0 + 1e-18, 1.0 - 1e-18, 1.0, 1.0 + 1e-18, 1.0 - 1e-18];
let labels = vec![1.0, 1.0, 1.0, 0.0, 0.0, 0.0];
let result = platt.fit(&dv, &labels);
assert!(result.is_err(), "near-singular should fail");
let err = result.unwrap_err();
assert!(
matches!(
err,
ScryLearnError::InvalidData(_) | ScryLearnError::ConvergenceFailure { .. }
),
"expected InvalidData or ConvergenceFailure, got {err:?}"
);
}
}