use super::*;
use crate::primitives::Matrix;
#[test]
fn falsify_logreg_001_binary_predictions() {
let x = Matrix::from_vec(
6,
2,
vec![0.0, 0.0, 0.5, 0.5, 1.0, 0.0, 5.0, 5.0, 5.5, 5.5, 6.0, 5.0],
)
.expect("valid");
let y = vec![0_usize, 0, 0, 1, 1, 1];
let mut lr = LogisticRegression::new().with_max_iter(1000);
lr.fit(&x, &y).expect("fit");
let preds = lr.predict(&x);
for (i, &p) in preds.iter().enumerate() {
assert!(
p <= 1,
"FALSIFIED LOGREG-001: prediction[{i}] = {p}, not in {{0, 1}}"
);
}
}
#[test]
fn falsify_logreg_002_prediction_count() {
let x = Matrix::from_vec(4, 2, vec![0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0]).expect("valid");
let y = vec![0_usize, 0, 1, 1];
let mut lr = LogisticRegression::new().with_max_iter(1000);
lr.fit(&x, &y).expect("fit");
let preds = lr.predict(&x);
assert_eq!(
preds.len(),
4,
"FALSIFIED LOGREG-002: {} predictions for 4 inputs",
preds.len()
);
}
#[test]
fn falsify_logreg_003_probabilities_bounded() {
let x = Matrix::from_vec(4, 2, vec![0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0]).expect("valid");
let y = vec![0_usize, 0, 1, 1];
let mut lr = LogisticRegression::new().with_max_iter(1000);
lr.fit(&x, &y).expect("fit");
let probas = lr.predict_proba(&x);
for i in 0..probas.len() {
assert!(
(0.0..=1.0).contains(&probas[i]),
"FALSIFIED LOGREG-003: proba[{i}] = {} not in [0, 1]",
probas[i]
);
}
}
#[test]
fn falsify_logreg_004_deterministic() {
let x = Matrix::from_vec(4, 2, vec![0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0]).expect("valid");
let y = vec![0_usize, 0, 1, 1];
let mut lr = LogisticRegression::new().with_max_iter(1000);
lr.fit(&x, &y).expect("fit");
let p1 = lr.predict(&x);
let p2 = lr.predict(&x);
assert_eq!(
p1, p2,
"FALSIFIED LOGREG-004: predictions differ on same input"
);
}
#[test]
fn falsify_logreg_005_probabilities_sum_to_one() {
let x = Matrix::from_vec(
6,
2,
vec![0.0, 0.0, 0.5, 0.5, 1.0, 0.0, 5.0, 5.0, 5.5, 5.5, 6.0, 5.0],
)
.expect("valid");
let y = vec![0_usize, 0, 0, 1, 1, 1];
let mut lr = LogisticRegression::new().with_max_iter(1000);
lr.fit(&x, &y).expect("fit");
let probas = lr.predict_proba(&x);
for i in 0..probas.len() {
let p = probas[i];
let sum = p + (1.0 - p);
assert!(
(sum - 1.0_f32).abs() < 1e-6,
"FALSIFIED LOGREG-005: P(y=1)[{i}]={p}, P(y=0)={}, sum={sum} != 1.0",
1.0 - p,
);
}
}
#[test]
fn falsify_logreg_006_balanced_class_weight() {
let n0 = 90;
let n1 = 10;
let n = n0 + n1;
let mut x_data = Vec::with_capacity(n * 2);
let mut y_data = Vec::with_capacity(n);
for i in 0..n0 {
x_data.push(i as f32 * 0.01);
x_data.push(i as f32 * 0.005);
y_data.push(0);
}
for i in 0..n1 {
x_data.push(5.0 + i as f32 * 0.1);
x_data.push(5.0 + i as f32 * 0.05);
y_data.push(1);
}
let x = Matrix::from_vec(n, 2, x_data).expect("valid");
let mut lr_uniform = LogisticRegression::new().with_max_iter(1000);
lr_uniform.fit(&x, &y_data).expect("fit");
let preds_uniform = lr_uniform.predict(&x);
let recall_uniform = {
let tp = preds_uniform
.iter()
.zip(y_data.iter())
.filter(|(p, y)| **p == 1 && **y == 1)
.count();
tp as f32 / n1 as f32
};
let mut lr_balanced = LogisticRegression::new()
.with_max_iter(1000)
.with_class_weight(ClassWeight::Balanced);
lr_balanced.fit(&x, &y_data).expect("fit");
let preds_balanced = lr_balanced.predict(&x);
let recall_balanced = {
let tp = preds_balanced
.iter()
.zip(y_data.iter())
.filter(|(p, y)| **p == 1 && **y == 1)
.count();
tp as f32 / n1 as f32
};
assert!(
recall_balanced >= recall_uniform,
"FALSIFIED LOGREG-006: balanced recall {recall_balanced} < uniform recall {recall_uniform}"
);
}
#[test]
fn falsify_logreg_007_l2_reduces_coefficients() {
let x = Matrix::from_vec(
6,
2,
vec![0.0, 0.0, 0.5, 0.5, 1.0, 0.0, 5.0, 5.0, 5.5, 5.5, 6.0, 5.0],
)
.expect("valid");
let y = vec![0_usize, 0, 0, 1, 1, 1];
let mut lr_no_reg = LogisticRegression::new().with_max_iter(1000);
lr_no_reg.fit(&x, &y).expect("fit");
let norm_no_reg: f32 = (0..lr_no_reg.coefficients().len())
.map(|i| lr_no_reg.coefficients()[i].powi(2))
.sum::<f32>()
.sqrt();
let mut lr_reg = LogisticRegression::new()
.with_max_iter(1000)
.with_l2_penalty(0.1);
lr_reg.fit(&x, &y).expect("fit");
let norm_reg: f32 = (0..lr_reg.coefficients().len())
.map(|i| lr_reg.coefficients()[i].powi(2))
.sum::<f32>()
.sqrt();
assert!(
norm_reg < norm_no_reg,
"FALSIFIED LOGREG-007: L2 norm {norm_reg} >= unregularized {norm_no_reg}"
);
}
#[test]
fn falsify_logreg_008_manual_class_weight() {
let x = Matrix::from_vec(
6,
2,
vec![0.0, 0.0, 0.5, 0.5, 1.0, 0.0, 5.0, 5.0, 5.5, 5.5, 6.0, 5.0],
)
.expect("valid");
let y = vec![0_usize, 0, 0, 1, 1, 1];
let mut lr = LogisticRegression::new()
.with_max_iter(1000)
.with_class_weight(ClassWeight::Manual(vec![1.0, 5.0]));
lr.fit(&x, &y).expect("fit");
let preds = lr.predict(&x);
let class1_correct = preds
.iter()
.zip(y.iter())
.filter(|(p, y)| **y == 1 && *p == *y)
.count();
assert_eq!(
class1_correct, 3,
"FALSIFIED LOGREG-008: only {class1_correct}/3 class-1 samples correct with 5x weight"
);
}
#[test]
fn falsify_logreg_009_backward_compatible() {
let x = Matrix::from_vec(4, 2, vec![0.0, 0.0, 1.0, 1.0, 5.0, 5.0, 6.0, 6.0]).expect("valid");
let y = vec![0_usize, 0, 1, 1];
let mut lr = LogisticRegression::new().with_max_iter(1000);
lr.fit(&x, &y).expect("fit");
let preds = lr.predict(&x);
assert_eq!(
preds,
vec![0, 0, 1, 1],
"FALSIFIED LOGREG-009: default model fails on linearly separable data"
);
}
mod logreg_proptest_falsify {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_logreg_003_prop_probabilities_bounded(
seed in 0..500u32,
) {
let n = 20;
let x_data: Vec<f32> = (0..n).flat_map(|i| {
let class = if i < n / 2 { 0.0 } else { 5.0 };
let offset = ((i as f32 + seed as f32) * 0.37).sin() * 0.5;
vec![class + offset, class + offset * 0.3]
}).collect();
let y_data: Vec<usize> = (0..n).map(|i| if i < n / 2 { 0 } else { 1 }).collect();
let x = Matrix::from_vec(n, 2, x_data).expect("valid");
let mut lr = LogisticRegression::new().with_max_iter(500);
lr.fit(&x, &y_data).expect("fit");
let probas = lr.predict_proba(&x);
for i in 0..probas.len() {
let p = probas[i];
prop_assert!(
(0.0..=1.0_f32).contains(&p),
"FALSIFIED LOGREG-003-prop: proba[{}]={} not in [0,1]",
i, p
);
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_logreg_004_prop_deterministic(
seed in 0..500u32,
) {
let n = 20;
let x_data: Vec<f32> = (0..n).flat_map(|i| {
let class = if i < n / 2 { 0.0 } else { 5.0 };
let offset = ((i as f32 + seed as f32) * 0.37).sin() * 0.5;
vec![class + offset, class + offset * 0.3]
}).collect();
let y_data: Vec<usize> = (0..n).map(|i| if i < n / 2 { 0 } else { 1 }).collect();
let x = Matrix::from_vec(n, 2, x_data).expect("valid");
let mut lr = LogisticRegression::new().with_max_iter(500);
lr.fit(&x, &y_data).expect("fit");
let p1 = lr.predict(&x);
let p2 = lr.predict(&x);
prop_assert_eq!(
p1, p2,
"FALSIFIED LOGREG-004-prop: predictions differ on same input"
);
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(30))]
#[test]
fn falsify_logreg_005_prop_probabilities_sum_to_one(
seed in 0..500u32,
) {
let n = 20;
let x_data: Vec<f32> = (0..n).flat_map(|i| {
let class = if i < n / 2 { 0.0 } else { 5.0 };
let offset = ((i as f32 + seed as f32) * 0.37).sin() * 0.5;
vec![class + offset, class + offset * 0.3]
}).collect();
let y_data: Vec<usize> = (0..n).map(|i| if i < n / 2 { 0 } else { 1 }).collect();
let x = Matrix::from_vec(n, 2, x_data).expect("valid");
let mut lr = LogisticRegression::new().with_max_iter(500);
lr.fit(&x, &y_data).expect("fit");
let probas = lr.predict_proba(&x);
for i in 0..probas.len() {
let p = probas[i];
let sum = p + (1.0 - p);
prop_assert!(
(sum - 1.0_f32).abs() < 1e-6,
"FALSIFIED LOGREG-005-prop: sum={} != 1.0 at index {}",
sum, i
);
}
}
}
}