use super::{Rng, StreamGenerator, TaskType};
#[derive(Debug, Clone)]
pub struct Hyperplane {
rng: Rng,
n_feat: usize,
n_drifting: usize,
magnitude: f64,
noise_std: f64,
weights: Vec<f64>,
sample_idx: usize,
drift_flag: bool,
}
impl Hyperplane {
pub fn new(
seed: u64,
n_features: usize,
n_drifting: usize,
magnitude: f64,
noise: f64,
) -> Self {
let mut rng = Rng::new(seed);
let n_drifting = n_drifting.min(n_features);
let weights: Vec<f64> = (0..n_features)
.map(|i| (i as f64 + 1.0) / n_features as f64 + rng.uniform_range(-0.1, 0.1))
.collect();
Self {
rng,
n_feat: n_features,
n_drifting,
magnitude,
noise_std: noise,
weights,
sample_idx: 0,
drift_flag: false,
}
}
}
impl StreamGenerator for Hyperplane {
fn next_sample(&mut self) -> (Vec<f64>, f64) {
self.drift_flag = false;
if self.magnitude > 0.0 && self.n_drifting > 0 {
for i in 0..self.n_drifting {
let direction = if i % 2 == 0 { 1.0 } else { -1.0 };
self.weights[i] += direction * self.magnitude;
self.weights[i] = self.weights[i].clamp(0.01, 10.0);
}
if self.sample_idx > 0 && self.sample_idx % 100 == 0 {
self.drift_flag = true;
}
}
let x: Vec<f64> = (0..self.n_feat).map(|_| self.rng.uniform()).collect();
let dot: f64 = x.iter().zip(self.weights.iter()).map(|(a, b)| a * b).sum();
let threshold: f64 = self.weights.iter().sum::<f64>() * 0.5;
let noise = self.rng.normal(0.0, self.noise_std);
let label = if dot + noise > threshold { 1.0 } else { 0.0 };
self.sample_idx += 1;
(x, label)
}
fn n_features(&self) -> usize {
self.n_feat
}
fn task_type(&self) -> TaskType {
TaskType::BinaryClassification
}
fn drift_occurred(&self) -> bool {
self.drift_flag
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn hyperplane_produces_correct_n_features() {
let mut gen = Hyperplane::new(42, 10, 3, 0.01, 0.05);
let (features, _) = gen.next_sample();
assert_eq!(
features.len(),
10,
"Hyperplane should produce 10 features, got {}",
features.len()
);
}
#[test]
fn hyperplane_custom_dimensionality() {
let mut gen = Hyperplane::new(42, 25, 5, 0.01, 0.0);
let (features, _) = gen.next_sample();
assert_eq!(
features.len(),
25,
"Hyperplane with d=25 should produce 25 features"
);
}
#[test]
fn hyperplane_task_type_is_binary() {
let gen = Hyperplane::new(42, 10, 3, 0.01, 0.05);
assert_eq!(gen.task_type(), TaskType::BinaryClassification);
}
#[test]
fn hyperplane_produces_finite_values() {
let mut gen = Hyperplane::new(123, 10, 3, 0.01, 0.05);
for i in 0..2000 {
let (features, target) = gen.next_sample();
for (j, f) in features.iter().enumerate() {
assert!(f.is_finite(), "feature {} at sample {} is not finite", j, i);
}
assert!(target.is_finite(), "target at sample {} is not finite", i);
}
}
#[test]
fn hyperplane_features_in_unit_range() {
let mut gen = Hyperplane::new(42, 10, 3, 0.01, 0.0);
for _ in 0..500 {
let (features, _) = gen.next_sample();
for (j, &f) in features.iter().enumerate() {
assert!(
(0.0..1.0).contains(&f),
"feature {} should be in [0, 1), got {}",
j,
f
);
}
}
}
#[test]
fn hyperplane_gradual_drift_signals() {
let mut gen = Hyperplane::new(42, 10, 3, 0.01, 0.05);
let mut drift_count = 0;
for _ in 0..1000 {
gen.next_sample();
if gen.drift_occurred() {
drift_count += 1;
}
}
assert!(
drift_count >= 5,
"expected multiple gradual drift signals, got {}",
drift_count
);
}
#[test]
fn hyperplane_no_drift_when_magnitude_zero() {
let mut gen = Hyperplane::new(42, 10, 3, 0.0, 0.05);
for _ in 0..500 {
gen.next_sample();
assert!(
!gen.drift_occurred(),
"no drift should occur when magnitude is 0"
);
}
}
#[test]
fn hyperplane_deterministic_with_same_seed() {
let mut gen1 = Hyperplane::new(42, 10, 3, 0.01, 0.05);
let mut gen2 = Hyperplane::new(42, 10, 3, 0.01, 0.05);
for _ in 0..200 {
let (f1, t1) = gen1.next_sample();
let (f2, t2) = gen2.next_sample();
assert_eq!(f1, f2, "same seed should produce identical features");
assert_eq!(t1, t2, "same seed should produce identical targets");
}
}
}