Skip to main content

oxilean_std/machine_learning/
types.rs

1//! Auto-generated module
2//!
3//! 🤖 Generated with [SplitRS](https://github.com/cool-japan/splitrs)
4use super::functions::*;
5
6pub struct NeuralNetwork {
7    pub layers: Vec<Layer>,
8}
9impl NeuralNetwork {
10    pub fn new(layers: Vec<Layer>) -> Self {
11        NeuralNetwork { layers }
12    }
13    pub fn forward(&self, input: &[f64]) -> Vec<f64> {
14        let mut current: Vec<f64> = input.to_vec();
15        for layer in &self.layers {
16            current = layer.forward(&current);
17        }
18        current
19    }
20    pub fn forward_cached(&self, input: &[f64]) -> (Vec<Vec<f64>>, Vec<Vec<f64>>, Vec<Vec<f64>>) {
21        let mut activations = vec![input.to_vec()];
22        let mut z_cache = Vec::new();
23        let mut a_cache = Vec::new();
24        let mut current = input.to_vec();
25        for layer in &self.layers {
26            let (z, a) = layer.forward_with_cache(&current);
27            z_cache.push(z);
28            a_cache.push(a.clone());
29            activations.push(a.clone());
30            current = a;
31        }
32        (activations, z_cache, a_cache)
33    }
34    pub fn n_params(&self) -> usize {
35        self.layers.iter().map(|l| l.n_params()).sum()
36    }
37    pub fn predict_class(&self, input: &[f64]) -> usize {
38        let out = self.forward(input);
39        out.iter()
40            .enumerate()
41            .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
42            .map(|(i, _)| i)
43            .unwrap_or(0)
44    }
45    pub fn depth(&self) -> usize {
46        self.layers.len()
47    }
48}
49#[derive(Debug, Clone, Copy, PartialEq)]
50pub enum UncertaintyStrategy {
51    LeastConfident,
52    MarginSampling,
53    Entropy,
54}
55pub struct GradientDescent {
56    pub learning_rate: f64,
57    pub max_epochs: u32,
58    pub tolerance: f64,
59}
60impl GradientDescent {
61    pub fn new(lr: f64, max_epochs: u32) -> Self {
62        GradientDescent {
63            learning_rate: lr,
64            max_epochs,
65            tolerance: 1e-8,
66        }
67    }
68    pub fn minimize_quadratic(&self, a: f64, b: f64, x0: f64) -> f64 {
69        let mut x = x0;
70        for _ in 0..self.max_epochs {
71            let grad = 2.0 * a * x + b;
72            let new_x = x - self.learning_rate * grad;
73            if (new_x - x).abs() < self.tolerance {
74                return new_x;
75            }
76            x = new_x;
77        }
78        x
79    }
80    pub fn minimize_numerical<F: Fn(f64) -> f64>(&self, f: &F, x0: f64) -> f64 {
81        let h = 1e-7;
82        let mut x = x0;
83        for _ in 0..self.max_epochs {
84            let grad = (f(x + h) - f(x - h)) / (2.0 * h);
85            let new_x = x - self.learning_rate * grad;
86            if (new_x - x).abs() < self.tolerance {
87                return new_x;
88            }
89            x = new_x;
90        }
91        x
92    }
93}
94pub struct PolynomialRegression {
95    pub coefficients: Vec<f64>,
96    pub degree: usize,
97}
98impl PolynomialRegression {
99    fn make_features(x: f64, degree: usize) -> Vec<f64> {
100        let mut features = Vec::with_capacity(degree + 1);
101        let mut xp = 1.0;
102        for _ in 0..=degree {
103            features.push(xp);
104            xp *= x;
105        }
106        features
107    }
108    pub fn fit(x_data: &[f64], y_data: &[f64], degree: usize, lr: f64, epochs: u32) -> Self {
109        let n = x_data.len().min(y_data.len());
110        let mut coeffs = vec![0.0f64; degree + 1];
111        for _ in 0..epochs {
112            let mut grads = vec![0.0f64; degree + 1];
113            for i in 0..n {
114                let features = Self::make_features(x_data[i], degree);
115                let pred: f64 = features.iter().zip(coeffs.iter()).map(|(f, c)| f * c).sum();
116                let err = pred - y_data[i];
117                for (j, feat) in features.iter().enumerate() {
118                    grads[j] += 2.0 * err * feat / n as f64;
119                }
120            }
121            for (c, g) in coeffs.iter_mut().zip(grads.iter()) {
122                *c -= lr * g;
123            }
124        }
125        PolynomialRegression {
126            coefficients: coeffs,
127            degree,
128        }
129    }
130    pub fn predict(&self, x: f64) -> f64 {
131        let features = Self::make_features(x, self.degree);
132        features
133            .iter()
134            .zip(self.coefficients.iter())
135            .map(|(f, c)| f * c)
136            .sum()
137    }
138}
139#[derive(Debug, Clone)]
140pub struct ElasticWeightConsolidation {
141    pub lambda: f64,
142    pub fisher_diagonal: Vec<f64>,
143    pub theta_star: Vec<f64>,
144}
145impl ElasticWeightConsolidation {
146    pub fn new(lambda: f64) -> Self {
147        ElasticWeightConsolidation {
148            lambda,
149            fisher_diagonal: Vec::new(),
150            theta_star: Vec::new(),
151        }
152    }
153    /// Store the current parameters and a diagonal Fisher approximation
154    /// computed from `gradients` (one gradient vector per data point).
155    pub fn consolidate(&mut self, params: &[f64], gradients: &[Vec<f64>]) {
156        self.theta_star = params.to_vec();
157        let n = gradients.len();
158        if n == 0 {
159            self.fisher_diagonal = vec![0.0; params.len()];
160            return;
161        }
162        let mut fisher = vec![0.0f64; params.len()];
163        for grad in gradients {
164            for (i, &g) in grad.iter().enumerate() {
165                if i < fisher.len() {
166                    fisher[i] += g * g;
167                }
168            }
169        }
170        for f in &mut fisher {
171            *f /= n as f64;
172        }
173        self.fisher_diagonal = fisher;
174    }
175    /// EWC penalty: lambda/2 * sum_i F_i * (theta_i - theta*_i)^2
176    pub fn penalty(&self, params: &[f64]) -> f64 {
177        if self.fisher_diagonal.is_empty() || self.theta_star.is_empty() {
178            return 0.0;
179        }
180        let sum: f64 = params
181            .iter()
182            .zip(self.theta_star.iter())
183            .zip(self.fisher_diagonal.iter())
184            .map(|((p, s), f)| f * (p - s).powi(2))
185            .sum();
186        0.5 * self.lambda * sum
187    }
188    /// Gradient of the EWC penalty w.r.t. params
189    pub fn penalty_gradient(&self, params: &[f64]) -> Vec<f64> {
190        if self.fisher_diagonal.is_empty() || self.theta_star.is_empty() {
191            return vec![0.0; params.len()];
192        }
193        params
194            .iter()
195            .zip(self.theta_star.iter())
196            .zip(self.fisher_diagonal.iter())
197            .map(|((p, s), f)| self.lambda * f * (p - s))
198            .collect()
199    }
200}
201#[derive(Debug, Clone)]
202pub struct ShapleyExplainer {
203    pub n_features: usize,
204    pub n_samples: usize,
205    pub background: Vec<Vec<f64>>,
206}
207impl ShapleyExplainer {
208    pub fn new(n_features: usize, n_samples: usize, background: Vec<Vec<f64>>) -> Self {
209        ShapleyExplainer {
210            n_features,
211            n_samples,
212            background,
213        }
214    }
215    /// Estimate Shapley values for `x` using the given model function via
216    /// the random-permutation sampling approach.
217    pub fn explain<F: Fn(&[f64]) -> f64>(&self, x: &[f64], model: &F) -> Vec<f64> {
218        let d = self.n_features.min(x.len());
219        if d == 0 || self.background.is_empty() {
220            return vec![0.0; d];
221        }
222        let mut phi = vec![0.0f64; d];
223        let n_bg = self.background.len();
224        let mut state: u64 = 0x5851f42d4c957f2d_u64;
225        let lcg = |s: &mut u64| -> usize {
226            *s = s
227                .wrapping_mul(6364136223846793005)
228                .wrapping_add(1442695040888963407);
229            (*s >> 33) as usize
230        };
231        for _ in 0..self.n_samples {
232            let mut perm: Vec<usize> = (0..d).collect();
233            for i in (1..d).rev() {
234                let j = lcg(&mut state) % (i + 1);
235                perm.swap(i, j);
236            }
237            let bg_idx = lcg(&mut state) % n_bg;
238            let bg = &self.background[bg_idx];
239            let mut coalition: Vec<bool> = vec![false; d];
240            let mut prev_val = {
241                let inp: Vec<f64> = (0..d)
242                    .map(|j| if j < bg.len() { bg[j] } else { 0.0 })
243                    .collect();
244                model(&inp)
245            };
246            for &feat in &perm {
247                coalition[feat] = true;
248                let inp: Vec<f64> = (0..d)
249                    .map(|j| {
250                        if coalition[j] {
251                            if j < x.len() {
252                                x[j]
253                            } else {
254                                0.0
255                            }
256                        } else if j < bg.len() {
257                            bg[j]
258                        } else {
259                            0.0
260                        }
261                    })
262                    .collect();
263                let new_val = model(&inp);
264                phi[feat] += new_val - prev_val;
265                prev_val = new_val;
266            }
267        }
268        for p in &mut phi {
269            *p /= self.n_samples as f64;
270        }
271        phi
272    }
273}
274pub struct KMeans {
275    pub k: usize,
276    pub centroids: Vec<Vec<f64>>,
277    pub max_iter: u32,
278}
279impl KMeans {
280    pub fn new(k: usize, max_iter: u32) -> Self {
281        KMeans {
282            k,
283            centroids: Vec::new(),
284            max_iter,
285        }
286    }
287    pub fn fit(&mut self, data: &[Vec<f64>], _seed: u64) -> Vec<usize> {
288        if data.is_empty() || self.k == 0 {
289            return vec![];
290        }
291        let k = self.k.min(data.len());
292        self.centroids = data[..k].to_vec();
293        let mut assignments = vec![0usize; data.len()];
294        for _ in 0..self.max_iter {
295            let new_assignments = self.assign_clusters(data);
296            if new_assignments == assignments {
297                assignments = new_assignments;
298                break;
299            }
300            assignments = new_assignments;
301            self.update_centroids(data, &assignments);
302        }
303        assignments
304    }
305    pub fn predict(&self, point: &[f64]) -> usize {
306        self.centroids
307            .iter()
308            .enumerate()
309            .min_by(|(_, a), (_, b)| {
310                let da: f64 = a
311                    .iter()
312                    .zip(point.iter())
313                    .map(|(x, y)| (x - y).powi(2))
314                    .sum();
315                let db: f64 = b
316                    .iter()
317                    .zip(point.iter())
318                    .map(|(x, y)| (x - y).powi(2))
319                    .sum();
320                da.partial_cmp(&db).unwrap_or(std::cmp::Ordering::Equal)
321            })
322            .map(|(i, _)| i)
323            .unwrap_or(0)
324    }
325    pub fn inertia(&self, data: &[Vec<f64>]) -> f64 {
326        data.iter()
327            .map(|p| {
328                self.centroids
329                    .iter()
330                    .map(|c| {
331                        c.iter()
332                            .zip(p.iter())
333                            .map(|(ci, pi)| (ci - pi).powi(2))
334                            .sum::<f64>()
335                    })
336                    .fold(f64::INFINITY, f64::min)
337            })
338            .sum()
339    }
340    fn assign_clusters(&self, data: &[Vec<f64>]) -> Vec<usize> {
341        data.iter().map(|p| self.predict(p)).collect()
342    }
343    fn update_centroids(&mut self, data: &[Vec<f64>], assignments: &[usize]) {
344        let dim = if data.is_empty() { 0 } else { data[0].len() };
345        let k = self.centroids.len();
346        let mut sums = vec![vec![0.0f64; dim]; k];
347        let mut counts = vec![0usize; k];
348        for (point, &cluster) in data.iter().zip(assignments.iter()) {
349            if cluster < k {
350                for (s, v) in sums[cluster].iter_mut().zip(point.iter()) {
351                    *s += v;
352                }
353                counts[cluster] += 1;
354            }
355        }
356        for c in 0..k {
357            if counts[c] > 0 {
358                for d in 0..dim {
359                    self.centroids[c][d] = sums[c][d] / counts[c] as f64;
360                }
361            }
362        }
363    }
364}
365#[derive(Debug, Clone)]
366pub struct Layer {
367    pub weights: Vec<Vec<f64>>,
368    pub biases: Vec<f64>,
369    pub activation: Activation,
370}
371impl Layer {
372    pub fn new(n_in: usize, n_out: usize, activation: Activation) -> Self {
373        let mut state: u64 = (n_in as u64).wrapping_mul(6364136223846793005)
374            ^ (n_out as u64).wrapping_add(1442695040888963407);
375        let mut next = move || -> f64 {
376            state = state
377                .wrapping_mul(6364136223846793005)
378                .wrapping_add(1442695040888963407);
379            let bits = ((state >> 33) as u32) as f64;
380            bits / u32::MAX as f64 * 0.2 - 0.1
381        };
382        let weights = (0..n_out)
383            .map(|_| (0..n_in).map(|_| next()).collect())
384            .collect();
385        let biases = vec![0.0; n_out];
386        Layer {
387            weights,
388            biases,
389            activation,
390        }
391    }
392    pub fn from_weights(weights: Vec<Vec<f64>>, biases: Vec<f64>, activation: Activation) -> Self {
393        Layer {
394            weights,
395            biases,
396            activation,
397        }
398    }
399    pub fn forward(&self, input: &[f64]) -> Vec<f64> {
400        self.weights
401            .iter()
402            .enumerate()
403            .map(|(i, row)| {
404                let z: f64 = row
405                    .iter()
406                    .zip(input.iter())
407                    .map(|(w, x)| w * x)
408                    .sum::<f64>()
409                    + self.biases[i];
410                self.activation.apply(z)
411            })
412            .collect()
413    }
414    pub fn forward_with_cache(&self, input: &[f64]) -> (Vec<f64>, Vec<f64>) {
415        let mut z_vals = Vec::with_capacity(self.weights.len());
416        let mut a_vals = Vec::with_capacity(self.weights.len());
417        for (i, row) in self.weights.iter().enumerate() {
418            let z: f64 = row
419                .iter()
420                .zip(input.iter())
421                .map(|(w, x)| w * x)
422                .sum::<f64>()
423                + self.biases[i];
424            z_vals.push(z);
425            a_vals.push(self.activation.apply(z));
426        }
427        (z_vals, a_vals)
428    }
429    pub fn n_params(&self) -> usize {
430        let n_out = self.weights.len();
431        let n_in = if n_out > 0 { self.weights[0].len() } else { 0 };
432        n_out * n_in + n_out
433    }
434    pub fn n_inputs(&self) -> usize {
435        if self.weights.is_empty() {
436            0
437        } else {
438            self.weights[0].len()
439        }
440    }
441    pub fn n_outputs(&self) -> usize {
442        self.weights.len()
443    }
444}
445pub struct AdamOptimizer {
446    pub learning_rate: f64,
447    pub beta1: f64,
448    pub beta2: f64,
449    pub epsilon: f64,
450    pub max_epochs: u32,
451}
452impl AdamOptimizer {
453    pub fn new(lr: f64, max_epochs: u32) -> Self {
454        AdamOptimizer {
455            learning_rate: lr,
456            beta1: 0.9,
457            beta2: 0.999,
458            epsilon: 1e-8,
459            max_epochs,
460        }
461    }
462    pub fn minimize_quadratic(&self, a: f64, b: f64, x0: f64) -> f64 {
463        let mut x = x0;
464        let mut m = 0.0;
465        let mut v = 0.0;
466        for t in 1..=self.max_epochs {
467            let grad = 2.0 * a * x + b;
468            m = self.beta1 * m + (1.0 - self.beta1) * grad;
469            v = self.beta2 * v + (1.0 - self.beta2) * grad * grad;
470            let m_hat = m / (1.0 - self.beta1.powi(t as i32));
471            let v_hat = v / (1.0 - self.beta2.powi(t as i32));
472            let new_x = x - self.learning_rate * m_hat / (v_hat.sqrt() + self.epsilon);
473            if (new_x - x).abs() < 1e-10 {
474                return new_x;
475            }
476            x = new_x;
477        }
478        x
479    }
480}
481#[derive(Debug, Clone)]
482pub struct PACBayesBound {
483    pub delta: f64,
484    pub n_samples: usize,
485}
486impl PACBayesBound {
487    pub fn new(delta: f64, n_samples: usize) -> Self {
488        PACBayesBound { delta, n_samples }
489    }
490    /// McAllester 2003 bound:
491    /// R(Q) <= R_emp(Q) + sqrt( (KL(Q||P) + ln(2*sqrt(n)/delta)) / (2*n) )
492    pub fn mcallester(&self, empirical_risk: f64, kl_divergence: f64) -> f64 {
493        let n = self.n_samples as f64;
494        let inside = (kl_divergence + (2.0 * n.sqrt() / self.delta).ln()) / (2.0 * n);
495        empirical_risk + inside.max(0.0).sqrt()
496    }
497    /// Catoni 2007 bound (simplified, lambda-dependent):
498    /// R(Q) <= (1 - e^{-lambda*R_emp(Q)*n}) / lambda  + KL(Q||P)/(lambda*n)
499    pub fn catoni(&self, empirical_risk: f64, kl_divergence: f64, lambda: f64) -> f64 {
500        let n = self.n_samples as f64;
501        if lambda.abs() < 1e-12 {
502            return empirical_risk;
503        }
504        let term1 = (1.0 - (-lambda * empirical_risk * n).exp()) / lambda;
505        let term2 = kl_divergence / (lambda * n);
506        term1 + term2
507    }
508    /// KL divergence between two Bernoulli distributions (for {0,1} risks)
509    pub fn kl_bernoulli(q: f64, p: f64) -> f64 {
510        let eps = 1e-12;
511        let q = q.clamp(eps, 1.0 - eps);
512        let p = p.clamp(eps, 1.0 - eps);
513        q * (q / p).ln() + (1.0 - q) * ((1.0 - q) / (1.0 - p)).ln()
514    }
515    /// KL divergence between two Gaussians with equal variance sigma^2
516    pub fn kl_gaussians(mu_q: f64, mu_p: f64, sigma: f64) -> f64 {
517        (mu_q - mu_p).powi(2) / (2.0 * sigma * sigma)
518    }
519}
520pub struct MomentumSGD {
521    pub learning_rate: f64,
522    pub momentum: f64,
523    pub max_epochs: u32,
524}
525impl MomentumSGD {
526    pub fn new(lr: f64, momentum: f64, max_epochs: u32) -> Self {
527        MomentumSGD {
528            learning_rate: lr,
529            momentum,
530            max_epochs,
531        }
532    }
533    pub fn minimize_quadratic(&self, a: f64, b: f64, x0: f64) -> f64 {
534        let mut x = x0;
535        let mut velocity = 0.0;
536        for _ in 0..self.max_epochs {
537            let grad = 2.0 * a * x + b;
538            velocity = self.momentum * velocity - self.learning_rate * grad;
539            let new_x = x + velocity;
540            if (new_x - x).abs() < 1e-10 {
541                return new_x;
542            }
543            x = new_x;
544        }
545        x
546    }
547}
548#[derive(Debug, Clone)]
549pub struct RandomizedSmoothingClassifier {
550    pub sigma: f64,
551    pub n_samples: usize,
552    pub confidence: f64,
553}
554impl RandomizedSmoothingClassifier {
555    pub fn new(sigma: f64, n_samples: usize, confidence: f64) -> Self {
556        RandomizedSmoothingClassifier {
557            sigma,
558            n_samples,
559            confidence,
560        }
561    }
562    /// Smooth prediction: return class with highest vote count after adding
563    /// Gaussian noise `n_samples` times.
564    pub fn smooth_predict<F: Fn(&[f64]) -> usize>(&self, x: &[f64], base_classifier: &F) -> usize {
565        let mut votes: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
566        let mut state: u64 = 0xdeadbeefcafe1234_u64;
567        let mut lcg = || -> f64 {
568            state = state
569                .wrapping_mul(6364136223846793005)
570                .wrapping_add(1442695040888963407);
571            let u = (state >> 11) as f64 / (1u64 << 53) as f64;
572            u.clamp(1e-15, 1.0 - 1e-15)
573        };
574        for _ in 0..self.n_samples {
575            let noisy: Vec<f64> = x
576                .chunks(2)
577                .flat_map(|chunk| {
578                    let u1 = lcg();
579                    let u2 = lcg();
580                    let r = (-2.0 * u1.ln()).sqrt();
581                    let theta = std::f64::consts::TAU * u2;
582                    let n1 = r * theta.cos() * self.sigma;
583                    let n2 = r * theta.sin() * self.sigma;
584                    if chunk.len() == 2 {
585                        vec![chunk[0] + n1, chunk[1] + n2]
586                    } else {
587                        vec![chunk[0] + n1]
588                    }
589                })
590                .take(x.len())
591                .collect();
592            let cls = base_classifier(&noisy);
593            *votes.entry(cls).or_insert(0) += 1;
594        }
595        votes
596            .into_iter()
597            .max_by_key(|(_, v)| *v)
598            .map(|(c, _)| c)
599            .unwrap_or(0)
600    }
601    /// Certify: return the L2 radius for which the top-class prediction is
602    /// guaranteed to hold.  Returns 0.0 if not certifiable.
603    pub fn certify<F: Fn(&[f64]) -> usize>(&self, x: &[f64], base_classifier: &F) -> (usize, f64) {
604        let mut votes: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
605        let mut state: u64 = 0x123456789abcdef0_u64;
606        let mut lcg = || -> f64 {
607            state = state
608                .wrapping_mul(6364136223846793005)
609                .wrapping_add(1442695040888963407);
610            let u = (state >> 11) as f64 / (1u64 << 53) as f64;
611            u.clamp(1e-15, 1.0 - 1e-15)
612        };
613        for _ in 0..self.n_samples {
614            let noisy: Vec<f64> = x
615                .chunks(2)
616                .flat_map(|chunk| {
617                    let u1 = lcg();
618                    let u2 = lcg();
619                    let r = (-2.0 * u1.ln()).sqrt();
620                    let theta = std::f64::consts::TAU * u2;
621                    let n1 = r * theta.cos() * self.sigma;
622                    let n2 = r * theta.sin() * self.sigma;
623                    if chunk.len() == 2 {
624                        vec![chunk[0] + n1, chunk[1] + n2]
625                    } else {
626                        vec![chunk[0] + n1]
627                    }
628                })
629                .take(x.len())
630                .collect();
631            let cls = base_classifier(&noisy);
632            *votes.entry(cls).or_insert(0) += 1;
633        }
634        let top = votes
635            .iter()
636            .max_by_key(|(_, v)| **v)
637            .map(|(&c, &v)| (c, v))
638            .unwrap_or((0, 0));
639        let p_hat = top.1 as f64 / self.n_samples as f64;
640        let z = (-((1.0 - self.confidence) / 2.0).ln() * 2.0).sqrt();
641        let p_lower =
642            (p_hat - z * (p_hat * (1.0 - p_hat) / self.n_samples as f64).sqrt()).clamp(0.0, 1.0);
643        if p_lower > 0.5 {
644            let radius = self.sigma * Self::probit(p_lower);
645            (top.0, radius)
646        } else {
647            (top.0, 0.0)
648        }
649    }
650    fn probit(p: f64) -> f64 {
651        let p = p.clamp(1e-15, 1.0 - 1e-15);
652        let t = if p < 0.5 {
653            (-2.0 * p.ln()).sqrt()
654        } else {
655            (-2.0 * (1.0 - p).ln()).sqrt()
656        };
657        let c = [2.515517, 0.802853, 0.010328];
658        let d = [1.432788, 0.189269, 0.001308];
659        let num = c[0] + c[1] * t + c[2] * t * t;
660        let den = 1.0 + d[0] * t + d[1] * t * t + d[2] * t * t * t;
661        let result = t - num / den;
662        if p < 0.5 {
663            -result
664        } else {
665            result
666        }
667    }
668}
669pub struct KnnClassifier {
670    pub k: usize,
671    pub data: Vec<(Vec<f64>, usize)>,
672}
673impl KnnClassifier {
674    pub fn new(k: usize) -> Self {
675        KnnClassifier {
676            k,
677            data: Vec::new(),
678        }
679    }
680    pub fn fit(&mut self, data: Vec<(Vec<f64>, usize)>) {
681        self.data = data;
682    }
683    pub fn predict(&self, point: &[f64]) -> usize {
684        let mut distances: Vec<(f64, usize)> = self
685            .data
686            .iter()
687            .map(|(features, label)| (Self::euclidean_distance(point, features), *label))
688            .collect();
689        distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
690        let k = self.k.min(distances.len());
691        let mut votes: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
692        for (_, label) in &distances[..k] {
693            *votes.entry(*label).or_insert(0) += 1;
694        }
695        votes
696            .into_iter()
697            .max_by_key(|(_, v)| *v)
698            .map(|(l, _)| l)
699            .unwrap_or(0)
700    }
701    pub fn predict_proba(&self, point: &[f64]) -> std::collections::HashMap<usize, f64> {
702        let mut distances: Vec<(f64, usize)> = self
703            .data
704            .iter()
705            .map(|(features, label)| (Self::euclidean_distance(point, features), *label))
706            .collect();
707        distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
708        let k = self.k.min(distances.len());
709        let mut counts: std::collections::HashMap<usize, usize> = std::collections::HashMap::new();
710        for (_, label) in &distances[..k] {
711            *counts.entry(*label).or_insert(0) += 1;
712        }
713        let total = k as f64;
714        counts
715            .into_iter()
716            .map(|(label, count)| (label, count as f64 / total))
717            .collect()
718    }
719    fn euclidean_distance(a: &[f64], b: &[f64]) -> f64 {
720        a.iter()
721            .zip(b.iter())
722            .map(|(x, y)| (x - y).powi(2))
723            .sum::<f64>()
724            .sqrt()
725    }
726}
727pub struct LinearRegression {
728    pub weights: Vec<f64>,
729    pub bias: f64,
730}
731impl LinearRegression {
732    pub fn new(n_features: usize) -> Self {
733        LinearRegression {
734            weights: vec![0.0; n_features],
735            bias: 0.0,
736        }
737    }
738    pub fn predict(&self, x: &[f64]) -> f64 {
739        self.weights
740            .iter()
741            .zip(x.iter())
742            .map(|(w, xi)| w * xi)
743            .sum::<f64>()
744            + self.bias
745    }
746    pub fn fit_least_squares(x_data: &[Vec<f64>], y_data: &[f64]) -> Self {
747        if x_data.is_empty() || y_data.is_empty() {
748            return LinearRegression::new(1);
749        }
750        let n_features = x_data[0].len();
751        let n = x_data.len().min(y_data.len()) as f64;
752        let y_mean = y_data.iter().sum::<f64>() / n;
753        let x_mean = if n_features > 0 && !x_data.is_empty() {
754            x_data.iter().map(|x| x[0]).sum::<f64>() / n
755        } else {
756            0.0
757        };
758        let mut cov_xy = 0.0f64;
759        let mut var_xx = 0.0f64;
760        for (x, y) in x_data.iter().zip(y_data.iter()) {
761            let xi = if x.is_empty() { 0.0 } else { x[0] };
762            cov_xy += (xi - x_mean) * (y - y_mean);
763            var_xx += (xi - x_mean).powi(2);
764        }
765        let w = if var_xx.abs() > 1e-12 {
766            cov_xy / var_xx
767        } else {
768            0.0
769        };
770        let b = y_mean - w * x_mean;
771        let mut weights = vec![0.0f64; n_features];
772        if !weights.is_empty() {
773            weights[0] = w;
774        }
775        LinearRegression { weights, bias: b }
776    }
777    pub fn r_squared(&self, x_data: &[Vec<f64>], y_data: &[f64]) -> f64 {
778        if y_data.is_empty() {
779            return 0.0;
780        }
781        let y_mean = y_data.iter().sum::<f64>() / y_data.len() as f64;
782        let ss_tot: f64 = y_data.iter().map(|y| (y - y_mean).powi(2)).sum();
783        let ss_res: f64 = x_data
784            .iter()
785            .zip(y_data.iter())
786            .map(|(x, y)| (y - self.predict(x)).powi(2))
787            .sum();
788        if ss_tot < 1e-12 {
789            1.0
790        } else {
791            1.0 - ss_res / ss_tot
792        }
793    }
794    pub fn mse(&self, x_data: &[Vec<f64>], y_data: &[f64]) -> f64 {
795        let n = x_data.len().min(y_data.len());
796        if n == 0 {
797            return 0.0;
798        }
799        let sum: f64 = x_data
800            .iter()
801            .zip(y_data.iter())
802            .map(|(x, y)| (y - self.predict(x)).powi(2))
803            .sum();
804        sum / n as f64
805    }
806}
807#[derive(Debug, Clone)]
808pub struct UncertaintySampler {
809    pub strategy: UncertaintyStrategy,
810}
811impl UncertaintySampler {
812    pub fn new(strategy: UncertaintyStrategy) -> Self {
813        UncertaintySampler { strategy }
814    }
815    /// Score a probability distribution (softmax output).
816    /// Higher score = more uncertain.
817    pub fn score(&self, probs: &[f64]) -> f64 {
818        match self.strategy {
819            UncertaintyStrategy::LeastConfident => {
820                let max_p = probs.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
821                1.0 - max_p
822            }
823            UncertaintyStrategy::MarginSampling => {
824                if probs.len() < 2 {
825                    return 0.0;
826                }
827                let mut sorted = probs.to_vec();
828                sorted.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
829                1.0 - (sorted[0] - sorted[1])
830            }
831            UncertaintyStrategy::Entropy => {
832                let eps = 1e-15;
833                -probs
834                    .iter()
835                    .filter(|&&p| p > 0.0)
836                    .map(|&p| p * (p + eps).ln())
837                    .sum::<f64>()
838            }
839        }
840    }
841    /// Given a batch of probability distributions (one per candidate),
842    /// return the index of the most uncertain candidate.
843    pub fn select_query(&self, candidates: &[Vec<f64>]) -> usize {
844        candidates
845            .iter()
846            .enumerate()
847            .max_by(|(_, a), (_, b)| {
848                self.score(a)
849                    .partial_cmp(&self.score(b))
850                    .unwrap_or(std::cmp::Ordering::Equal)
851            })
852            .map(|(i, _)| i)
853            .unwrap_or(0)
854    }
855}
856pub struct DecisionStump {
857    pub feature_idx: usize,
858    pub threshold: f64,
859    pub polarity: i32,
860}
861impl DecisionStump {
862    pub fn new(feature_idx: usize, threshold: f64, polarity: i32) -> Self {
863        DecisionStump {
864            feature_idx,
865            threshold,
866            polarity,
867        }
868    }
869    pub fn predict(&self, x: &[f64]) -> usize {
870        let val = if self.feature_idx < x.len() {
871            x[self.feature_idx]
872        } else {
873            0.0
874        };
875        if self.polarity > 0 {
876            if val >= self.threshold {
877                1
878            } else {
879                0
880            }
881        } else if val < self.threshold {
882            1
883        } else {
884            0
885        }
886    }
887    pub fn find_best(data: &[(Vec<f64>, usize)], weights: &[f64]) -> Self {
888        let n_features = if data.is_empty() { 0 } else { data[0].0.len() };
889        let mut best_err = f64::INFINITY;
890        let mut best = DecisionStump::new(0, 0.0, 1);
891        for feat in 0..n_features {
892            let mut values: Vec<f64> = data.iter().map(|(x, _)| x[feat]).collect();
893            values.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
894            values.dedup();
895            for &thresh in &values {
896                for &pol in &[1, -1] {
897                    let stump = DecisionStump::new(feat, thresh, pol);
898                    let err: f64 = data
899                        .iter()
900                        .zip(weights.iter())
901                        .map(|((x, y), w)| if stump.predict(x) != *y { *w } else { 0.0 })
902                        .sum();
903                    if err < best_err {
904                        best_err = err;
905                        best = stump;
906                    }
907                }
908            }
909        }
910        best
911    }
912}
913#[derive(Debug, Clone, Copy, PartialEq)]
914pub enum Activation {
915    ReLU,
916    Sigmoid,
917    Tanh,
918    Linear,
919    Softmax,
920    LeakyReLU,
921    ELU,
922}
923impl Activation {
924    pub fn apply(&self, x: f64) -> f64 {
925        match self {
926            Activation::ReLU => x.max(0.0),
927            Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
928            Activation::Tanh => x.tanh(),
929            Activation::Linear => x,
930            Activation::Softmax => x,
931            Activation::LeakyReLU => {
932                if x > 0.0 {
933                    x
934                } else {
935                    0.01 * x
936                }
937            }
938            Activation::ELU => {
939                if x > 0.0 {
940                    x
941                } else {
942                    x.exp() - 1.0
943                }
944            }
945        }
946    }
947    pub fn derivative(&self, x: f64) -> f64 {
948        match self {
949            Activation::ReLU => {
950                if x > 0.0 {
951                    1.0
952                } else {
953                    0.0
954                }
955            }
956            Activation::Sigmoid => {
957                let s = self.apply(x);
958                s * (1.0 - s)
959            }
960            Activation::Tanh => 1.0 - x.tanh().powi(2),
961            Activation::Linear => 1.0,
962            Activation::Softmax => 1.0,
963            Activation::LeakyReLU => {
964                if x > 0.0 {
965                    1.0
966                } else {
967                    0.01
968                }
969            }
970            Activation::ELU => {
971                if x > 0.0 {
972                    1.0
973                } else {
974                    x.exp()
975                }
976            }
977        }
978    }
979    pub fn apply_softmax(values: &[f64]) -> Vec<f64> {
980        if values.is_empty() {
981            return vec![];
982        }
983        let max_val = values.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
984        let exps: Vec<f64> = values.iter().map(|&v| (v - max_val).exp()).collect();
985        let sum: f64 = exps.iter().sum();
986        if sum.abs() < 1e-15 {
987            vec![1.0 / values.len() as f64; values.len()]
988        } else {
989            exps.iter().map(|&e| e / sum).collect()
990        }
991    }
992}