1use super::functions::*;
6pub struct GrowthFunction {
8 pub vc_dim: usize,
10}
11impl GrowthFunction {
12 pub fn new(vc_dim: usize) -> Self {
14 Self { vc_dim }
15 }
16 pub fn evaluate(&self, m: usize) -> usize {
18 VCDimension::new(self.vc_dim).sauer_shelah_bound(m)
19 }
20}
21#[allow(dead_code)]
23#[derive(Debug, Clone)]
24pub struct CrossValidation {
25 pub n_folds: usize,
26 pub n_samples: usize,
27 pub shuffle: bool,
28 pub stratified: bool,
29}
30#[allow(dead_code)]
31impl CrossValidation {
32 pub fn new(k: usize, n: usize) -> Self {
33 CrossValidation {
34 n_folds: k,
35 n_samples: n,
36 shuffle: true,
37 stratified: false,
38 }
39 }
40 pub fn k_fold_5(n: usize) -> Self {
41 CrossValidation::new(5, n)
42 }
43 pub fn loocv(n: usize) -> Self {
44 CrossValidation::new(n, n)
45 }
46 pub fn fold_size(&self) -> usize {
47 (self.n_samples + self.n_folds - 1) / self.n_folds
48 }
49 pub fn train_size(&self) -> usize {
50 self.n_samples - self.fold_size()
51 }
52 pub fn n_train_test_splits(&self) -> usize {
53 self.n_folds
54 }
55}
56pub struct EarlyStoppingReg {
58 pub max_iters: usize,
60 pub step_size: f64,
62}
63impl EarlyStoppingReg {
64 pub fn new(max_iters: usize, step_size: f64) -> Self {
66 Self {
67 max_iters,
68 step_size,
69 }
70 }
71 pub fn effective_lambda(&self) -> f64 {
73 1.0 / (self.step_size * self.max_iters as f64)
74 }
75}
76pub struct AdaBoost {
78 pub rounds: usize,
80 pub alphas: Vec<f64>,
82 pub weak_accuracies: Vec<f64>,
84}
85impl AdaBoost {
86 pub fn new(rounds: usize) -> Self {
88 Self {
89 rounds,
90 alphas: Vec::new(),
91 weak_accuracies: Vec::new(),
92 }
93 }
94 pub fn compute_alpha(weak_error: f64) -> f64 {
96 0.5 * ((1.0 - weak_error) / weak_error).ln()
97 }
98 pub fn training_error_bound(gammas: &[f64]) -> f64 {
100 let sum_gamma_sq: f64 = gammas.iter().map(|g| g * g).sum();
101 (-2.0 * sum_gamma_sq).exp()
102 }
103 pub fn add_round(&mut self, weak_accuracy: f64) {
105 let weak_error = 1.0 - weak_accuracy;
106 let alpha = Self::compute_alpha(weak_error);
107 self.alphas.push(alpha);
108 self.weak_accuracies.push(weak_accuracy);
109 }
110}
111pub struct OnlineGradientDescent {
113 pub weights: Vec<f64>,
115 pub eta: f64,
117 pub d: f64,
119 pub g: f64,
121 pub t: usize,
123}
124impl OnlineGradientDescent {
125 pub fn new(dim: usize, eta: f64, d: f64, g: f64) -> Self {
127 Self {
128 weights: vec![0.0; dim],
129 eta,
130 d,
131 g,
132 t: 0,
133 }
134 }
135 pub fn update(&mut self, gradient: &[f64]) {
137 for (wi, &gi) in self.weights.iter_mut().zip(gradient.iter()) {
138 *wi -= self.eta * gi;
139 }
140 let norm: f64 = self.weights.iter().map(|wi| wi * wi).sum::<f64>().sqrt();
141 if norm > self.d {
142 let scale = self.d / norm;
143 for wi in self.weights.iter_mut() {
144 *wi *= scale;
145 }
146 }
147 self.t += 1;
148 }
149 pub fn regret_bound(&self) -> f64 {
151 self.d * self.g * (2.0 * self.t as f64).sqrt()
152 }
153}
154#[allow(dead_code)]
156#[derive(Debug, Clone)]
157pub struct SVMClassifier {
158 pub kernel_type: SVMKernel,
159 pub c_regularization: f64,
160 pub n_support_vectors: usize,
161}
162#[allow(dead_code)]
163impl SVMClassifier {
164 pub fn new(kernel: SVMKernel, c: f64) -> Self {
165 SVMClassifier {
166 kernel_type: kernel,
167 c_regularization: c,
168 n_support_vectors: 0,
169 }
170 }
171 pub fn linear(c: f64) -> Self {
172 SVMClassifier::new(SVMKernel::Linear, c)
173 }
174 pub fn rbf(gamma: f64, c: f64) -> Self {
175 SVMClassifier::new(SVMKernel::RBF(gamma), c)
176 }
177 pub fn kernel_value(&self, x: &[f64], xp: &[f64]) -> f64 {
178 match &self.kernel_type {
179 SVMKernel::Linear => dot(x, xp),
180 SVMKernel::Polynomial(d) => (dot(x, xp) + 1.0).powi(*d as i32),
181 SVMKernel::RBF(gamma) => {
182 let sq_dist = x
183 .iter()
184 .zip(xp.iter())
185 .map(|(a, b)| (a - b).powi(2))
186 .sum::<f64>();
187 (-gamma * sq_dist).exp()
188 }
189 SVMKernel::Sigmoid => (dot(x, xp)).tanh(),
190 }
191 }
192 pub fn sparsity_ratio(&self, n_training: usize) -> f64 {
193 if n_training == 0 {
194 return 0.0;
195 }
196 self.n_support_vectors as f64 / n_training as f64
197 }
198}
199pub struct FisherInformation {
201 pub theta: f64,
203}
204impl FisherInformation {
205 pub fn new(theta: f64) -> Self {
207 Self { theta }
208 }
209 pub fn estimate(log_density: impl Fn(f64, f64) -> f64, theta: f64, samples: &[f64]) -> f64 {
213 let h = 1e-5;
214 let n = samples.len() as f64;
215 samples
216 .iter()
217 .map(|&x| {
218 let score = (log_density(x, theta + h) - log_density(x, theta - h)) / (2.0 * h);
219 score * score
220 })
221 .sum::<f64>()
222 / n
223 }
224 pub fn cramer_rao_bound(&self, fisher_val: f64) -> f64 {
226 if fisher_val > 0.0 {
227 1.0 / fisher_val
228 } else {
229 f64::INFINITY
230 }
231 }
232}
233pub struct RademacherComplexity {
238 pub n: usize,
240 pub class_size: usize,
242}
243impl RademacherComplexity {
244 pub fn new(n: usize, class_size: usize) -> Self {
246 Self { n, class_size }
247 }
248 pub fn upper_bound(&self) -> f64 {
250 (2.0 * (self.class_size as f64).ln() / self.n as f64).sqrt()
251 }
252 pub fn generalization_bound(&self, empirical_loss: f64, delta: f64) -> f64 {
254 let rn = self.upper_bound();
255 let confidence_term = ((2.0 / delta).ln() / (2.0 * self.n as f64)).sqrt();
256 empirical_loss + 2.0 * rn + confidence_term
257 }
258}
259pub struct UniformConvergence {
261 pub eps: f64,
263 pub delta: f64,
265}
266impl UniformConvergence {
267 pub fn new(eps: f64, delta: f64) -> Self {
269 Self { eps, delta }
270 }
271 pub fn required_samples(&self, class_size: usize) -> usize {
273 let log_h = (class_size as f64).ln();
274 let log_delta = (1.0 / self.delta).ln();
275 ((2.0 * (log_h + log_delta)) / (self.eps * self.eps)).ceil() as usize
276 }
277}
278pub struct BiasVarianceTradeoff {
280 pub bias_squared: f64,
282 pub variance: f64,
284 pub noise: f64,
286}
287impl BiasVarianceTradeoff {
288 pub fn new(bias_squared: f64, variance: f64, noise: f64) -> Self {
290 Self {
291 bias_squared,
292 variance,
293 noise,
294 }
295 }
296 pub fn total_mse(&self) -> f64 {
298 self.bias_squared + self.variance + self.noise
299 }
300}
301pub struct MutualInformation;
303impl MutualInformation {
304 pub fn compute(joint: &[Vec<f64>]) -> f64 {
308 if joint.is_empty() {
309 return 0.0;
310 }
311 let _n_rows = joint.len();
312 let n_cols = joint[0].len();
313 let px: Vec<f64> = joint.iter().map(|row| row.iter().sum::<f64>()).collect();
314 let py: Vec<f64> = (0..n_cols)
315 .map(|j| {
316 joint
317 .iter()
318 .map(|row| row.get(j).copied().unwrap_or(0.0))
319 .sum()
320 })
321 .collect();
322 let h_x: f64 = px.iter().filter(|&&p| p > 0.0).map(|&p| -p * p.ln()).sum();
323 let h_y: f64 = py.iter().filter(|&&p| p > 0.0).map(|&p| -p * p.ln()).sum();
324 let h_xy: f64 = joint
325 .iter()
326 .flat_map(|row| row.iter())
327 .filter(|&&p| p > 0.0)
328 .map(|&p| -p * p.ln())
329 .sum();
330 (h_x + h_y - h_xy).max(0.0)
331 }
332 pub fn data_processing_inequality(joint_xy: &[Vec<f64>], joint_xz: &[Vec<f64>]) -> bool {
336 let i_xy = Self::compute(joint_xy);
337 let i_xz = Self::compute(joint_xz);
338 i_xz <= i_xy + 1e-9
339 }
340}
341#[allow(dead_code)]
343#[derive(Debug, Clone)]
344pub struct GradientBoosting {
345 pub n_estimators: usize,
346 pub learning_rate: f64,
347 pub max_depth: usize,
348 pub loss: GBLoss,
349}
350#[allow(dead_code)]
351impl GradientBoosting {
352 pub fn new(n: usize, lr: f64, depth: usize, loss: GBLoss) -> Self {
353 GradientBoosting {
354 n_estimators: n,
355 learning_rate: lr,
356 max_depth: depth,
357 loss,
358 }
359 }
360 pub fn xgboost_style(n: usize) -> Self {
361 GradientBoosting::new(n, 0.1, 6, GBLoss::MSE)
362 }
363 pub fn effective_shrinkage(&self) -> f64 {
364 self.learning_rate
365 }
366 pub fn n_leaves_upper_bound(&self) -> usize {
367 2usize.pow(self.max_depth as u32)
368 }
369 pub fn is_regularized(&self) -> bool {
370 self.learning_rate < 1.0
371 }
372}
373pub struct KernelMatrix {
375 pub entries: Vec<Vec<f64>>,
377 pub n: usize,
379}
380impl KernelMatrix {
381 pub fn compute(kernel: &KernelFunction, data: &[Vec<f64>]) -> Self {
383 let n = data.len();
384 let entries: Vec<Vec<f64>> = (0..n)
385 .map(|i| {
386 (0..n)
387 .map(|j| kernel.evaluate(&data[i], &data[j]))
388 .collect()
389 })
390 .collect();
391 Self { entries, n }
392 }
393 pub fn trace(&self) -> f64 {
395 (0..self.n).map(|i| self.entries[i][i]).sum()
396 }
397}
398#[allow(dead_code)]
399#[derive(Debug, Clone, PartialEq)]
400pub enum SVMKernel {
401 Linear,
402 Polynomial(u32),
403 RBF(f64),
404 Sigmoid,
405}
406pub struct KLDivergence;
408impl KLDivergence {
409 pub fn compute(p: &[f64], q: &[f64]) -> f64 {
411 p.iter()
412 .zip(q.iter())
413 .filter(|(&pi, _)| pi > 0.0)
414 .map(|(&pi, &qi)| {
415 if qi == 0.0 {
416 f64::INFINITY
417 } else {
418 pi * (pi / qi).ln()
419 }
420 })
421 .sum()
422 }
423 pub fn is_nonneg(p: &[f64], q: &[f64]) -> bool {
425 Self::compute(p, q) >= -1e-9
426 }
427}
428pub struct VCDimension {
430 pub dim: usize,
432}
433impl VCDimension {
434 pub fn new(dim: usize) -> Self {
436 Self { dim }
437 }
438 pub fn sauer_shelah_bound(&self, m: usize) -> usize {
440 let d = self.dim;
441 let mut bound = 0usize;
442 let mut binom = 1usize;
443 for i in 0..=d.min(m) {
444 if i > 0 {
445 binom = binom
446 .saturating_mul(m - i + 1)
447 .checked_div(i)
448 .unwrap_or(binom);
449 }
450 bound = bound.saturating_add(binom);
451 }
452 bound
453 }
454 pub fn can_shatter(&self, m: usize) -> bool {
459 m <= self.dim
460 }
461 pub fn fundamental_theorem_pac(&self) -> bool {
463 self.dim < usize::MAX
464 }
465}
466pub enum KernelType {
468 Linear,
470 Polynomial { degree: u32, offset: f64 },
472 Rbf { sigma: f64 },
474 Laplace { sigma: f64 },
476}
477pub struct RegretBound {
479 pub t: usize,
481 pub d: f64,
483 pub g: f64,
485}
486impl RegretBound {
487 pub fn new(t: usize, d: f64, g: f64) -> Self {
489 Self { t, d, g }
490 }
491 pub fn ogd_bound(&self) -> f64 {
493 self.d * self.g * (2.0 * self.t as f64).sqrt()
494 }
495}
496pub struct SampleComplexity {
501 pub eps: f64,
503 pub delta: f64,
505 pub vc_dim: usize,
507}
508impl SampleComplexity {
509 pub fn new(eps: f64, delta: f64, vc_dim: usize) -> Self {
511 Self { eps, delta, vc_dim }
512 }
513 pub fn compute(&self) -> usize {
515 let d = self.vc_dim as f64;
516 let numerator = d * (d / self.eps + 1.0).ln() + (2.0 / self.delta).ln();
517 (numerator / self.eps).ceil() as usize
518 }
519}
520pub struct KernelSVMTrainer {
524 pub n: usize,
526 pub alphas: Vec<f64>,
528 pub labels: Vec<f64>,
530 pub bias: f64,
532 pub c: f64,
534}
535impl KernelSVMTrainer {
536 pub fn new(n: usize, labels: Vec<f64>, c: f64) -> Self {
538 Self {
539 n,
540 alphas: vec![0.0; n],
541 labels,
542 bias: 0.0,
543 c,
544 }
545 }
546 pub fn decision(&self, kernel_row: &[f64]) -> f64 {
548 self.alphas
549 .iter()
550 .zip(self.labels.iter())
551 .zip(kernel_row.iter())
552 .map(|((a, y), k)| a * y * k)
553 .sum::<f64>()
554 + self.bias
555 }
556 pub fn smo_step(&mut self, i: usize, j: usize, k: &[Vec<f64>]) -> bool {
560 if i == j {
561 return false;
562 }
563 let yi = self.labels[i];
564 let yj = self.labels[j];
565 let fi = self.decision(&k[i]);
566 let fj = self.decision(&k[j]);
567 let ei = fi - yi;
568 let ej = fj - yj;
569 let eta = k[i][i] + k[j][j] - 2.0 * k[i][j];
570 if eta <= 0.0 {
571 return false;
572 }
573 let alpha_j_old = self.alphas[j];
574 let alpha_i_old = self.alphas[i];
575 let (l, h) = if (yi - yj).abs() < 1e-9 {
576 let sum = alpha_i_old + alpha_j_old;
577 ((sum - self.c).max(0.0), sum.min(self.c))
578 } else {
579 let diff = alpha_j_old - alpha_i_old;
580 ((-diff).max(0.0), (self.c - diff).min(self.c))
581 };
582 let alpha_j_new = (alpha_j_old + yj * (ei - ej) / eta).clamp(l, h);
583 if (alpha_j_new - alpha_j_old).abs() < 1e-5 {
584 return false;
585 }
586 let alpha_i_new = alpha_i_old + yi * yj * (alpha_j_old - alpha_j_new);
587 let b1 = self.bias
588 - ei
589 - yi * (alpha_i_new - alpha_i_old) * k[i][i]
590 - yj * (alpha_j_new - alpha_j_old) * k[i][j];
591 let b2 = self.bias
592 - ej
593 - yi * (alpha_i_new - alpha_i_old) * k[i][j]
594 - yj * (alpha_j_new - alpha_j_old) * k[j][j];
595 self.bias = (b1 + b2) / 2.0;
596 self.alphas[i] = alpha_i_new;
597 self.alphas[j] = alpha_j_new;
598 true
599 }
600 pub fn generalization_bound(radius: f64, margin: f64, n: usize) -> f64 {
602 (radius * radius) / (margin * margin * n as f64)
603 }
604}
605pub struct PACLearner {
607 pub eps: f64,
609 pub delta: f64,
611}
612impl PACLearner {
613 pub fn new(eps: f64, delta: f64) -> Self {
615 Self { eps, delta }
616 }
617 pub fn required_samples(&self, vc_dim: usize) -> usize {
619 SampleComplexity::new(self.eps, self.delta, vc_dim).compute()
620 }
621}
622pub struct ELBO {
626 pub kl_term: f64,
628 pub reconstruction_term: f64,
630}
631impl ELBO {
632 pub fn new(reconstruction_term: f64, kl_term: f64) -> Self {
634 Self {
635 kl_term,
636 reconstruction_term,
637 }
638 }
639 pub fn value(&self) -> f64 {
641 self.reconstruction_term - self.kl_term
642 }
643 pub fn compute(q: &[f64], p_joint: &[f64]) -> f64 {
647 q.iter()
648 .zip(p_joint.iter())
649 .filter(|(&qi, _)| qi > 0.0)
650 .map(|(&qi, &pi)| {
651 if pi == 0.0 {
652 f64::NEG_INFINITY
653 } else {
654 qi * (pi / qi).ln()
655 }
656 })
657 .sum()
658 }
659}
660pub struct TikhonovReg {
662 pub lambda: f64,
664}
665impl TikhonovReg {
666 pub fn new(lambda: f64) -> Self {
668 Self { lambda }
669 }
670 pub fn solve(&self, x: &[Vec<f64>], y: &[f64]) -> Vec<f64> {
673 let d = if x.is_empty() { 0 } else { x[0].len() };
674 let n = x.len();
675 let mut xtx = vec![vec![0.0f64; d]; d];
676 for i in 0..d {
677 xtx[i][i] = self.lambda;
678 }
679 for row in x {
680 for i in 0..d {
681 for j in 0..d {
682 xtx[i][j] += row[i] * row[j];
683 }
684 }
685 }
686 let mut xty = vec![0.0f64; d];
687 for (row, &yi) in x.iter().zip(y.iter()) {
688 for j in 0..d {
689 xty[j] += row[j] * yi;
690 }
691 }
692 gauss_solve(&xtx, &xty, d, n)
693 }
694 pub fn penalty(&self, w: &[f64]) -> f64 {
696 self.lambda * w.iter().map(|wi| wi * wi).sum::<f64>()
697 }
698}
699pub struct CausalBackdoor {
704 pub cond_probs: Vec<f64>,
706 pub stratum_probs: Vec<f64>,
708}
709impl CausalBackdoor {
710 pub fn new(cond_probs: Vec<f64>, stratum_probs: Vec<f64>) -> Self {
712 Self {
713 cond_probs,
714 stratum_probs,
715 }
716 }
717 pub fn adjust(&self) -> f64 {
721 self.cond_probs
722 .iter()
723 .zip(self.stratum_probs.iter())
724 .map(|(p, q)| p * q)
725 .sum()
726 }
727 pub fn confounding_bias(&self, observational: f64) -> f64 {
729 (observational - self.adjust()).abs()
730 }
731 pub fn is_valid(&self) -> bool {
733 let sum: f64 = self.stratum_probs.iter().sum();
734 (sum - 1.0).abs() < 1e-6
735 }
736}
737pub struct DoubleRademacher {
739 pub rademacher: RademacherComplexity,
741}
742impl DoubleRademacher {
743 pub fn new(n: usize, class_size: usize) -> Self {
745 Self {
746 rademacher: RademacherComplexity::new(n, class_size),
747 }
748 }
749 pub fn two_sided_bound(&self) -> f64 {
751 2.0 * self.rademacher.upper_bound()
752 }
753}
754pub struct Perceptron {
756 pub weights: Vec<f64>,
758 pub bias: f64,
760 pub mistakes: usize,
762}
763impl Perceptron {
764 pub fn new(dim: usize) -> Self {
766 Self {
767 weights: vec![0.0; dim],
768 bias: 0.0,
769 mistakes: 0,
770 }
771 }
772 pub fn predict(&self, x: &[f64]) -> f64 {
774 let score = dot(&self.weights, x) + self.bias;
775 if score >= 0.0 {
776 1.0
777 } else {
778 -1.0
779 }
780 }
781 pub fn update(&mut self, x: &[f64], label: f64) -> bool {
783 let pred = self.predict(x);
784 if (pred * label) <= 0.0 {
785 for (wi, &xi) in self.weights.iter_mut().zip(x.iter()) {
786 *wi += label * xi;
787 }
788 self.bias += label;
789 self.mistakes += 1;
790 true
791 } else {
792 false
793 }
794 }
795 pub fn mistake_bound(radius: f64, margin: f64) -> usize {
797 ((radius / margin).powi(2)).ceil() as usize
798 }
799}
800pub struct GaussianComplexity {
802 pub n: usize,
804 pub class_size: usize,
806}
807impl GaussianComplexity {
808 pub fn new(n: usize, class_size: usize) -> Self {
810 Self { n, class_size }
811 }
812 pub fn upper_bound(&self) -> f64 {
814 (2.0 * (self.class_size as f64).ln() / self.n as f64).sqrt()
815 }
816}
817#[allow(dead_code)]
818#[derive(Debug, Clone, PartialEq)]
819pub enum GBLoss {
820 MSE,
821 MAE,
822 LogLoss,
823 Huber(f64),
824}
825pub struct LassoReg {
827 pub lambda: f64,
829}
830impl LassoReg {
831 pub fn new(lambda: f64) -> Self {
833 Self { lambda }
834 }
835 pub fn soft_threshold(&self, w: &[f64]) -> Vec<f64> {
837 w.iter()
838 .map(|&wi| {
839 let abs_wi = wi.abs();
840 if abs_wi <= self.lambda {
841 0.0
842 } else {
843 wi.signum() * (abs_wi - self.lambda)
844 }
845 })
846 .collect()
847 }
848 pub fn penalty(&self, w: &[f64]) -> f64 {
850 self.lambda * w.iter().map(|wi| wi.abs()).sum::<f64>()
851 }
852}
853pub struct UCBBandit {
857 pub n: usize,
859 pub counts: Vec<usize>,
861 pub means: Vec<f64>,
863 pub t: usize,
865}
866impl UCBBandit {
867 pub fn new(n: usize) -> Self {
869 Self {
870 n,
871 counts: vec![0; n],
872 means: vec![0.0; n],
873 t: 0,
874 }
875 }
876 pub fn select(&self) -> usize {
881 if let Some(i) = self.counts.iter().position(|&c| c == 0) {
882 return i;
883 }
884 let ln_t = (self.t as f64).ln();
885 (0..self.n)
886 .max_by(|&i, &j| {
887 let ucb_i = self.means[i] + (2.0 * ln_t / self.counts[i] as f64).sqrt();
888 let ucb_j = self.means[j] + (2.0 * ln_t / self.counts[j] as f64).sqrt();
889 ucb_i
890 .partial_cmp(&ucb_j)
891 .unwrap_or(std::cmp::Ordering::Equal)
892 })
893 .unwrap_or(0)
894 }
895 pub fn update(&mut self, arm: usize, reward: f64) {
897 self.counts[arm] += 1;
898 let n = self.counts[arm] as f64;
899 self.means[arm] += (reward - self.means[arm]) / n;
900 self.t += 1;
901 }
902 pub fn regret_bound_upper(&self) -> f64 {
904 let t = self.t as f64;
905 let n = self.n as f64;
906 (n * t * t.ln()).sqrt()
907 }
908}
909pub struct PACBayesGeneralization {
913 pub kl_qp: f64,
915 pub n: usize,
917 pub delta: f64,
919}
920impl PACBayesGeneralization {
921 pub fn new(kl_qp: f64, n: usize, delta: f64) -> Self {
923 Self { kl_qp, n, delta }
924 }
925 pub fn mcallester_bound(&self, empirical_loss: f64) -> f64 {
927 let penalty =
928 ((self.kl_qp + (self.n as f64 / self.delta).ln()) / (2.0 * self.n as f64)).sqrt();
929 empirical_loss + penalty
930 }
931 pub fn catoni_bound(&self, empirical_loss: f64, lambda: f64) -> f64 {
934 let scale = 1.0 / (1.0 - lambda / 2.0);
935 let penalty = self.kl_qp / (2.0 * lambda * self.n as f64);
936 scale * (empirical_loss + penalty)
937 }
938 pub fn optimal_lambda(&self, empirical_loss: f64) -> f64 {
940 let ratio = self.n as f64 * empirical_loss / (self.kl_qp.max(1e-9));
941 1.0 / (1.0 + ratio).sqrt()
942 }
943}
944pub struct ExponentialWeightsAlgorithm {
949 pub n: usize,
951 pub eta: f64,
953 pub weights: Vec<f64>,
955 pub rounds: usize,
957}
958impl ExponentialWeightsAlgorithm {
959 pub fn new(n: usize, eta: f64) -> Self {
961 Self {
962 n,
963 eta,
964 weights: vec![1.0; n],
965 rounds: 0,
966 }
967 }
968 pub fn distribution(&self) -> Vec<f64> {
970 let total: f64 = self.weights.iter().sum();
971 self.weights.iter().map(|w| w / total).collect()
972 }
973 pub fn update(&mut self, losses: &[f64]) {
975 for (w, &l) in self.weights.iter_mut().zip(losses.iter()) {
976 *w *= (-self.eta * l).exp();
977 }
978 self.rounds += 1;
979 }
980 pub fn regret_bound(&self) -> f64 {
982 let t = self.rounds as f64;
983 (self.n as f64).ln() / self.eta + self.eta * t / 2.0
984 }
985 pub fn optimal_eta(n: usize, t: usize) -> f64 {
987 (2.0 * (n as f64).ln() / t as f64).sqrt()
988 }
989}
990pub struct FeatureMap {
992 pub feature_dim: usize,
994}
995impl FeatureMap {
996 pub fn new(feature_dim: usize) -> Self {
998 Self { feature_dim }
999 }
1000 pub fn inner_product(&self, x: &[f64], y: &[f64]) -> f64 {
1003 dot(x, y)
1004 }
1005}
1006pub struct KernelSVM {
1008 pub alphas: Vec<f64>,
1010 pub labels: Vec<f64>,
1012 pub bias: f64,
1014 pub c: f64,
1016}
1017impl KernelSVM {
1018 pub fn new(n: usize, c: f64) -> Self {
1020 Self {
1021 alphas: vec![0.0; n],
1022 labels: vec![1.0; n],
1023 bias: 0.0,
1024 c,
1025 }
1026 }
1027 pub fn predict(&self, kernel_vals: &[f64]) -> f64 {
1029 let sum: f64 = self
1030 .alphas
1031 .iter()
1032 .zip(self.labels.iter())
1033 .zip(kernel_vals.iter())
1034 .map(|((a, y), k)| a * y * k)
1035 .sum();
1036 sum + self.bias
1037 }
1038}
1039#[allow(dead_code)]
1041#[derive(Debug, Clone)]
1042pub struct GaussianProcess {
1043 pub mean: f64,
1044 pub length_scale: f64,
1045 pub signal_variance: f64,
1046 pub noise_variance: f64,
1047}
1048#[allow(dead_code)]
1049impl GaussianProcess {
1050 pub fn new(mean: f64, length_scale: f64, signal_var: f64, noise_var: f64) -> Self {
1051 GaussianProcess {
1052 mean,
1053 length_scale,
1054 signal_variance: signal_var,
1055 noise_variance: noise_var,
1056 }
1057 }
1058 pub fn default_rbf() -> Self {
1059 GaussianProcess::new(0.0, 1.0, 1.0, 0.01)
1060 }
1061 pub fn rbf_kernel(&self, x: f64, xp: f64) -> f64 {
1063 let d = x - xp;
1064 self.signal_variance * (-d * d / (2.0 * self.length_scale.powi(2))).exp()
1065 }
1066 pub fn predictive_variance(&self, x: f64, train_x: &[f64]) -> f64 {
1068 let k_star_star = self.rbf_kernel(x, x);
1069 let k_noise: Vec<f64> = train_x.iter().map(|&xi| self.rbf_kernel(x, xi)).collect();
1070 let contrib: f64 = k_noise.iter().map(|&k| k * k).sum::<f64>()
1071 / (self.signal_variance + self.noise_variance).max(1e-10);
1072 (k_star_star - contrib).max(self.noise_variance)
1073 }
1074 pub fn log_marginal_likelihood_approx(&self, n: usize) -> f64 {
1075 -(n as f64) / 2.0 * (2.0 * std::f64::consts::PI * self.signal_variance).ln()
1076 }
1077}
1078pub struct KernelFunction {
1080 pub kernel_type: KernelType,
1082}
1083impl KernelFunction {
1084 pub fn new(kernel_type: KernelType) -> Self {
1086 Self { kernel_type }
1087 }
1088 pub fn evaluate(&self, x: &[f64], y: &[f64]) -> f64 {
1090 match &self.kernel_type {
1091 KernelType::Linear => dot(x, y),
1092 KernelType::Polynomial { degree, offset } => (dot(x, y) + offset).powi(*degree as i32),
1093 KernelType::Rbf { sigma } => {
1094 let diff_sq: f64 = x.iter().zip(y).map(|(a, b)| (a - b).powi(2)).sum();
1095 (-diff_sq / (2.0 * sigma * sigma)).exp()
1096 }
1097 KernelType::Laplace { sigma } => {
1098 let diff_norm: f64 = x.iter().zip(y).map(|(a, b)| (a - b).abs()).sum();
1099 (-diff_norm / sigma).exp()
1100 }
1101 }
1102 }
1103 pub fn is_positive_definite(&self, points: &[Vec<f64>]) -> bool {
1106 let n = points.len();
1107 let mut k: Vec<Vec<f64>> = (0..n)
1108 .map(|i| {
1109 (0..n)
1110 .map(|j| self.evaluate(&points[i], &points[j]))
1111 .collect()
1112 })
1113 .collect();
1114 for i in 0..n {
1115 for j in 0..i {
1116 let mut sum = k[i][j];
1117 for l in 0..j {
1118 sum -= k[i][l] * k[j][l];
1119 }
1120 k[i][j] = sum / k[j][j];
1121 }
1122 let mut diag = k[i][i];
1123 for l in 0..i {
1124 diag -= k[i][l] * k[i][l];
1125 }
1126 if diag < -1e-9 {
1127 return false;
1128 }
1129 k[i][i] = diag.max(0.0).sqrt();
1130 }
1131 true
1132 }
1133}