ghostflow_ml/
svm.rs

1//! Support Vector Machine implementations
2
3use ghostflow_core::Tensor;
4use rayon::prelude::*;
5
6/// Support Vector Classifier using SMO (Sequential Minimal Optimization)
7pub struct SVC {
8    pub c: f32,
9    pub kernel: Kernel,
10    pub gamma: f32,
11    pub degree: i32,
12    pub coef0: f32,
13    pub tol: f32,
14    pub max_iter: usize,
15    support_vectors_: Option<Vec<Vec<f32>>>,
16    support_: Option<Vec<usize>>,
17    dual_coef_: Option<Vec<f32>>,
18    intercept_: Option<f32>,
19    y_train_: Option<Vec<f32>>,
20    n_features_: usize,
21}
22
23#[derive(Clone, Copy, Debug)]
24pub enum Kernel {
25    Linear,
26    Poly,
27    RBF,
28    Sigmoid,
29}
30
31impl SVC {
32    pub fn new() -> Self {
33        SVC {
34            c: 1.0,
35            kernel: Kernel::RBF,
36            gamma: 1.0,
37            degree: 3,
38            coef0: 0.0,
39            tol: 1e-3,
40            max_iter: 1000,
41            support_vectors_: None,
42            support_: None,
43            dual_coef_: None,
44            intercept_: None,
45            y_train_: None,
46            n_features_: 0,
47        }
48    }
49
50    pub fn c(mut self, c: f32) -> Self {
51        self.c = c;
52        self
53    }
54
55    pub fn kernel(mut self, kernel: Kernel) -> Self {
56        self.kernel = kernel;
57        self
58    }
59
60    pub fn gamma(mut self, gamma: f32) -> Self {
61        self.gamma = gamma;
62        self
63    }
64
65    pub fn max_iter(mut self, n: usize) -> Self {
66        self.max_iter = n;
67        self
68    }
69
70
71    fn kernel_function(&self, xi: &[f32], xj: &[f32]) -> f32 {
72        match self.kernel {
73            Kernel::Linear => {
74                xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum()
75            }
76            Kernel::Poly => {
77                let dot: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum();
78                (self.gamma * dot + self.coef0).powi(self.degree)
79            }
80            Kernel::RBF => {
81                let dist_sq: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| (a - b).powi(2)).sum();
82                (-self.gamma * dist_sq).exp()
83            }
84            Kernel::Sigmoid => {
85                let dot: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum();
86                (self.gamma * dot + self.coef0).tanh()
87            }
88        }
89    }
90
91    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
92        let x_data = x.data_f32();
93        let y_data = y.data_f32();
94        let n_samples = x.dims()[0];
95        let n_features = x.dims()[1];
96        
97        self.n_features_ = n_features;
98        
99        // Convert labels to -1/+1
100        let y_binary: Vec<f32> = y_data.iter()
101            .map(|&label| if label > 0.5 { 1.0 } else { -1.0 })
102            .collect();
103        
104        // Initialize alphas and bias
105        let mut alpha = vec![0.0f32; n_samples];
106        let mut b = 0.0f32;
107        
108        // Precompute kernel matrix
109        let mut kernel_matrix = vec![vec![0.0f32; n_samples]; n_samples];
110        for i in 0..n_samples {
111            for j in i..n_samples {
112                let xi = &x_data[i * n_features..(i + 1) * n_features];
113                let xj = &x_data[j * n_features..(j + 1) * n_features];
114                let k = self.kernel_function(xi, xj);
115                kernel_matrix[i][j] = k;
116                kernel_matrix[j][i] = k;
117            }
118        }
119        
120        // Simplified SMO
121        let eps = 1e-5;
122        for _iter in 0..self.max_iter {
123            let mut num_changed = 0;
124            
125            for i in 0..n_samples {
126                // Compute error for i
127                let mut f_i = -b;
128                for j in 0..n_samples {
129                    f_i += alpha[j] * y_binary[j] * kernel_matrix[i][j];
130                }
131                let e_i = f_i - y_binary[i];
132                
133                // Check KKT conditions
134                if (y_binary[i] * e_i < -self.tol && alpha[i] < self.c) ||
135                   (y_binary[i] * e_i > self.tol && alpha[i] > 0.0) {
136                    
137                    // Select j randomly (simplified)
138                    let j = (i + 1) % n_samples;
139                    
140                    // Compute error for j
141                    let mut f_j = -b;
142                    for k in 0..n_samples {
143                        f_j += alpha[k] * y_binary[k] * kernel_matrix[j][k];
144                    }
145                    let e_j = f_j - y_binary[j];
146                    
147                    let alpha_i_old = alpha[i];
148                    let alpha_j_old = alpha[j];
149                    
150                    // Compute bounds
151                    let (l, h) = if y_binary[i] != y_binary[j] {
152                        ((alpha[j] - alpha[i]).max(0.0), self.c.min(self.c + alpha[j] - alpha[i]))
153                    } else {
154                        ((alpha[i] + alpha[j] - self.c).max(0.0), self.c.min(alpha[i] + alpha[j]))
155                    };
156                    
157                    if l >= h { continue; }
158                    
159                    let eta = 2.0 * kernel_matrix[i][j] - kernel_matrix[i][i] - kernel_matrix[j][j];
160                    if eta >= 0.0 { continue; }
161                    
162                    // Update alpha_j
163                    alpha[j] = alpha_j_old - y_binary[j] * (e_i - e_j) / eta;
164                    alpha[j] = alpha[j].clamp(l, h);
165                    
166                    if (alpha[j] - alpha_j_old).abs() < eps { continue; }
167                    
168                    // Update alpha_i
169                    alpha[i] = alpha_i_old + y_binary[i] * y_binary[j] * (alpha_j_old - alpha[j]);
170                    
171                    // Update bias
172                    let b1 = b - e_i - y_binary[i] * (alpha[i] - alpha_i_old) * kernel_matrix[i][i]
173                                     - y_binary[j] * (alpha[j] - alpha_j_old) * kernel_matrix[i][j];
174                    let b2 = b - e_j - y_binary[i] * (alpha[i] - alpha_i_old) * kernel_matrix[i][j]
175                                     - y_binary[j] * (alpha[j] - alpha_j_old) * kernel_matrix[j][j];
176                    
177                    b = if alpha[i] > 0.0 && alpha[i] < self.c {
178                        b1
179                    } else if alpha[j] > 0.0 && alpha[j] < self.c {
180                        b2
181                    } else {
182                        (b1 + b2) / 2.0
183                    };
184                    
185                    num_changed += 1;
186                }
187            }
188            
189            if num_changed == 0 { break; }
190        }
191        
192        // Extract support vectors
193        let support_indices: Vec<usize> = alpha.iter()
194            .enumerate()
195            .filter(|(_, &a)| a > eps)
196            .map(|(i, _)| i)
197            .collect();
198        
199        let support_vectors: Vec<Vec<f32>> = support_indices.iter()
200            .map(|&i| x_data[i * n_features..(i + 1) * n_features].to_vec())
201            .collect();
202        
203        let dual_coef: Vec<f32> = support_indices.iter()
204            .map(|&i| alpha[i] * y_binary[i])
205            .collect();
206        
207        self.support_vectors_ = Some(support_vectors);
208        self.support_ = Some(support_indices.clone());
209        self.dual_coef_ = Some(dual_coef);
210        self.intercept_ = Some(b);
211        self.y_train_ = Some(y_binary);
212    }
213
214
215    pub fn predict(&self, x: &Tensor) -> Tensor {
216        let x_data = x.data_f32();
217        let n_samples = x.dims()[0];
218        let n_features = x.dims()[1];
219        
220        let support_vectors = self.support_vectors_.as_ref().expect("Model not fitted");
221        let dual_coef = self.dual_coef_.as_ref().expect("Model not fitted");
222        let b = self.intercept_.unwrap_or(0.0);
223        
224        let predictions: Vec<f32> = (0..n_samples)
225            .into_par_iter()
226            .map(|i| {
227                let xi = &x_data[i * n_features..(i + 1) * n_features];
228                let mut decision = -b;
229                
230                for (sv, &coef) in support_vectors.iter().zip(dual_coef.iter()) {
231                    decision += coef * self.kernel_function(xi, sv);
232                }
233                
234                if decision >= 0.0 { 1.0 } else { 0.0 }
235            })
236            .collect();
237        
238        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
239    }
240
241    pub fn decision_function(&self, x: &Tensor) -> Tensor {
242        let x_data = x.data_f32();
243        let n_samples = x.dims()[0];
244        let n_features = x.dims()[1];
245        
246        let support_vectors = self.support_vectors_.as_ref().expect("Model not fitted");
247        let dual_coef = self.dual_coef_.as_ref().expect("Model not fitted");
248        let b = self.intercept_.unwrap_or(0.0);
249        
250        let decisions: Vec<f32> = (0..n_samples)
251            .into_par_iter()
252            .map(|i| {
253                let xi = &x_data[i * n_features..(i + 1) * n_features];
254                let mut decision = -b;
255                
256                for (sv, &coef) in support_vectors.iter().zip(dual_coef.iter()) {
257                    decision += coef * self.kernel_function(xi, sv);
258                }
259                
260                decision
261            })
262            .collect();
263        
264        Tensor::from_slice(&decisions, &[n_samples]).unwrap()
265    }
266
267    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
268        let predictions = self.predict(x);
269        let pred_data = predictions.data_f32();
270        let y_data = y.data_f32();
271        
272        let correct: usize = pred_data.iter()
273            .zip(y_data.iter())
274            .filter(|(&p, &y)| (p - y).abs() < 0.5)
275            .count();
276        
277        correct as f32 / y_data.len() as f32
278    }
279}
280
281impl Default for SVC {
282    fn default() -> Self {
283        Self::new()
284    }
285}
286
287/// Support Vector Regressor
288pub struct SVR {
289    pub c: f32,
290    pub kernel: Kernel,
291    pub gamma: f32,
292    pub epsilon: f32,
293    pub max_iter: usize,
294    support_vectors_: Option<Vec<Vec<f32>>>,
295    dual_coef_: Option<Vec<f32>>,
296    intercept_: Option<f32>,
297    n_features_: usize,
298}
299
300impl SVR {
301    pub fn new() -> Self {
302        SVR {
303            c: 1.0,
304            kernel: Kernel::RBF,
305            gamma: 1.0,
306            epsilon: 0.1,
307            max_iter: 1000,
308            support_vectors_: None,
309            dual_coef_: None,
310            intercept_: None,
311            n_features_: 0,
312        }
313    }
314
315    pub fn c(mut self, c: f32) -> Self {
316        self.c = c;
317        self
318    }
319
320    pub fn kernel(mut self, kernel: Kernel) -> Self {
321        self.kernel = kernel;
322        self
323    }
324
325    pub fn epsilon(mut self, epsilon: f32) -> Self {
326        self.epsilon = epsilon;
327        self
328    }
329
330    fn kernel_function(&self, xi: &[f32], xj: &[f32]) -> f32 {
331        match self.kernel {
332            Kernel::Linear => {
333                xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum()
334            }
335            Kernel::RBF => {
336                let dist_sq: f32 = xi.iter().zip(xj.iter()).map(|(&a, &b)| (a - b).powi(2)).sum();
337                (-self.gamma * dist_sq).exp()
338            }
339            _ => {
340                xi.iter().zip(xj.iter()).map(|(&a, &b)| a * b).sum()
341            }
342        }
343    }
344
345    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
346        let x_data = x.data_f32();
347        let y_data = y.data_f32();
348        let n_samples = x.dims()[0];
349        let n_features = x.dims()[1];
350        
351        self.n_features_ = n_features;
352        
353        // Simplified SVR using gradient descent on dual
354        let mut alpha = vec![0.0f32; n_samples];
355        let mut alpha_star = vec![0.0f32; n_samples];
356        
357        let lr = 0.01;
358        
359        for _iter in 0..self.max_iter {
360            for i in 0..n_samples {
361                let xi = &x_data[i * n_features..(i + 1) * n_features];
362                
363                // Compute prediction
364                let mut pred = 0.0f32;
365                for j in 0..n_samples {
366                    let xj = &x_data[j * n_features..(j + 1) * n_features];
367                    pred += (alpha[j] - alpha_star[j]) * self.kernel_function(xi, xj);
368                }
369                
370                let error = pred - y_data[i];
371                
372                // Update alphas
373                if error > self.epsilon {
374                    alpha_star[i] = (alpha_star[i] + lr).min(self.c);
375                } else if error < -self.epsilon {
376                    alpha[i] = (alpha[i] + lr).min(self.c);
377                }
378            }
379        }
380        
381        // Extract support vectors
382        let eps = 1e-5;
383        let mut support_vectors = Vec::new();
384        let mut dual_coef = Vec::new();
385        
386        for i in 0..n_samples {
387            let coef = alpha[i] - alpha_star[i];
388            if coef.abs() > eps {
389                support_vectors.push(x_data[i * n_features..(i + 1) * n_features].to_vec());
390                dual_coef.push(coef);
391            }
392        }
393        
394        // Compute intercept
395        let mut b = 0.0f32;
396        let mut count = 0;
397        for i in 0..n_samples {
398            if alpha[i] > eps && alpha[i] < self.c - eps {
399                let xi = &x_data[i * n_features..(i + 1) * n_features];
400                let mut pred = 0.0f32;
401                for (sv, &coef) in support_vectors.iter().zip(dual_coef.iter()) {
402                    pred += coef * self.kernel_function(xi, sv);
403                }
404                b += y_data[i] - pred - self.epsilon;
405                count += 1;
406            }
407        }
408        if count > 0 {
409            b /= count as f32;
410        }
411        
412        self.support_vectors_ = Some(support_vectors);
413        self.dual_coef_ = Some(dual_coef);
414        self.intercept_ = Some(b);
415    }
416
417    pub fn predict(&self, x: &Tensor) -> Tensor {
418        let x_data = x.data_f32();
419        let n_samples = x.dims()[0];
420        let n_features = x.dims()[1];
421        
422        let support_vectors = self.support_vectors_.as_ref().expect("Model not fitted");
423        let dual_coef = self.dual_coef_.as_ref().expect("Model not fitted");
424        let b = self.intercept_.unwrap_or(0.0);
425        
426        let predictions: Vec<f32> = (0..n_samples)
427            .map(|i| {
428                let xi = &x_data[i * n_features..(i + 1) * n_features];
429                let mut pred = b;
430                
431                for (sv, &coef) in support_vectors.iter().zip(dual_coef.iter()) {
432                    pred += coef * self.kernel_function(xi, sv);
433                }
434                
435                pred
436            })
437            .collect();
438        
439        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
440    }
441
442    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
443        let predictions = self.predict(x);
444        let pred_data = predictions.data_f32();
445        let y_data = y.data_f32();
446        
447        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
448        let ss_res: f32 = pred_data.iter()
449            .zip(y_data.iter())
450            .map(|(&p, &y)| (y - p).powi(2))
451            .sum();
452        let ss_tot: f32 = y_data.iter()
453            .map(|&y| (y - y_mean).powi(2))
454            .sum();
455        
456        1.0 - ss_res / ss_tot.max(1e-10)
457    }
458}
459
460impl Default for SVR {
461    fn default() -> Self {
462        Self::new()
463    }
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    #[test]
471    fn test_svc() {
472        let x = Tensor::from_slice(&[0.0f32, 0.0,
473            0.0, 1.0,
474            1.0, 0.0,
475            1.0, 1.0,
476        ], &[4, 2]).unwrap();
477        
478        let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
479        
480        let mut svc = SVC::new().kernel(Kernel::RBF).c(1.0);
481        svc.fit(&x, &y);
482        
483        let predictions = svc.predict(&x);
484        assert_eq!(predictions.dims(), &[4]);
485    }
486
487    #[test]
488    fn test_svr() {
489        let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
490        let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
491        
492        let mut svr = SVR::new().kernel(Kernel::Linear);
493        svr.fit(&x, &y);
494        
495        let predictions = svr.predict(&x);
496        assert_eq!(predictions.dims(), &[5]);
497    }
498}
499
500