1use ghostflow_core::Tensor;
4use rayon::prelude::*;
5
6pub struct KNeighborsClassifier {
8 pub n_neighbors: usize,
9 pub weights: Weights,
10 pub metric: Metric,
11 pub p: f32,
12 x_train_: Option<Vec<f32>>,
13 y_train_: Option<Vec<f32>>,
14 n_samples_: usize,
15 n_features_: usize,
16 n_classes_: usize,
17}
18
19#[derive(Clone, Copy, Debug)]
20pub enum Weights {
21 Uniform,
22 Distance,
23}
24
25#[derive(Clone, Copy, Debug)]
26pub enum Metric {
27 Euclidean,
28 Manhattan,
29 Minkowski,
30 Cosine,
31}
32
33impl KNeighborsClassifier {
34 pub fn new(n_neighbors: usize) -> Self {
35 KNeighborsClassifier {
36 n_neighbors,
37 weights: Weights::Uniform,
38 metric: Metric::Euclidean,
39 p: 2.0,
40 x_train_: None,
41 y_train_: None,
42 n_samples_: 0,
43 n_features_: 0,
44 n_classes_: 0,
45 }
46 }
47
48 pub fn weights(mut self, weights: Weights) -> Self {
49 self.weights = weights;
50 self
51 }
52
53 pub fn metric(mut self, metric: Metric) -> Self {
54 self.metric = metric;
55 self
56 }
57
58 fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
59 match self.metric {
60 Metric::Euclidean => {
61 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).powi(2)).sum::<f32>().sqrt()
62 }
63 Metric::Manhattan => {
64 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).abs()).sum()
65 }
66 Metric::Minkowski => {
67 a.iter().zip(b.iter())
68 .map(|(&ai, &bi)| (ai - bi).abs().powf(self.p))
69 .sum::<f32>()
70 .powf(1.0 / self.p)
71 }
72 Metric::Cosine => {
73 let dot: f32 = a.iter().zip(b.iter()).map(|(&ai, &bi)| ai * bi).sum();
74 let norm_a: f32 = a.iter().map(|&x| x * x).sum::<f32>().sqrt();
75 let norm_b: f32 = b.iter().map(|&x| x * x).sum::<f32>().sqrt();
76 1.0 - dot / (norm_a * norm_b + 1e-10)
77 }
78 }
79 }
80
81 fn find_k_nearest(&self, query: &[f32]) -> Vec<(usize, f32)> {
82 let x_train = self.x_train_.as_ref().unwrap();
83
84 let mut distances: Vec<(usize, f32)> = (0..self.n_samples_)
85 .map(|i| {
86 let train_point = &x_train[i * self.n_features_..(i + 1) * self.n_features_];
87 let dist = self.distance(query, train_point);
88 (i, dist)
89 })
90 .collect();
91
92 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
93 distances.truncate(self.n_neighbors);
94 distances
95 }
96
97 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
98 let x_data = x.data_f32();
99 let y_data = y.data_f32();
100
101 self.n_samples_ = x.dims()[0];
102 self.n_features_ = x.dims()[1];
103 self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
104
105 self.x_train_ = Some(x_data);
106 self.y_train_ = Some(y_data);
107 }
108
109 pub fn predict(&self, x: &Tensor) -> Tensor {
110 let x_data = x.data_f32();
111 let n_samples = x.dims()[0];
112 let n_features = x.dims()[1];
113
114 let y_train = self.y_train_.as_ref().expect("Model not fitted");
115
116 let predictions: Vec<f32> = (0..n_samples)
117 .into_par_iter()
118 .map(|i| {
119 let query = &x_data[i * n_features..(i + 1) * n_features];
120 let neighbors = self.find_k_nearest(query);
121
122 match self.weights {
123 Weights::Uniform => {
124 let mut class_counts = vec![0usize; self.n_classes_];
125 for (neighbor_idx, _) in neighbors {
126 let class = y_train[neighbor_idx] as usize;
127 if class < self.n_classes_ {
128 class_counts[class] += 1;
129 }
130 }
131 class_counts.iter()
132 .enumerate()
133 .max_by_key(|(_, &count)| count)
134 .map(|(class, _)| class as f32)
135 .unwrap_or(0.0)
136 }
137 Weights::Distance => {
138 let mut class_weights = vec![0.0f32; self.n_classes_];
139 for (neighbor_idx, dist) in neighbors {
140 let class = y_train[neighbor_idx] as usize;
141 if class < self.n_classes_ {
142 let weight = if dist < 1e-10 { 1e10 } else { 1.0 / dist };
143 class_weights[class] += weight;
144 }
145 }
146 class_weights.iter()
147 .enumerate()
148 .max_by(|(_, &a), (_, &b)| a.partial_cmp(&b).unwrap())
149 .map(|(class, _)| class as f32)
150 .unwrap_or(0.0)
151 }
152 }
153 })
154 .collect();
155
156 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
157 }
158
159 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
160 let predictions = self.predict(x);
161 let pred_data = predictions.data_f32();
162 let y_data = y.data_f32();
163
164 let correct: usize = pred_data.iter()
165 .zip(y_data.iter())
166 .filter(|(&p, &y)| (p - y).abs() < 0.5)
167 .count();
168
169 correct as f32 / y_data.len() as f32
170 }
171}
172
173
174pub struct KNeighborsRegressor {
176 pub n_neighbors: usize,
177 pub weights: Weights,
178 pub metric: Metric,
179 pub p: f32,
180 x_train_: Option<Vec<f32>>,
181 y_train_: Option<Vec<f32>>,
182 n_samples_: usize,
183 n_features_: usize,
184}
185
186impl KNeighborsRegressor {
187 pub fn new(n_neighbors: usize) -> Self {
188 KNeighborsRegressor {
189 n_neighbors,
190 weights: Weights::Uniform,
191 metric: Metric::Euclidean,
192 p: 2.0,
193 x_train_: None,
194 y_train_: None,
195 n_samples_: 0,
196 n_features_: 0,
197 }
198 }
199
200 pub fn weights(mut self, weights: Weights) -> Self {
201 self.weights = weights;
202 self
203 }
204
205 fn distance(&self, a: &[f32], b: &[f32]) -> f32 {
206 match self.metric {
207 Metric::Euclidean => {
208 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).powi(2)).sum::<f32>().sqrt()
209 }
210 Metric::Manhattan => {
211 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).abs()).sum()
212 }
213 _ => {
214 a.iter().zip(b.iter()).map(|(&ai, &bi)| (ai - bi).powi(2)).sum::<f32>().sqrt()
215 }
216 }
217 }
218
219 fn find_k_nearest(&self, query: &[f32]) -> Vec<(usize, f32)> {
220 let x_train = self.x_train_.as_ref().unwrap();
221
222 let mut distances: Vec<(usize, f32)> = (0..self.n_samples_)
223 .map(|i| {
224 let train_point = &x_train[i * self.n_features_..(i + 1) * self.n_features_];
225 let dist = self.distance(query, train_point);
226 (i, dist)
227 })
228 .collect();
229
230 distances.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
231 distances.truncate(self.n_neighbors);
232 distances
233 }
234
235 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
236 self.n_samples_ = x.dims()[0];
237 self.n_features_ = x.dims()[1];
238 self.x_train_ = Some(x.data_f32());
239 self.y_train_ = Some(y.data_f32());
240 }
241
242 pub fn predict(&self, x: &Tensor) -> Tensor {
243 let x_data = x.data_f32();
244 let n_samples = x.dims()[0];
245 let n_features = x.dims()[1];
246
247 let y_train = self.y_train_.as_ref().expect("Model not fitted");
248
249 let predictions: Vec<f32> = (0..n_samples)
250 .into_par_iter()
251 .map(|i| {
252 let query = &x_data[i * n_features..(i + 1) * n_features];
253 let neighbors = self.find_k_nearest(query);
254
255 match self.weights {
256 Weights::Uniform => {
257 let sum: f32 = neighbors.iter()
258 .map(|(idx, _)| y_train[*idx])
259 .sum();
260 sum / neighbors.len() as f32
261 }
262 Weights::Distance => {
263 let mut weighted_sum = 0.0f32;
264 let mut total_weight = 0.0f32;
265 for (idx, dist) in neighbors {
266 let weight = if dist < 1e-10 { 1e10 } else { 1.0 / dist };
267 weighted_sum += weight * y_train[idx];
268 total_weight += weight;
269 }
270 weighted_sum / total_weight.max(1e-10)
271 }
272 }
273 })
274 .collect();
275
276 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
277 }
278
279 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
280 let predictions = self.predict(x);
281 let pred_data = predictions.data_f32();
282 let y_data = y.data_f32();
283
284 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
285 let ss_res: f32 = pred_data.iter()
286 .zip(y_data.iter())
287 .map(|(&p, &y)| (y - p).powi(2))
288 .sum();
289 let ss_tot: f32 = y_data.iter()
290 .map(|&y| (y - y_mean).powi(2))
291 .sum();
292
293 1.0 - ss_res / ss_tot.max(1e-10)
294 }
295}
296
297#[cfg(test)]
298mod tests {
299 use super::*;
300
301 #[test]
302 fn test_knn_classifier() {
303 let x = Tensor::from_slice(&[0.0f32, 0.0,
304 0.1, 0.1,
305 1.0, 1.0,
306 1.1, 1.1,
307 ], &[4, 2]).unwrap();
308
309 let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0], &[4]).unwrap();
310
311 let mut knn = KNeighborsClassifier::new(3);
312 knn.fit(&x, &y);
313
314 let score = knn.score(&x, &y);
315 assert!(score >= 0.5);
316 }
317
318 #[test]
319 fn test_knn_regressor() {
320 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
321 let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
322
323 let mut knn = KNeighborsRegressor::new(3);
324 knn.fit(&x, &y);
325
326 let score = knn.score(&x, &y);
327 assert!(score >= 0.5);
328 }
329}
330
331