use std::collections::{HashMap, HashSet};
use crate::metrics::{accuracy,r2_score};
fn euclidean_distance(x1: &Vec<f64>, x2: &Vec<f64>) -> f64 {
let mut sum = 0.0;
for i in 0..x1.len() {
sum += (x1[i] - x2[i]).powi(2);
}
sum.sqrt()
}
fn minkowski_distance(x1: &Vec<f64>, x2: &Vec<f64>, p: i32) -> f64 {
let mut sum = 0.0;
for i in 0..x1.len() {
sum += (x1[i] - x2[i]).abs().powi(p as i32);
}
sum.powf(1.0 / p as f64)
}
fn cosine(x1: &Vec<f64>, x2: &Vec<f64>) -> f64 {
let mut dot = 0.0;
let mut mag1 = 0.0;
let mut mag2 = 0.0;
for i in 0..x1.len() {
dot += x1[i] * x2[i];
mag1 += x1[i].powi(2);
mag2 += x2[i].powi(2);
}
dot / (mag1.sqrt() * mag2.sqrt())
}
fn manhattan(x1: &Vec<f64>, x2: &Vec<f64>) -> f64 {
let mut sum = 0.0;
for i in 0..x1.len() {
sum += (x1[i] - x2[i]).abs();
}
sum
}
fn get_dist(x1: &Vec<f64>, x2: &Vec<f64>, metric:String, p: Option<i32>) -> f64 {
match metric.as_str() {
"euclidean" => euclidean_distance(x1, x2),
"minkowski" => minkowski_distance(x1, x2, p.unwrap()),
"cosine" => cosine(x1, x2),
"manhattan" => manhattan(x1, x2),
_ => panic!("metric must be one of 'euclidean', 'minkowski', 'cosine', 'manhattan"),
}
}
pub struct KNNeighborsClassifier{
pub n_neighbors: usize,
pub metric: String,
pub p: Option<i32>,
x_train: Vec<Vec<f64>>,
y_train: Vec<f64>,
}
impl KNNeighborsClassifier {
pub fn new(n_neighbors: usize, metric: String, p: Option<i32>) -> KNNeighborsClassifier {
if metric != "euclidean" && metric != "minkowski" && metric != "cosine" && metric != "manhattan" {
panic!("metric must be one of 'euclidean', 'minkowski', 'cosine', 'manhattan'");
}
if metric == "minkowski" && p.is_none() {
panic!("p must be specified for minkowski metric");
}
KNNeighborsClassifier {
n_neighbors,
metric,
p,
x_train: Vec::new(),
y_train: Vec::new(),
}
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: &Vec<f64>) {
if x_train.len() != y_train.len() {
panic!("x_train and y_train must have the same length");
}
self.x_train = x_train.clone();
self.y_train = y_train.clone();
}
pub fn knearest_neighbors(&self, x: &Vec<f64>) -> Vec<i32> {
if x.len() != self.x_train[0].len() {
panic!("x must have the same number of features as the training data");
}
let mut distances = Vec::new();
for i in 0..self.x_train.len() {
let distance = crate::knn::get_dist(x, &self.x_train[i], self.metric.clone(), self.p.clone());
distances.push((distance, i as i32));
}
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let mut y = Vec::new();
for i in 0..self.n_neighbors {
y.push(distances[i].1);
}
y
}
fn knearest(&self, x: &Vec<f64>) -> Vec<f64> {
if x.len() != self.x_train[0].len() {
panic!("x must have the same number of features as the training data");
}
let mut distances = Vec::new();
for i in 0..self.x_train.len() {
let distance = crate::knn::get_dist(x, &self.x_train[i], self.metric.clone(), self.p.clone());
distances.push((distance, self.y_train[i]));
}
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let mut y = Vec::new();
for i in 0..self.n_neighbors {
y.push(distances[i].1);
}
y
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<f64> {
let mut y_pred = Vec::new();
for i in 0..x_test.len() {
let y = self.knearest(&x_test[i]);
let mut counts = HashMap::new();
for j in 0..y.len() {
*counts.entry(y[j] as i32).or_insert(0)+=1;
}
let mut max = 0;
let mut max_key = 0;
for (key, value) in counts.iter() {
if *value > max {
max = *value;
max_key = *key;
}
else if *value == max {
max_key=y[0] as i32;
}
}
y_pred.push(max_key as f64);
}
y_pred
}
pub fn predict_proba(&self, x_test: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
let mut y_pred = Vec::new();
let classes = self.y_train.iter().cloned().map(|x| x as i64).collect::<HashSet<_>>();
let mut counts = HashMap::new();
for class in classes.iter() {
counts.insert(*class, 0);
}
for i in 0..x_test.len() {
let y = self.knearest(&x_test[i]);
counts.values_mut().for_each(|v| *v = 0);
for j in 0..y.len() {
*counts.entry(y[j] as i64).or_insert(0)+=1;
}
let mut probs = Vec::new();
for class in classes.iter() {
probs.push(*counts.get(class).unwrap() as f64 / self.n_neighbors as f64);
}
y_pred.push(probs);
}
y_pred
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> HashMap<String, f64> {
let y_pred = self.predict(x_test).iter().map(|x| *x as i32).collect::<Vec<i32>>();
let y_test_=&y_test.iter().map(|x| *x as i32).collect::<Vec<i32>>();
accuracy(&y_pred, &y_test_)
}
pub fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("n_neighbors".to_string(), self.n_neighbors.to_string());
params.insert("metric".to_string(), self.metric.clone());
if self.p.is_some() {
params.insert("p".to_string(), self.p.unwrap().to_string());
}
params
}
}
pub struct KNNeighborsRegressor {
pub n_neighbors: usize,
pub metric: String,
pub p: Option<i32>,
x_train: Vec<Vec<f64>>,
y_train: Vec<f64>,
}
impl KNNeighborsRegressor {
pub fn new(n_neighbors: usize, metric: String, p: Option<i32>) -> KNNeighborsRegressor {
if metric != "euclidean" && metric != "minkowski" && metric != "cosine" && metric != "manhattan" {
panic!("metric must be one of 'euclidean', 'minkowski', 'cosine', 'manhattan'");
}
if metric == "minkowski" && p.is_none() {
panic!("p must be specified for minkowski metric");
}
KNNeighborsRegressor {
n_neighbors,
metric,
p,
x_train: Vec::new(),
y_train: Vec::new(),
}
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: &Vec<f64>) {
if x_train.len() != y_train.len() {
panic!("x_train and y_train must have the same length");
}
self.x_train = x_train.clone();
self.y_train = y_train.clone();
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<f64> {
if x_test[0].len() != self.x_train[0].len() {
panic!("x_test must have the same number of features as the training data");
}
let mut y_pred = Vec::new();
for i in 0..x_test.len() {
let mut distances = Vec::new();
for j in 0..self.x_train.len() {
let distance = crate::knn::get_dist(&x_test[i], &self.x_train[j], self.metric.clone(), self.p.clone());
distances.push((distance, self.y_train[j]));
}
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let mut sum = 0.0;
for j in 0..self.n_neighbors {
sum += distances[j].1;
}
y_pred.push(sum / self.n_neighbors as f64);
}
y_pred
}
pub fn knearest_neighbors(&self, x: &Vec<f64>) -> Vec<i32> {
if x.len() != self.x_train[0].len() {
panic!("x must have the same number of features as the training data");
}
let mut distances = Vec::new();
for i in 0..self.x_train.len() {
let distance = crate::knn::get_dist(x, &self.x_train[i], self.metric.clone(), self.p.clone());
distances.push((distance, i as i32));
}
distances.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap());
let mut y = Vec::new();
for i in 0..self.n_neighbors {
y.push(distances[i].1);
}
y
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> f64 {
if x_test[0].len() != self.x_train[0].len() {
panic!("x_test must have the same number of features as the training data");
}
if x_test.len() != y_test.len() {
panic!("x_test and y_test must have the same length");
}
let y_pred = self.predict(x_test);
r2_score(&y_pred, y_test)
}
pub fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("n_neighbors".to_string(), self.n_neighbors.to_string());
params.insert("metric".to_string(), self.metric.clone());
if self.p.is_some() {
params.insert("p".to_string(), self.p.unwrap().to_string());
}
params
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_knn_new() {
let knn = KNNeighborsClassifier::new(3, "euclidean".to_string(), None);
assert_eq!(knn.n_neighbors, 3);
assert_eq!(knn.metric, "euclidean");
assert_eq!(knn.p, None);
}
#[test]
fn test_knn_fit() {
let mut knn = KNNeighborsClassifier::new(3, "euclidean".to_string(), None);
let x_train = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
let y_train = vec![0.0, 1.0, 0.0];
knn.fit(&x_train, &y_train);
assert_eq!(knn.x_train, x_train);
assert_eq!(knn.y_train, y_train);
}
#[test]
fn test_knn_knearest() {
let mut knn = KNNeighborsClassifier::new(2, "euclidean".to_string(), None);
let x_train = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
let y_train = vec![0.0, 1.0, 0.0];
knn.fit(&x_train, &y_train);
let x = vec![1.0, 2.0];
let y = knn.knearest(&x);
assert_eq!(y, vec![0.0, 1.0]);
}
#[test]
fn test_knn_predict() {
let mut knn = KNNeighborsClassifier::new(2, "euclidean".to_string(), None);
let x_train = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
let y_train = vec![0.0, 1.0, 0.0];
knn.fit(&x_train, &y_train);
let x_test = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let y_pred = knn.predict(&x_test);
assert_eq!(y_pred, vec![0.0, 1.0]);
}
#[test]
fn test_knn_predict_proba() {
let mut knn = KNNeighborsClassifier::new(2, "euclidean".to_string(), None);
let x_train = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
let y_train = vec![0.0, 1.0, 0.0];
knn.fit(&x_train, &y_train);
let x_test = vec![vec![1.0, 2.0], vec![3.0, 4.0]];
let y_pred = knn.predict_proba(&x_test);
assert_eq!(y_pred, vec![vec![0.5, 0.5], vec![0.5, 0.5]]);
}
#[test]
fn test_knn_regressor(){
let mut knn = KNNeighborsRegressor::new(2, "euclidean".to_string(), None);
let x_train = vec![vec![1.0, 2.0], vec![3.0, 4.0], vec![5.0, 6.0]];
let y_train = vec![0.0, 9.0, 2.0];
knn.fit(&x_train, &y_train);
let x_test = vec![vec![0.0, 2.0], vec![8.0, 10.0]];
let y_pred = knn.predict(&x_test);
assert_eq!(y_pred, vec![4.5, 9.5]);
}
}