use mathru::algebra::linear::{Vector, Matrix};
use crate::model::UnsupervisedLearn;
use rand::Rng;
pub enum KMeansInitializer{
Random,
}
pub struct KMeans
{
k: usize,
iter: usize,
centroids: Matrix<f64>,
initializer: KMeansInitializer
}
impl KMeans
{
pub fn new(k: usize, n: usize, init: KMeansInitializer) -> KMeans
{
KMeans
{
k: k,
iter: n,
centroids: Matrix::zero(0, 0),
initializer: init,
}
}
fn update_centroids<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
{
let (_input_m, input_n): (usize, usize) = input.dim();
let (classes, _distances): (Vector<usize>, Vector<f64>)= self.find_closest_centroids(input);
let mut new_centroids: Matrix<f64>= self.centroids.clone();
let mut row_indices: Vec<Vec<usize>> = vec![Vec::new(); self.k];
for i in 0..self.k
{
let centroid_index: usize = *classes.get(&i);
row_indices.get_mut(centroid_index).map(|f| f.push(i));
}
for (i, vec_i) in row_indices.iter().enumerate()
{
let mut sum: Matrix<f64> = Matrix::zero(1, input_n);
for v in vec_i.iter()
{
let row: Matrix<f64> = input.get_slice(*v, *v,0, input_n -1);
sum = &sum + &row;
}
let num_points: usize = vec_i.len();
if num_points != 0
{
sum = sum.apply(&|x| {x / (num_points as f64)});
new_centroids = new_centroids.set_slice(&sum, i, 0);
}
}
self.centroids = new_centroids;
}
fn find_closest_centroids(self: &Self, input: &Matrix<f64>) -> (Vector<usize>, Vector<f64>)
{
let (input_m, _input_n): (usize, usize) = input.dim();
let mut index: Vector<usize> = Vector::zero(input_m);
let mut dist: Vector<f64> = Vector::zero(input_m);
for i in 0..input_m
{
let input_i: Vector<f64> = input.get_row(&i);
let mut dist_i: Vector<f64> = Vector::zero(self.k);
for c in 0..self.k
{
let centroid: Vector<f64> = self.centroids.get_row(&c);
let diff: Vector<f64> = (¢roid - &input_i).transpose();
*dist_i.get_mut(&c) = diff.dotp(&diff);
}
let min_index: usize = dist_i.argmin();
let min_distance: f64 = *dist_i.get(&min_index);
*dist.get_mut(&i) = min_distance;
*index.get_mut(&i) = min_index;
}
(index, dist)
}
fn random_init_centroids<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
{
let (input_m, input_n) : (usize, usize) = input.dim();
let mut rng = rand::thread_rng();
let centroids_indices: Vec<usize> = (0..self.k).map(|_| {
rng.gen_range(0, input_m)
}).collect();
let mut centroids = Matrix::zero(self.k, input_n);
for (idx, value) in centroids_indices.iter().enumerate()
{
let row: Matrix<f64> = input.get_slice(*value, *value, 0, input_n - 1);
centroids = centroids.set_slice(&row, idx, 0);
}
self.centroids = centroids;
}
pub fn centroids(&self) -> &Matrix<f64>
{
return &self.centroids;
}
}
impl UnsupervisedLearn<Matrix<f64>, Vector<usize>> for KMeans
{
fn train<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
{
match self.initializer {
KMeansInitializer::Random => self.random_init_centroids(input)
}
for _i in 0..self.iter
{
self.update_centroids(input);
}
}
fn predict<'a, 'b>(self: &'a Self, input: &'b Matrix<f64>) -> Vector<usize>
{
let (indices, _centroids): (Vector<usize>, Vector<f64>) = self.find_closest_centroids(input);
return indices;
}
}