use mathru::algebra::linear::{Vector, Matrix};
use crate::model::UnsupervisedLearn;
pub enum KMeansInitializer{
Random,
}
pub struct KMeans
{
k: usize,
iter: usize,
centroids: Matrix<f64>,
initializer: KMeansInitializer
}
impl KMeans
{
pub fn new(k: usize, n: usize, init: KMeansInitializer) -> KMeans
{
KMeans
{
k: k,
iter: n,
centroids: Matrix::zero(0, 0),
initializer: init,
}
}
fn update_centroids<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
{
let (_input_m, input_n): (usize, usize) = input.dim();
let (classes, _distances): (Vector<f64>, Vector<f64>)= self.find_closest_centroids(input);
let mut new_centroids: Matrix<f64>= self.centroids.clone();
let mut row_indices: Vec<Vec<usize>> = vec![Vec::new(); self.k];
for i in 0..self.k
{
let centroid_index: f64 = *classes.get(i);
row_indices.get_mut(centroid_index as usize).map(|f| f.push(i));
}
for (i, vec_i) in row_indices.iter().enumerate()
{
let mut sum: Matrix<f64> = Matrix::zero(1, input_n);
for v in vec_i.iter()
{
let row: Matrix<f64> = input.get_slice(*v, *v,0, input_n -1);
sum = &sum + &row;
}
let num_points: usize = vec_i.len();
if num_points != 0
{
sum = sum.apply(&|x| {x / (num_points as f64)});
new_centroids = new_centroids.set_slice(&sum, i, 0);
}
}
self.centroids = new_centroids;
}
fn find_closest_centroids(self: &Self, input: &Matrix<f64>) -> (Vector<f64>, Vector<f64>)
{
let (input_m, _input_n): (usize, usize) = input.dim();
let mut index: Vector<f64> = Vector::zero(input_m);
let mut dist: Vector<f64> = Vector::zero(input_m);
for i in 0..input_m
{
let input_i: Vector<f64> = input.get_row(i);
let mut dist_i: Vector<f64> = Vector::zero(self.k);
for c in 0..self.k
{
let centroid: Vector<f64> = self.centroids.get_row(c);
let diff: Vector<f64> = (¢roid - &input_i).transpose();
*dist_i.get_mut(c) = diff.dotp(&diff);
}
let min_index: usize = dist_i.argmin();
let min_distance: f64 = *dist_i.get(min_index);
*dist.get_mut(i) = min_distance;
*index.get_mut(i) = min_index as f64;
}
(index, dist)
}
fn random_init_centroids<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
{
let (_input_m, input_n) : (usize, usize) = input.dim();
let centroids_indices: Vec<usize> = (0..self.k).map(|i| {
i
}).collect();
let mut centroids = Matrix::zero(self.k, input_n);
for (idx, value) in centroids_indices.iter().enumerate()
{
let row: Matrix<f64> = input.get_slice(*value, *value, 0, input_n - 1);
centroids = centroids.set_slice(&row, idx, 0);
}
self.centroids = centroids;
}
pub fn centroids(&self) -> &Matrix<f64>
{
return &self.centroids;
}
}
impl UnsupervisedLearn<Matrix<f64>, Vector<f64>> for KMeans
{
fn train<'a, 'b>(self: &'a mut Self, input: &'b Matrix<f64>)
{
match self.initializer {
KMeansInitializer::Random => self.random_init_centroids(input)
}
for _i in 0..self.iter
{
self.update_centroids(input);
}
}
fn predict<'a, 'b>(self: &'a Self, input: &'b Matrix<f64>) -> Vector<f64>
{
let (indices, _centroids): (Vector<f64>, Vector<f64>) = self.find_closest_centroids(input);
return indices;
}
}