use math::round;
use rand::*;
pub struct LayerDetails {
pub n_inputs: usize,
pub n_neurons: i32,
}
impl LayerDetails {
pub fn create_weights(&self) -> Vec<Vec<f64>> {
let mut rng = rand::thread_rng();
let mut weight: Vec<Vec<f64>> = vec![];
for _ in 0..self.n_inputs {
weight.push(
(0..self.n_neurons)
.map(|_| round::ceil(rng.gen_range(-1., 1.), 3))
.collect(),
);
}
weight
}
pub fn create_bias(&self) -> Vec<f64> {
let bias = vec![0.; self.n_neurons as usize];
bias
}
pub fn output_of_layer(
&self,
input: &Vec<Vec<f64>>,
weights: &Vec<Vec<f64>>,
bias: &mut Vec<f64>,
f: fn(input: &Vec<f64>) -> Vec<f64>,
) -> Vec<Vec<f64>> {
let mut mat_mul = transpose(&matrix_multiplication(&input, &weights));
let mut output: Vec<Vec<f64>> = vec![];
for i in &mut mat_mul {
output.push(vector_addition(i, bias));
}
let mut activated_output = vec![];
for i in output {
activated_output.push(f(&i));
}
activated_output
}
}
pub fn activation_relu<T>(input: &Vec<T>) -> Vec<T>
where
T: Copy + std::cmp::PartialOrd + std::ops::Sub<Output = T> + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero = "0".parse::<T>().unwrap();
input
.iter()
.map(|x| if *x > zero { *x } else { *x - *x })
.collect()
}
pub fn activation_leaky_relu<T>(input: &Vec<T>, alpha: f64) -> Vec<T>
where
T: Copy + std::cmp::PartialOrd + std::ops::Mul<Output = T> + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero = "0".parse::<T>().unwrap();
let a = format!("{}", alpha).parse::<T>().unwrap();
input
.iter()
.map(|x| if *x > zero { *x } else { a * *x })
.collect()
}
pub fn activation_sigmoid<T>(input: &Vec<T>) -> Vec<f64>
where
T: std::str::FromStr + std::fmt::Debug,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
input
.iter()
.map(|x| 1. / (1. + format!("{:?}", x).parse::<f64>().unwrap().exp()))
.collect()
}
pub fn activation_tanh<T>(input: &Vec<T>) -> Vec<f64>
where
T: std::str::FromStr + std::fmt::Debug,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
input
.iter()
.map(|x| {
(format!("{:?}", x).parse::<f64>().unwrap().exp()
- (format!("{:?}", x).parse::<f64>().unwrap() * (-1.)).exp())
/ (format!("{:?}", x).parse::<f64>().unwrap().exp()
+ (format!("{:?}", x).parse::<f64>().unwrap() * (-1.)).exp())
})
.collect()
}
pub fn mean<T>(list: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ Copy
+ std::str::FromStr
+ std::string::ToString
+ std::ops::Add<T, Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero: T = "0".parse().unwrap();
let len_str = list.len().to_string();
let length: T = len_str.parse().unwrap();
(list.iter().fold(zero, |acc, x| acc + *x) / length)
.to_string()
.parse()
.unwrap()
}
pub fn variance<T>(list: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::marker::Copy
+ std::fmt::Display
+ std::ops::Sub<T, Output = T>
+ std::ops::Add<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::fmt::Debug
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero: T = "0".parse().unwrap();
let mu = mean(list);
let _len_str: T = list.len().to_string().parse().unwrap();
let output: Vec<_> = list
.iter()
.map(|x| (*x - mu.to_string().parse().unwrap()) * (*x - mu.to_string().parse().unwrap()))
.collect();
let variance = output.iter().fold(zero, |a, b| a + *b);
variance.to_string().parse().unwrap()
}
pub fn covariance<T>(list1: &Vec<T>, list2: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mu1 = mean(list1);
let mu2 = mean(list2);
let zero: T = "0".parse().unwrap();
let _len_str: T = list1.len().to_string().parse().unwrap();
let tupled: Vec<_> = list1.iter().zip(list2).collect();
let output = tupled.iter().fold(zero, |a, b| {
a + ((*b.0 - mu1.to_string().parse().unwrap()) * (*b.1 - mu2.to_string().parse().unwrap()))
});
output.to_string().parse().unwrap()
}
pub fn coefficient<T>(list1: &Vec<T>, list2: &Vec<T>) -> (f64, f64)
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let b1 = covariance(list1, list2) / variance(list1);
let b0 = mean(list2) - (b1 * mean(list1));
(b0.to_string().parse().unwrap(), b1)
}
pub fn simple_linear_regression_prediction<T>(train: &Vec<(T, T)>, test: &Vec<(T, T)>) -> Vec<T>
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let train_features = &train.iter().map(|a| a.0).collect();
let test_features = &test.iter().map(|a| a.1).collect();
let (offset, slope) = coefficient(train_features, test_features);
let b0: T = offset.to_string().parse().unwrap();
let b1: T = slope.to_string().parse().unwrap();
let predicted_output = test.iter().map(|a| b0 + b1 * a.0).collect();
let original_output: Vec<_> = test.iter().map(|a| a.0).collect();
println!(
"RMSE: {:?}",
root_mean_square(&predicted_output, &original_output)
);
predicted_output
}
pub fn root_mean_square<T>(list1: &Vec<T>, list2: &Vec<T>) -> f64
where
T: std::ops::Sub<T, Output = T>
+ Copy
+ std::ops::Mul<T, Output = T>
+ std::ops::Add<T, Output = T>
+ std::ops::Div<Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
println!("========================================================================================================================================================");
let zero: T = "0".parse().unwrap();
let tupled: Vec<_> = list1.iter().zip(list2).collect();
let length: T = list1.len().to_string().parse().unwrap();
let mean_square_error = tupled
.iter()
.fold(zero, |b, a| b + ((*a.1 - *a.0) * (*a.1 - *a.0)))
/ length;
let mse: f64 = mean_square_error.to_string().parse().unwrap();
mse.powf(0.5)
}
use std::collections::HashMap;
use std::fs;
pub fn read_csv<'a>(path: String) -> (Vec<String>, Vec<Vec<String>>) {
println!("========================================================================================================================================================");
println!("Reading the file ...");
let file = fs::read_to_string(&path).unwrap();
let splitted: Vec<&str> = file.split("\n").collect();
let rows: i32 = (splitted.len() - 1) as i32;
println!("Number of rows = {}", rows - 1);
let table: Vec<Vec<_>> = splitted.iter().map(|a| a.split(",").collect()).collect();
let values = table[1..]
.iter()
.map(|a| a.iter().map(|b| b.to_string()).collect())
.collect();
let columns: Vec<String> = table[0].iter().map(|a| a.to_string()).collect();
(columns, values)
}
use std::io::Error;
pub fn convert_and_impute<U>(
list: &Vec<String>,
to: U,
impute_with: U,
) -> (Result<Vec<U>, Error>, Vec<usize>)
where
U: std::cmp::PartialEq + Copy + std::marker::Copy + std::string::ToString + std::str::FromStr,
<U as std::str::FromStr>::Err: std::fmt::Debug,
{
println!("========================================================================================================================================================");
let mut output: Vec<_> = vec![];
let mut missing = vec![];
match type_of(to) {
"f64" => {
for (n, i) in list.iter().enumerate() {
if *i != "" {
let x = i.parse::<U>().unwrap();
output.push(x);
} else {
output.push(impute_with);
missing.push(n);
println!("Error found in {}th position of the vector", n);
}
}
}
"i32" => {
for (n, i) in list.iter().enumerate() {
if *i != "" {
let string_splitted: Vec<_> = i.split(".").collect();
let ones_digit = string_splitted[0].parse::<U>().unwrap();
output.push(ones_digit);
} else {
output.push(impute_with);
missing.push(n);
println!("Error found in {}th position of the vector", n);
}
}
}
_ => println!("This type conversion cant be done, choose either int or float type\n Incase of string conversion, use impute_string"),
}
(Ok(output), missing)
}
pub fn impute_string<'a>(list: &'a mut Vec<String>, impute_with: &'a str) -> Vec<&'a str> {
println!("========================================================================================================================================================");
list.iter()
.enumerate()
.map(|(n, a)| {
if *a == String::from("") {
println!("Missing value found in {}th position of the vector", n);
impute_with
} else {
&a[..]
}
})
.collect()
}
pub fn convert_string_categorical<T>(list: &Vec<T>, extra_class: bool) -> Vec<f64>
where
T: std::cmp::PartialEq + std::cmp::Eq + std::hash::Hash + Copy,
{
println!("========================================================================================================================================================");
let values = unique_values(&list);
if extra_class == true && values.len() > 10 {
println!("The number of classes will be more than 10");
} else {
();
}
let mut map: HashMap<&T, f64> = HashMap::new();
for (n, i) in values.iter().enumerate() {
map.insert(i, n as f64 + 1.);
}
list.iter().map(|a| map[a]).collect()
}
pub fn normalize_vector_f(list: &Vec<f64>) -> Vec<f64> {
let (minimum, maximum) = min_max_f(&list);
let range: f64 = maximum - minimum;
list.iter().map(|a| 1. - ((maximum - a) / range)).collect()
}
pub fn logistic_function_f(matrix: &Vec<Vec<f64>>, beta: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
println!("========================================================================================================================================================");
println!("logistic function");
println!(
"{:?}x{:?}\n{:?}x{:?}",
matrix.len(),
matrix[0].len(),
beta.len(),
beta[0].len()
);
matrix_multiplication(matrix, beta)
.iter()
.map(|a| a.iter().map(|b| 1. / (1. + ((b * -1.).exp()))).collect())
.collect()
}
pub fn log_gradient_f(
matrix1: &Vec<Vec<f64>>,
beta: &Vec<Vec<f64>>,
matrix2: &Vec<f64>,
) -> Vec<Vec<f64>> {
println!("========================================================================================================================================================");
println!("Log gradient_f");
let mut first_calc = vec![];
for (n, i) in logistic_function_f(matrix1, beta).iter().enumerate() {
let mut row = vec![];
for j in i.iter() {
row.push(j - matrix2[n]);
}
first_calc.push(row);
}
let first_calc_T = transpose(&first_calc);
let mut X = vec![];
for j in 0..matrix1[0].len() {
let mut row = vec![];
for i in matrix1.iter() {
row.push(i[j]);
}
X.push(row);
}
let mut final_calc = vec![];
for i in first_calc_T.iter() {
for j in X.iter() {
final_calc.push(dot_product(&i, &j))
}
}
shape_changer(&final_calc, matrix1[0].len(), matrix1.len())
}
pub fn cost_function_f(matrix1: &Vec<Vec<f64>>, beta: &Vec<Vec<f64>>, matrix2: &Vec<f64>) -> f64 {
println!("========================================================================================================================================================");
println!(
"shape\ninput: {:?},{:?}\nbeta: {:?},{:?}\ntarget: {:?}",
matrix1[0].len(),
matrix1.len(),
beta[0].len(),
beta.len(),
matrix2.len()
);
println!("Calculating cost function ...");
let logistic_func_v = logistic_function_f(&transpose(&matrix1), &beta);
let log_logistic: Vec<Vec<f64>> = logistic_func_v
.iter()
.map(|a| a.iter().map(|a| a.ln()).collect())
.collect();
let mut step1 = vec![];
for i in log_logistic.iter() {
let mut row = vec![];
for (n, j) in i.iter().enumerate() {
for (m, k) in matrix2.iter().enumerate() {
if n == m {
row.push(j * k);
} else {
()
}
}
}
step1.push(row);
}
let one_minus_matrix2: Vec<f64> = matrix2.iter().map(|b| 1. - b).collect();
let one_minus_log_logistic: Vec<Vec<f64>> = logistic_func_v
.iter()
.map(|a| a.iter().map(|b| (1. - b).ln()).collect())
.collect();
let minus_step1: Vec<Vec<f64>> = step1
.iter()
.map(|a| a.iter().map(|b| *b * -1.).collect())
.collect();
let mut step2 = vec![];
for i in one_minus_log_logistic.iter() {
step2.push(element_wise_operation(i, &one_minus_matrix2, "Mul"));
}
let minus_step2: Vec<Vec<f64>> = step2
.iter()
.map(|a| a.iter().map(|b| *b * -1.).collect())
.collect();
let mut output = element_wise_matrix_operation(&minus_step1, &step2, "Sub");
let sum = output
.iter()
.fold(0., |a, b| a + b.iter().fold(0., |a, b| a + b));
sum / (beta.len() * beta.len()) as f64
}
pub fn gradient_descent(
matrix1: &Vec<Vec<f64>>,
beta: &mut Vec<Vec<f64>>,
matrix2: &Vec<f64>,
learning_rate: f64,
coverage_rate: f64,
) -> (Vec<Vec<f64>>, i32) {
let mut cost = cost_function_f(matrix1, beta, matrix2);
println!("Gradient descent ...");
let mut iterations = 1;
let mut change_cost = 1.;
let mut log_beta: Vec<Vec<f64>> = vec![];
let mut b: Vec<Vec<f64>> = vec![];
while change_cost > coverage_rate {
let old_cost = cost;
println!("{:?}x{:?}", beta.len(), beta[0].len());
*beta = element_wise_matrix_operation(
beta,
&log_gradient_f(matrix1, beta, matrix2)
.iter()
.map(|a| a.iter().map(|b| b * learning_rate).collect())
.collect(),
"Sub",
);
cost = cost_function_f(matrix1, &beta, matrix2);
change_cost = old_cost - cost;
iterations += 1;
}
let output = beta.clone();
(output, iterations)
}
pub fn logistic_predict(matrix1: &Vec<Vec<f64>>, beta: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
let prediction_probability = logistic_function_f(matrix1, beta);
let output = prediction_probability
.iter()
.map(|a| a.iter().map(|b| if *b >= 0.5 { 1. } else { 0. }).collect())
.collect();
output
}
pub fn randomize(rows: &Vec<f64>) -> Vec<f64> {
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng};
let mut order: Vec<usize> = (0..rows.len() - 1 as usize).collect();
let slice: &mut [usize] = &mut order;
let mut rng = thread_rng();
slice.shuffle(&mut rng);
let mut output = vec![];
for i in order.iter() {
output.push(rows[*i].clone());
}
output
}
pub fn train_test_split(input: &Vec<f64>, percentage: f64) -> (Vec<f64>, Vec<f64>) {
let data = randomize(input);
let test_count = (data.len() as f64 * percentage) as usize;
let test = data[0..test_count].to_vec();
let train = data[test_count..].to_vec();
(train, test)
}
pub fn binary_logistic_regression(
path: String,
target_name: String,
test_percentage: f64,
learning_rate: f64,
coverage_rate: f64,
) -> (Vec<Vec<f64>>, i32) {
let (columns, values) = read_csv(path);
let mut df: HashMap<String, Vec<f64>> = HashMap::new();
for (n, i) in columns.iter().enumerate() {
let mut v = vec![];
for j in values.iter() {
for (m, k) in j.iter().enumerate() {
if n == m {
v.push(k.parse().unwrap());
}
}
}
v = normalize_vector_f(&v);
df.insert(i.to_string(), v);
}
let mut test_features: HashMap<String, Vec<f64>> = HashMap::new();
let mut train_features: HashMap<String, Vec<f64>> = HashMap::new();
let mut test_target: HashMap<String, Vec<f64>> = HashMap::new();
let mut train_target: HashMap<String, Vec<f64>> = HashMap::new();
for (k, v) in df.iter() {
if *k.to_string() != target_name {
test_features.insert(k.clone(), train_test_split(v, test_percentage).1);
train_features.insert(k.clone(), train_test_split(v, test_percentage).0);
} else {
test_target.insert(k.clone(), train_test_split(v, test_percentage).1);
train_target.insert(k.clone(), train_test_split(v, test_percentage).0);
}
}
let feature_vector: Vec<_> = train_features.values().cloned().collect();
let target_vector: Vec<_> = train_target.values().cloned().collect();
let feature_length = feature_vector[0].len();
let mut beta_df = HashMap::new();
for (n, i) in columns.iter().enumerate() {
let mut v = vec![0.; feature_length];
beta_df.insert(i.to_string(), v);
}
let mut beta = vec![vec![0.; train_features.keys().len()]];
println!("BETA: {:?}", beta);
let (new_beta, iteration_count) =
gradient_descent(&feature_vector, &mut beta, &target_vector[0], 0.01, 0.001);
(new_beta, iteration_count)
}
pub struct MultivariantLinearRegression {
pub header: Vec<String>,
pub data: Vec<Vec<String>>,
pub split_ratio: f64,
pub alpha_learning_rate: f64,
pub iterations: i32,
}
use std::collections::BTreeMap;
impl MultivariantLinearRegression {
pub fn multivariant_linear_regression(&self)
{
println!(
"Before removing missing values, number of rows : {:?}",
self.data.len()
);
let df_na_removed: Vec<_> = self
.data
.iter()
.filter(|a| a.len() == self.header.len())
.collect();
println!(
"After removing missing values, number of rows : {:?}",
df_na_removed.len()
);
println!(
"The target here is header named: {:?}",
self.header[self.header.len() - 1]
);
let df_f: Vec<Vec<f64>> = df_na_removed
.iter()
.map(|a| a.iter().map(|b| b.parse::<f64>().unwrap()).collect())
.collect();
println!("Values are now converted to f64");
let (train, test) = MultivariantLinearRegression::train_test_split(&df_f, self.split_ratio);
println!("Train size: {}\nTest size : {:?}", train.len(), test.len());
let mut train_feature = BTreeMap::new();
let mut test_feature = BTreeMap::new();
let mut train_target = BTreeMap::new();
let mut test_target = BTreeMap::new();
let mut coefficients = vec![];
for (n, j) in self.header.iter().enumerate() {
if *j != self.header[self.header.len() - 1] {
let mut row = vec![];
for i in train.iter() {
row.push(i[n]);
}
train_feature.entry(j.to_string()).or_insert(row);
} else {
let mut row = vec![];
for i in train.iter() {
row.push(i[n]);
}
train_target.entry(j.to_string()).or_insert(row);
}
}
for (n, j) in self.header.iter().enumerate() {
if *j != self.header[self.header.len() - 1] {
{
let mut row = vec![];
for i in test.iter() {
row.push(i[n]);
}
test_feature.entry(j.to_string()).or_insert(row);
}
} else {
let mut row = vec![];
for i in test.iter() {
row.push(i[n]);
}
test_target.entry(j.to_string()).or_insert(row);
}
}
let mut norm_test_features = BTreeMap::new();
let mut norm_train_features = BTreeMap::new();
let mut norm_test_target = BTreeMap::new();
let mut norm_train_target = BTreeMap::new();
for (k, _) in test_feature.iter() {
norm_test_features
.entry(k.clone())
.or_insert(normalize_vector_f(&test_feature[k]));
}
for (k, _) in train_feature.iter() {
norm_train_features
.entry(k.clone())
.or_insert(normalize_vector_f(&train_feature[k]));
}
for (k, _) in test_target.iter() {
norm_test_target
.entry(k.clone())
.or_insert(normalize_vector_f(&test_target[k]));
}
for (k, _) in train_target.iter() {
norm_train_target
.entry(k.clone())
.or_insert(normalize_vector_f(&train_target[k]));
}
coefficients = vec![0.; train[0].len() - 1];
let target: Vec<_> = norm_train_target.values().cloned().collect();
let (coefficeints, _) = MultivariantLinearRegression::batch_gradient_descent(
&MultivariantLinearRegression::hash_to_table(&norm_train_features),
&target[0],
&coefficients,
self.alpha_learning_rate,
self.iterations,
);
println!("The weights of the inputs are {:?}", coefficeints);
let mut pv: Vec<_> = MultivariantLinearRegression::hash_to_table(&norm_test_features)
.iter()
.map(|a| element_wise_operation(a, &coefficeints, "Mul"))
.collect();
let mut predicted_values = vec![];
for i in pv.iter() {
predicted_values.push(i.iter().fold(0., |a, b| a + b))
}
let a = &MultivariantLinearRegression::hash_to_table(&norm_test_target);
let mut actual = vec![];
for i in a.iter() {
actual.push(i[0]);
}
println!(
"The r2 of this model is : {:?}",
MultivariantLinearRegression::generate_score(&predicted_values, &actual)
);
}
fn train_test_split(input: &Vec<Vec<f64>>, percentage: f64) -> (Vec<Vec<f64>>, Vec<Vec<f64>>) {
let data = MultivariantLinearRegression::randomize(input);
let test_count = (data.len() as f64 * percentage) as usize;
let test = data[0..test_count].to_vec();
let train = data[test_count..].to_vec();
(train, test)
}
fn randomize(rows: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng};
let mut order: Vec<usize> = (0..rows.len() - 1 as usize).collect();
let slice: &mut [usize] = &mut order;
let mut rng = thread_rng();
slice.shuffle(&mut rng);
let mut output = vec![];
for i in order.iter() {
output.push(rows[*i].clone());
}
output
}
fn generate_score(predicted: &Vec<f64>, actual: &Vec<f64>) -> f64 {
let sst: Vec<_> = actual
.iter()
.map(|a| {
(a - (actual.iter().fold(0., |a, b| a + b) / (actual.len() as f64))
* (a - (actual.iter().fold(0., |a, b| a + b) / (actual.len() as f64))))
})
.collect();
let ssr = predicted
.iter()
.zip(actual.iter())
.fold(0., |a, b| a + (b.0 - b.1));
let r2 = 1. - (ssr / (sst.iter().fold(0., |a, b| a + b)));
r2
}
fn mse_cost_function(features: &Vec<Vec<f64>>, target: &Vec<f64>, theta: &Vec<f64>) -> f64 {
let rows = target.len();
let prod = matrix_vector_product_f(&features, theta);
let numerator: Vec<_> = element_wise_operation(&prod, target, "Sub")
.iter()
.map(|a| *a * *a)
.collect();
numerator.iter().fold(0., |a, b| a + b) / (2. * rows as f64)
}
pub fn batch_gradient_descent(
features: &Vec<Vec<f64>>,
target: &Vec<f64>,
theta: &Vec<f64>,
alpha_lr: f64,
max_iter: i32,
) -> (Vec<f64>, Vec<f64>) {
let mut new_theta = theta.clone();
let mut hypothesis_value = vec![];
let mut cost_history = vec![];
let mut loss = vec![];
let mut gradient = vec![];
let rows = target.len();
for _ in 0..max_iter {
hypothesis_value = matrix_vector_product_f(features, &new_theta);
loss = hypothesis_value
.iter()
.zip(target)
.map(|(a, b)| a - b)
.collect();
gradient = matrix_vector_product_f(&transpose(features), &loss)
.iter()
.map(|a| a / rows as f64)
.collect();
new_theta = element_wise_operation(
&new_theta,
&gradient.iter().map(|a| alpha_lr * a).collect(),
"Sub",
)
.clone();
cost_history.push(MultivariantLinearRegression::mse_cost_function(
features, target, &new_theta,
));
}
println!("");
(new_theta.clone(), cost_history)
}
pub fn hash_to_table<T: Copy + std::fmt::Debug>(d: &BTreeMap<String, Vec<T>>) -> Vec<Vec<T>> {
let mut vector = vec![];
for (_, v) in d.iter() {
vector.push(v.clone());
}
let mut original = vec![];
for i in 0..vector[0].len() {
let mut row = vec![];
for j in vector.iter() {
row.push(j[i]);
}
original.push(row);
}
original
}
}
#[derive(Debug)]
pub struct MatrixF {
matrix: Vec<Vec<f64>>,
}
impl MatrixF {
pub fn determinant_f(&self) -> f64 {
if MatrixF::is_square_matrix(&self.matrix) == true {
println!("Calculating Determinant...");
match self.matrix.len() {
1 => self.matrix[0][0],
2 => MatrixF::determinant_2(&self),
3..=100 => MatrixF::determinant_3plus(&self),
_ => {
println!("Cant find determinant for size more than {}", 100);
"0".parse().unwrap()
}
}
} else {
panic!("The input should be a square matrix");
}
}
fn determinant_2(&self) -> f64 {
(self.matrix[0][0] * self.matrix[1][1]) - (self.matrix[1][0] * self.matrix[1][0])
}
fn determinant_3plus(&self) -> f64 {
let length = self.matrix.len() - 1;
let mut new_matrix = self.matrix.clone();
new_matrix = new_matrix
.iter()
.map(|a| a.iter().map(|a| MatrixF::round_off_f(*a, 3)).collect())
.collect();
for diagonal in 0..=length {
for i in diagonal + 1..=length {
if new_matrix[diagonal][diagonal] == 0.0 {
new_matrix[diagonal][diagonal] = 0.001;
}
let scalar = new_matrix[i][diagonal] / new_matrix[diagonal][diagonal];
for j in 0..=length {
new_matrix[i][j] = new_matrix[i][j] - (scalar * new_matrix[diagonal][j]);
}
}
}
let mut product = 1.;
for i in 0..=length {
product *= new_matrix[i][i]
}
product
}
pub fn is_square_matrix<T>(matrix: &Vec<Vec<T>>) -> bool {
if matrix.len() == matrix[0].len() {
true
} else {
false
}
}
pub fn round_off_f(value: f64, decimals: i32) -> f64 {
((value * 10.0f64.powi(decimals)).round()) / 10.0f64.powi(decimals)
}
pub fn inverse_f(&self) -> Vec<Vec<f64>> {
let mut input = self.matrix.clone();
let length = self.matrix.len();
let mut identity = MatrixF::identity_matrix(length);
let mut index: Vec<usize> = (0..length).collect();
let mut int_index: Vec<i32> = index.iter().map(|a| *a as i32).collect();
for diagonal in 0..length {
let diagonalScalar = 1. / (input[diagonal][diagonal]);
for columnLoop in 0..length {
input[diagonal][columnLoop] *= diagonalScalar;
identity[diagonal][columnLoop] *= diagonalScalar;
}
let mut exceptDiagonal: Vec<usize> = index[0..diagonal]
.iter()
.copied()
.chain(index[diagonal + 1..].iter().copied())
.collect();
println!("Here\n{:?}", exceptDiagonal);
for i in exceptDiagonal {
let rowScalar = input[i as usize][diagonal].clone();
for j in 0..length {
input[i][j] = input[i][j] - (rowScalar * input[diagonal][j]);
identity[i][j] = identity[i][j] - (rowScalar * identity[diagonal][j])
}
}
}
identity
}
fn identity_matrix(size: usize) -> Vec<Vec<f64>> {
let mut output: Vec<Vec<f64>> = MatrixF::zero_matrix(size);
for i in 0..=(size - 1) {
for j in 0..=(size - 1) {
if i == j {
output[i][j] = 1.;
} else {
output[i][j] = 0.;
}
}
}
output
}
fn zero_matrix(size: usize) -> Vec<Vec<f64>> {
let mut output: Vec<Vec<f64>> = vec![];
for _ in 0..=(size - 1) {
output.push(vec![0.; size]);
}
output
}
}
pub fn print_a_matrix<T: std::fmt::Debug>(string: &str, matrix: &Vec<Vec<T>>) {
println!("{}", string);
for i in matrix.iter() {
println!("{:?}", i);
}
println!("");
println!("");
}
pub fn shape_changer<T>(list: &Vec<T>, columns: usize, rows: usize) -> Vec<Vec<T>>
where
T: std::clone::Clone,
{
let mut l = list.clone();
let mut output = vec![vec![]; rows];
if columns * rows == list.len() {
for i in 0..rows {
output[i] = l[..columns].iter().cloned().collect();
l = l[columns..].iter().cloned().collect();
}
output
} else {
panic!("!!! The shape transformation is not possible, check the values entered !!!");
}
}
pub fn transpose<T: std::clone::Clone + Copy>(matrix: &Vec<Vec<T>>) -> Vec<Vec<T>> {
let mut output = vec![];
for j in 0..matrix[0].len() {
for i in 0..matrix.len() {
output.push(matrix[i][j]);
}
}
let x = matrix[0].len();
shape_changer(&output, matrix.len(), x)
}
pub fn vector_addition<T>(a: &mut Vec<T>, b: &mut Vec<T>) -> Vec<T>
where
T: std::ops::Add<Output = T> + Copy + std::fmt::Debug + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mut output = vec![];
if a.len() == b.len() {
for i in 0..a.len() {
output.push(a[i] + b[i]);
}
output
} else {
if a.len() < b.len() {
let new_a = pad_with_zero(a, b.len() - a.len());
println!("The changed vector is {:?}", new_a);
for i in 0..a.len() {
output.push(a[i] + b[i]);
}
output
} else {
let new_b = pad_with_zero(b, a.len() - b.len());
println!("The changed vector is {:?}", new_b);
for i in 0..a.len() {
output.push(a[i] + b[i]);
}
output
}
}
}
pub fn matrix_multiplication<T>(input: &Vec<Vec<T>>, weights: &Vec<Vec<T>>) -> Vec<Vec<T>>
where
T: Copy + std::iter::Sum + std::ops::Mul<Output = T>,
{
let weights_t = transpose(&weights);
let mut output: Vec<T> = vec![];
if input[0].len() == weights.len() {
for i in input.iter() {
for j in weights_t.iter() {
output.push(dot_product(&i, &j));
}
}
shape_changer(&output, input.len(), weights_t.len())
} else {
panic!("Dimension mismatch")
}
}
pub fn dot_product<T>(a: &Vec<T>, b: &Vec<T>) -> T
where
T: std::ops::Mul<Output = T> + std::iter::Sum + Copy,
{
let output: T = a.iter().zip(b.iter()).map(|(x, y)| *x * *y).sum();
output
}
pub fn element_wise_operation<T>(a: &Vec<T>, b: &Vec<T>, operation: &str) -> Vec<T>
where
T: Copy
+ std::fmt::Debug
+ std::ops::Mul<Output = T>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Div<Output = T>
+ std::cmp::PartialEq
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
if a.len() == b.len() {
a.iter().zip(b.iter()).map(|(x, y)| match operation {
"Mul" => *x * *y,
"Add" => *x + *y,
"Sub" => *x - *y,
"Div" => *x / *y,
_ => panic!("Operation unsuccessful!\nEnter any of the following(case sensitive):\n> Add\n> Sub\n> Mul\n> Div"),
})
.collect()
} else {
panic!("Dimension mismatch")
}
}
pub fn pad_with_zero<T>(vector: &mut Vec<T>, count: usize) -> Vec<T>
where
T: Copy + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mut output = vector.clone();
let zero = "0".parse::<T>().unwrap();
for _ in 0..count {
output.push(zero);
}
output
}
pub fn make_matrix_float<T>(input: &Vec<Vec<T>>) -> Vec<Vec<f64>>
where
T: std::fmt::Display + Copy,
{
println!("========================================================================================================================================================");
input
.iter()
.map(|a| {
a.iter()
.map(|b| {
if is_numerical(*b) {
format!("{}", b).parse().unwrap()
} else {
panic!("Non numerical value present in the intput");
}
})
.collect()
})
.collect()
}
pub fn make_vector_float<T>(input: &Vec<T>) -> Vec<f64>
where
T: std::fmt::Display + Copy,
{
println!("========================================================================================================================================================");
input
.iter()
.map(|b| {
if is_numerical(*b) {
format!("{}", b).parse().unwrap()
} else {
panic!("Non numerical value present in the intput");
}
})
.collect()
}
pub fn round_off_f(value: f64, decimals: i32) -> f64 {
println!("========================================================================================================================================================");
((value * 10.0f64.powi(decimals)).round()) / 10.0f64.powi(decimals)
}
pub fn min_max_f(list: &Vec<f64>) -> (f64, f64) {
if type_of(list[0]) == "f64" {
let mut positive: Vec<f64> = list
.clone()
.iter()
.filter(|a| **a >= 0.)
.map(|a| *a)
.collect();
let mut negative: Vec<f64> = list
.clone()
.iter()
.filter(|a| **a < 0.)
.map(|a| *a)
.collect();
positive.sort_by(|a, b| a.partial_cmp(b).unwrap());
negative.sort_by(|a, b| a.partial_cmp(b).unwrap());
if negative.len() > 0 {
(negative[0], positive[positive.len() - 1])
} else {
(positive[0], positive[positive.len() - 1])
}
} else {
panic!("Input should be a float type");
}
}
pub fn is_numerical<T>(value: T) -> bool {
if type_of(&value) == "&i32"
|| type_of(&value) == "&i8"
|| type_of(&value) == "&i16"
|| type_of(&value) == "&i64"
|| type_of(&value) == "&i128"
|| type_of(&value) == "&f64"
|| type_of(&value) == "&f32"
|| type_of(&value) == "&u32"
|| type_of(&value) == "&u8"
|| type_of(&value) == "&u16"
|| type_of(&value) == "&u64"
|| type_of(&value) == "&u128"
|| type_of(&value) == "&usize"
|| type_of(&value) == "&isize"
{
true
} else {
false
}
}
pub fn value_counts<T>(list: &Vec<T>) -> HashMap<T, u32>
where
T: std::cmp::PartialEq + std::cmp::Eq + std::hash::Hash + Copy,
{
println!("========================================================================================================================================================");
let mut count: HashMap<T, u32> = HashMap::new();
for i in list {
count.insert(*i, 1 + if count.contains_key(i) { count[i] } else { 0 });
}
count
}
use std::any::type_name;
pub fn type_of<T>(_: T) -> &'static str {
type_name::<T>()
}
pub fn unique_values<T>(list: &Vec<T>) -> Vec<T>
where
T: std::cmp::PartialEq + Copy,
{
let mut output = vec![];
for i in list.iter() {
if output.contains(i) {
} else {
output.push(*i)
};
}
output
}
pub fn element_wise_matrix_operation<T>(
matrix1: &Vec<Vec<T>>,
matrix2: &Vec<Vec<T>>,
operation: &str,
) -> Vec<Vec<T>>
where
T: Copy
+ std::fmt::Debug
+ std::ops::Mul<Output = T>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Div<Output = T>
+ std::cmp::PartialEq
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
if matrix1.len() == matrix2.len() && matrix1[0].len() == matrix2[0].len() {
matrix1
.iter()
.zip(matrix2.iter())
.map(|(x, y)| {
x.iter()
.zip(y.iter())
.map(|a| match operation {
"Mul" => *a.0 * *a.1,
"Add" => *a.0 + *a.1,
"Sub" => *a.0 - *a.1,
"Div" => *a.0 / *a.1,
_ => panic!("Operation unsuccessful!\nEnter any of the following(case sensitive):\n> Add\n> Sub\n> Mul\n> Div"),
})
.collect()
})
.collect()
} else {
panic!("Dimension mismatch")
}
}
pub fn matrix_vector_product_f(matrix: &Vec<Vec<f64>>, vector: &Vec<f64>) -> Vec<f64> {
let mut output: Vec<_> = vec![];
for i in matrix.iter() {
output.push(dot_product(i, vector));
}
output
}
pub fn split_vector<T: std::clone::Clone>(vector: &Vec<T>, parts: i32) -> Vec<Vec<T>> {
if vector.len() % parts as usize == 0 {
let mut output = vec![];
let size = vector.len() / parts as usize;
let mut from = 0;
let mut to = from + size;
while to <= vector.len() {
output.push(vector[from..to].to_vec());
from = from + size;
to = from + size;
}
output
} else {
panic!("This partition is not possible, check the number of partiotions passed")
}
}
pub fn split_vector_at<T>(vector: &Vec<T>, at: T) -> Vec<Vec<T>>
where
T: std::cmp::PartialEq + Copy + std::clone::Clone,
{
if vector.contains(&at) {
let mut output = vec![];
let copy = vector.clone();
let mut from = 0;
for (n, i) in vector.iter().enumerate() {
if i == &at {
output.push(copy[from..n].to_vec());
from = n;
}
}
output.push(copy[from..].to_vec());
output
} else {
panic!("The value is not in the vector, please check");
}
}