use math::round;
use rand::*;
pub struct LayerDetails {
pub n_inputs: usize,
pub n_neurons: i32,
}
impl LayerDetails {
pub fn create_weights(&self) -> Vec<Vec<f64>> {
let mut rng = rand::thread_rng();
let mut weight: Vec<Vec<f64>> = vec![];
for _ in 0..self.n_inputs {
weight.push(
(0..self.n_neurons)
.map(|_| round::ceil(rng.gen_range(-1., 1.), 3))
.collect(),
);
}
weight
}
pub fn create_bias(&self, value: f64) -> Vec<f64> {
let bias = vec![value; self.n_neurons as usize];
bias
}
pub fn output_of_layer(
&self,
input: &Vec<Vec<f64>>,
weights: &Vec<Vec<f64>>,
bias: &mut Vec<f64>,
f: &str,
alpha: f64,
) -> Vec<Vec<f64>> {
let mut mat_mul = transpose(&matrix_multiplication(&input, &weights));
let mut output: Vec<Vec<f64>> = vec![];
for i in &mut mat_mul {
output.push(vector_addition(i, bias));
}
let mut activated_output = vec![];
match f {
"relu" => {
println!("Alpha is for 'leaky relu' only, it is not taken into account here");
for i in output.clone() {
activated_output.push(activation_relu(&i));
}
}
"leaky relu" => {
for i in output.clone() {
activated_output.push(activation_leaky_relu(&i, alpha));
}
}
"sigmoid" => {
println!("Alpha is for 'leaky relu' only, it is not taken into account here");
for i in output.clone() {
activated_output.push(activation_sigmoid(&i));
}
}
"tanh" => {
println!("Alpha is for 'leaky relu' only, it is not taken into account here");
for i in output.clone() {
activated_output.push(activation_tanh(&i));
}
}
_ => panic!("Select from either 'tanh','sigmoid','relu','leaky relu'"),
}
activated_output
}
}
pub fn activation_relu<T>(input: &Vec<T>) -> Vec<T>
where
T: Copy + std::cmp::PartialOrd + std::ops::Sub<Output = T> + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero = "0".parse::<T>().unwrap();
input
.iter()
.map(|x| if *x > zero { *x } else { *x - *x })
.collect()
}
pub fn activation_leaky_relu<T>(input: &Vec<T>, alpha: f64) -> Vec<T>
where
T: Copy + std::cmp::PartialOrd + std::ops::Mul<Output = T> + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero = "0".parse::<T>().unwrap();
let a = format!("{}", alpha).parse::<T>().unwrap();
input
.iter()
.map(|x| if *x > zero { *x } else { a * *x })
.collect()
}
pub fn activation_sigmoid<T>(input: &Vec<T>) -> Vec<f64>
where
T: std::str::FromStr + std::fmt::Debug,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
input
.iter()
.map(|x| 1. / (1. + format!("{:?}", x).parse::<f64>().unwrap().exp()))
.collect()
}
pub fn activation_tanh<T>(input: &Vec<T>) -> Vec<f64>
where
T: std::str::FromStr + std::fmt::Debug,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
input
.iter()
.map(|x| {
(format!("{:?}", x).parse::<f64>().unwrap().exp()
- (format!("{:?}", x).parse::<f64>().unwrap() * (-1.)).exp())
/ (format!("{:?}", x).parse::<f64>().unwrap().exp()
+ (format!("{:?}", x).parse::<f64>().unwrap() * (-1.)).exp())
})
.collect()
}
pub struct MultivariantLinearRegression {
pub header: Vec<String>,
pub data: Vec<Vec<String>>,
pub split_ratio: f64,
pub alpha_learning_rate: f64,
pub iterations: i32,
}
use std::collections::BTreeMap;
impl MultivariantLinearRegression {
pub fn multivariant_linear_regression(&self)
{
println!(
"Before removing missing values, number of rows : {:?}",
self.data.len()
);
let df_na_removed: Vec<_> = self
.data
.iter()
.filter(|a| a.len() == self.header.len())
.collect();
println!(
"After removing missing values, number of rows : {:?}",
df_na_removed.len()
);
println!(
"The target here is header named: {:?}",
self.header[self.header.len() - 1]
);
let df_f: Vec<Vec<f64>> = df_na_removed
.iter()
.map(|a| a.iter().map(|b| b.parse::<f64>().unwrap()).collect())
.collect();
println!("Values are now converted to f64");
let (train, test) = MultivariantLinearRegression::train_test_split(&df_f, self.split_ratio);
println!("Train size: {}\nTest size : {:?}", train.len(), test.len());
let mut train_feature = BTreeMap::new();
let mut test_feature = BTreeMap::new();
let mut train_target = BTreeMap::new();
let mut test_target = BTreeMap::new();
let mut coefficients = vec![];
for (n, j) in self.header.iter().enumerate() {
if *j != self.header[self.header.len() - 1] {
let mut row = vec![];
for i in train.iter() {
row.push(i[n]);
}
train_feature.entry(j.to_string()).or_insert(row);
} else {
let mut row = vec![];
for i in train.iter() {
row.push(i[n]);
}
train_target.entry(j.to_string()).or_insert(row);
}
}
for (n, j) in self.header.iter().enumerate() {
if *j != self.header[self.header.len() - 1] {
{
let mut row = vec![];
for i in test.iter() {
row.push(i[n]);
}
test_feature.entry(j.to_string()).or_insert(row);
}
} else {
let mut row = vec![];
for i in test.iter() {
row.push(i[n]);
}
test_target.entry(j.to_string()).or_insert(row);
}
}
let mut norm_test_features = BTreeMap::new();
let mut norm_train_features = BTreeMap::new();
let mut norm_test_target = BTreeMap::new();
let mut norm_train_target = BTreeMap::new();
for (k, _) in test_feature.iter() {
norm_test_features
.entry(k.clone())
.or_insert(normalize_vector_f(&test_feature[k]));
}
for (k, _) in train_feature.iter() {
norm_train_features
.entry(k.clone())
.or_insert(normalize_vector_f(&train_feature[k]));
}
for (k, _) in test_target.iter() {
norm_test_target
.entry(k.clone())
.or_insert(normalize_vector_f(&test_target[k]));
}
for (k, _) in train_target.iter() {
norm_train_target
.entry(k.clone())
.or_insert(normalize_vector_f(&train_target[k]));
}
coefficients = vec![0.; train[0].len() - 1];
let target: Vec<_> = norm_train_target.values().cloned().collect();
let (coefficeints, _) = MultivariantLinearRegression::batch_gradient_descent(
&MultivariantLinearRegression::hash_to_table(&norm_train_features),
&target[0],
&coefficients,
self.alpha_learning_rate,
self.iterations,
);
println!("The weights of the inputs are {:?}", coefficeints);
let mut pv: Vec<_> = MultivariantLinearRegression::hash_to_table(&norm_test_features)
.iter()
.map(|a| element_wise_operation(a, &coefficeints, "mul"))
.collect();
let mut predicted_values = vec![];
for i in pv.iter() {
predicted_values.push(i.iter().fold(0., |a, b| a + b))
}
let a = &MultivariantLinearRegression::hash_to_table(&norm_test_target);
let mut actual = vec![];
for i in a.iter() {
actual.push(i[0]);
}
println!(
"The r2 of this model is : {:?}",
MultivariantLinearRegression::generate_score(&predicted_values, &actual)
);
}
fn train_test_split(input: &Vec<Vec<f64>>, percentage: f64) -> (Vec<Vec<f64>>, Vec<Vec<f64>>) {
let data = MultivariantLinearRegression::randomize(input);
let test_count = (data.len() as f64 * percentage) as usize;
let test = data[0..test_count].to_vec();
let train = data[test_count..].to_vec();
(train, test)
}
fn randomize(rows: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng};
let mut order: Vec<usize> = (0..rows.len() - 1 as usize).collect();
let slice: &mut [usize] = &mut order;
let mut rng = thread_rng();
slice.shuffle(&mut rng);
let mut output = vec![];
for i in order.iter() {
output.push(rows[*i].clone());
}
output
}
fn generate_score(predicted: &Vec<f64>, actual: &Vec<f64>) -> f64 {
let sst: Vec<_> = actual
.iter()
.map(|a| {
(a - (actual.iter().fold(0., |a, b| a + b) / (actual.len() as f64))
* (a - (actual.iter().fold(0., |a, b| a + b) / (actual.len() as f64))))
})
.collect();
let ssr = predicted
.iter()
.zip(actual.iter())
.fold(0., |a, b| a + (b.0 - b.1));
let r2 = 1. - (ssr / (sst.iter().fold(0., |a, b| a + b)));
r2
}
fn mse_cost_function(features: &Vec<Vec<f64>>, target: &Vec<f64>, theta: &Vec<f64>) -> f64 {
let rows = target.len();
let prod = matrix_vector_product_f(&features, theta);
let numerator: Vec<_> = element_wise_operation(&prod, target, "sub")
.iter()
.map(|a| *a * *a)
.collect();
numerator.iter().fold(0., |a, b| a + b) / (2. * rows as f64)
}
pub fn batch_gradient_descent(
features: &Vec<Vec<f64>>,
target: &Vec<f64>,
theta: &Vec<f64>,
alpha_lr: f64,
max_iter: i32,
) -> (Vec<f64>, Vec<f64>) {
let mut new_theta = theta.clone();
let mut hypothesis_value = vec![];
let mut cost_history = vec![];
let mut loss = vec![];
let mut gradient = vec![];
let rows = target.len();
for _ in 0..max_iter {
hypothesis_value = matrix_vector_product_f(features, &new_theta);
loss = hypothesis_value
.iter()
.zip(target)
.map(|(a, b)| a - b)
.collect();
gradient = matrix_vector_product_f(&transpose(features), &loss)
.iter()
.map(|a| a / rows as f64)
.collect();
new_theta = element_wise_operation(
&new_theta,
&gradient.iter().map(|a| alpha_lr * a).collect(),
"sub",
)
.clone();
cost_history.push(MultivariantLinearRegression::mse_cost_function(
features, target, &new_theta,
));
}
println!("");
(new_theta.clone(), cost_history)
}
pub fn hash_to_table<T: Copy + std::fmt::Debug>(d: &BTreeMap<String, Vec<T>>) -> Vec<Vec<T>> {
let mut vector = vec![];
for (_, v) in d.iter() {
vector.push(v.clone());
}
let mut original = vec![];
for i in 0..vector[0].len() {
let mut row = vec![];
for j in vector.iter() {
row.push(j[i]);
}
original.push(row);
}
original
}
}
pub fn mean<T>(list: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ Copy
+ std::str::FromStr
+ std::string::ToString
+ std::ops::Add<T, Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero: T = "0".parse().unwrap();
let len_str = list.len().to_string();
let length: T = len_str.parse().unwrap();
(list.iter().fold(zero, |acc, x| acc + *x) / length)
.to_string()
.parse()
.unwrap()
}
pub fn variance<T>(list: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::marker::Copy
+ std::fmt::Display
+ std::ops::Sub<T, Output = T>
+ std::ops::Add<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::fmt::Debug
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero: T = "0".parse().unwrap();
let mu = mean(list);
let _len_str: T = list.len().to_string().parse().unwrap();
let output: Vec<_> = list
.iter()
.map(|x| (*x - mu.to_string().parse().unwrap()) * (*x - mu.to_string().parse().unwrap()))
.collect();
let variance = output.iter().fold(zero, |a, b| a + *b);
variance.to_string().parse().unwrap()
}
pub fn covariance<T>(list1: &Vec<T>, list2: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mu1 = mean(list1);
let mu2 = mean(list2);
let zero: T = "0".parse().unwrap();
let len_str: f64 = list1.len().to_string().parse().unwrap();
let tupled: Vec<_> = list1.iter().zip(list2).collect();
let output = tupled.iter().fold(zero, |a, b| {
a + ((*b.0 - mu1.to_string().parse().unwrap()) * (*b.1 - mu2.to_string().parse().unwrap()))
});
let numerator: f64 = output.to_string().parse().unwrap();
numerator
}
pub fn coefficient<T>(list1: &Vec<T>, list2: &Vec<T>) -> (f64, f64)
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let b1 = covariance(list1, list2) / variance(list1);
let b0 = mean(list2) - (b1 * mean(list1));
(b0.to_string().parse().unwrap(), b1)
}
pub fn simple_linear_regression_prediction<T>(train: &Vec<(T, T)>, test: &Vec<(T, T)>) -> Vec<T>
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let train_features = &train.iter().map(|a| a.0).collect();
let test_features = &test.iter().map(|a| a.1).collect();
let (offset, slope) = coefficient(train_features, test_features);
let b0: T = offset.to_string().parse().unwrap();
let b1: T = slope.to_string().parse().unwrap();
let predicted_output = test.iter().map(|a| b0 + b1 * a.0).collect();
let original_output: Vec<_> = test.iter().map(|a| a.0).collect();
println!("========================================================================================================================================================");
println!("b0 = {:?} and b1= {:?}", b0, b1);
println!(
"RMSE: {:?}",
root_mean_square(&predicted_output, &original_output)
);
predicted_output
}
pub fn root_mean_square<T>(list1: &Vec<T>, list2: &Vec<T>) -> f64
where
T: std::ops::Sub<T, Output = T>
+ Copy
+ std::ops::Mul<T, Output = T>
+ std::ops::Add<T, Output = T>
+ std::ops::Div<Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let zero: T = "0".parse().unwrap();
let tupled: Vec<_> = list1.iter().zip(list2).collect();
let length: T = list1.len().to_string().parse().unwrap();
let mean_square_error = tupled
.iter()
.fold(zero, |b, a| b + ((*a.1 - *a.0) * (*a.1 - *a.0)))
/ length;
let mse: f64 = mean_square_error.to_string().parse().unwrap();
mse.powf(0.5)
}
use std::collections::HashMap;
use std::fs;
pub fn read_csv<'a>(path: String) -> (Vec<String>, Vec<Vec<String>>) {
println!("Reading the file ...");
let file = fs::read_to_string(&path).unwrap();
let splitted: Vec<&str> = file.split("\n").collect();
let rows: i32 = (splitted.len() - 1) as i32;
println!("Number of rows = {}", rows - 1);
let table: Vec<Vec<_>> = splitted.iter().map(|a| a.split(",").collect()).collect();
let values = table[1..]
.iter()
.map(|a| a.iter().map(|b| b.to_string()).collect())
.collect();
let columns: Vec<String> = table[0].iter().map(|a| a.to_string()).collect();
(columns, values)
}
use std::io::Error;
pub fn convert_and_impute<U>(
list: &Vec<String>,
to: U,
impute_with: U,
) -> (Result<Vec<U>, Error>, Vec<usize>)
where
U: std::cmp::PartialEq + Copy + std::marker::Copy + std::string::ToString + std::str::FromStr,
<U as std::str::FromStr>::Err: std::fmt::Debug,
{
println!("========================================================================================================================================================");
let mut output: Vec<_> = vec![];
let mut missing = vec![];
match type_of(to) {
"f64" => {
for (n, i) in list.iter().enumerate() {
if *i != "" {
let x = i.parse::<U>().unwrap();
output.push(x);
} else {
output.push(impute_with);
missing.push(n);
println!("Error found in {}th position of the vector", n);
}
}
}
"i32" => {
for (n, i) in list.iter().enumerate() {
if *i != "" {
let string_splitted: Vec<_> = i.split(".").collect();
let ones_digit = string_splitted[0].parse::<U>().unwrap();
output.push(ones_digit);
} else {
output.push(impute_with);
missing.push(n);
println!("Error found in {}th position of the vector", n);
}
}
}
_ => println!("This type conversion cant be done, choose either int or float type\n Incase of string conversion, use impute_string"),
}
(Ok(output), missing)
}
pub fn impute_string<'a>(list: &'a mut Vec<String>, impute_with: &'a str) -> Vec<&'a str> {
list.iter()
.enumerate()
.map(|(n, a)| {
if *a == String::from("") {
println!("Missing value found in {}th position of the vector", n);
impute_with
} else {
&a[..]
}
})
.collect()
}
pub fn convert_string_categorical<T>(list: &Vec<T>, extra_class: bool) -> Vec<f64>
where
T: std::cmp::PartialEq + std::cmp::Eq + std::hash::Hash + Copy,
{
println!("========================================================================================================================================================");
let values = unique_values(&list);
if extra_class == true && values.len() > 10 {
println!("The number of classes will be more than 10");
} else {
();
}
let mut map: HashMap<&T, f64> = HashMap::new();
for (n, i) in values.iter().enumerate() {
map.insert(i, n as f64 + 1.);
}
list.iter().map(|a| map[a]).collect()
}
pub fn normalize_vector_f(list: &Vec<f64>) -> Vec<f64> {
let (minimum, maximum) = min_max_f(&list);
let range: f64 = maximum - minimum;
list.iter().map(|a| 1. - ((maximum - a) / range)).collect()
}
pub fn logistic_function_f(matrix: &Vec<Vec<f64>>, beta: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
println!("========================================================================================================================================================");
println!("logistic function");
println!(
"{:?}x{:?}\n{:?}x{:?}",
matrix.len(),
matrix[0].len(),
beta.len(),
beta[0].len()
);
matrix_multiplication(matrix, beta)
.iter()
.map(|a| a.iter().map(|b| 1. / (1. + ((b * -1.).exp()))).collect())
.collect()
}
pub fn log_gradient_f(
matrix1: &Vec<Vec<f64>>,
beta: &Vec<Vec<f64>>,
matrix2: &Vec<f64>,
) -> Vec<Vec<f64>> {
println!("========================================================================================================================================================");
println!("Log gradient_f");
let mut first_calc = vec![];
for (n, i) in logistic_function_f(matrix1, beta).iter().enumerate() {
let mut row = vec![];
for j in i.iter() {
row.push(j - matrix2[n]);
}
first_calc.push(row);
}
let first_calc_T = transpose(&first_calc);
let mut X = vec![];
for j in 0..matrix1[0].len() {
let mut row = vec![];
for i in matrix1.iter() {
row.push(i[j]);
}
X.push(row);
}
let mut final_calc = vec![];
for i in first_calc_T.iter() {
for j in X.iter() {
final_calc.push(dot_product(&i, &j))
}
}
shape_changer(&final_calc, matrix1[0].len(), matrix1.len())
}
pub fn logistic_predict(matrix1: &Vec<Vec<f64>>, beta: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
let prediction_probability = logistic_function_f(matrix1, beta);
let output = prediction_probability
.iter()
.map(|a| a.iter().map(|b| if *b >= 0.5 { 1. } else { 0. }).collect())
.collect();
output
}
pub fn randomize_vector_f(rows: &Vec<f64>) -> Vec<f64> {
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng};
let mut order: Vec<usize> = (0..rows.len() as usize).collect();
let slice: &mut [usize] = &mut order;
let mut rng = thread_rng();
slice.shuffle(&mut rng);
let mut output = vec![];
for i in order.iter() {
output.push(rows[*i].clone());
}
output
}
pub fn randomize_f(rows: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
use rand::seq::SliceRandom;
use rand::{thread_rng, Rng};
let mut order: Vec<usize> = (0..rows.len() as usize).collect();
let slice: &mut [usize] = &mut order;
let mut rng = thread_rng();
slice.shuffle(&mut rng);
let mut output = vec![];
for i in order.iter() {
output.push(rows[*i].clone());
}
output
}
pub fn train_test_split_vector_f(input: &Vec<f64>, percentage: f64) -> (Vec<f64>, Vec<f64>) {
let data = randomize_vector_f(input);
let test_count = (data.len() as f64 * percentage) as usize;
let test = data[0..test_count].to_vec();
let train = data[test_count..].to_vec();
(train, test)
}
pub fn train_test_split_f(
input: &Vec<Vec<f64>>,
percentage: f64,
) -> (Vec<Vec<f64>>, Vec<Vec<f64>>) {
let data = randomize_f(input);
let test_count = (data.len() as f64 * percentage) as usize;
let test = data[0..test_count].to_vec();
let train = data[test_count..].to_vec();
(train, test)
}
pub fn correlation<T>(list1: &Vec<T>, list2: &Vec<T>, name: &str) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::cmp::PartialOrd
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let cov = covariance(list1, list2);
let output = match name {
"p" => (cov / (std_dev(list1) * std_dev(list2))) / list1.len() as f64,
"s" => {
let ranked_list1 = spearman_rank(list1);
let ranked_list2 = spearman_rank(list2);
let len = list1.len() as f64;
let mut rl1 = vec![];
for k in list1.iter() {
for (i, j) in ranked_list1.iter() {
if k == i {
rl1.push(j);
}
}
}
let mut rl2 = vec![];
for k in list2.iter() {
for (i, j) in ranked_list2.iter() {
if k == i {
rl2.push(j);
}
}
}
let combined: Vec<_> = rl1.iter().zip(rl2.iter()).collect();
let sum_of_square_of_difference = combined
.iter()
.map(|(a, b)| (***a - ***b) * (***a - ***b))
.fold(0., |a, b| a + b);
1. - ((6. * sum_of_square_of_difference) / (len * ((len * len) - 1.)))
}
_ => panic!("Either `p`: Pearson or `s`:Spearman has to be the name. Please retry!"),
};
match output {
x if x < 0.2 && x > -0.2 => println!("There is a weak correlation between the two :"),
x if x > 0.6 => println!("There is a strong positive correlation between the two :"),
x if x < -0.6 => println!("There is a strong negative correlation between the two :"),
_ => (),
}
output
}
pub fn std_dev<T>(list1: &Vec<T>) -> f64
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mu: T = mean(list1).to_string().parse().unwrap();
let square_of_difference = list1.iter().map(|a| (*a - mu) * (*a - mu)).collect();
let var = mean(&square_of_difference);
var.sqrt()
}
pub fn spearman_rank<T>(list1: &Vec<T>) -> Vec<(T, f64)>
where
T: std::iter::Sum<T>
+ std::ops::Div<Output = T>
+ std::fmt::Debug
+ std::fmt::Display
+ std::ops::Add
+ std::marker::Copy
+ std::cmp::PartialOrd
+ std::ops::Add<T, Output = T>
+ std::ops::Sub<T, Output = T>
+ std::ops::Mul<T, Output = T>
+ std::string::ToString
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mut sorted = list1.clone();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
let mut map: Vec<(_, _)> = vec![];
for (n, i) in sorted.iter().enumerate() {
map.push(((n + 1), *i));
}
let mut repeats: Vec<_> = vec![];
for (n, i) in sorted.iter().enumerate() {
if how_many_and_where_vector(&sorted, *i).len() > 1 {
repeats.push((*i, how_many_and_where_vector(&sorted, *i)));
} else {
repeats.push((*i, vec![n]));
}
}
let mut rank: Vec<_> = repeats
.iter()
.map(|(a, b)| {
(a, b.iter().fold(0., |a, b| a + *b as f64) / b.len() as f64)
})
.collect();
let output: Vec<_> = rank.iter().map(|(a, b)| (**a, b + 1.)).collect();
output
}
pub fn how_many_and_where_vector<T>(list: &Vec<T>, number: T) -> Vec<usize>
where
T: std::cmp::PartialEq + std::fmt::Debug + Copy,
{
let tuple: Vec<_> = list
.iter()
.enumerate()
.filter(|&(_, a)| *a == number)
.map(|(n, _)| n)
.collect();
tuple
}
#[derive(Debug)]
pub struct MatrixF {
pub matrix: Vec<Vec<f64>>,
}
impl MatrixF {
pub fn determinant_f(&self) -> f64 {
if MatrixF::is_square_matrix(&self.matrix) == true {
println!("Calculating Determinant...");
match self.matrix.len() {
1 => self.matrix[0][0],
2 => MatrixF::determinant_2(&self),
3..=100 => MatrixF::determinant_3plus(&self),
_ => {
println!("Cant find determinant for size more than {}", 100);
"100".parse().unwrap()
}
}
} else {
panic!("The input should be a square matrix");
}
}
fn determinant_2(&self) -> f64 {
(self.matrix[0][0] * self.matrix[1][1]) - (self.matrix[1][0] * self.matrix[1][0])
}
fn determinant_3plus(&self) -> f64 {
let length = self.matrix.len() - 1;
let mut new_matrix = self.matrix.clone();
new_matrix = new_matrix
.iter()
.map(|a| a.iter().map(|a| MatrixF::round_off_f(*a, 3)).collect())
.collect();
for diagonal in 0..=length {
for i in diagonal + 1..=length {
if new_matrix[diagonal][diagonal] == 0.0 {
new_matrix[diagonal][diagonal] = 0.001;
}
let scalar = new_matrix[i][diagonal] / new_matrix[diagonal][diagonal];
for j in 0..=length {
new_matrix[i][j] = new_matrix[i][j] - (scalar * new_matrix[diagonal][j]);
}
}
}
let mut product = 1.;
for i in 0..=length {
product *= new_matrix[i][i]
}
product
}
pub fn is_square_matrix<T>(matrix: &Vec<Vec<T>>) -> bool {
if matrix.len() == matrix[0].len() {
true
} else {
false
}
}
fn round_off_f(value: f64, decimals: i32) -> f64 {
((value * 10.0f64.powi(decimals)).round()) / 10.0f64.powi(decimals)
}
pub fn inverse_f(&self) -> Vec<Vec<f64>> {
let mut input = self.matrix.clone();
let length = self.matrix.len();
let mut identity = MatrixF::identity_matrix(length);
let index: Vec<usize> = (0..length).collect();
for diagonal in 0..length {
let diagonal_scalar = 1. / (input[diagonal][diagonal]);
for column_loop in 0..length {
input[diagonal][column_loop] *= diagonal_scalar;
identity[diagonal][column_loop] *= diagonal_scalar;
}
let except_diagonal: Vec<usize> = index[0..diagonal]
.iter()
.copied()
.chain(index[diagonal + 1..].iter().copied())
.collect();
for i in except_diagonal {
let row_scalar = input[i as usize][diagonal].clone();
for j in 0..length {
input[i][j] = input[i][j] - (row_scalar * input[diagonal][j]);
identity[i][j] = identity[i][j] - (row_scalar * identity[diagonal][j])
}
}
}
identity
}
fn identity_matrix(size: usize) -> Vec<Vec<f64>> {
let mut output: Vec<Vec<f64>> = MatrixF::zero_matrix(size);
for i in 0..=(size - 1) {
for j in 0..=(size - 1) {
if i == j {
output[i][j] = 1.;
} else {
output[i][j] = 0.;
}
}
}
output
}
fn zero_matrix(size: usize) -> Vec<Vec<f64>> {
let mut output: Vec<Vec<f64>> = vec![];
for _ in 0..=(size - 1) {
output.push(vec![0.; size]);
}
output
}
}
pub fn print_a_matrix<T: std::fmt::Debug>(string: &str, matrix: &Vec<Vec<T>>) {
println!("{}", string);
for i in matrix.iter() {
println!("{:?}", i);
}
println!("");
println!("");
}
pub fn shape_changer<T>(list: &Vec<T>, columns: usize, rows: usize) -> Vec<Vec<T>>
where
T: std::clone::Clone,
{
let mut l = list.clone();
let mut output = vec![vec![]; rows];
if columns * rows == list.len() {
for i in 0..rows {
output[i] = l[..columns].iter().cloned().collect();
l = l[columns..].iter().cloned().collect();
}
output
} else {
panic!("!!! The shape transformation is not possible, check the values entered !!!");
}
}
pub fn transpose<T: std::clone::Clone + Copy>(matrix: &Vec<Vec<T>>) -> Vec<Vec<T>> {
let mut output = vec![];
for j in 0..matrix[0].len() {
for i in 0..matrix.len() {
output.push(matrix[i][j]);
}
}
let x = matrix[0].len();
shape_changer(&output, matrix.len(), x)
}
pub fn vector_addition<T>(a: &mut Vec<T>, b: &mut Vec<T>) -> Vec<T>
where
T: std::ops::Add<Output = T> + Copy + std::fmt::Debug + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mut output = vec![];
if a.len() == b.len() {
for i in 0..a.len() {
output.push(a[i] + b[i]);
}
output
} else {
if a.len() < b.len() {
let new_a = pad_with_zero(a, b.len() - a.len(), "post");
println!("The changed vector is {:?}", new_a);
for i in 0..a.len() {
output.push(a[i] + b[i]);
}
output
} else {
let new_b = pad_with_zero(b, a.len() - b.len(), "post");
println!("The changed vector is {:?}", new_b);
for i in 0..a.len() {
output.push(a[i] + b[i]);
}
output
}
}
}
pub fn matrix_multiplication<T>(input: &Vec<Vec<T>>, weights: &Vec<Vec<T>>) -> Vec<Vec<T>>
where
T: Copy + std::iter::Sum + std::ops::Mul<Output = T>,
{
println!(
"Multiplication of {}x{} and {}x{}",
input.len(),
input[0].len(),
weights.len(),
weights[0].len()
);
println!("Output will be {}x{}", input.len(), weights[0].len());
let weights_t = transpose(&weights);
let mut output: Vec<T> = vec![];
if input[0].len() == weights.len() {
for i in input.iter() {
for j in weights_t.iter() {
output.push(dot_product(&i, &j));
}
}
shape_changer(&output, input.len(), weights_t.len())
} else {
panic!("Dimension mismatch")
}
}
pub fn dot_product<T>(a: &Vec<T>, b: &Vec<T>) -> T
where
T: std::ops::Mul<Output = T> + std::iter::Sum + Copy,
{
let output: T = a.iter().zip(b.iter()).map(|(x, y)| *x * *y).sum();
output
}
pub fn element_wise_operation<T>(a: &Vec<T>, b: &Vec<T>, operation: &str) -> Vec<T>
where
T: Copy
+ std::fmt::Debug
+ std::ops::Mul<Output = T>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Div<Output = T>
+ std::cmp::PartialEq
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
if a.len() == b.len() {
a.iter().zip(b.iter()).map(|(x, y)| match operation {
"mul" => *x * *y,
"add" => *x + *y,
"sub" => *x - *y,
"div" => *x / *y,
_ => panic!("Operation unsuccessful!\nEnter any of the following(case sensitive):\n> Add\n> Sub\n> Mul\n> Div"),
})
.collect()
} else {
panic!("Dimension mismatch")
}
}
pub fn pad_with_zero<T>(vector: &mut Vec<T>, count: usize, position: &str) -> Vec<T>
where
T: Copy + std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
let mut output = vector.clone();
let zero = "0".parse::<T>().unwrap();
match position {
"post" => {
for _ in 0..count {
output.push(zero);
}
}
"pre" => {
let z = vec![zero; count];
output = [&z[..], &vector[..]].concat()
}
_ => panic!("Position can either be `post` or `pre`"),
};
output
}
pub fn make_matrix_float<T>(input: &Vec<Vec<T>>) -> Vec<Vec<f64>>
where
T: std::fmt::Display + Copy,
{
input
.iter()
.map(|a| {
a.iter()
.map(|b| {
if is_numerical(*b) {
format!("{}", b).parse().unwrap()
} else {
panic!("Non numerical value present in the intput");
}
})
.collect()
})
.collect()
}
pub fn make_vector_float<T>(input: &Vec<T>) -> Vec<f64>
where
T: std::fmt::Display + Copy,
{
input
.iter()
.map(|b| {
if is_numerical(*b) {
format!("{}", b).parse().unwrap()
} else {
panic!("Non numerical value present in the intput");
}
})
.collect()
}
pub fn round_off_f(value: f64, decimals: i32) -> f64 {
((value * 10.0f64.powi(decimals)).round()) / 10.0f64.powi(decimals)
}
pub fn min_max_f(list: &Vec<f64>) -> (f64, f64) {
if type_of(list[0]) == "f64" {
let mut positive: Vec<f64> = list
.clone()
.iter()
.filter(|a| **a >= 0.)
.map(|a| *a)
.collect();
let mut negative: Vec<f64> = list
.clone()
.iter()
.filter(|a| **a < 0.)
.map(|a| *a)
.collect();
positive.sort_by(|a, b| a.partial_cmp(b).unwrap());
negative.sort_by(|a, b| a.partial_cmp(b).unwrap());
if negative.len() > 0 {
(negative[0], positive[positive.len() - 1])
} else {
(positive[0], positive[positive.len() - 1])
}
} else {
panic!("Input should be a float type");
}
}
pub fn is_numerical<T>(value: T) -> bool {
if type_of(&value) == "&i32"
|| type_of(&value) == "&i8"
|| type_of(&value) == "&i16"
|| type_of(&value) == "&i64"
|| type_of(&value) == "&i128"
|| type_of(&value) == "&f64"
|| type_of(&value) == "&f32"
|| type_of(&value) == "&u32"
|| type_of(&value) == "&u8"
|| type_of(&value) == "&u16"
|| type_of(&value) == "&u64"
|| type_of(&value) == "&u128"
|| type_of(&value) == "&usize"
|| type_of(&value) == "&isize"
{
true
} else {
false
}
}
pub fn value_counts<T: std::cmp::Ord>(list: &Vec<T>) -> BTreeMap<T, u32>
where
T: std::cmp::PartialEq + std::cmp::Eq + std::hash::Hash + Copy,
{
let mut count: BTreeMap<T, u32> = BTreeMap::new();
for i in list {
count.insert(*i, 1 + if count.contains_key(i) { count[i] } else { 0 });
}
count
}
use std::any::type_name;
pub fn type_of<T>(_: T) -> &'static str {
type_name::<T>()
}
pub fn unique_values<T>(list: &Vec<T>) -> Vec<T>
where
T: std::cmp::PartialEq + Copy,
{
let mut output = vec![];
for i in list.iter() {
if output.contains(i) {
} else {
output.push(*i)
};
}
output
}
pub fn element_wise_matrix_operation<T>(
matrix1: &Vec<Vec<T>>,
matrix2: &Vec<Vec<T>>,
operation: &str,
) -> Vec<Vec<T>>
where
T: Copy
+ std::fmt::Debug
+ std::ops::Mul<Output = T>
+ std::ops::Add<Output = T>
+ std::ops::Sub<Output = T>
+ std::ops::Div<Output = T>
+ std::cmp::PartialEq
+ std::str::FromStr,
<T as std::str::FromStr>::Err: std::fmt::Debug,
{
if matrix1.len() == matrix2.len() && matrix1[0].len() == matrix2[0].len() {
matrix1
.iter()
.zip(matrix2.iter())
.map(|(x, y)| {
x.iter()
.zip(y.iter())
.map(|a| match operation {
"mul" => *a.0 * *a.1,
"add" => *a.0 + *a.1,
"sub" => *a.0 - *a.1,
"div" => *a.0 / *a.1,
_ => panic!("Operation unsuccessful!\nEnter any of the following(case sensitive):\n> Add\n> Sub\n> Mul\n> Div"),
})
.collect()
})
.collect()
} else {
panic!("Dimension mismatch")
}
}
pub fn matrix_vector_product_f(matrix: &Vec<Vec<f64>>, vector: &Vec<f64>) -> Vec<f64> {
let mut output: Vec<_> = vec![];
if matrix[0].len() == vector.len() {
for i in matrix.iter() {
output.push(dot_product(i, vector));
}
} else {
panic!("The lengths do not match, please check");
}
output
}
pub fn split_vector<T: std::clone::Clone>(vector: &Vec<T>, parts: i32) -> Vec<Vec<T>> {
if vector.len() % parts as usize == 0 {
let mut output = vec![];
let size = vector.len() / parts as usize;
let mut from = 0;
let mut to = from + size;
while to <= vector.len() {
output.push(vector[from..to].to_vec());
from = from + size;
to = from + size;
}
output
} else {
panic!("This partition is not possible, check the number of partiotions passed")
}
}
pub fn split_vector_at<T>(vector: &Vec<T>, at: T) -> Vec<Vec<T>>
where
T: std::cmp::PartialEq + Copy + std::clone::Clone,
{
if vector.contains(&at) {
let mut output = vec![];
let copy = vector.clone();
let mut from = 0;
for (n, i) in vector.iter().enumerate() {
if i == &at {
output.push(copy[from..n].to_vec());
from = n;
}
}
output.push(copy[from..].to_vec());
output
} else {
panic!("The value is not in the vector, please check");
}
}
pub struct StringToMatch {
pub string1: String,
pub string2: String,
}
impl StringToMatch {
pub fn compare_percentage(
&self,
weightage_for_position: f64,
weightage_for_presence: f64,
) -> f64 {
((StringToMatch::compare_chars(&self) * weightage_for_presence * 100.)
+ (StringToMatch::compare_position(&self) * weightage_for_position * 100.))
/ 2.
}
pub fn clean_string(s1: String) -> String {
let this = s1.to_lowercase();
let this_byte: Vec<_> = this
.as_bytes()
.iter()
.filter(|a| {
(**a > 47 && **a < 58) || (**a > 96 && **a < 123) || (**a > 127 && **a < 201)
})
.map(|a| *a)
.collect();
let new_this = std::str::from_utf8(&this_byte[..]).unwrap();
new_this.to_string()
}
fn char_vector(string1: String) -> Vec<char> {
let string1 = StringToMatch::clean_string(string1.clone());
string1.chars().collect()
}
fn calculate(actual: f64, v1: &Vec<char>, v2: &Vec<char>) -> f64 {
let larger = if v1.len() > v2.len() {
v1.len()
} else {
v2.len()
};
(actual / larger as f64)
}
pub fn compare_chars(&self) -> f64 {
let mut output = 0.;
let vec1 = StringToMatch::char_vector(self.string1.clone());
let vec2 = StringToMatch::char_vector(self.string2.clone());
for i in vec1.iter() {
if vec2.contains(i) {
output += 1.;
}
}
StringToMatch::calculate(output, &vec1, &vec2)
}
pub fn compare_position(&self) -> f64 {
let mut output = 0.;
let vec1 = StringToMatch::char_vector(self.string1.clone());
let vec2 = StringToMatch::char_vector(self.string2.clone());
let combined: Vec<_> = vec1.iter().zip(vec2.iter()).collect();
for (i, j) in combined.iter() {
if i == j {
output += 1.;
}
}
StringToMatch::calculate(output, &vec1, &vec2)
}
pub fn fuzzy_subset(&self, n_gram: usize) -> f64 {
let match_percentage;
let vec1 = StringToMatch::clean_string(self.string1.clone());
let vec2 = StringToMatch::clean_string(self.string2.clone());
let mut subset = vec2.clone();
let mut superset = vec1.clone();
if vec1.len() < vec2.len() {
subset = vec1;
superset = vec2;
}
let mut chunck_match_count = 0.;
if superset.contains(&subset) {
match_percentage = 100.
} else {
let superset_n = StringToMatch::n_gram(&superset, n_gram);
let subset_n = StringToMatch::n_gram(&subset, n_gram);
for i in subset_n.iter() {
if superset_n.contains(i) {
chunck_match_count += 1.;
}
}
let smaller = if superset_n.len() < subset_n.len() {
superset_n.len()
} else {
subset_n.len()
};
match_percentage = (chunck_match_count / smaller as f64) * 100.
}
println!("{:?} in {:?}", subset, superset);
match_percentage
}
fn n_gram<'a>(string: &'a str, window_size: usize) -> Vec<&'a str> {
let vector: Vec<_> = string.chars().collect();
let mut output = vec![];
for (mut n, _) in vector.iter().enumerate() {
while n + window_size < string.len() - 1 {
output.push(&string[n..n + window_size]);
n = n + window_size;
}
}
unique_values(&output)
}
pub fn split_alpha_numericals(string: String) -> (String, String) {
let bytes: Vec<_> = string.as_bytes().to_vec();
let numbers: Vec<_> = bytes.iter().filter(|a| **a < 58 && **a > 47).collect();
println!("{:?}", bytes);
let aplhabets: Vec<_> = bytes
.iter()
.filter(|a| {
(**a > 64 && **a < 91)
|| (**a > 96 && **a < 123)
|| (**a > 127 && **a < 201)
|| (**a == 32)
})
.collect();
(
String::from_utf8(numbers.iter().map(|a| **a).collect()).unwrap(),
String::from_utf8(aplhabets.iter().map(|a| **a).collect()).unwrap(),
)
}
pub fn char_count(string: String) -> BTreeMap<char, u32> {
let mut count: BTreeMap<char, Vec<i32>> = BTreeMap::new();
let vector: Vec<_> = string.to_lowercase().chars().collect();
for i in vector.iter() {
count.insert(*i, vec![]);
}
let mut new_count: BTreeMap<char, Vec<i32>> = BTreeMap::new();
for (k, _) in count.iter() {
let mut values = vec![];
for i in vector.iter() {
if i == k {
values.push(1);
}
}
new_count.insert(*k, values);
}
let mut output = BTreeMap::new();
for (k, v) in new_count.iter() {
output.insert(*k, v.iter().fold(0, |a, b| a as u32 + *b as u32));
}
output
}
pub fn frequent_char(string: String) -> char {
let dict = StringToMatch::char_count(string);
let mut value = 0;
let mut key = '-';
for (k, _) in dict.iter() {
key = match dict.get_key_value(k) {
Some((x, y)) => {
if *y > value {
value = *y;
*x
} else {
key
}
}
_ => panic!("Please check the input!!"),
};
}
key
}
pub fn char_replace(string: String, find: char, replace: String, operation: &str) -> String {
if string.contains(find) {
let string_utf8 = string.as_bytes().to_vec();
let find_utf8 = find.to_string().as_bytes().to_vec();
let replace_utf8 = replace.as_bytes().to_vec();
let split = split_vector_at(&string_utf8, find_utf8[0]);
let split_vec: Vec<_> = split
.iter()
.map(|a| String::from_utf8(a.to_vec()).unwrap())
.collect();
let mut new_string_vec = vec![];
if operation == "all" {
for (n, _) in split_vec.iter().enumerate() {
if n > 0 {
let x = split_vec[n][1..].to_string();
new_string_vec.push(format!(
"{}{}",
String::from_utf8(replace_utf8.clone()).unwrap(),
x.clone()
));
} else {
new_string_vec.push(split_vec[n].clone());
}
}
} else {
if operation == "first" {
for (n, _) in split_vec.iter().enumerate() {
if n == 1 {
let x = split_vec[n][1..].to_string();
new_string_vec.push(format!(
"{}{}",
String::from_utf8(replace_utf8.clone()).unwrap(),
x.clone()
));
} else {
new_string_vec.push(split_vec[n].clone());
}
}
} else {
panic!("Either pass operation as `all` or `first`");
}
}
new_string_vec.concat()
} else {
panic!("The character to replace does not exist in the string passed, please check!")
}
}
}