use std::collections::HashMap;
use crate::metrics::{r2_score,accuracy};
pub struct LinearRegression {
pub weights: Vec<f64>,
pub intercept: f64,
}
impl LinearRegression {
pub fn new() -> LinearRegression {
LinearRegression {
weights: Vec::new(),
intercept: 0.0,
}
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: &Vec<f64>, lr: f64, n_iter: i32) {
let n_samples = x_train.len();
let n_features = x_train[0].len();
self.weights = vec![1.0; n_features];
if n_samples != y_train.len() {
panic!("Number of samples in training data does not match the number of samples in target values");
}
for _ in 0..n_iter {
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i] += self.weights[j] * x_train[i][j];
}
y_pred[i] += self.intercept;
}
let mut dw = vec![0.0; n_features];
let mut di = 0.0;
for i in 0..n_samples {
for j in 0..n_features {
dw[j] += (y_pred[i] - y_train[i]) * x_train[i][j];
}
di += y_pred[i] - y_train[i];
}
self.intercept -= lr * di / n_samples as f64;
for i in 0..n_features {
self.weights[i] -= lr * dw[i] / n_samples as f64;
}
}
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<f64> {
let n_samples = x_test.len();
let n_features = x_test[0].len();
if n_features != self.weights.len() {
panic!("Number of features in test data does not match the number of features in training data");
}
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i] += self.weights[j] * x_test[i][j];
}
y_pred[i] += self.intercept;
}
y_pred
}
pub fn get_weights(&self) -> Vec<f64> {
self.weights.clone()
}
pub fn get_intercept(&self) -> f64 {
self.intercept
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> f64 {
let y_pred = self.predict(x_test);
r2_score(y_test, &y_pred)
}
pub fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("intercept".to_string(), self.intercept.to_string());
for i in 0..self.weights.len() {
params.insert(format!("weight_{}", i), self.weights[i].to_string());
}
params
}
}
pub struct PolynomialRegression {
pub weights: Vec<f64>,
pub intercept: f64,
pub degree: usize,
}
impl PolynomialRegression {
pub fn new(degree: usize) -> PolynomialRegression {
PolynomialRegression {
weights: Vec::new(),
intercept: 0.0,
degree: degree,
}
}
pub fn expand_features(&self, x: &Vec<Vec<f64>>) -> Vec<Vec<f64>> {
use itertools::Itertools;
let mut x_poly = Vec::new();
for row in x {
let mut row_poly = vec![1.0];
row_poly.extend(row.clone());
for d in 2..=self.degree {
let mut combis = Vec::new();
for combo in (0..row.len()).combinations_with_replacement(d) {
let mut product = 1.0;
for &i in &combo {
product *= row[i];
}
combis.push(product);
}
row_poly.extend(combis);
}
x_poly.push(row_poly);
}
x_poly
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: &Vec<f64>, lr: f64, n_iter: i32) {
let n_samples = x_train.len();
if n_samples != y_train.len() {
panic!("Number of samples in training data does not match the number of samples in target values");
}
let x_poly = self.expand_features(x_train);
let n_features_poly = x_poly[0].len();
self.weights = vec![1.0; n_features_poly];
for _ in 0..n_iter {
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features_poly {
y_pred[i] += self.weights[j] * x_poly[i][j];
}
}
let mut dw = vec![0.0; n_features_poly];
let mut di = 0.0;
for i in 0..n_samples {
for j in 0..n_features_poly {
dw[j] += (y_pred[i] - y_train[i]) * x_poly[i][j];
}
di += y_pred[i] - y_train[i];
}
self.intercept -= lr * di / n_samples as f64;
for i in 0..n_features_poly {
self.weights[i] -= lr * dw[i] / n_samples as f64;
}
}
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<f64> {
let n_samples = x_test.len();
let x_poly = self.expand_features(&x_test);
if x_poly[0].len() != self.weights.len() {
panic!("Number of features in test data does not match the number of features in training data");
}
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..x_poly[0].len() {
y_pred[i] += self.weights[j] * x_poly[i][j];
}
y_pred[i] += self.intercept;
}
y_pred
}
pub fn get_weights(&self) -> Vec<f64> {
self.weights.clone()
}
pub fn get_intercept(&self) -> f64 {
self.intercept
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> f64 {
let y_pred = self.predict(&x_test);
r2_score(&y_test, &y_pred)
}
}
pub struct RidgeRegression {
pub weights: Vec<f64>,
pub intercept: f64,
pub alpha: f64,
}
impl RidgeRegression {
pub fn new(alpha: f64) -> RidgeRegression {
RidgeRegression {
weights: Vec::new(),
intercept: 0.0,
alpha: alpha,
}
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: &Vec<f64>, lr: f64, n_iter: i32) {
let n_samples = x_train.len();
let n_features = x_train[0].len();
self.weights = vec![1.0; n_features];
if n_samples != y_train.len() {
panic!("Number of samples in training data does not match the number of samples in target values");
}
for _ in 0..n_iter {
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i] += self.weights[j] * x_train[i][j];
}
y_pred[i] += self.intercept;
}
let mut dw = vec![0.0; n_features];
let mut di = 0.0;
for i in 0..n_samples {
for j in 0..n_features {
dw[j] += (y_pred[i] - y_train[i]) * x_train[i][j];
}
di += y_pred[i] - y_train[i];
}
self.intercept -= lr * di / n_samples as f64;
for i in 0..n_features {
self.weights[i] -=
lr * (dw[i] / n_samples as f64 + self.alpha * self.weights[i].powi(2));
}
}
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<f64> {
let n_samples = x_test.len();
let n_features = x_test[0].len();
if n_features != self.weights.len() {
panic!("Number of features in test data does not match the number of features in training data");
}
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i] += self.weights[j] * x_test[i][j];
}
y_pred[i] += self.intercept;
}
y_pred
}
pub fn get_weights(&self) -> Vec<f64> {
self.weights.clone()
}
pub fn get_intercept(&self) -> f64 {
self.intercept
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> f64 {
let y_pred = self.predict(x_test);
r2_score(y_test, &y_pred)
}
pub fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("intercept".to_string(), self.intercept.to_string());
for i in 0..self.weights.len() {
params.insert(format!("weight_{}", i), self.weights[i].to_string());
}
params.insert("alpha".to_string(), self.alpha.to_string());
params
}
}
pub struct LassoRegression {
pub weights: Vec<f64>,
pub intercept: f64,
pub alpha: f64,
}
impl LassoRegression {
pub fn new(alpha: f64) -> LassoRegression {
LassoRegression {
weights: Vec::new(),
intercept: 0.0,
alpha: alpha,
}
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: Vec<f64>, lr: f64, n_iters: i32) {
let n_samples = x_train.len();
let n_features = x_train[0].len();
self.weights = vec![1.0; n_features];
if n_samples != y_train.len() {
panic!("Number of samples in training data does not match the number of samples in target values");
}
for _ in 0..n_iters {
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i] += self.weights[j] * x_train[i][j];
}
y_pred[i] += self.intercept;
}
let mut dw = vec![0.0; n_features];
let mut di = 0.0;
for i in 0..n_samples {
for j in 0..n_features {
dw[j] += (y_pred[i] - y_train[i]) * x_train[i][j];
}
di += y_pred[i] - y_train[i];
}
self.intercept -= lr * di / n_samples as f64;
for i in 0..n_features {
self.weights[i] -=
lr * (dw[i] / n_samples as f64 + self.alpha * self.weights[i].signum());
}
}
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<f64> {
let n_samples = x_test.len();
let n_features = x_test[0].len();
if n_features != self.weights.len() {
panic!("Number of features in test data does not match the number of features in training data");
}
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i] += self.weights[j] * x_test[i][j];
}
y_pred[i] += self.intercept;
}
y_pred
}
pub fn weights(&self) -> Vec<f64> {
self.weights.clone()
}
pub fn intercept(&self) -> f64 {
self.intercept
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> f64 {
let y_pred = self.predict(x_test);
r2_score(y_test, &y_pred)
}
pub fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("intercept".to_string(), self.intercept.to_string());
for i in 0..self.weights.len() {
params.insert(format!("weight_{}", i), self.weights[i].to_string());
}
params.insert("alpha".to_string(), self.alpha.to_string());
params
}
}
pub struct LogisticRegression {
pub weights: Vec<f64>,
pub intercept: f64,
}
impl LogisticRegression{
pub fn new() -> LogisticRegression{
LogisticRegression{
weights: Vec::new(),
intercept: 0.0,
}
}
fn sigmoid(x: f64) -> f64{
1.0 / (1.0 + (-x).exp())
}
pub fn fit(&mut self, x_train: &Vec<Vec<f64>>, y_train: &Vec<f64>, lr: f64, n_iter: i32){
let n_samples = x_train.len();
let n_features = x_train[0].len();
self.weights = vec![1.0; n_features];
if n_samples != y_train.len() {
panic!("Number of samples in training data does not match the number of samples in target values");
}
for _ in 0..n_iter{
let mut y_pred = vec![0.0; n_samples];
for i in 0..n_samples{
for j in 0..n_features{
y_pred[i] += self.weights[j] * x_train[i][j];
}
y_pred[i] += self.intercept;
y_pred[i] = LogisticRegression::sigmoid(y_pred[i]);
}
let mut dw = vec![0.0; n_features];
let mut di = 0.0;
for i in 0..n_samples{
for j in 0..n_features{
dw[j] += (y_pred[i] - y_train[i]) * x_train[i][j];
}
di += y_pred[i] - y_train[i];
}
self.intercept -= lr * di / n_samples as f64;
for i in 0..n_features{
self.weights[i] -= lr * dw[i] / n_samples as f64;
}
}
}
pub fn predict_proba(&self, x_test: &Vec<Vec<f64>>) -> Vec<Vec<f64>>{
let n_samples = x_test.len();
let n_features = x_test[0].len();
if n_features != self.weights.len() {
panic!("Number of features in test data does not match the number of features in training data");
}
let mut y_pred = vec![vec![0.0; 2];n_samples];
for i in 0..n_samples {
for j in 0..n_features {
y_pred[i][1] += self.weights[j] * x_test[i][j];
}
y_pred[i][1] += self.intercept;
y_pred[i][1] = LogisticRegression::sigmoid(y_pred[i][1]);
y_pred[i][0] = 1.0 - y_pred[i][1];
}
y_pred
}
pub fn predict(&self, x_test: &Vec<Vec<f64>>) -> Vec<i32>{
let probs = self.predict_proba(x_test);
let mut y_pred = vec![0; probs.len()];
for i in 0..probs.len(){
if probs[i][1] > probs[i][0]{
y_pred[i] = 1;
}
}
y_pred
}
pub fn get_weights(&self) -> Vec<f64>{
self.weights.clone()
}
pub fn get_intercept(&self) -> f64{
self.intercept
}
pub fn score(&self, x_test: &Vec<Vec<f64>>, y_test: &Vec<f64>) -> HashMap<String,f64>{
let y_pred = self.predict(x_test);
let y_test = y_test.iter().map(|&x| x as i32).collect();
let y_pred = y_pred.iter().map(|&x| x as i32).collect();
accuracy(&y_pred, &y_test)
}
pub fn get_params(&self) -> HashMap<String, String> {
let mut params = HashMap::new();
params.insert("intercept".to_string(), self.intercept.to_string());
for i in 0..self.weights.len() {
params.insert(format!("weight_{}", i), self.weights[i].to_string());
}
params
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_linear_regression() {
let mut model = LinearRegression::new();
let x_train = vec![vec![1.0, 2.0], vec![2.0, 3.0], vec![3.0, 4.0]];
let y_train = vec![3.0, 4.0, 5.0];
model.fit(&x_train, &y_train, 0.01, 1000);
let x_test = vec![vec![4.0, 5.0], vec![5.0, 6.0]];
let y_pred = model.predict(&x_test);
let score = model.score(&x_test, &vec![6.0, 7.0]);
let coefficients = model.get_weights();
let intercept = model.get_intercept();
}
#[test]
fn test_polynomial_regression() {
let mut model = PolynomialRegression::new(2);
let x_train = vec![vec![1.0], vec![2.0], vec![3.0]];
let y_train = vec![1.0, 4.0, 9.0];
model.fit(&x_train, &y_train, 0.1, 1000);
let x_test = vec![vec![4.0], vec![5.0]];
let y_pred = model.predict(&x_test);
let score = model.score(&x_test, &vec![16.0, 25.0]);
let coefficients = model.get_weights();
let intercept = model.get_intercept();
}
#[test]
fn test_ridge_regression() {
let mut model = RidgeRegression::new(0.01);
let x_train = vec![vec![1.0, 2.0], vec![2.0, 3.0], vec![3.0, 4.0]];
let y_train = vec![3.0, 4.0, 5.0];
model.fit(&x_train, &y_train, 0.01, 1000);
let x_test = vec![vec![4.0, 5.0], vec![5.0, 6.0]];
let y_pred = model.predict(&x_test);
let score = model.score(&x_test, &vec![6.0, 7.0]);
let coefficients = model.get_weights();
let intercept = model.get_intercept();
}
#[test]
fn test_lasso_regression() {
let mut model = LassoRegression::new(0.01);
let x_train = vec![vec![1.0, 2.0], vec![1.0, 3.0], vec![1.0, 4.0]];
let y_train = vec![3.0, 4.0, 5.0];
model.fit(&x_train, y_train, 0.1, 1000);
let x_test = vec![vec![1.0, 5.0], vec![1.0, 6.0]];
let y_pred = model.predict(&x_test);
let score = model.score(&x_test, &vec![6.0, 7.0]);
let coefficients = model.weights();
let intercept = model.intercept();
}
#[test]
fn test_logistic_regression(){
let x_train = vec![
vec![1.0, 2.0],
vec![2.0, 3.0],
vec![3.0, 4.0],
vec![4.0, 5.0],
vec![5.0, 6.0],
vec![6.0, 7.0],
];
let y_train = vec![0.0, 0.0, 0.0, 1.0, 1.0, 1.0];
let x_test = vec![vec![-1.0, 0.0], vec![4.0, 5.0], vec![7.0, 7.0], vec![1.0, 9.0]];
let mut model = LogisticRegression::new();
model.fit(&x_train, &y_train, 0.01, 1000);
let preds = model.predict(&x_test);
let score = model.score(&x_test, &vec![0.0, 1.0,1.0,0.0]);
let weights = model.get_weights();
let intercept = model.get_intercept();
}
}