use ghostflow_core::Tensor;
pub struct LinearRegression {
pub coef_: Option<Vec<f32>>,
pub intercept_: Option<f32>,
pub fit_intercept: bool,
pub solver: LinearSolver,
pub learning_rate: f32,
pub max_iter: usize,
pub tol: f32,
}
#[derive(Clone, Copy, Debug)]
pub enum LinearSolver {
ClosedForm,
GradientDescent,
SGD,
}
impl LinearRegression {
pub fn new() -> Self {
LinearRegression {
coef_: None,
intercept_: None,
fit_intercept: true,
solver: LinearSolver::ClosedForm,
learning_rate: 0.01,
max_iter: 1000,
tol: 1e-4,
}
}
pub fn fit_intercept(mut self, fit: bool) -> Self {
self.fit_intercept = fit;
self
}
pub fn solver(mut self, solver: LinearSolver) -> Self {
self.solver = solver;
self
}
pub fn learning_rate(mut self, lr: f32) -> Self {
self.learning_rate = lr;
self
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
match self.solver {
LinearSolver::ClosedForm => self.fit_closed_form(x, y),
LinearSolver::GradientDescent => self.fit_gradient_descent(x, y),
LinearSolver::SGD => self.fit_sgd(x, y),
}
}
fn fit_closed_form(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let n_cols = if self.fit_intercept { n_features + 1 } else { n_features };
let mut x_aug = Vec::with_capacity(n_samples * n_cols);
for i in 0..n_samples {
if self.fit_intercept {
x_aug.push(1.0); }
for j in 0..n_features {
x_aug.push(x_data[i * n_features + j]);
}
}
let mut xtx = vec![0.0f32; n_cols * n_cols];
for i in 0..n_cols {
for j in 0..n_cols {
let mut sum = 0.0f32;
for k in 0..n_samples {
sum += x_aug[k * n_cols + i] * x_aug[k * n_cols + j];
}
xtx[i * n_cols + j] = sum;
}
}
let mut xty = vec![0.0f32; n_cols];
for i in 0..n_cols {
let mut sum = 0.0f32;
for k in 0..n_samples {
sum += x_aug[k * n_cols + i] * y_data[k];
}
xty[i] = sum;
}
let weights = self.solve_linear_system(&xtx, &xty, n_cols);
if self.fit_intercept {
self.intercept_ = Some(weights[0]);
self.coef_ = Some(weights[1..].to_vec());
} else {
self.intercept_ = Some(0.0);
self.coef_ = Some(weights);
}
}
fn solve_linear_system(&self, a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
let mut a_reg = a.to_vec();
for i in 0..n {
a_reg[i * n + i] += 1e-8;
}
let mut l = vec![0.0f32; n * n];
for i in 0..n {
for j in 0..=i {
let mut sum = 0.0f32;
if i == j {
for k in 0..j {
sum += l[j * n + k] * l[j * n + k];
}
let val = a_reg[j * n + j] - sum;
l[j * n + j] = if val > 0.0 { val.sqrt() } else { 1e-10 };
} else {
for k in 0..j {
sum += l[i * n + k] * l[j * n + k];
}
l[i * n + j] = (a_reg[i * n + j] - sum) / l[j * n + j].max(1e-10);
}
}
}
let mut y = vec![0.0f32; n];
for i in 0..n {
let mut sum = 0.0f32;
for j in 0..i {
sum += l[i * n + j] * y[j];
}
y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
}
let mut x = vec![0.0f32; n];
for i in (0..n).rev() {
let mut sum = 0.0f32;
for j in (i + 1)..n {
sum += l[j * n + i] * x[j];
}
x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
}
x
}
fn fit_gradient_descent(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let mut weights = vec![0.0f32; n_features];
let mut bias = 0.0f32;
for _iter in 0..self.max_iter {
let mut predictions = vec![0.0f32; n_samples];
for i in 0..n_samples {
let mut pred = bias;
for j in 0..n_features {
pred += weights[j] * x_data[i * n_features + j];
}
predictions[i] = pred;
}
let mut grad_w = vec![0.0f32; n_features];
let mut grad_b = 0.0f32;
for i in 0..n_samples {
let error = predictions[i] - y_data[i];
grad_b += error;
for j in 0..n_features {
grad_w[j] += error * x_data[i * n_features + j];
}
}
let scale = 1.0 / n_samples as f32;
grad_b *= scale;
for g in &mut grad_w {
*g *= scale;
}
if self.fit_intercept {
bias -= self.learning_rate * grad_b;
}
for j in 0..n_features {
weights[j] -= self.learning_rate * grad_w[j];
}
let grad_norm: f32 = grad_w.iter().map(|&g| g * g).sum::<f32>() + grad_b * grad_b;
if grad_norm.sqrt() < self.tol {
break;
}
}
self.coef_ = Some(weights);
self.intercept_ = Some(bias);
}
fn fit_sgd(&mut self, x: &Tensor, y: &Tensor) {
use rand::seq::SliceRandom;
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let mut weights = vec![0.0f32; n_features];
let mut bias = 0.0f32;
let mut indices: Vec<usize> = (0..n_samples).collect();
for _epoch in 0..self.max_iter {
indices.shuffle(&mut rand::thread_rng());
for &i in &indices {
let mut pred = bias;
for j in 0..n_features {
pred += weights[j] * x_data[i * n_features + j];
}
let error = pred - y_data[i];
if self.fit_intercept {
bias -= self.learning_rate * error;
}
for j in 0..n_features {
weights[j] -= self.learning_rate * error * x_data[i * n_features + j];
}
}
}
self.coef_ = Some(weights);
self.intercept_ = Some(bias);
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let coef = self.coef_.as_ref().expect("Model not fitted");
let intercept = self.intercept_.unwrap_or(0.0);
let predictions: Vec<f32> = (0..n_samples)
.map(|i| {
let mut pred = intercept;
for j in 0..n_features {
pred += coef[j] * x_data[i * n_features + j];
}
pred
})
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
let predictions = self.predict(x);
let pred_data = predictions.data_f32();
let y_data = y.data_f32();
let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
let ss_res: f32 = pred_data.iter()
.zip(y_data.iter())
.map(|(&p, &y)| (y - p).powi(2))
.sum();
let ss_tot: f32 = y_data.iter()
.map(|&y| (y - y_mean).powi(2))
.sum();
1.0 - ss_res / ss_tot.max(1e-10)
}
}
impl Default for LinearRegression {
fn default() -> Self {
Self::new()
}
}
pub struct Ridge {
pub coef_: Option<Vec<f32>>,
pub intercept_: Option<f32>,
pub alpha: f32,
pub fit_intercept: bool,
pub max_iter: usize,
pub tol: f32,
}
impl Ridge {
pub fn new(alpha: f32) -> Self {
Ridge {
coef_: None,
intercept_: None,
alpha,
fit_intercept: true,
max_iter: 1000,
tol: 1e-4,
}
}
pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
let x_mean: Vec<f32> = (0..n_features)
.map(|j| {
(0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32
})
.collect();
let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
let x_centered: Vec<f32> = (0..n_samples)
.flat_map(|i| {
(0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>()
})
.collect();
let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
(x_centered, y_centered, x_mean, y_mean)
} else {
(x_data.to_vec(), y_data.to_vec(), vec![0.0; n_features], 0.0)
};
let mut xtx = vec![0.0f32; n_features * n_features];
for i in 0..n_features {
for j in 0..n_features {
let mut sum = 0.0f32;
for k in 0..n_samples {
sum += x_centered[k * n_features + i] * x_centered[k * n_features + j];
}
xtx[i * n_features + j] = sum;
if i == j {
xtx[i * n_features + j] += self.alpha;
}
}
}
let mut xty = vec![0.0f32; n_features];
for i in 0..n_features {
let mut sum = 0.0f32;
for k in 0..n_samples {
sum += x_centered[k * n_features + i] * y_centered[k];
}
xty[i] = sum;
}
let weights = self.solve_cholesky(&xtx, &xty, n_features);
self.coef_ = Some(weights.clone());
if self.fit_intercept {
let mut intercept = y_mean;
for j in 0..n_features {
intercept -= weights[j] * x_mean[j];
}
self.intercept_ = Some(intercept);
} else {
self.intercept_ = Some(0.0);
}
}
fn solve_cholesky(&self, a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
let mut l = vec![0.0f32; n * n];
for i in 0..n {
for j in 0..=i {
let mut sum = 0.0f32;
if i == j {
for k in 0..j {
sum += l[j * n + k] * l[j * n + k];
}
l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
} else {
for k in 0..j {
sum += l[i * n + k] * l[j * n + k];
}
l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
}
}
}
let mut y = vec![0.0f32; n];
for i in 0..n {
let mut sum = 0.0f32;
for j in 0..i {
sum += l[i * n + j] * y[j];
}
y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
}
let mut x = vec![0.0f32; n];
for i in (0..n).rev() {
let mut sum = 0.0f32;
for j in (i + 1)..n {
sum += l[j * n + i] * x[j];
}
x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
}
x
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let coef = self.coef_.as_ref().expect("Model not fitted");
let intercept = self.intercept_.unwrap_or(0.0);
let predictions: Vec<f32> = (0..n_samples)
.map(|i| {
let mut pred = intercept;
for j in 0..n_features {
pred += coef[j] * x_data[i * n_features + j];
}
pred
})
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
let predictions = self.predict(x);
let pred_data = predictions.data_f32();
let y_data = y.data_f32();
let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
1.0 - ss_res / ss_tot.max(1e-10)
}
}
pub struct Lasso {
pub coef_: Option<Vec<f32>>,
pub intercept_: Option<f32>,
pub alpha: f32,
pub fit_intercept: bool,
pub max_iter: usize,
pub tol: f32,
}
impl Lasso {
pub fn new(alpha: f32) -> Self {
Lasso {
coef_: None,
intercept_: None,
alpha,
fit_intercept: true,
max_iter: 1000,
tol: 1e-4,
}
}
fn soft_threshold(x: f32, lambda: f32) -> f32 {
if x > lambda {
x - lambda
} else if x < -lambda {
x + lambda
} else {
0.0
}
}
pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let x_mean: Vec<f32> = (0..n_features)
.map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
.collect();
let y_mean = if self.fit_intercept {
y_data.iter().sum::<f32>() / n_samples as f32
} else {
0.0
};
let x_centered: Vec<f32> = (0..n_samples)
.flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
.collect();
let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
let mut x_sq_sum = vec![0.0f32; n_features];
for j in 0..n_features {
for i in 0..n_samples {
x_sq_sum[j] += x_centered[i * n_features + j].powi(2);
}
}
let mut weights = vec![0.0f32; n_features];
let mut residuals = y_centered.clone();
for _iter in 0..self.max_iter {
let weights_old = weights.clone();
for j in 0..n_features {
for i in 0..n_samples {
residuals[i] += weights[j] * x_centered[i * n_features + j];
}
let mut rho = 0.0f32;
for i in 0..n_samples {
rho += x_centered[i * n_features + j] * residuals[i];
}
let lambda = self.alpha * n_samples as f32;
weights[j] = Self::soft_threshold(rho, lambda) / x_sq_sum[j].max(1e-10);
for i in 0..n_samples {
residuals[i] -= weights[j] * x_centered[i * n_features + j];
}
}
let max_change: f32 = weights.iter()
.zip(weights_old.iter())
.map(|(&w, &w_old)| (w - w_old).abs())
.fold(0.0f32, f32::max);
if max_change < self.tol {
break;
}
}
self.coef_ = Some(weights.clone());
if self.fit_intercept {
let mut intercept = y_mean;
for j in 0..n_features {
intercept -= weights[j] * x_mean[j];
}
self.intercept_ = Some(intercept);
} else {
self.intercept_ = Some(0.0);
}
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let coef = self.coef_.as_ref().expect("Model not fitted");
let intercept = self.intercept_.unwrap_or(0.0);
let predictions: Vec<f32> = (0..n_samples)
.map(|i| {
let mut pred = intercept;
for j in 0..n_features {
pred += coef[j] * x_data[i * n_features + j];
}
pred
})
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
let predictions = self.predict(x);
let pred_data = predictions.data_f32();
let y_data = y.data_f32();
let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
1.0 - ss_res / ss_tot.max(1e-10)
}
}
pub struct ElasticNet {
pub coef_: Option<Vec<f32>>,
pub intercept_: Option<f32>,
pub alpha: f32,
pub l1_ratio: f32,
pub fit_intercept: bool,
pub max_iter: usize,
pub tol: f32,
}
impl ElasticNet {
pub fn new(alpha: f32, l1_ratio: f32) -> Self {
ElasticNet {
coef_: None,
intercept_: None,
alpha,
l1_ratio: l1_ratio.clamp(0.0, 1.0),
fit_intercept: true,
max_iter: 1000,
tol: 1e-4,
}
}
fn soft_threshold(x: f32, lambda: f32) -> f32 {
if x > lambda {
x - lambda
} else if x < -lambda {
x + lambda
} else {
0.0
}
}
pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let l1_reg = self.alpha * self.l1_ratio * n_samples as f32;
let l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples as f32;
let x_mean: Vec<f32> = (0..n_features)
.map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
.collect();
let y_mean = if self.fit_intercept {
y_data.iter().sum::<f32>() / n_samples as f32
} else {
0.0
};
let x_centered: Vec<f32> = (0..n_samples)
.flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
.collect();
let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
let mut x_sq_sum = vec![0.0f32; n_features];
for j in 0..n_features {
for i in 0..n_samples {
x_sq_sum[j] += x_centered[i * n_features + j].powi(2);
}
}
let mut weights = vec![0.0f32; n_features];
let mut residuals = y_centered.clone();
for _iter in 0..self.max_iter {
let weights_old = weights.clone();
for j in 0..n_features {
for i in 0..n_samples {
residuals[i] += weights[j] * x_centered[i * n_features + j];
}
let mut rho = 0.0f32;
for i in 0..n_samples {
rho += x_centered[i * n_features + j] * residuals[i];
}
let denom = x_sq_sum[j] + l2_reg;
weights[j] = Self::soft_threshold(rho, l1_reg) / denom.max(1e-10);
for i in 0..n_samples {
residuals[i] -= weights[j] * x_centered[i * n_features + j];
}
}
let max_change: f32 = weights.iter()
.zip(weights_old.iter())
.map(|(&w, &w_old)| (w - w_old).abs())
.fold(0.0f32, f32::max);
if max_change < self.tol {
break;
}
}
self.coef_ = Some(weights.clone());
if self.fit_intercept {
let mut intercept = y_mean;
for j in 0..n_features {
intercept -= weights[j] * x_mean[j];
}
self.intercept_ = Some(intercept);
} else {
self.intercept_ = Some(0.0);
}
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let coef = self.coef_.as_ref().expect("Model not fitted");
let intercept = self.intercept_.unwrap_or(0.0);
let predictions: Vec<f32> = (0..n_samples)
.map(|i| {
let mut pred = intercept;
for j in 0..n_features {
pred += coef[j] * x_data[i * n_features + j];
}
pred
})
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
let predictions = self.predict(x);
let pred_data = predictions.data_f32();
let y_data = y.data_f32();
let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
1.0 - ss_res / ss_tot.max(1e-10)
}
}
pub struct LogisticRegression {
pub coef_: Option<Vec<Vec<f32>>>,
pub intercept_: Option<Vec<f32>>,
pub penalty: Penalty,
pub c: f32,
pub fit_intercept: bool,
pub max_iter: usize,
pub tol: f32,
pub multi_class: MultiClass,
n_classes: usize,
}
#[derive(Clone, Copy, Debug)]
pub enum Penalty {
None,
L1,
L2,
ElasticNet(f32),
}
#[derive(Clone, Copy, Debug)]
pub enum MultiClass {
OvR,
Multinomial,
}
impl LogisticRegression {
pub fn new() -> Self {
LogisticRegression {
coef_: None,
intercept_: None,
penalty: Penalty::L2,
c: 1.0,
fit_intercept: true,
max_iter: 100,
tol: 1e-4,
multi_class: MultiClass::OvR,
n_classes: 0,
}
}
pub fn penalty(mut self, penalty: Penalty) -> Self {
self.penalty = penalty;
self
}
pub fn c(mut self, c: f32) -> Self {
self.c = c;
self
}
pub fn max_iter(mut self, n: usize) -> Self {
self.max_iter = n;
self
}
pub fn multi_class(mut self, mc: MultiClass) -> Self {
self.multi_class = mc;
self
}
fn sigmoid(x: f32) -> f32 {
if x >= 0.0 {
1.0 / (1.0 + (-x).exp())
} else {
let exp_x = x.exp();
exp_x / (1.0 + exp_x)
}
}
pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
let y_data = y.data_f32();
self.n_classes = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
if self.n_classes == 2 {
self.fit_binary(x, y);
} else {
match self.multi_class {
MultiClass::OvR => self.fit_ovr(x, y),
MultiClass::Multinomial => self.fit_multinomial(x, y),
}
}
}
fn fit_binary(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let mut weights = vec![0.0f32; n_features];
let mut bias = 0.0f32;
let reg = 1.0 / self.c;
for _iter in 0..self.max_iter {
let mut grad_w = vec![0.0f32; n_features];
let mut grad_b = 0.0f32;
for i in 0..n_samples {
let mut z = bias;
for j in 0..n_features {
z += weights[j] * x_data[i * n_features + j];
}
let p = Self::sigmoid(z);
let error = p - y_data[i];
grad_b += error;
for j in 0..n_features {
grad_w[j] += error * x_data[i * n_features + j];
}
}
match self.penalty {
Penalty::L2 => {
for j in 0..n_features {
grad_w[j] += reg * weights[j];
}
}
Penalty::L1 => {
for j in 0..n_features {
grad_w[j] += reg * weights[j].signum();
}
}
_ => {}
}
let scale = 1.0 / n_samples as f32;
grad_b *= scale;
for g in &mut grad_w {
*g *= scale;
}
let lr = 0.1;
if self.fit_intercept {
bias -= lr * grad_b;
}
for j in 0..n_features {
weights[j] -= lr * grad_w[j];
}
let grad_norm: f32 = grad_w.iter().map(|&g| g * g).sum::<f32>().sqrt();
if grad_norm < self.tol {
break;
}
}
self.coef_ = Some(vec![weights]);
self.intercept_ = Some(vec![bias]);
}
fn fit_ovr(&mut self, x: &Tensor, y: &Tensor) {
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let _n_features = x.dims()[1];
let mut all_coef = Vec::with_capacity(self.n_classes);
let mut all_intercept = Vec::with_capacity(self.n_classes);
for c in 0..self.n_classes {
let y_binary: Vec<f32> = y_data.iter()
.map(|&yi| if yi as usize == c { 1.0 } else { 0.0 })
.collect();
let y_tensor = Tensor::from_slice(&y_binary, &[n_samples]).unwrap();
let mut binary_lr = LogisticRegression::new()
.penalty(self.penalty)
.c(self.c)
.max_iter(self.max_iter);
binary_lr.n_classes = 2;
binary_lr.fit_binary(x, &y_tensor);
all_coef.push(binary_lr.coef_.unwrap().into_iter().next().unwrap());
all_intercept.push(binary_lr.intercept_.unwrap().into_iter().next().unwrap());
}
self.coef_ = Some(all_coef);
self.intercept_ = Some(all_intercept);
}
fn fit_multinomial(&mut self, x: &Tensor, y: &Tensor) {
let x_data = x.data_f32();
let y_data = y.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let mut weights = vec![vec![0.0f32; n_features]; self.n_classes];
let mut biases = vec![0.0f32; self.n_classes];
let reg = 1.0 / self.c;
let lr = 0.1;
for _iter in 0..self.max_iter {
let mut probs = vec![vec![0.0f32; self.n_classes]; n_samples];
for i in 0..n_samples {
let mut max_z = f32::NEG_INFINITY;
let mut z = vec![0.0f32; self.n_classes];
for c in 0..self.n_classes {
z[c] = biases[c];
for j in 0..n_features {
z[c] += weights[c][j] * x_data[i * n_features + j];
}
max_z = max_z.max(z[c]);
}
let mut sum_exp = 0.0f32;
for c in 0..self.n_classes {
probs[i][c] = (z[c] - max_z).exp();
sum_exp += probs[i][c];
}
for c in 0..self.n_classes {
probs[i][c] /= sum_exp;
}
}
let mut grad_w = vec![vec![0.0f32; n_features]; self.n_classes];
let mut grad_b = vec![0.0f32; self.n_classes];
for i in 0..n_samples {
let true_class = y_data[i] as usize;
for c in 0..self.n_classes {
let target = if c == true_class { 1.0 } else { 0.0 };
let error = probs[i][c] - target;
grad_b[c] += error;
for j in 0..n_features {
grad_w[c][j] += error * x_data[i * n_features + j];
}
}
}
let scale = 1.0 / n_samples as f32;
for c in 0..self.n_classes {
grad_b[c] *= scale;
for j in 0..n_features {
grad_w[c][j] = grad_w[c][j] * scale + reg * weights[c][j];
}
}
for c in 0..self.n_classes {
if self.fit_intercept {
biases[c] -= lr * grad_b[c];
}
for j in 0..n_features {
weights[c][j] -= lr * grad_w[c][j];
}
}
}
self.coef_ = Some(weights);
self.intercept_ = Some(biases);
}
pub fn predict(&self, x: &Tensor) -> Tensor {
let proba = self.predict_proba(x);
let proba_data = proba.data_f32();
let n_samples = x.dims()[0];
let predictions: Vec<f32> = (0..n_samples)
.map(|i| {
let start = i * self.n_classes;
let sample_probs = &proba_data[start..start + self.n_classes];
sample_probs.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
.map(|(idx, _)| idx as f32)
.unwrap_or(0.0)
})
.collect();
Tensor::from_slice(&predictions, &[n_samples]).unwrap()
}
pub fn predict_proba(&self, x: &Tensor) -> Tensor {
let x_data = x.data_f32();
let n_samples = x.dims()[0];
let n_features = x.dims()[1];
let coef = self.coef_.as_ref().expect("Model not fitted");
let intercept = self.intercept_.as_ref().expect("Model not fitted");
let mut probs = Vec::with_capacity(n_samples * self.n_classes);
for i in 0..n_samples {
if self.n_classes == 2 {
let mut z = intercept[0];
for j in 0..n_features {
z += coef[0][j] * x_data[i * n_features + j];
}
let p = Self::sigmoid(z);
probs.push(1.0 - p);
probs.push(p);
} else {
let mut z = vec![0.0f32; self.n_classes];
let mut max_z = f32::NEG_INFINITY;
for c in 0..self.n_classes {
z[c] = intercept[c];
for j in 0..n_features {
z[c] += coef[c][j] * x_data[i * n_features + j];
}
max_z = max_z.max(z[c]);
}
let mut sum_exp = 0.0f32;
for c in 0..self.n_classes {
z[c] = (z[c] - max_z).exp();
sum_exp += z[c];
}
for c in 0..self.n_classes {
probs.push(z[c] / sum_exp);
}
}
}
Tensor::from_slice(&probs, &[n_samples, self.n_classes]).unwrap()
}
pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
let predictions = self.predict(x);
let pred_data = predictions.data_f32();
let y_data = y.data_f32();
let correct: usize = pred_data.iter()
.zip(y_data.iter())
.filter(|(&p, &y)| (p - y).abs() < 0.5)
.count();
correct as f32 / y_data.len() as f32
}
}
impl Default for LogisticRegression {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_linear_regression() {
let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
let mut lr = LinearRegression::new();
lr.fit(&x, &y);
let _predictions = lr.predict(&x);
let score = lr.score(&x, &y);
assert!(score > 0.99, "R² should be close to 1.0");
}
#[test]
fn test_ridge() {
let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
let y = Tensor::from_slice(&[2.1f32, 3.9, 6.2, 7.8, 10.1], &[5]).unwrap();
let mut ridge = Ridge::new(1.0);
ridge.fit(&x, &y);
let score = ridge.score(&x, &y);
assert!(score > 0.95);
}
#[test]
fn test_logistic_regression() {
let x = Tensor::from_slice(&[0.0f32, 0.0,
0.0, 1.0,
1.0, 0.0,
1.0, 1.0,
], &[4, 2]).unwrap();
let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
let mut lr = LogisticRegression::new().max_iter(1000);
lr.fit(&x, &y);
let predictions = lr.predict(&x);
assert_eq!(predictions.dims(), &[4]);
}
}