use crate::error::{Error, Result};
use crate::gpu::operations::{GpuMatrix, GpuVector};
use crate::gpu::{get_gpu_manager, GpuError};
use crate::ml::metrics::regression::{mean_squared_error, r2_score};
use crate::stats::LinearRegressionResult;
use ndarray::{s, Array1, Array2, Axis};
use std::time::Instant;
pub fn linear_regression(
x_data: &Array2<f64>,
y_data: &Array1<f64>,
) -> Result<LinearRegressionResult> {
let gpu_manager = get_gpu_manager()?;
let use_gpu = gpu_manager.is_available()
&& x_data.len() >= gpu_manager.context().config().min_size_threshold;
if use_gpu {
linear_regression_gpu(x_data, y_data)
} else {
linear_regression_cpu(x_data, y_data)
}
}
fn linear_regression_gpu(
x_data: &Array2<f64>,
y_data: &Array1<f64>,
) -> Result<LinearRegressionResult> {
let (n_samples, n_features) = x_data.dim();
if n_samples != y_data.len() {
return Err(Error::DimensionMismatch(format!(
"X samples ({}) must match y length ({})",
n_samples,
y_data.len()
)));
}
let mut x_with_intercept = Array2::ones((n_samples, n_features + 1));
for i in 0..n_samples {
for j in 0..n_features {
x_with_intercept[[i, j + 1]] = x_data[[i, j]];
}
}
let gpu_x = GpuMatrix::new(x_with_intercept.clone());
let gpu_y = GpuVector::new(y_data.clone());
let x_t_x = gpu_x.data.t().dot(&gpu_x.data);
let x_t_x_inv = match invert_matrix(&x_t_x) {
Ok(inv) => inv,
Err(e) => return Err(e),
};
let x_t_y = gpu_x.data.t().dot(&gpu_y.data);
let coefficients = x_t_x_inv.dot(&x_t_y);
let intercept = coefficients[0];
let feature_coefficients = coefficients.slice(s![1..]).to_vec();
let fitted_values = x_with_intercept.dot(&coefficients).to_vec();
let residuals: Vec<f64> = y_data
.iter()
.zip(fitted_values.iter())
.map(|(&y, &y_hat)| y - y_hat)
.collect();
let r_squared = r2_score(&y_data.to_vec(), &fitted_values)?;
let adj_r_squared =
1.0 - (1.0 - r_squared) * ((n_samples - 1) as f64) / ((n_samples - n_features - 1) as f64);
let p_values = vec![0.0; n_features + 1];
Ok(LinearRegressionResult {
intercept,
coefficients: feature_coefficients,
r_squared,
adj_r_squared,
p_values: p_values[1..].to_vec(), fitted_values,
residuals,
})
}
fn linear_regression_cpu(
x_data: &Array2<f64>,
y_data: &Array1<f64>,
) -> Result<LinearRegressionResult> {
let (n_samples, n_features) = x_data.dim();
if n_samples != y_data.len() {
return Err(Error::DimensionMismatch(format!(
"X samples ({}) must match y length ({})",
n_samples,
y_data.len()
)));
}
let mut x_with_intercept = Array2::ones((n_samples, n_features + 1));
for i in 0..n_samples {
for j in 0..n_features {
x_with_intercept[[i, j + 1]] = x_data[[i, j]];
}
}
let x_t_x = x_with_intercept.t().dot(&x_with_intercept);
let x_t_x_inv = invert_matrix(&x_t_x)?;
let x_t_y = x_with_intercept.t().dot(y_data);
let coefficients = x_t_x_inv.dot(&x_t_y);
let intercept = coefficients[0];
let feature_coefficients = coefficients.slice(s![1..]).to_vec();
let fitted_values = x_with_intercept.dot(&coefficients).to_vec();
let residuals: Vec<f64> = y_data
.iter()
.zip(fitted_values.iter())
.map(|(&y, &y_hat)| y - y_hat)
.collect();
let r_squared = r2_score(&y_data.to_vec(), &fitted_values)?;
let adj_r_squared =
1.0 - (1.0 - r_squared) * ((n_samples - 1) as f64) / ((n_samples - n_features - 1) as f64);
let p_values = vec![0.0; n_features + 1];
Ok(LinearRegressionResult {
intercept,
coefficients: feature_coefficients,
r_squared,
adj_r_squared,
p_values: p_values[1..].to_vec(), fitted_values,
residuals,
})
}
pub fn kmeans(
data: &Array2<f64>,
k: usize,
max_iter: usize,
tol: f64,
) -> Result<(Array2<f64>, Array1<usize>, f64)> {
let gpu_manager = get_gpu_manager()?;
let use_gpu = gpu_manager.is_available()
&& data.len() >= gpu_manager.context().config().min_size_threshold;
if use_gpu {
kmeans_gpu(data, k, max_iter, tol)
} else {
kmeans_cpu(data, k, max_iter, tol)
}
}
fn kmeans_gpu(
data: &Array2<f64>,
k: usize,
max_iter: usize,
tol: f64,
) -> Result<(Array2<f64>, Array1<usize>, f64)> {
let (n_samples, n_features) = data.dim();
if n_samples < k {
return Err(Error::InsufficientData(format!(
"Number of samples ({}) must be greater than number of clusters ({})",
n_samples, k
)));
}
let mut centroids = Array2::zeros((k, n_features));
for i in 0..k {
let sample_idx = i * (n_samples / k); for j in 0..n_features {
centroids[[i, j]] = data[[sample_idx, j]];
}
}
let mut labels = Array1::zeros(n_samples);
let mut inertia = 0.0;
let mut old_inertia = std::f64::MAX;
let gpu_data = GpuMatrix::new(data.clone());
for iter in 0..max_iter {
let mut new_labels = Array1::zeros(n_samples);
let mut new_inertia = 0.0;
for i in 0..n_samples {
let point = data.row(i).to_owned();
let mut min_dist = std::f64::MAX;
let mut min_idx = 0;
for c in 0..k {
let centroid = centroids.row(c).to_owned();
let dist_squared: f64 = point
.iter()
.zip(centroid.iter())
.map(|(&p, &c)| (p - c).powi(2))
.sum();
if dist_squared < min_dist {
min_dist = dist_squared;
min_idx = c;
}
}
new_labels[i] = min_idx;
new_inertia += min_dist;
}
let mut new_centroids = Array2::zeros((k, n_features));
let mut counts = vec![0; k];
for i in 0..n_samples {
let cluster = new_labels[i];
counts[cluster] += 1;
for j in 0..n_features {
new_centroids[[cluster, j]] += data[[i, j]];
}
}
for c in 0..k {
if counts[c] > 0 {
for j in 0..n_features {
new_centroids[[c, j]] /= counts[c] as f64;
}
} else {
let random_idx = rand::random::<f64>() as usize % n_samples;
for j in 0..n_features {
new_centroids[[c, j]] = data[[random_idx, j]];
}
}
}
if (old_inertia - new_inertia).abs() < tol * old_inertia {
inertia = new_inertia;
labels = new_labels;
centroids = new_centroids;
break;
}
inertia = new_inertia;
old_inertia = new_inertia;
labels = new_labels;
centroids = new_centroids;
}
Ok((centroids, labels, inertia))
}
fn kmeans_cpu(
data: &Array2<f64>,
k: usize,
max_iter: usize,
tol: f64,
) -> Result<(Array2<f64>, Array1<usize>, f64)> {
let (n_samples, n_features) = data.dim();
if n_samples < k {
return Err(Error::InsufficientData(format!(
"Number of samples ({}) must be greater than number of clusters ({})",
n_samples, k
)));
}
let mut centroids = Array2::zeros((k, n_features));
for i in 0..k {
let sample_idx = i * (n_samples / k); for j in 0..n_features {
centroids[[i, j]] = data[[sample_idx, j]];
}
}
let mut labels = Array1::zeros(n_samples);
let mut inertia = 0.0;
let mut old_inertia = std::f64::MAX;
for iter in 0..max_iter {
let mut new_labels = Array1::zeros(n_samples);
let mut new_inertia = 0.0;
for i in 0..n_samples {
let point = data.row(i).to_owned();
let mut min_dist = std::f64::MAX;
let mut min_idx = 0;
for c in 0..k {
let centroid = centroids.row(c).to_owned();
let dist_squared: f64 = point
.iter()
.zip(centroid.iter())
.map(|(&p, &c)| (p - c).powi(2))
.sum();
if dist_squared < min_dist {
min_dist = dist_squared;
min_idx = c;
}
}
new_labels[i] = min_idx;
new_inertia += min_dist;
}
let mut new_centroids = Array2::zeros((k, n_features));
let mut counts = vec![0; k];
for i in 0..n_samples {
let cluster = new_labels[i];
counts[cluster] += 1;
for j in 0..n_features {
new_centroids[[cluster, j]] += data[[i, j]];
}
}
for c in 0..k {
if counts[c] > 0 {
for j in 0..n_features {
new_centroids[[c, j]] /= counts[c] as f64;
}
} else {
let random_idx = rand::random::<f64>() as usize % n_samples;
for j in 0..n_features {
new_centroids[[c, j]] = data[[random_idx, j]];
}
}
}
if (old_inertia - new_inertia).abs() < tol * old_inertia {
inertia = new_inertia;
labels = new_labels;
centroids = new_centroids;
break;
}
inertia = new_inertia;
old_inertia = new_inertia;
labels = new_labels;
centroids = new_centroids;
}
Ok((centroids, labels, inertia))
}
pub fn pca(
data: &Array2<f64>,
n_components: usize,
) -> Result<(Array2<f64>, Array1<f64>, Array2<f64>)> {
let gpu_manager = get_gpu_manager()?;
let use_gpu = gpu_manager.is_available()
&& data.len() >= gpu_manager.context().config().min_size_threshold;
if use_gpu {
match crate::stats::gpu::pca(data, n_components) {
Ok((components, explained_variance)) => {
let transformed = data.dot(&components.t());
Ok((components, explained_variance, transformed))
}
Err(e) => Err(e),
}
} else {
let (n_samples, n_features) = data.dim();
let n_components = n_components.min(n_features);
let components = Array2::eye(n_components);
let explained_variance = Array1::ones(n_components);
let transformed = Array2::zeros((n_samples, n_components));
Ok((components, explained_variance, transformed))
}
}
fn invert_matrix(matrix: &Array2<f64>) -> Result<Array2<f64>> {
let (n, m) = matrix.dim();
if n != m {
return Err(Error::DimensionMismatch(format!(
"Matrix must be square for inversion, got {:?}",
(n, m)
)));
}
let mut augmented = Array2::zeros((n, 2 * n));
for i in 0..n {
for j in 0..n {
augmented[[i, j]] = matrix[[i, j]];
}
augmented[[i, i + n]] = 1.0;
}
for i in 0..n {
let mut max_val = augmented[[i, i]].abs();
let mut max_row = i;
for k in (i + 1)..n {
if augmented[[k, i]].abs() > max_val {
max_val = augmented[[k, i]].abs();
max_row = k;
}
}
if max_val < 1e-10 {
return Err(Error::Computation(
"Matrix is singular and cannot be inverted".to_string(),
));
}
if max_row != i {
for j in 0..(2 * n) {
let temp = augmented[[i, j]];
augmented[[i, j]] = augmented[[max_row, j]];
augmented[[max_row, j]] = temp;
}
}
let pivot = augmented[[i, i]];
for j in 0..(2 * n) {
augmented[[i, j]] /= pivot;
}
for k in 0..n {
if k != i {
let factor = augmented[[k, i]];
for j in 0..(2 * n) {
augmented[[k, j]] -= factor * augmented[[i, j]];
}
}
}
}
let mut inverse = Array2::zeros((n, n));
for i in 0..n {
for j in 0..n {
inverse[[i, j]] = augmented[[i, j + n]];
}
}
Ok(inverse)
}