sciforge-lib 0.0.4

Scientific computing library — mathematics, physics, chemistry, biology, astronomy, geology, meteorology.
Documentation
pub fn covariance_matrix(data: &[Vec<f64>]) -> Vec<Vec<f64>> {
    let n = data.len();
    if n == 0 {
        return Vec::new();
    }
    let d = data[0].len();
    let means: Vec<f64> = (0..d)
        .map(|j| data.iter().map(|row| row[j]).sum::<f64>() / n as f64)
        .collect();
    let mut cov = vec![vec![0.0; d]; d];
    for row in data {
        for i in 0..d {
            for j in 0..d {
                cov[i][j] += (row[i] - means[i]) * (row[j] - means[j]);
            }
        }
    }
    let denom = (n - 1) as f64;
    for row in cov.iter_mut() {
        for v in row.iter_mut() {
            *v /= denom;
        }
    }
    cov
}

pub fn pca(data: &[Vec<f64>], n_components: usize, max_iter: usize, tol: f64) -> Vec<Vec<f64>> {
    let n = data.len();
    if n == 0 {
        return Vec::new();
    }
    let d = data[0].len();
    let means: Vec<f64> = (0..d)
        .map(|j| data.iter().map(|row| row[j]).sum::<f64>() / n as f64)
        .collect();
    let centered: Vec<Vec<f64>> = data
        .iter()
        .map(|row| row.iter().zip(means.iter()).map(|(x, m)| x - m).collect())
        .collect();
    let cov = covariance_matrix(&centered);
    let components = n_components.min(d);
    let mut result = Vec::with_capacity(components);
    let mut deflated = cov.clone();
    for _ in 0..components {
        let (_, eigvec) = power_iteration_local(&deflated, max_iter, tol);
        let ev_norm = eigvec.iter().map(|x| x * x).sum::<f64>().sqrt();
        let ev: Vec<f64> = eigvec.iter().map(|x| x / ev_norm.max(1e-30)).collect();
        let eigenval: f64 = (0..d)
            .map(|i| {
                ev[i]
                    * deflated[i]
                        .iter()
                        .zip(ev.iter())
                        .map(|(a, b)| a * b)
                        .sum::<f64>()
            })
            .sum();
        for i in 0..d {
            for j in 0..d {
                deflated[i][j] -= eigenval * ev[i] * ev[j];
            }
        }
        result.push(ev);
    }
    result
}

fn power_iteration_local(a: &[Vec<f64>], max_iter: usize, tol: f64) -> (f64, Vec<f64>) {
    let n = a.len();
    let mut v = vec![1.0 / (n as f64).sqrt(); n];
    let mut eigenvalue = 0.0;
    for _ in 0..max_iter {
        let mut av = vec![0.0; n];
        for i in 0..n {
            for (j, &aij) in a[i].iter().enumerate() {
                av[i] += aij * v[j];
            }
        }
        let new_eigenvalue: f64 = av.iter().zip(v.iter()).map(|(a, b)| a * b).sum();
        let norm = av.iter().map(|x| x * x).sum::<f64>().sqrt();
        if norm < 1e-30 {
            break;
        }
        for i in 0..n {
            v[i] = av[i] / norm;
        }
        if (new_eigenvalue - eigenvalue).abs() < tol {
            break;
        }
        eigenvalue = new_eigenvalue;
    }
    (eigenvalue, v)
}

pub fn project_pca(data: &[Vec<f64>], components: &[Vec<f64>]) -> Vec<Vec<f64>> {
    let d = if data.is_empty() { 0 } else { data[0].len() };
    let means: Vec<f64> = (0..d)
        .map(|j| data.iter().map(|row| row[j]).sum::<f64>() / data.len().max(1) as f64)
        .collect();
    data.iter()
        .map(|row| {
            let centered: Vec<f64> = row.iter().zip(means.iter()).map(|(x, m)| x - m).collect();
            components
                .iter()
                .map(|comp| centered.iter().zip(comp.iter()).map(|(x, c)| x * c).sum())
                .collect()
        })
        .collect()
}

pub fn kmeans(
    data: &[Vec<f64>],
    k: usize,
    max_iter: usize,
    seed: u64,
) -> (Vec<usize>, Vec<Vec<f64>>) {
    let n = data.len();
    if n == 0 || k == 0 {
        return (Vec::new(), Vec::new());
    }
    let d = data[0].len();
    let mut rng = seed;
    let mut centroids: Vec<Vec<f64>> = (0..k.min(n))
        .map(|_| {
            rng = rng
                .wrapping_mul(6_364_136_223_846_793_005)
                .wrapping_add(1_442_695_040_888_963_407);
            let idx = ((rng >> 33) as usize) % n;
            data[idx].clone()
        })
        .collect();
    let mut assignments = vec![0usize; n];
    for _ in 0..max_iter {
        let mut changed = false;
        for (i, point) in data.iter().enumerate() {
            let best = centroids
                .iter()
                .enumerate()
                .map(|(ci, c)| {
                    let dist: f64 = point
                        .iter()
                        .zip(c.iter())
                        .map(|(x, cx)| (x - cx).powi(2))
                        .sum();
                    (ci, dist)
                })
                .fold(
                    (0, f64::INFINITY),
                    |(bi, bv), (ci, v)| {
                        if v < bv { (ci, v) } else { (bi, bv) }
                    },
                )
                .0;
            if assignments[i] != best {
                assignments[i] = best;
                changed = true;
            }
        }
        if !changed {
            break;
        }
        let mut sums = vec![vec![0.0; d]; k];
        let mut counts = vec![0usize; k];
        for (i, point) in data.iter().enumerate() {
            let c = assignments[i];
            counts[c] += 1;
            for j in 0..d {
                sums[c][j] += point[j];
            }
        }
        for c in 0..k {
            if counts[c] > 0 {
                for j in 0..d {
                    centroids[c][j] = sums[c][j] / counts[c] as f64;
                }
            }
        }
    }
    (assignments, centroids)
}

pub fn silhouette_score(data: &[Vec<f64>], assignments: &[usize], k: usize) -> f64 {
    let n = data.len();
    if n < 2 {
        return 0.0;
    }
    let mut total = 0.0;
    for i in 0..n {
        let ci = assignments[i];
        let same: Vec<usize> = (0..n).filter(|&j| j != i && assignments[j] == ci).collect();
        let a = if same.is_empty() {
            0.0
        } else {
            same.iter()
                .map(|&j| euclidean_dist(&data[i], &data[j]))
                .sum::<f64>()
                / same.len() as f64
        };
        let b = (0..k)
            .filter(|&c| c != ci)
            .map(|c| {
                let other: Vec<usize> = (0..n).filter(|&j| assignments[j] == c).collect();
                if other.is_empty() {
                    f64::INFINITY
                } else {
                    other
                        .iter()
                        .map(|&j| euclidean_dist(&data[i], &data[j]))
                        .sum::<f64>()
                        / other.len() as f64
                }
            })
            .fold(f64::INFINITY, f64::min);
        let s = (b - a) / a.max(b).max(1e-30);
        total += s;
    }
    total / n as f64
}

fn euclidean_dist(a: &[f64], b: &[f64]) -> f64 {
    a.iter()
        .zip(b.iter())
        .map(|(x, y)| (x - y).powi(2))
        .sum::<f64>()
        .sqrt()
}

pub fn dbscan(data: &[Vec<f64>], eps: f64, min_pts: usize) -> Vec<Option<usize>> {
    let n = data.len();
    let mut labels: Vec<Option<usize>> = vec![None; n];
    let mut visited = vec![false; n];
    let mut cluster_id = 0usize;
    for i in 0..n {
        if visited[i] {
            continue;
        }
        visited[i] = true;
        let neighbors = region_query(data, i, eps);
        if neighbors.len() < min_pts {
            continue;
        }
        let mut queue = neighbors.clone();
        labels[i] = Some(cluster_id);
        let mut qi = 0;
        while qi < queue.len() {
            let j = queue[qi];
            qi += 1;
            if !visited[j] {
                visited[j] = true;
                let n2 = region_query(data, j, eps);
                if n2.len() >= min_pts {
                    for &p in &n2 {
                        if !queue.contains(&p) {
                            queue.push(p);
                        }
                    }
                }
            }
            if labels[j].is_none() {
                labels[j] = Some(cluster_id);
            }
        }
        cluster_id += 1;
    }
    labels
}

fn region_query(data: &[Vec<f64>], idx: usize, eps: f64) -> Vec<usize> {
    data.iter()
        .enumerate()
        .filter(|(j, point)| *j != idx && euclidean_dist(&data[idx], point) <= eps)
        .map(|(j, _)| j)
        .collect()
}