use crate::Tensor;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SparseFormat {
COO,
CSR,
CSC,
}
#[derive(Debug, Clone)]
pub struct SparseCOO {
pub indices: Vec<Vec<usize>>,
pub values: Vec<f32>,
pub shape: Vec<usize>,
pub is_coalesced: bool,
}
impl SparseCOO {
pub fn new(indices: Vec<Vec<usize>>, values: Vec<f32>, shape: Vec<usize>) -> Self {
assert_eq!(
indices.len(),
shape.len(),
"indices dimensions must match shape"
);
if !indices.is_empty() {
let nnz = indices[0].len();
for idx in &indices {
assert_eq!(idx.len(), nnz, "all index arrays must have same length");
}
assert_eq!(
values.len(),
nnz,
"values length must match number of indices"
);
}
Self {
indices,
values,
shape,
is_coalesced: false,
}
}
pub fn from_coo_2d(coords: &[(usize, usize)], values: &[f32], shape: &[usize]) -> Self {
assert_eq!(shape.len(), 2, "shape must be 2D");
assert_eq!(
coords.len(),
values.len(),
"coords and values must have same length"
);
let rows: Vec<usize> = coords.iter().map(|(r, _)| *r).collect();
let cols: Vec<usize> = coords.iter().map(|(_, c)| *c).collect();
Self::new(vec![rows, cols], values.to_vec(), shape.to_vec())
}
pub fn nnz(&self) -> usize {
self.values.len()
}
pub fn density(&self) -> f32 {
let total: usize = self.shape.iter().product();
if total == 0 {
0.0
} else {
self.nnz() as f32 / total as f32
}
}
pub fn shape(&self) -> &[usize] {
&self.shape
}
pub fn coalesce(&mut self) {
if self.is_coalesced || self.nnz() == 0 {
self.is_coalesced = true;
return;
}
let mut entries: Vec<(Vec<usize>, f32)> = (0..self.nnz())
.map(|i| {
let idx: Vec<usize> = self.indices.iter().map(|dim| dim[i]).collect();
(idx, self.values[i])
})
.collect();
entries.sort_by(|a, b| a.0.cmp(&b.0));
let mut new_indices: Vec<Vec<usize>> = vec![Vec::new(); self.shape.len()];
let mut new_values = Vec::new();
let mut prev_idx: Option<Vec<usize>> = None;
for (idx, val) in entries {
if prev_idx.as_ref() == Some(&idx) {
if let Some(last) = new_values.last_mut() {
*last += val;
}
} else {
for (d, i) in idx.iter().enumerate() {
new_indices[d].push(*i);
}
new_values.push(val);
prev_idx = Some(idx);
}
}
self.indices = new_indices;
self.values = new_values;
self.is_coalesced = true;
}
pub fn to_dense(&self) -> Tensor<f32> {
let total: usize = self.shape.iter().product();
let mut data = vec![0.0f32; total];
for i in 0..self.nnz() {
let mut flat_idx = 0;
let mut stride = 1;
for d in (0..self.shape.len()).rev() {
flat_idx += self.indices[d][i] * stride;
stride *= self.shape[d];
}
data[flat_idx] += self.values[i];
}
Tensor::from_vec(data, &self.shape).expect("tensor creation failed")
}
pub fn to_csr(&self) -> SparseCSR {
assert_eq!(self.shape.len(), 2, "CSR only supports 2D tensors");
let mut coo = self.clone();
coo.coalesce();
let nrows = self.shape[0];
let nnz = coo.nnz();
let mut row_ptr = vec![0usize; nrows + 1];
let mut col_indices = Vec::with_capacity(nnz);
let mut values = Vec::with_capacity(nnz);
for &row in &coo.indices[0] {
row_ptr[row + 1] += 1;
}
for i in 1..=nrows {
row_ptr[i] += row_ptr[i - 1];
}
let mut entries: Vec<(usize, usize, f32)> = (0..nnz)
.map(|i| (coo.indices[0][i], coo.indices[1][i], coo.values[i]))
.collect();
entries.sort_by_key(|(r, c, _)| (*r, *c));
for (_, col, val) in entries {
col_indices.push(col);
values.push(val);
}
SparseCSR {
row_ptr,
col_indices,
values,
shape: self.shape.clone(),
}
}
}
#[derive(Debug, Clone)]
pub struct SparseCSR {
pub row_ptr: Vec<usize>,
pub col_indices: Vec<usize>,
pub values: Vec<f32>,
pub shape: Vec<usize>,
}
impl SparseCSR {
pub fn new(
row_ptr: Vec<usize>,
col_indices: Vec<usize>,
values: Vec<f32>,
shape: Vec<usize>,
) -> Self {
assert_eq!(shape.len(), 2, "CSR only supports 2D tensors");
assert_eq!(
row_ptr.len(),
shape[0] + 1,
"row_ptr length must be nrows + 1"
);
assert_eq!(
col_indices.len(),
values.len(),
"col_indices and values must match"
);
Self {
row_ptr,
col_indices,
values,
shape,
}
}
pub fn nnz(&self) -> usize {
self.values.len()
}
pub fn nrows(&self) -> usize {
self.shape[0]
}
pub fn ncols(&self) -> usize {
self.shape[1]
}
pub fn density(&self) -> f32 {
let total = self.nrows() * self.ncols();
if total == 0 {
0.0
} else {
self.nnz() as f32 / total as f32
}
}
pub fn row(&self, row_idx: usize) -> impl Iterator<Item = (usize, f32)> + '_ {
let start = self.row_ptr[row_idx];
let end = self.row_ptr[row_idx + 1];
(start..end).map(move |i| (self.col_indices[i], self.values[i]))
}
pub fn matvec(&self, x: &[f32]) -> Vec<f32> {
assert_eq!(x.len(), self.ncols(), "vector length must match ncols");
let mut result = vec![0.0f32; self.nrows()];
for row in 0..self.nrows() {
let start = self.row_ptr[row];
let end = self.row_ptr[row + 1];
for i in start..end {
let col = self.col_indices[i];
let val = self.values[i];
result[row] += val * x[col];
}
}
result
}
pub fn matmul_dense(&self, b: &Tensor<f32>) -> Tensor<f32> {
let b_shape = b.shape();
assert_eq!(b_shape[0], self.ncols(), "inner dimensions must match");
let m = self.nrows();
let n = b_shape[1];
let b_data = b.to_vec();
let mut result = vec![0.0f32; m * n];
for row in 0..m {
for (col, val) in self.row(row) {
for j in 0..n {
result[row * n + j] += val * b_data[col * n + j];
}
}
}
Tensor::from_vec(result, &[m, n]).expect("tensor creation failed")
}
pub fn to_dense(&self) -> Tensor<f32> {
let mut data = vec![0.0f32; self.nrows() * self.ncols()];
for row in 0..self.nrows() {
for (col, val) in self.row(row) {
data[row * self.ncols() + col] = val;
}
}
Tensor::from_vec(data, &self.shape).expect("tensor creation failed")
}
pub fn to_coo(&self) -> SparseCOO {
let mut rows = Vec::with_capacity(self.nnz());
let mut cols = Vec::with_capacity(self.nnz());
for row in 0..self.nrows() {
let start = self.row_ptr[row];
let end = self.row_ptr[row + 1];
for i in start..end {
rows.push(row);
cols.push(self.col_indices[i]);
}
}
SparseCOO {
indices: vec![rows, cols],
values: self.values.clone(),
shape: self.shape.clone(),
is_coalesced: true,
}
}
}
#[derive(Debug, Clone)]
pub enum SparseTensor {
COO(SparseCOO),
CSR(SparseCSR),
}
impl SparseTensor {
pub fn from_coo(indices: Vec<Vec<usize>>, values: Vec<f32>, shape: Vec<usize>) -> Self {
Self::COO(SparseCOO::new(indices, values, shape))
}
pub fn from_coords(coords: &[(usize, usize)], values: &[f32], shape: &[usize]) -> Self {
Self::COO(SparseCOO::from_coo_2d(coords, values, shape))
}
pub fn from_dense(tensor: &Tensor<f32>, threshold: f32) -> Self {
let data = tensor.to_vec();
let shape = tensor.shape().to_vec();
let mut indices: Vec<Vec<usize>> = vec![Vec::new(); shape.len()];
let mut values = Vec::new();
let strides: Vec<usize> = {
let mut s = vec![1; shape.len()];
for i in (0..shape.len() - 1).rev() {
s[i] = s[i + 1] * shape[i + 1];
}
s
};
for (flat_idx, &val) in data.iter().enumerate() {
if val.abs() > threshold {
let mut idx = flat_idx;
for (d, &stride) in strides.iter().enumerate() {
indices[d].push(idx / stride);
idx %= stride;
}
values.push(val);
}
}
Self::COO(SparseCOO::new(indices, values, shape))
}
pub fn eye(n: usize) -> Self {
let indices: Vec<usize> = (0..n).collect();
let values = vec![1.0f32; n];
Self::COO(SparseCOO::new(
vec![indices.clone(), indices],
values,
vec![n, n],
))
}
pub fn diag(values: &[f32]) -> Self {
let n = values.len();
let indices: Vec<usize> = (0..n).collect();
Self::COO(SparseCOO::new(
vec![indices.clone(), indices],
values.to_vec(),
vec![n, n],
))
}
pub fn nnz(&self) -> usize {
match self {
Self::COO(coo) => coo.nnz(),
Self::CSR(csr) => csr.nnz(),
}
}
pub fn shape(&self) -> &[usize] {
match self {
Self::COO(coo) => &coo.shape,
Self::CSR(csr) => &csr.shape,
}
}
pub fn density(&self) -> f32 {
match self {
Self::COO(coo) => coo.density(),
Self::CSR(csr) => csr.density(),
}
}
pub fn to_dense(&self) -> Tensor<f32> {
match self {
Self::COO(coo) => coo.to_dense(),
Self::CSR(csr) => csr.to_dense(),
}
}
pub fn to_csr(&self) -> SparseCSR {
match self {
Self::COO(coo) => coo.to_csr(),
Self::CSR(csr) => csr.clone(),
}
}
pub fn to_coo(&self) -> SparseCOO {
match self {
Self::COO(coo) => coo.clone(),
Self::CSR(csr) => csr.to_coo(),
}
}
pub fn matvec(&self, x: &[f32]) -> Vec<f32> {
match self {
Self::COO(coo) => coo.to_csr().matvec(x),
Self::CSR(csr) => csr.matvec(x),
}
}
pub fn matmul(&self, dense: &Tensor<f32>) -> Tensor<f32> {
match self {
Self::COO(coo) => coo.to_csr().matmul_dense(dense),
Self::CSR(csr) => csr.matmul_dense(dense),
}
}
pub fn mul_scalar(&self, scalar: f32) -> Self {
match self {
Self::COO(coo) => {
let values: Vec<f32> = coo.values.iter().map(|v| v * scalar).collect();
Self::COO(SparseCOO::new(
coo.indices.clone(),
values,
coo.shape.clone(),
))
}
Self::CSR(csr) => {
let values: Vec<f32> = csr.values.iter().map(|v| v * scalar).collect();
Self::CSR(SparseCSR::new(
csr.row_ptr.clone(),
csr.col_indices.clone(),
values,
csr.shape.clone(),
))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sparse_coo_creation() {
let indices = vec![vec![0, 1, 2], vec![1, 0, 2]];
let values = vec![1.0, 2.0, 3.0];
let sparse = SparseCOO::new(indices, values, vec![3, 3]);
assert_eq!(sparse.nnz(), 3);
assert_eq!(sparse.shape(), &[3, 3]);
}
#[test]
fn test_sparse_coo_to_dense() {
let coords = vec![(0, 1), (1, 0), (2, 2)];
let values = vec![1.0, 2.0, 3.0];
let sparse = SparseCOO::from_coo_2d(&coords, &values, &[3, 3]);
let dense = sparse.to_dense();
let data = dense.to_vec();
assert_eq!(data[0 * 3 + 1], 1.0); assert_eq!(data[1 * 3 + 0], 2.0); assert_eq!(data[2 * 3 + 2], 3.0); }
#[test]
fn test_sparse_coo_coalesce() {
let indices = vec![vec![0, 0, 1], vec![0, 0, 1]];
let values = vec![1.0, 2.0, 3.0];
let mut sparse = SparseCOO::new(indices, values, vec![2, 2]);
sparse.coalesce();
assert_eq!(sparse.nnz(), 2); let dense = sparse.to_dense();
assert_eq!(dense.to_vec()[0], 3.0); }
#[test]
fn test_sparse_csr_creation() {
let row_ptr = vec![0, 1, 2, 3];
let col_indices = vec![1, 0, 2];
let values = vec![1.0, 2.0, 3.0];
let csr = SparseCSR::new(row_ptr, col_indices, values, vec![3, 3]);
assert_eq!(csr.nnz(), 3);
assert_eq!(csr.nrows(), 3);
assert_eq!(csr.ncols(), 3);
}
#[test]
fn test_sparse_csr_matvec() {
let row_ptr = vec![0, 1, 2];
let col_indices = vec![0, 1];
let values = vec![1.0, 2.0];
let csr = SparseCSR::new(row_ptr, col_indices, values, vec![2, 2]);
let x = vec![1.0, 2.0];
let result = csr.matvec(&x);
assert_eq!(result, vec![1.0, 4.0]);
}
#[test]
fn test_sparse_coo_to_csr() {
let coords = vec![(0, 1), (1, 0), (2, 2)];
let values = vec![1.0, 2.0, 3.0];
let coo = SparseCOO::from_coo_2d(&coords, &values, &[3, 3]);
let csr = coo.to_csr();
assert_eq!(csr.nnz(), 3);
assert_eq!(csr.nrows(), 3);
}
#[test]
fn test_sparse_tensor_from_dense() {
let dense =
Tensor::from_vec(vec![0.0, 1.0, 0.0, 2.0], &[2, 2]).expect("tensor creation failed");
let sparse = SparseTensor::from_dense(&dense, 0.0);
assert_eq!(sparse.nnz(), 2);
}
#[test]
fn test_sparse_tensor_eye() {
let eye = SparseTensor::eye(3);
let dense = eye.to_dense();
let data = dense.to_vec();
assert_eq!(data[0], 1.0);
assert_eq!(data[4], 1.0);
assert_eq!(data[8], 1.0);
assert_eq!(data[1], 0.0);
}
#[test]
fn test_sparse_tensor_diag() {
let diag = SparseTensor::diag(&[1.0, 2.0, 3.0]);
let dense = diag.to_dense();
let data = dense.to_vec();
assert_eq!(data[0], 1.0);
assert_eq!(data[4], 2.0);
assert_eq!(data[8], 3.0);
}
#[test]
fn test_sparse_density() {
let coords = vec![(0, 0), (1, 1)];
let values = vec![1.0, 2.0];
let sparse = SparseTensor::from_coords(&coords, &values, &[4, 4]);
assert!((sparse.density() - 0.125).abs() < 1e-6); }
#[test]
fn test_sparse_mul_scalar() {
let coords = vec![(0, 0)];
let values = vec![2.0];
let sparse = SparseTensor::from_coords(&coords, &values, &[2, 2]);
let scaled = sparse.mul_scalar(3.0);
let dense = scaled.to_dense();
assert_eq!(dense.to_vec()[0], 6.0);
}
#[test]
fn test_sparse_matmul() {
let coords = vec![(0, 0), (1, 1)];
let values = vec![1.0, 2.0];
let sparse = SparseTensor::from_coords(&coords, &values, &[2, 2]);
let dense =
Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0], &[2, 2]).expect("tensor creation failed");
let result = sparse.matmul(&dense);
let data = result.to_vec();
assert_eq!(data, vec![1.0, 2.0, 6.0, 8.0]);
}
}