use faer::Mat;
use faer_traits::ComplexField;
use num_traits::{Float, Zero};
use rayon::prelude::*;
use std::ops::{Add, Mul};
#[derive(Clone)]
pub struct CoordinateList<T> {
pub row_indices: Vec<usize>,
pub col_indices: Vec<usize>,
pub values: Vec<T>,
pub n_samples: usize,
}
impl<T> CoordinateList<T>
where
T: Float,
{
pub fn to_edge_list(&self) -> Vec<(usize, usize, T)> {
self.row_indices
.iter()
.zip(&self.col_indices)
.zip(&self.values)
.map(|((&r, &c), &v)| (r, c, v))
.collect()
}
pub fn get_size(&self) -> usize {
self.row_indices.len()
}
}
#[derive(Debug, Clone)]
pub enum CompressedSparseFormat {
Csc,
Csr,
}
impl CompressedSparseFormat {
pub fn is_csc(&self) -> bool {
matches!(self, CompressedSparseFormat::Csc)
}
pub fn is_csr(&self) -> bool {
matches!(self, CompressedSparseFormat::Csr)
}
}
#[derive(Debug, Clone)]
pub struct CompressedSparseData<T>
where
T: Clone + Float + ComplexField,
{
pub data: Vec<T>,
pub indices: Vec<usize>,
pub indptr: Vec<usize>,
pub cs_type: CompressedSparseFormat,
pub shape: (usize, usize),
}
impl<T> CompressedSparseData<T>
where
T: Clone + Sync + Add + PartialEq + Mul + Float + ComplexField,
{
#[allow(dead_code)]
pub fn new_csc(data: &[T], indices: &[usize], indptr: &[usize], shape: (usize, usize)) -> Self {
Self {
data: data.to_vec(),
indices: indices.to_vec(),
indptr: indptr.to_vec(),
cs_type: CompressedSparseFormat::Csc,
shape,
}
}
pub fn new_csr(data: &[T], indices: &[usize], indptr: &[usize], shape: (usize, usize)) -> Self {
Self {
data: data.to_vec(),
indices: indices.to_vec(),
indptr: indptr.to_vec(), cs_type: CompressedSparseFormat::Csr,
shape,
}
}
pub fn transform(&self) -> Self {
match self.cs_type {
CompressedSparseFormat::Csc => csc_to_csr(self),
CompressedSparseFormat::Csr => csr_to_csc(self),
}
}
pub fn transpose(&self) -> Self {
match self.cs_type {
CompressedSparseFormat::Csr => {
let as_csc = CompressedSparseData {
data: self.data.clone(),
indices: self.indices.clone(),
indptr: self.indptr.clone(),
cs_type: CompressedSparseFormat::Csc,
shape: (self.shape.1, self.shape.0), };
csc_to_csr(&as_csc)
}
CompressedSparseFormat::Csc => {
let as_csr = CompressedSparseData {
data: self.data.clone(),
indices: self.indices.clone(),
indptr: self.indptr.clone(),
cs_type: CompressedSparseFormat::Csr,
shape: (self.shape.1, self.shape.0), };
csr_to_csc(&as_csr)
}
}
}
pub fn shape(&self) -> (usize, usize) {
self.shape
}
pub fn get_nnz(&self) -> usize {
self.data.len()
}
pub fn nrows(&self) -> usize {
self.shape.0
}
pub fn ncols(&self) -> usize {
self.shape.1
}
pub fn to_dense(&self) -> Mat<T> {
let (nrows, ncols) = self.shape;
let mut dense: Mat<T> = Mat::zeros(nrows, ncols);
match self.cs_type {
CompressedSparseFormat::Csr => {
for i in 0..nrows {
let start = self.indptr[i];
let end = self.indptr[i + 1];
for idx in start..end {
let j = self.indices[idx];
let val = self.data[idx];
dense[(i, j)] = val;
}
}
}
CompressedSparseFormat::Csc => {
for j in 0..ncols {
let start = self.indptr[j];
let end = self.indptr[j + 1];
for idx in start..end {
let i = self.indices[idx];
let val = self.data[idx];
dense[(i, j)] = val;
}
}
}
}
dense
}
}
pub fn csc_to_csr<T>(sparse_data: &CompressedSparseData<T>) -> CompressedSparseData<T>
where
T: Clone + Sync + Add + PartialEq + Mul + Float + ComplexField,
{
if sparse_data.cs_type.is_csr() {
return sparse_data.clone();
}
let (nrow, _) = sparse_data.shape();
let nnz = sparse_data.get_nnz();
let mut row_ptr = vec![0; nrow + 1];
for &r in &sparse_data.indices {
row_ptr[r + 1] += 1;
}
for i in 0..nrow {
row_ptr[i + 1] += row_ptr[i];
}
let mut csr_data = vec![T::zero(); nnz];
let mut csr_col_ind = vec![0; nnz];
let mut next = row_ptr[..nrow].to_vec();
for col in 0..(sparse_data.indptr.len() - 1) {
for idx in sparse_data.indptr[col]..sparse_data.indptr[col + 1] {
let row = sparse_data.indices[idx];
let pos = next[row];
csr_data[pos] = sparse_data.data[idx];
csr_col_ind[pos] = col;
next[row] += 1;
}
}
CompressedSparseData {
data: csr_data,
indices: csr_col_ind,
indptr: row_ptr,
cs_type: CompressedSparseFormat::Csr,
shape: sparse_data.shape(),
}
}
pub fn csr_to_csc<T>(sparse_data: &CompressedSparseData<T>) -> CompressedSparseData<T>
where
T: Clone + Sync + Add + PartialEq + Mul + Float + ComplexField,
{
if sparse_data.cs_type.is_csc() {
return sparse_data.clone();
}
let nnz = sparse_data.get_nnz();
let (_, ncol) = sparse_data.shape();
let mut col_ptr = vec![0; ncol + 1];
for &c in &sparse_data.indices {
col_ptr[c + 1] += 1;
}
for i in 0..ncol {
col_ptr[i + 1] += col_ptr[i];
}
let mut csc_data = vec![T::zero(); nnz];
let mut csc_row_ind = vec![0; nnz];
let mut next = col_ptr[..ncol].to_vec();
for row in 0..(sparse_data.indptr.len() - 1) {
for idx in sparse_data.indptr[row]..sparse_data.indptr[row + 1] {
let col = sparse_data.indices[idx];
let pos = next[col];
csc_data[pos] = sparse_data.data[idx];
csc_row_ind[pos] = row;
next[col] += 1;
}
}
CompressedSparseData {
data: csc_data,
indices: csc_row_ind,
indptr: col_ptr,
cs_type: CompressedSparseFormat::Csc,
shape: sparse_data.shape(),
}
}
pub fn to_dense<T>(sparse: &CompressedSparseData<T>) -> Mat<T>
where
T: Clone + Float + Zero + ComplexField,
{
let (nrows, ncols) = sparse.shape;
let mut dense = Mat::zeros(nrows, ncols);
match sparse.cs_type {
CompressedSparseFormat::Csr => {
for i in 0..nrows {
let start = sparse.indptr[i];
let end = sparse.indptr[i + 1];
for idx in start..end {
let j = sparse.indices[idx];
let val = sparse.data[idx];
dense[(i, j)] = val;
}
}
}
CompressedSparseFormat::Csc => {
for j in 0..ncols {
let start = sparse.indptr[j];
let end = sparse.indptr[j + 1];
for idx in start..end {
let i = sparse.indices[idx];
let val = sparse.data[idx];
dense[(i, j)] = val;
}
}
}
}
dense
}
pub struct SparseRow<T> {
pub indices: Vec<usize>,
pub data: Vec<T>,
}
pub fn coo_to_csr<T>(graph: &CoordinateList<T>) -> CompressedSparseData<T>
where
T: Float + Send + Sync + Default + ComplexField,
{
let n = graph.n_samples;
let nnz = graph.values.len();
let mut triplets: Vec<(usize, usize, T)> = (0..nnz)
.into_par_iter()
.map(|i| (graph.row_indices[i], graph.col_indices[i], graph.values[i]))
.collect();
triplets.par_sort_unstable_by(|(r1, c1, _), (r2, c2, _)| r1.cmp(r2).then(c1.cmp(c2)));
let mut data = Vec::with_capacity(nnz);
let mut indices = Vec::with_capacity(nnz);
for (_, c, v) in triplets.iter() {
data.push(*v);
indices.push(*c);
}
let mut indptr = vec![0; n + 1];
for (r, _, _) in triplets.iter() {
indptr[r + 1] += 1;
}
for i in 0..n {
indptr[i + 1] += indptr[i];
}
CompressedSparseData::new_csr(&data, &indices, &indptr, (n, n))
}
pub fn sparse_row_to_csr<T>(rows: &[SparseRow<T>], ncols: usize) -> CompressedSparseData<T>
where
T: Clone + Float + Send + Sync + ComplexField,
{
let nrows = rows.len();
let nnz: usize = rows.iter().map(|row| row.data.len()).sum();
let mut data = Vec::with_capacity(nnz);
let mut indices = Vec::with_capacity(nnz);
let mut indptr = Vec::with_capacity(nrows + 1);
indptr.push(0);
for row in rows {
data.extend_from_slice(&row.data);
indices.extend_from_slice(&row.indices);
indptr.push(data.len());
}
CompressedSparseData::new_csr(&data, &indices, &indptr, (nrows, ncols))
}
#[cfg(test)]
mod test_data_struct {
use super::*;
#[test]
fn test_sparse_graph_to_edge_list() {
let graph = CoordinateList {
row_indices: vec![0, 0, 1, 2],
col_indices: vec![1, 2, 2, 0],
values: vec![1.0, 2.0, 3.0, 4.0],
n_samples: 3,
};
let edges = graph.to_edge_list();
assert_eq!(edges.len(), 4);
assert_eq!(edges[0], (0, 1, 1.0));
assert_eq!(edges[1], (0, 2, 2.0));
assert_eq!(edges[2], (1, 2, 3.0));
assert_eq!(edges[3], (2, 0, 4.0));
}
#[test]
fn test_compressed_sparse_format() {
let csc = CompressedSparseFormat::Csc;
let csr = CompressedSparseFormat::Csr;
assert!(csc.is_csc());
assert!(!csc.is_csr());
assert!(csr.is_csr());
assert!(!csr.is_csc());
}
#[test]
fn test_csr_to_csc_conversion() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let indices = vec![0, 2, 1, 0, 2]; let indptr = vec![0, 2, 3, 5];
let csr = CompressedSparseData::new_csr(&data, &indices, &indptr, (3, 3));
let csc = csr.transform();
assert!(csc.cs_type.is_csc());
assert_eq!(csc.shape(), (3, 3));
assert_eq!(csc.get_nnz(), 5);
assert_eq!(csc.indptr, vec![0, 2, 3, 5]);
}
#[test]
fn test_csc_to_csr_conversion() {
let data = vec![1.0, 4.0, 3.0, 2.0, 5.0];
let indices = vec![0, 2, 1, 0, 2]; let indptr = vec![0, 2, 3, 5];
let csc = CompressedSparseData::new_csc(&data, &indices, &indptr, (3, 3));
let csr = csc.transform();
assert!(csr.cs_type.is_csr());
assert_eq!(csr.shape(), (3, 3));
assert_eq!(csr.get_nnz(), 5);
}
#[test]
fn test_transform_roundtrip() {
let data = vec![1.0, 2.0, 3.0];
let indices = vec![0, 1, 2];
let indptr = vec![0, 1, 2, 3];
let csr = CompressedSparseData::new_csr(&data, &indices, &indptr, (3, 3));
let csc = csr.transform();
let csr_again = csc.transform();
assert!(csr_again.cs_type.is_csr());
assert_eq!(csr_again.get_nnz(), csr.get_nnz());
assert_eq!(csr_again.shape(), csr.shape());
}
#[test]
fn test_empty_sparse_matrix() {
let data: Vec<f64> = vec![];
let indices: Vec<usize> = vec![];
let indptr = vec![0, 0, 0];
let csr = CompressedSparseData::new_csr(&data, &indices, &indptr, (2, 2));
assert_eq!(csr.get_nnz(), 0);
let csc = csr.transform();
assert_eq!(csc.get_nnz(), 0);
}
#[test]
fn test_coo_to_csr_sorting_and_structure() {
let graph = CoordinateList {
row_indices: vec![0, 1, 0],
col_indices: vec![1, 2, 2],
values: vec![1.0, 3.0, 2.0],
n_samples: 3,
};
let csr = coo_to_csr(&graph);
assert!(csr.cs_type.is_csr());
assert_eq!(csr.shape(), (3, 3));
assert_eq!(csr.get_nnz(), 3);
assert_eq!(csr.data, vec![1.0, 2.0, 3.0]);
assert_eq!(csr.indices, vec![1, 2, 2]);
assert_eq!(csr.indptr, vec![0, 2, 3, 3]);
}
#[test]
fn test_coo_to_csr_empty_rows_and_gaps() {
let graph = CoordinateList {
row_indices: vec![0, 3],
col_indices: vec![1, 2],
values: vec![10.0, 20.0],
n_samples: 4,
};
let csr = coo_to_csr(&graph);
assert_eq!(csr.indptr, vec![0, 1, 1, 1, 2]);
assert_eq!(csr.data, vec![10.0, 20.0]);
assert_eq!(csr.indices, vec![1, 2]);
}
}