pub const VERSION: &str = env!("CARGO_PKG_VERSION");
pub const VERSION_MAJOR: u32 = 0;
pub const VERSION_MINOR: u32 = 1;
pub const VERSION_PATCH: u32 = 0;
use std::collections::HashMap;
use torsh_core::{DType, DeviceType, Result, Shape, TorshError};
use torsh_tensor::Tensor;
pub type TorshResult<T> = Result<T>;
pub mod autograd;
pub mod bsr;
pub mod conversions;
pub mod coo;
pub mod csc;
pub mod csr;
pub mod custom_kernels;
pub mod dia;
pub mod dsr;
pub mod ell;
pub mod gpu;
pub mod hdf5_support;
pub mod hybrid;
pub mod layers;
pub mod linalg;
pub mod matlab_compat;
pub mod matrix_market;
pub mod memory_management;
pub mod nn;
pub mod ops;
pub mod optimizers;
pub mod pattern_analysis;
pub mod performance_tools;
pub mod rle;
pub mod scipy_sparse;
#[cfg(feature = "scirs2-integration")]
pub mod scirs2_sparse_integration;
pub mod symmetric;
pub mod unified_interface;
pub use bsr::BsrTensor;
pub use coo::CooTensor;
pub use csc::CscTensor;
pub use csr::CsrTensor;
pub use dia::DiaTensor;
pub use dsr::DsrTensor;
pub use ell::EllTensor;
pub use rle::RleTensor;
pub use symmetric::{SymmetricMode, SymmetricTensor};
pub use gpu::{CudaSparseOps, CudaSparseTensor, CudaSparseTensorFactory};
pub use autograd::{SparseAutogradTensor, SparseData, SparseGradFn, SparseGradientAccumulator};
#[cfg(feature = "scirs2-integration")]
pub use scirs2_sparse_integration::{
create_gpu_sparse_processor, create_nn_sparse_processor, create_sparse_processor,
SciRS2SparseProcessor, SparseConfig as ScirsSparseConfig,
};
pub use nn::{
Format,
GraphConvolution,
InitConfig,
LayerConfig,
SparseAdam,
SparseAttention,
SparseConv2d,
SparseConverter,
SparseEmbedding,
SparseEmbeddingStats,
SparseInitConfig,
SparseLayer,
SparseLayerConfig,
SparseLinear,
SparseMemoryStats,
SparseOptimizer,
SparsePatternAnalysis,
SparseSGD,
SparseStats,
SparseWeightGenerator,
};
pub use hybrid::{auto_select_format, HybridTensor, PartitionStrategy, SparsityPattern};
pub use pattern_analysis::{
AdvancedSparsityPattern, ClusteringAlgorithm, MatrixReorderer, PatternAnalyzer,
PatternStatistics, PatternVisualizer, ReorderingAlgorithm,
};
pub use performance_tools::{
AutoTuner, BenchmarkConfig, CachePerformanceResult, HardwareBenchmark, MemoryAnalysis,
OperationStatistics, PerformanceExporter, PerformanceMeasurement, PerformanceReport, PlotData,
SparseProfiler, SystemInfo, TensorBoardExporter, TrendAnalysis, TrendAnalyzer, TrendDirection,
};
pub use matrix_market::{
MatrixMarketField, MatrixMarketFormat, MatrixMarketHeader, MatrixMarketIO, MatrixMarketObject,
MatrixMarketSize, MatrixMarketSymmetry, MatrixMarketUtils,
};
pub use custom_kernels::{
ElementWiseKernels, FormatConversionKernels, KernelDispatcher, ReductionKernels,
SparseMatMulKernels,
};
pub use scipy_sparse::{ScipyFormat, ScipySparseData, ScipySparseIntegration};
pub use matlab_compat::{
export_to_matlab_script, matlab_sparse_from_triplets, MatlabSparseCompat, MatlabSparseMatrix,
};
pub use hdf5_support::{load_sparse_matrix, save_sparse_matrix, Hdf5SparseIO, Hdf5SparseMetadata};
pub use unified_interface::{
AccessPatterns, MemoryStats, OptimizationConfig, OptimizationFlags, OptimizationReport,
PerformanceHints, PerformanceSummary, TensorMetadata, UnifiedSparseTensor,
UnifiedSparseTensorFactory,
};
pub use memory_management::{
create_sparse_with_memory_management, MemoryAwareSparseBuilder, MemoryPoolConfig, MemoryReport,
MemoryStatistics, SparseMemoryHandle, SparseMemoryManager, SparseMemoryPool,
};
pub use conversions::{direct_conversions, optimization, patterns, validation, ConversionHints};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum SparseFormat {
Coo,
Csr,
Csc,
Bsr,
Dia,
Dsr,
Ell,
Rle,
Symmetric,
}
pub trait SparseTensor {
fn format(&self) -> SparseFormat;
fn shape(&self) -> &Shape;
fn dtype(&self) -> DType;
fn device(&self) -> DeviceType;
fn nnz(&self) -> usize;
fn to_dense(&self) -> TorshResult<Tensor>;
fn to_coo(&self) -> TorshResult<CooTensor>;
fn to_csr(&self) -> TorshResult<CsrTensor>;
fn to_csc(&self) -> TorshResult<CscTensor>;
fn sparsity(&self) -> f32 {
let total_elements = self.shape().numel();
if total_elements == 0 {
0.0
} else {
1.0 - (self.nnz() as f32 / total_elements as f32)
}
}
fn as_any(&self) -> &dyn std::any::Any;
}
pub fn sparse_from_dense(
dense: &Tensor,
format: SparseFormat,
threshold: Option<f32>,
) -> TorshResult<Box<dyn SparseTensor + Send + Sync>> {
let threshold = threshold.unwrap_or(0.0);
match format {
SparseFormat::Coo => {
let coo = CooTensor::from_dense(dense, threshold)?;
Ok(Box::new(coo))
}
SparseFormat::Csr => {
let csr = CsrTensor::from_dense(dense, threshold)?;
Ok(Box::new(csr))
}
SparseFormat::Csc => {
let csc = CscTensor::from_dense(dense, threshold)?;
Ok(Box::new(csc))
}
SparseFormat::Bsr => {
let coo = CooTensor::from_dense(dense, threshold)?;
let bsr = BsrTensor::from_coo(&coo, (2, 2))?;
Ok(Box::new(bsr))
}
SparseFormat::Dia => {
let dia = DiaTensor::from_dense(dense, threshold)?;
Ok(Box::new(dia))
}
SparseFormat::Dsr => {
let dsr = DsrTensor::from_dense(dense, threshold)?;
Ok(Box::new(dsr))
}
SparseFormat::Ell => {
let ell = EllTensor::from_dense(dense, threshold)?;
Ok(Box::new(ell))
}
SparseFormat::Rle => {
let rle = RleTensor::from_dense(dense, threshold)?;
Ok(Box::new(rle))
}
SparseFormat::Symmetric => {
let sym = SymmetricTensor::from_dense(dense, SymmetricMode::Upper, threshold)?;
Ok(Box::new(sym))
}
}
}
pub fn sparse_auto_from_dense(
dense: &Tensor,
threshold: Option<f32>,
) -> TorshResult<Box<dyn SparseTensor + Send + Sync>> {
let threshold = threshold.unwrap_or(0.0);
let optimal_format = hybrid::auto_select_format(dense, threshold)?;
sparse_from_dense(dense, optimal_format, Some(threshold))
}
pub fn sparse_hybrid_from_dense(
dense: &Tensor,
strategy: PartitionStrategy,
threshold: Option<f32>,
) -> TorshResult<HybridTensor> {
let threshold = threshold.unwrap_or(0.0);
let coo = CooTensor::from_dense(dense, threshold)?;
HybridTensor::from_sparse(coo, strategy)
}
#[derive(Debug, Clone)]
pub struct FormatConfig {
pub threshold: f32,
pub dense_threshold: f32,
pub block_size: (usize, usize),
pub enable_hybrid: bool,
pub analyze_patterns: bool,
}
impl Default for FormatConfig {
fn default() -> Self {
Self {
threshold: 0.0,
dense_threshold: 0.1,
block_size: (32, 32),
enable_hybrid: false,
analyze_patterns: true,
}
}
}
impl FormatConfig {
pub fn memory_optimized() -> Self {
Self {
threshold: 1e-12,
dense_threshold: 0.05,
block_size: (16, 16),
enable_hybrid: true,
analyze_patterns: true,
}
}
pub fn performance_optimized() -> Self {
Self {
threshold: 1e-8,
dense_threshold: 0.2,
block_size: (64, 64),
enable_hybrid: false,
analyze_patterns: false,
}
}
pub fn validate(&self) -> TorshResult<()> {
if self.threshold < 0.0 {
return Err(TorshError::InvalidArgument(
"Threshold must be non-negative".to_string(),
));
}
if self.dense_threshold < 0.0 || self.dense_threshold > 1.0 {
return Err(TorshError::InvalidArgument(
"Dense threshold must be between 0.0 and 1.0".to_string(),
));
}
if self.block_size.0 == 0 || self.block_size.1 == 0 {
return Err(TorshError::InvalidArgument(
"Block size dimensions must be positive".to_string(),
));
}
Ok(())
}
pub fn with_threshold(mut self, threshold: f32) -> Self {
self.threshold = threshold;
self
}
pub fn with_block_size(mut self, block_size: (usize, usize)) -> Self {
self.block_size = block_size;
self
}
pub fn with_hybrid(mut self, enable: bool) -> Self {
self.enable_hybrid = enable;
self
}
}
pub fn sparse_from_dense_with_config(
dense: &Tensor,
config: FormatConfig,
) -> TorshResult<Box<dyn SparseTensor + Send + Sync>> {
config.validate()?;
if config.enable_hybrid {
let strategy = if config.analyze_patterns {
PartitionStrategy::PatternBased
} else {
PartitionStrategy::BlockBased {
block_size: config.block_size,
}
};
let hybrid = sparse_hybrid_from_dense(dense, strategy, Some(config.threshold))?;
Ok(Box::new(hybrid))
} else {
sparse_auto_from_dense(dense, Some(config.threshold))
}
}
pub fn convert_sparse_format(
sparse: &dyn SparseTensor,
target_format: SparseFormat,
) -> TorshResult<Box<dyn SparseTensor + Send + Sync>> {
match target_format {
SparseFormat::Coo => Ok(Box::new(sparse.to_coo()?)),
SparseFormat::Csr => Ok(Box::new(sparse.to_csr()?)),
SparseFormat::Csc => Ok(Box::new(sparse.to_csc()?)),
SparseFormat::Bsr => {
let coo = sparse.to_coo()?;
let bsr = BsrTensor::from_coo(&coo, (2, 2))?;
Ok(Box::new(bsr))
}
SparseFormat::Dia => {
let coo = sparse.to_coo()?;
let dia = DiaTensor::from_coo(&coo)?;
Ok(Box::new(dia))
}
SparseFormat::Dsr => {
let coo = sparse.to_coo()?;
let dsr = DsrTensor::from_coo(&coo)?;
Ok(Box::new(dsr))
}
SparseFormat::Ell => {
let coo = sparse.to_coo()?;
let ell = EllTensor::from_coo(&coo)?;
Ok(Box::new(ell))
}
SparseFormat::Rle => {
let coo = sparse.to_coo()?;
let rle = RleTensor::from_coo(&coo)?;
Ok(Box::new(rle))
}
SparseFormat::Symmetric => {
let coo = sparse.to_coo()?;
let sym = SymmetricTensor::from_coo(&coo, SymmetricMode::Upper, 1e-6)?;
Ok(Box::new(sym))
}
}
}
#[derive(Debug, Clone)]
pub struct SparseAnalysis {
pub format: SparseFormat,
pub nnz: usize,
pub sparsity: f32,
pub recommended_format: SparseFormat,
pub pattern: SparsityPattern,
pub storage_efficiency: f32,
}
#[derive(Debug, Clone)]
pub struct FormatPerformanceComparison {
pub tensor_info: SparseAnalysis,
pub format_results: HashMap<SparseFormat, FormatPerformanceResult>,
pub recommended_format: SparseFormat,
pub improvement_factor: f32,
}
#[derive(Debug, Clone)]
pub struct FormatPerformanceResult {
pub format: SparseFormat,
pub memory_usage: usize,
pub creation_time_ns: u64,
pub spmv_time_ns: Option<u64>,
pub conversion_time_ns: u64,
pub performance_score: f32,
}
pub fn compare_format_performance(
sparse: &dyn SparseTensor,
include_operations: bool,
) -> TorshResult<FormatPerformanceComparison> {
let tensor_info = analyze_sparse_tensor(sparse)?;
let mut format_results = HashMap::new();
let coo = sparse.to_coo()?;
let formats_to_test = vec![
SparseFormat::Coo,
SparseFormat::Csr,
SparseFormat::Csc,
SparseFormat::Bsr,
SparseFormat::Dia,
SparseFormat::Dsr,
SparseFormat::Ell,
SparseFormat::Rle,
SparseFormat::Symmetric,
];
for format in formats_to_test {
let result = benchmark_format_performance(&coo, format, include_operations)?;
format_results.insert(format, result);
}
let recommended_format = format_results
.iter()
.min_by(|a, b| {
a.1.performance_score
.partial_cmp(&b.1.performance_score)
.unwrap_or(std::cmp::Ordering::Equal)
})
.map(|(format, _)| *format)
.unwrap_or(SparseFormat::Csr);
let best_score = format_results[&recommended_format].performance_score;
let worst_score = format_results
.values()
.map(|r| r.performance_score)
.fold(0.0f32, |a, b| a.max(b));
let improvement_factor = if best_score > 0.0 {
worst_score / best_score
} else {
1.0
};
Ok(FormatPerformanceComparison {
tensor_info,
format_results,
recommended_format,
improvement_factor,
})
}
fn benchmark_format_performance(
coo: &CooTensor,
format: SparseFormat,
include_operations: bool,
) -> TorshResult<FormatPerformanceResult> {
use std::time::Instant;
let conversion_start = Instant::now();
let converted = match format {
SparseFormat::Coo => Box::new(coo.clone()) as Box<dyn SparseTensor + Send + Sync>,
SparseFormat::Csr => Box::new(coo.to_csr()?) as Box<dyn SparseTensor + Send + Sync>,
SparseFormat::Csc => Box::new(coo.to_csc()?) as Box<dyn SparseTensor + Send + Sync>,
SparseFormat::Bsr => {
let bsr = BsrTensor::from_coo(coo, (2, 2))?;
Box::new(bsr) as Box<dyn SparseTensor + Send + Sync>
}
SparseFormat::Dia => {
let dia = DiaTensor::from_coo(coo)?;
Box::new(dia) as Box<dyn SparseTensor + Send + Sync>
}
SparseFormat::Dsr => {
let dsr = DsrTensor::from_coo(coo)?;
Box::new(dsr) as Box<dyn SparseTensor + Send + Sync>
}
SparseFormat::Ell => {
let ell = EllTensor::from_coo(coo)?;
Box::new(ell) as Box<dyn SparseTensor + Send + Sync>
}
SparseFormat::Rle => {
let rle = RleTensor::from_coo(coo)?;
Box::new(rle) as Box<dyn SparseTensor + Send + Sync>
}
SparseFormat::Symmetric => {
let sym = SymmetricTensor::from_coo(coo, SymmetricMode::Upper, 1e-6)?;
Box::new(sym) as Box<dyn SparseTensor + Send + Sync>
}
};
let conversion_time_ns = conversion_start.elapsed().as_nanos() as u64;
let memory_usage = estimate_memory_usage(&*converted);
let creation_time_ns = conversion_time_ns;
let spmv_time_ns = if include_operations && coo.shape().dims()[0] <= 1000 {
measure_spmv_performance(&*converted).ok()
} else {
None
};
let mut performance_score = 0.0f32;
let memory_per_nnz = if converted.nnz() > 0 {
memory_usage as f32 / converted.nnz() as f32
} else {
0.0
};
performance_score += memory_per_nnz * 0.3;
performance_score += (conversion_time_ns as f32 / 1_000_000.0) * 0.2;
if let Some(spmv_ns) = spmv_time_ns {
performance_score += (spmv_ns as f32 / 1_000_000.0) * 0.5; } else {
performance_score += memory_per_nnz * 0.25; performance_score += (conversion_time_ns as f32 / 1_000_000.0) * 0.25; }
Ok(FormatPerformanceResult {
format,
memory_usage,
creation_time_ns,
spmv_time_ns,
conversion_time_ns,
performance_score,
})
}
fn estimate_memory_usage(tensor: &dyn SparseTensor) -> usize {
let nnz = tensor.nnz();
match tensor.format() {
SparseFormat::Coo => nnz * 12, SparseFormat::Csr => nnz * 8 + tensor.shape().dims()[0] * 4, SparseFormat::Csc => nnz * 8 + tensor.shape().dims()[1] * 4, SparseFormat::Bsr => nnz * 8, SparseFormat::Dia => nnz * 8, SparseFormat::Dsr => nnz * 16, SparseFormat::Ell => nnz * 8, SparseFormat::Rle => nnz * 6, SparseFormat::Symmetric => nnz * 6, }
}
fn measure_spmv_performance(tensor: &dyn SparseTensor) -> TorshResult<u64> {
use std::time::Instant;
use torsh_tensor::creation::ones;
let vector = ones::<f32>(&[tensor.shape().dims()[1]])?;
let _ = crate::ops::spmm(tensor, &vector)?;
let start = Instant::now();
let _ = crate::ops::spmm(tensor, &vector)?;
let duration = start.elapsed();
Ok(duration.as_nanos() as u64)
}
pub fn analyze_sparse_tensor(sparse: &dyn SparseTensor) -> TorshResult<SparseAnalysis> {
let format = sparse.format();
let nnz = sparse.nnz();
let sparsity = sparse.sparsity();
let shape = sparse.shape();
let coo = sparse.to_coo()?;
let triplets = coo.triplets();
let pattern = hybrid::HybridTensor::analyze_sparsity_pattern(&triplets, shape)?;
let recommended_format = match pattern {
SparsityPattern::Diagonal => SparseFormat::Dia,
SparsityPattern::Banded { .. } => {
if is_matrix_symmetric(&coo) {
SparseFormat::Symmetric
} else {
SparseFormat::Ell
}
}
SparsityPattern::BlockDiagonal { .. } => SparseFormat::Bsr,
SparsityPattern::Random => {
if has_consecutive_patterns(&coo) {
SparseFormat::Rle
} else if sparsity > 0.9 {
SparseFormat::Coo
} else {
SparseFormat::Csr }
}
};
let storage_efficiency = match format {
SparseFormat::Coo => 12.0, SparseFormat::Csr => 8.0 + (4.0 * shape.dims()[0] as f32 / nnz as f32), SparseFormat::Csc => 8.0 + (4.0 * shape.dims()[1] as f32 / nnz as f32), SparseFormat::Bsr => 8.0, SparseFormat::Dia => 8.0, SparseFormat::Dsr => 16.0, SparseFormat::Ell => 8.0, SparseFormat::Rle => 6.0, SparseFormat::Symmetric => 6.0, };
Ok(SparseAnalysis {
format,
nnz,
sparsity,
recommended_format,
pattern,
storage_efficiency,
})
}
fn is_matrix_symmetric(coo: &CooTensor) -> bool {
use std::collections::HashMap;
let triplets = coo.triplets();
let mut element_map: HashMap<(usize, usize), f32> = HashMap::new();
for (row, col, value) in &triplets {
element_map.insert((*row, *col), *value);
}
for (row, col, value) in &triplets {
if *row != *col {
if let Some(&sym_value) = element_map.get(&(*col, *row)) {
if (value - sym_value).abs() > 1e-6 {
return false;
}
} else {
return false;
}
}
}
true
}
fn has_consecutive_patterns(coo: &CooTensor) -> bool {
use std::collections::HashMap;
let triplets = coo.triplets();
let mut row_elements: HashMap<usize, Vec<usize>> = HashMap::new();
for (row, col, _) in &triplets {
row_elements.entry(*row).or_default().push(*col);
}
let mut consecutive_count = 0;
let mut total_elements = 0;
for (_, mut cols_in_row) in row_elements {
cols_in_row.sort_unstable();
total_elements += cols_in_row.len();
for window in cols_in_row.windows(2) {
if window[1] == window[0] + 1 {
consecutive_count += 1;
}
}
}
if total_elements == 0 {
false
} else {
(consecutive_count as f32 / total_elements as f32) > 0.3
}
}
pub mod prelude {
pub use crate::autograd::{
SparseAutogradTensor, SparseData, SparseGradFn, SparseGradientAccumulator,
};
pub use crate::bsr::BsrTensor;
pub use crate::coo::CooTensor;
pub use crate::csc::CscTensor;
pub use crate::csr::CsrTensor;
pub use crate::dia::DiaTensor;
pub use crate::dsr::DsrTensor;
pub use crate::ell::EllTensor;
pub use crate::gpu::{CudaSparseOps, CudaSparseTensor, CudaSparseTensorFactory};
pub use crate::rle::RleTensor;
pub use crate::symmetric::{SymmetricMode, SymmetricTensor};
pub use crate::{
analyze_sparse_tensor, compare_format_performance, sparse_from_dense, SparseFormat,
SparseTensor,
};
}
#[cfg(test)]
mod tests {
use super::*;
use torsh_tensor::creation::zeros;
#[test]
fn test_sparse_format() {
let dense = zeros::<f32>(&[3, 4]).unwrap();
let sparse = sparse_from_dense(&dense, SparseFormat::Coo, None).unwrap();
assert_eq!(sparse.format(), SparseFormat::Coo);
assert_eq!(sparse.shape(), &dense.shape());
}
#[test]
fn test_format_performance_comparison() {
let triplets = vec![(0, 0, 1.0f32), (1, 1, 2.0f32), (2, 2, 3.0f32)];
let coo = CooTensor::from_triplets(triplets, (10, 10)).unwrap();
let comparison = compare_format_performance(&coo, false).unwrap();
assert!(!comparison.format_results.is_empty());
assert!(comparison.improvement_factor >= 1.0);
assert!(comparison.format_results.contains_key(&SparseFormat::Coo));
assert!(comparison
.format_results
.contains_key(&comparison.recommended_format));
for result in comparison.format_results.values() {
assert!(result.performance_score >= 0.0);
assert!(result.memory_usage > 0);
}
}
#[test]
fn test_sparse_analysis() {
let triplets = vec![(0, 0, 1.0f32), (1, 1, 2.0f32), (2, 2, 3.0f32)];
let coo = CooTensor::from_triplets(triplets, (3, 3)).unwrap();
let analysis = analyze_sparse_tensor(&coo).unwrap();
assert_eq!(analysis.format, SparseFormat::Coo);
assert_eq!(analysis.nnz, 3);
assert!(analysis.sparsity > 0.0 && analysis.sparsity <= 1.0);
assert!(analysis.storage_efficiency > 0.0);
assert_eq!(analysis.recommended_format, SparseFormat::Dia);
}
}