Skip to main content

tensorlogic_scirs_backend/
blocked_sparse.rs

1//! Blocked Sparse Row (BSR) format tensor operations.
2//!
3//! This module provides a Blocked Sparse Row (BSR) representation for tensors
4//! where non-zero data is organized in fixed-size dense blocks. BSR is particularly
5//! efficient when there is block-level sparsity — the matrix is sparse at the block
6//! granularity but dense within each stored block (e.g., neural network weight matrices
7//! pruned with structured/block sparsity).
8//!
9//! # Format Overview
10//!
11//! For a matrix of shape `(M, K)` with block size `(br, bc)`:
12//! - The logical matrix is divided into `(M/br) × (K/bc)` blocks.
13//! - Only blocks where at least one element exceeds a threshold are stored.
14//! - `block_row_ptr[i]` .. `block_row_ptr[i+1]` are the indices into `block_col_idx`
15//!   and `data` for block-row `i` (CSR-style indirection).
16//!
17//! # Example
18//!
19//! ```rust
20//! use scirs2_core::ndarray::Array2;
21//! use tensorlogic_scirs_backend::blocked_sparse::{BlockedSparseTensor, BlockSparsityStats};
22//!
23//! let dense = Array2::<f64>::eye(4);
24//! let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10).unwrap();
25//! println!("NNZ blocks: {}", bst.nnz_blocks());
26//! println!("Sparsity: {:.2}", bst.sparsity());
27//!
28//! let stats = BlockSparsityStats::compute(&bst);
29//! println!("Compression ratio: {:.2}", stats.compression_ratio);
30//! ```
31
32use scirs2_core::ndarray::{s, Array2, ArrayD};
33use std::fmt;
34
35// ─────────────────────────────────────────────────────────────────────────────
36// Error type
37// ─────────────────────────────────────────────────────────────────────────────
38
39/// Errors that can arise from blocked sparse tensor operations.
40#[derive(Debug, Clone, PartialEq, Eq)]
41pub enum BlockedSparseError {
42    /// The matrix dimension is not evenly divisible by the requested block size.
43    DimensionNotDivisibleByBlock { dim: usize, block: usize },
44    /// A block index (block_row, block_col) is out of range for the tensor.
45    BlockIndexOutOfBounds { row: usize, col: usize },
46    /// Shape mismatch between two operands.
47    ShapeMismatch {
48        expected: (usize, usize),
49        got: (usize, usize),
50    },
51    /// Incompatible inner dimensions for matrix multiplication.
52    IncompatibleDimensions { lhs_cols: usize, rhs_rows: usize },
53    /// The matrix has no rows or no columns.
54    EmptyMatrix,
55    /// Block size of zero was requested.
56    ZeroBlockSize,
57}
58
59impl fmt::Display for BlockedSparseError {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        match self {
62            BlockedSparseError::DimensionNotDivisibleByBlock { dim, block } => {
63                write!(f, "dimension {dim} is not divisible by block size {block}")
64            }
65            BlockedSparseError::BlockIndexOutOfBounds { row, col } => {
66                write!(f, "block index ({row}, {col}) is out of bounds")
67            }
68            BlockedSparseError::ShapeMismatch { expected, got } => {
69                write!(
70                    f,
71                    "shape mismatch: expected ({}, {}), got ({}, {})",
72                    expected.0, expected.1, got.0, got.1
73                )
74            }
75            BlockedSparseError::IncompatibleDimensions { lhs_cols, rhs_rows } => {
76                write!(
77                    f,
78                    "incompatible dimensions for matmul: lhs has {lhs_cols} columns but rhs has {rhs_rows} rows"
79                )
80            }
81            BlockedSparseError::EmptyMatrix => write!(f, "matrix has zero rows or zero columns"),
82            BlockedSparseError::ZeroBlockSize => write!(f, "block size must be greater than zero"),
83        }
84    }
85}
86
87impl std::error::Error for BlockedSparseError {}
88
89// ─────────────────────────────────────────────────────────────────────────────
90// Core BSR struct
91// ─────────────────────────────────────────────────────────────────────────────
92
93/// Blocked Sparse Row (BSR) format tensor.
94///
95/// Stores non-zero blocks of fixed size `(block_rows × block_cols)`.
96/// The indexing structure mirrors the Compressed Sparse Row (CSR) format,
97/// but each "element" is a dense 2-D block instead of a scalar.
98///
99/// # Invariants
100///
101/// - `nrows % block_rows == 0`
102/// - `ncols % block_cols == 0`
103/// - `block_row_ptr.len() == num_block_rows() + 1`
104/// - `block_col_idx.len() == data.len() == nnz_blocks()`
105/// - Every block in `data` has shape `(block_rows, block_cols)`
106/// - Column indices within each block-row are sorted in ascending order
107#[derive(Debug, Clone)]
108pub struct BlockedSparseTensor {
109    /// Total number of rows in the logical matrix.
110    pub nrows: usize,
111    /// Total number of columns in the logical matrix.
112    pub ncols: usize,
113    /// Height of each block (in scalar elements).
114    pub block_rows: usize,
115    /// Width of each block (in scalar elements).
116    pub block_cols: usize,
117    /// CSR-style row pointer into `block_col_idx` / `data`.
118    /// Length is `num_block_rows() + 1`.
119    pub block_row_ptr: Vec<usize>,
120    /// Block-column index for each stored block.
121    pub block_col_idx: Vec<usize>,
122    /// Dense data for each stored block; shape is `(block_rows, block_cols)`.
123    pub data: Vec<Array2<f64>>,
124}
125
126impl BlockedSparseTensor {
127    // ── Constructors ─────────────────────────────────────────────────────────
128
129    /// Create a BSR tensor from a dense 2-D array.
130    ///
131    /// Blocks whose maximum absolute value is ≤ `threshold` are treated as
132    /// zero and **not** stored.
133    pub fn from_dense(
134        dense: &Array2<f64>,
135        block_rows: usize,
136        block_cols: usize,
137        threshold: f64,
138    ) -> Result<Self, BlockedSparseError> {
139        if block_rows == 0 || block_cols == 0 {
140            return Err(BlockedSparseError::ZeroBlockSize);
141        }
142
143        let (nrows, ncols) = dense.dim();
144
145        if nrows == 0 || ncols == 0 {
146            return Err(BlockedSparseError::EmptyMatrix);
147        }
148
149        if !nrows.is_multiple_of(block_rows) {
150            return Err(BlockedSparseError::DimensionNotDivisibleByBlock {
151                dim: nrows,
152                block: block_rows,
153            });
154        }
155
156        if !ncols.is_multiple_of(block_cols) {
157            return Err(BlockedSparseError::DimensionNotDivisibleByBlock {
158                dim: ncols,
159                block: block_cols,
160            });
161        }
162
163        let nbr = nrows / block_rows;
164        let nbc = ncols / block_cols;
165
166        let mut block_row_ptr = vec![0usize; nbr + 1];
167        let mut block_col_idx: Vec<usize> = Vec::new();
168        let mut data: Vec<Array2<f64>> = Vec::new();
169
170        for br in 0..nbr {
171            let row_start = br * block_rows;
172            let row_end = row_start + block_rows;
173
174            for bc in 0..nbc {
175                let col_start = bc * block_cols;
176                let col_end = col_start + block_cols;
177
178                let block = dense.slice(s![row_start..row_end, col_start..col_end]);
179
180                // Check if the block is non-zero (any |entry| > threshold).
181                let is_nonzero = block.iter().any(|&v| v.abs() > threshold);
182                if is_nonzero {
183                    block_col_idx.push(bc);
184                    data.push(block.to_owned());
185                }
186            }
187
188            block_row_ptr[br + 1] = block_col_idx.len();
189        }
190
191        Ok(BlockedSparseTensor {
192            nrows,
193            ncols,
194            block_rows,
195            block_cols,
196            block_row_ptr,
197            block_col_idx,
198            data,
199        })
200    }
201
202    /// Create an empty BSR tensor with the given dimensions and block sizes.
203    ///
204    /// No blocks are stored initially; use `set_block` to populate.
205    pub fn empty(
206        nrows: usize,
207        ncols: usize,
208        block_rows: usize,
209        block_cols: usize,
210    ) -> Result<Self, BlockedSparseError> {
211        if block_rows == 0 || block_cols == 0 {
212            return Err(BlockedSparseError::ZeroBlockSize);
213        }
214        if nrows == 0 || ncols == 0 {
215            return Err(BlockedSparseError::EmptyMatrix);
216        }
217        if !nrows.is_multiple_of(block_rows) {
218            return Err(BlockedSparseError::DimensionNotDivisibleByBlock {
219                dim: nrows,
220                block: block_rows,
221            });
222        }
223        if !ncols.is_multiple_of(block_cols) {
224            return Err(BlockedSparseError::DimensionNotDivisibleByBlock {
225                dim: ncols,
226                block: block_cols,
227            });
228        }
229
230        let nbr = nrows / block_rows;
231        Ok(BlockedSparseTensor {
232            nrows,
233            ncols,
234            block_rows,
235            block_cols,
236            block_row_ptr: vec![0usize; nbr + 1],
237            block_col_idx: Vec::new(),
238            data: Vec::new(),
239        })
240    }
241
242    // ── Dimension helpers ────────────────────────────────────────────────────
243
244    /// Number of block-rows in the matrix.
245    #[inline]
246    pub fn num_block_rows(&self) -> usize {
247        self.nrows / self.block_rows
248    }
249
250    /// Number of block-columns in the matrix.
251    #[inline]
252    pub fn num_block_cols(&self) -> usize {
253        self.ncols / self.block_cols
254    }
255
256    /// Number of stored (non-zero) blocks.
257    #[inline]
258    pub fn nnz_blocks(&self) -> usize {
259        self.data.len()
260    }
261
262    /// Fraction of blocks that are **zero** (not stored).
263    ///
264    /// Returns `1.0` when no blocks are stored, `0.0` when all blocks are stored.
265    pub fn sparsity(&self) -> f64 {
266        let total = self.num_block_rows() * self.num_block_cols();
267        if total == 0 {
268            return 1.0;
269        }
270        let nnz = self.nnz_blocks();
271        1.0 - (nnz as f64 / total as f64)
272    }
273
274    /// Approximate memory usage in bytes (stack + heap).
275    ///
276    /// Counts:
277    /// - `block_row_ptr` vector elements
278    /// - `block_col_idx` vector elements
279    /// - all dense block data (`block_rows × block_cols × 8` bytes each)
280    pub fn memory_bytes(&self) -> usize {
281        let ptr_bytes = self.block_row_ptr.len() * std::mem::size_of::<usize>();
282        let idx_bytes = self.block_col_idx.len() * std::mem::size_of::<usize>();
283        let block_element_bytes = self.block_rows * self.block_cols * std::mem::size_of::<f64>();
284        let data_bytes = self.nnz_blocks() * block_element_bytes;
285        ptr_bytes + idx_bytes + data_bytes
286    }
287
288    // ── Block access ─────────────────────────────────────────────────────────
289
290    /// Retrieve a reference to the stored block at logical block position
291    /// `(block_row, block_col)`.
292    ///
293    /// Returns `None` if the block is not stored (i.e. it is a zero block).
294    pub fn get_block(&self, block_row: usize, block_col: usize) -> Option<&Array2<f64>> {
295        if block_row >= self.num_block_rows() || block_col >= self.num_block_cols() {
296            return None;
297        }
298        let start = self.block_row_ptr[block_row];
299        let end = self.block_row_ptr[block_row + 1];
300        // Column indices within a block-row are sorted; use binary search.
301        match self.block_col_idx[start..end].binary_search(&block_col) {
302            Ok(relative_pos) => Some(&self.data[start + relative_pos]),
303            Err(_) => None,
304        }
305    }
306
307    /// Insert or replace the block at `(block_row, block_col)`.
308    ///
309    /// If a block already exists at that position it is replaced in-place.
310    /// Otherwise the block is inserted while maintaining the sorted column-index
311    /// invariant within the block-row.
312    ///
313    /// The provided `block` must have shape `(block_rows, block_cols)`.
314    pub fn set_block(
315        &mut self,
316        block_row: usize,
317        block_col: usize,
318        block: Array2<f64>,
319    ) -> Result<(), BlockedSparseError> {
320        if block_row >= self.num_block_rows() || block_col >= self.num_block_cols() {
321            return Err(BlockedSparseError::BlockIndexOutOfBounds {
322                row: block_row,
323                col: block_col,
324            });
325        }
326
327        let block_shape = block.dim();
328        if block_shape != (self.block_rows, self.block_cols) {
329            return Err(BlockedSparseError::ShapeMismatch {
330                expected: (self.block_rows, self.block_cols),
331                got: block_shape,
332            });
333        }
334
335        let start = self.block_row_ptr[block_row];
336        let end = self.block_row_ptr[block_row + 1];
337
338        match self.block_col_idx[start..end].binary_search(&block_col) {
339            Ok(relative_pos) => {
340                // Replace existing block.
341                self.data[start + relative_pos] = block;
342            }
343            Err(insert_offset) => {
344                // Insert new block, maintaining sorted order.
345                let abs_pos = start + insert_offset;
346                self.block_col_idx.insert(abs_pos, block_col);
347                self.data.insert(abs_pos, block);
348                // Increment all row pointers beyond this block-row.
349                for ptr in self.block_row_ptr[block_row + 1..].iter_mut() {
350                    *ptr += 1;
351                }
352            }
353        }
354
355        Ok(())
356    }
357
358    // ── Dense conversion ─────────────────────────────────────────────────────
359
360    /// Convert the BSR tensor to a dense `Array2<f64>`.
361    pub fn to_dense(&self) -> Array2<f64> {
362        let mut dense = Array2::<f64>::zeros((self.nrows, self.ncols));
363
364        for br in 0..self.num_block_rows() {
365            let row_start = br * self.block_rows;
366            let row_end = row_start + self.block_rows;
367            let start = self.block_row_ptr[br];
368            let end = self.block_row_ptr[br + 1];
369
370            for idx in start..end {
371                let bc = self.block_col_idx[idx];
372                let col_start = bc * self.block_cols;
373                let col_end = col_start + self.block_cols;
374
375                dense
376                    .slice_mut(s![row_start..row_end, col_start..col_end])
377                    .assign(&self.data[idx]);
378            }
379        }
380
381        dense
382    }
383}
384
385// ─────────────────────────────────────────────────────────────────────────────
386// Arithmetic operations
387// ─────────────────────────────────────────────────────────────────────────────
388
389/// Multiply a blocked sparse matrix `A` (M × K) by a dense matrix `B` (K × N),
390/// producing a dense result `C` (M × N).
391///
392/// This routine iterates only over stored (non-zero) blocks of `A`, performing a
393/// GEMM-style product for each and accumulating into the output.
394pub fn blocked_sparse_dense_mm(
395    a: &BlockedSparseTensor,
396    b: &Array2<f64>,
397) -> Result<Array2<f64>, BlockedSparseError> {
398    let (b_rows, b_cols) = b.dim();
399
400    if a.ncols != b_rows {
401        return Err(BlockedSparseError::IncompatibleDimensions {
402            lhs_cols: a.ncols,
403            rhs_rows: b_rows,
404        });
405    }
406
407    let mut c = Array2::<f64>::zeros((a.nrows, b_cols));
408
409    for br in 0..a.num_block_rows() {
410        let row_start = br * a.block_rows;
411        let row_end = row_start + a.block_rows;
412        let ptr_start = a.block_row_ptr[br];
413        let ptr_end = a.block_row_ptr[br + 1];
414
415        for idx in ptr_start..ptr_end {
416            let bc = a.block_col_idx[idx];
417            let col_start = bc * a.block_cols;
418            let col_end = col_start + a.block_cols;
419
420            // a_block: (block_rows, block_cols)
421            // b_slice: (block_cols, b_cols)
422            let a_block = &a.data[idx];
423            let b_slice = b.slice(s![col_start..col_end, ..]);
424
425            // GEMM: c_slice += a_block @ b_slice
426            let product = a_block.dot(&b_slice);
427            c.slice_mut(s![row_start..row_end, ..])
428                .scaled_add(1.0, &product);
429        }
430    }
431
432    Ok(c)
433}
434
435/// Multiply two blocked sparse matrices `A` (M × K) and `B` (K × N),
436/// producing a blocked sparse result `C` (M × N).
437///
438/// Both operands must use the **same block sizes**. The output uses those same
439/// block sizes. Blocks in `C` that accumulate to all-zeros (below a very tight
440/// threshold of `f64::EPSILON`) are **not** stored.
441pub fn blocked_sparse_mm(
442    a: &BlockedSparseTensor,
443    b: &BlockedSparseTensor,
444) -> Result<BlockedSparseTensor, BlockedSparseError> {
445    if a.ncols != b.nrows {
446        return Err(BlockedSparseError::IncompatibleDimensions {
447            lhs_cols: a.ncols,
448            rhs_rows: b.nrows,
449        });
450    }
451    if a.block_rows != b.block_rows || a.block_cols != b.block_cols {
452        // Block sizes must match so that block products are conformable.
453        return Err(BlockedSparseError::ShapeMismatch {
454            expected: (a.block_rows, a.block_cols),
455            got: (b.block_rows, b.block_cols),
456        });
457    }
458
459    let nbr_a = a.num_block_rows();
460    let nbc_b = b.num_block_cols();
461    let _nbc_a = a.num_block_cols(); // == nbr_b (kept for documentation)
462
463    // We accumulate dense block results into a 2-D array of Option<Array2<f64>>.
464    // This avoids the complexity of building CSR incrementally in arbitrary order.
465    let mut acc: Vec<Vec<Option<Array2<f64>>>> = (0..nbr_a)
466        .map(|_| (0..nbc_b).map(|_| None).collect())
467        .collect();
468
469    // For each block-column in B that block-row br_a touches, accumulate.
470    // We need index access to both `a.block_row_ptr` and `acc`, so we use an
471    // index-based loop here; suppress clippy's needless_range_loop lint since
472    // we intentionally index multiple data structures by the same counter.
473    #[allow(clippy::needless_range_loop)]
474    for br_a in 0..nbr_a {
475        let a_start = a.block_row_ptr[br_a];
476        let a_end = a.block_row_ptr[br_a + 1];
477
478        for a_idx in a_start..a_end {
479            let bc_a = a.block_col_idx[a_idx]; // == br_b
480            let a_block = &a.data[a_idx];
481
482            // Walk the matching block-row of B (bc_a is the shared index).
483            if bc_a >= b.num_block_rows() {
484                continue;
485            }
486            let b_start = b.block_row_ptr[bc_a];
487            let b_end = b.block_row_ptr[bc_a + 1];
488
489            for b_idx in b_start..b_end {
490                let bc_b = b.block_col_idx[b_idx];
491                let b_block = &b.data[b_idx];
492
493                let product = a_block.dot(b_block);
494
495                match &mut acc[br_a][bc_b] {
496                    Some(existing) => {
497                        *existing = &*existing + &product;
498                    }
499                    slot @ None => {
500                        *slot = Some(product);
501                    }
502                }
503            }
504        }
505    }
506
507    // Build the output BSR from the accumulation.
508    let mut c = BlockedSparseTensor::empty(a.nrows, b.ncols, a.block_rows, a.block_cols)?;
509
510    #[allow(clippy::needless_range_loop)]
511    for br in 0..nbr_a {
512        for bc in 0..nbc_b {
513            if let Some(block) = acc[br][bc].take() {
514                // Only store the block if it has at least one significant entry.
515                let is_nonzero = block.iter().any(|&v| v.abs() > f64::EPSILON);
516                if is_nonzero {
517                    c.set_block(br, bc, block)?;
518                }
519            }
520        }
521    }
522
523    Ok(c)
524}
525
526/// Element-wise addition of two BSR tensors with identical shape and block sizes.
527///
528/// Blocks present in either operand are included. Blocks present in both are summed.
529pub fn blocked_sparse_add(
530    a: &BlockedSparseTensor,
531    b: &BlockedSparseTensor,
532) -> Result<BlockedSparseTensor, BlockedSparseError> {
533    if a.nrows != b.nrows || a.ncols != b.ncols {
534        return Err(BlockedSparseError::ShapeMismatch {
535            expected: (a.nrows, a.ncols),
536            got: (b.nrows, b.ncols),
537        });
538    }
539    if a.block_rows != b.block_rows || a.block_cols != b.block_cols {
540        return Err(BlockedSparseError::ShapeMismatch {
541            expected: (a.block_rows, a.block_cols),
542            got: (b.block_rows, b.block_cols),
543        });
544    }
545
546    let mut result = a.clone();
547
548    for br in 0..b.num_block_rows() {
549        let b_start = b.block_row_ptr[br];
550        let b_end = b.block_row_ptr[br + 1];
551
552        for idx in b_start..b_end {
553            let bc = b.block_col_idx[idx];
554            let b_block = &b.data[idx];
555
556            // Check if 'result' already has this block.
557            let r_start = result.block_row_ptr[br];
558            let r_end = result.block_row_ptr[br + 1];
559
560            match result.block_col_idx[r_start..r_end].binary_search(&bc) {
561                Ok(relative) => {
562                    // Add in-place.
563                    let abs = r_start + relative;
564                    result.data[abs] = &result.data[abs] + b_block;
565                }
566                Err(_) => {
567                    // Block only in B — insert it.
568                    result.set_block(br, bc, b_block.clone())?;
569                }
570            }
571        }
572    }
573
574    Ok(result)
575}
576
577/// Scale every stored element of a BSR tensor by a scalar factor, returning a new tensor.
578pub fn blocked_sparse_scale(tensor: &BlockedSparseTensor, scalar: f64) -> BlockedSparseTensor {
579    let scaled_data: Vec<Array2<f64>> = tensor.data.iter().map(|block| block * scalar).collect();
580
581    BlockedSparseTensor {
582        nrows: tensor.nrows,
583        ncols: tensor.ncols,
584        block_rows: tensor.block_rows,
585        block_cols: tensor.block_cols,
586        block_row_ptr: tensor.block_row_ptr.clone(),
587        block_col_idx: tensor.block_col_idx.clone(),
588        data: scaled_data,
589    }
590}
591
592// ─────────────────────────────────────────────────────────────────────────────
593// Statistics
594// ─────────────────────────────────────────────────────────────────────────────
595
596/// Detailed statistics about the block-level sparsity of a BSR tensor.
597#[derive(Debug, Clone)]
598pub struct BlockSparsityStats {
599    /// Total number of possible blocks.
600    pub total_blocks: usize,
601    /// Number of stored (non-zero) blocks.
602    pub nnz_blocks: usize,
603    /// Fraction of blocks that are zero (1 − density).
604    pub sparsity: f64,
605    /// Fraction of blocks that are non-zero (1 − sparsity).
606    pub density: f64,
607    /// Actual memory used by the BSR representation (bytes).
608    pub memory_bytes: usize,
609    /// Memory a fully dense `f64` matrix would require (bytes).
610    pub theoretical_dense_bytes: usize,
611    /// `theoretical_dense_bytes / memory_bytes`. Values > 1.0 indicate savings.
612    pub compression_ratio: f64,
613    /// Mean Frobenius norm of stored blocks.
614    pub avg_block_norm: f64,
615    /// Maximum Frobenius norm among stored blocks.
616    pub max_block_norm: f64,
617}
618
619impl BlockSparsityStats {
620    /// Compute statistics for the given BSR tensor.
621    pub fn compute(tensor: &BlockedSparseTensor) -> Self {
622        let total_blocks = tensor.num_block_rows() * tensor.num_block_cols();
623        let nnz_blocks = tensor.nnz_blocks();
624        let sparsity = tensor.sparsity();
625        let density = 1.0 - sparsity;
626        let memory_bytes = tensor.memory_bytes();
627        let theoretical_dense_bytes = tensor.nrows * tensor.ncols * std::mem::size_of::<f64>();
628
629        let compression_ratio = if memory_bytes == 0 {
630            f64::INFINITY
631        } else {
632            theoretical_dense_bytes as f64 / memory_bytes as f64
633        };
634
635        // Frobenius norms of stored blocks.
636        let block_norms: Vec<f64> = tensor
637            .data
638            .iter()
639            .map(|block| {
640                let sq_sum: f64 = block.iter().map(|&v| v * v).sum();
641                sq_sum.sqrt()
642            })
643            .collect();
644
645        let avg_block_norm = if block_norms.is_empty() {
646            0.0
647        } else {
648            block_norms.iter().sum::<f64>() / block_norms.len() as f64
649        };
650
651        let max_block_norm = block_norms.iter().cloned().fold(0.0_f64, f64::max);
652
653        BlockSparsityStats {
654            total_blocks,
655            nnz_blocks,
656            sparsity,
657            density,
658            memory_bytes,
659            theoretical_dense_bytes,
660            compression_ratio,
661            avg_block_norm,
662            max_block_norm,
663        }
664    }
665}
666
667// ─────────────────────────────────────────────────────────────────────────────
668// Sparsity pattern
669// ─────────────────────────────────────────────────────────────────────────────
670
671/// Binary pattern showing which blocks of a BSR tensor are non-zero.
672///
673/// This is useful for structural analysis (symmetry detection, diagonal coverage,
674/// bandwidth estimation) without loading the actual block data.
675#[derive(Debug, Clone)]
676pub struct BlockSparsityPattern {
677    /// Number of block-rows.
678    pub nblock_rows: usize,
679    /// Number of block-columns.
680    pub nblock_cols: usize,
681    /// `pattern[br][bc]` is `true` iff block `(br, bc)` is stored.
682    pub pattern: Vec<Vec<bool>>,
683}
684
685impl BlockSparsityPattern {
686    /// Derive the sparsity pattern from a BSR tensor.
687    pub fn from_tensor(tensor: &BlockedSparseTensor) -> Self {
688        let nbr = tensor.num_block_rows();
689        let nbc = tensor.num_block_cols();
690
691        let mut pattern = vec![vec![false; nbc]; nbr];
692
693        // Index-based loop needed because we access tensor.block_row_ptr by index.
694        #[allow(clippy::needless_range_loop)]
695        for br in 0..nbr {
696            let start = tensor.block_row_ptr[br];
697            let end = tensor.block_row_ptr[br + 1];
698            for idx in start..end {
699                let bc = tensor.block_col_idx[idx];
700                if bc < nbc {
701                    pattern[br][bc] = true;
702                }
703            }
704        }
705
706        BlockSparsityPattern {
707            nblock_rows: nbr,
708            nblock_cols: nbc,
709            pattern,
710        }
711    }
712
713    /// Fraction of blocks that are non-zero.
714    pub fn density(&self) -> f64 {
715        let total = self.nblock_rows * self.nblock_cols;
716        if total == 0 {
717            return 0.0;
718        }
719        let nnz: usize = self
720            .pattern
721            .iter()
722            .map(|row| row.iter().filter(|&&v| v).count())
723            .sum();
724        nnz as f64 / total as f64
725    }
726
727    /// Returns `true` if the block pattern is square and symmetric, i.e.
728    /// `pattern[i][j] == pattern[j][i]` for all valid `(i, j)`.
729    pub fn is_symmetric(&self) -> bool {
730        if self.nblock_rows != self.nblock_cols {
731            return false;
732        }
733        let n = self.nblock_rows;
734        for i in 0..n {
735            for j in 0..n {
736                if self.pattern[i][j] != self.pattern[j][i] {
737                    return false;
738                }
739            }
740        }
741        true
742    }
743
744    /// Returns `true` if every diagonal block `(i, i)` is present.
745    ///
746    /// Only meaningful for square block patterns.
747    pub fn has_diagonal_blocks(&self) -> bool {
748        let n = self.nblock_rows.min(self.nblock_cols);
749        (0..n).all(|i| self.pattern[i][i])
750    }
751
752    /// Render the sparsity pattern as an ASCII string.
753    ///
754    /// `'#'` represents a non-zero block; `'.'` represents a zero block.
755    /// Each block-row occupies one text line.
756    pub fn to_ascii(&self) -> String {
757        let mut output = String::with_capacity(self.nblock_rows * (self.nblock_cols + 1));
758        for row in &self.pattern {
759            for &present in row {
760                output.push(if present { '#' } else { '.' });
761            }
762            output.push('\n');
763        }
764        output
765    }
766}
767
768// ─────────────────────────────────────────────────────────────────────────────
769// Re-exports for convenience
770// ─────────────────────────────────────────────────────────────────────────────
771
772/// Convenience alias for `ndarray::ArrayD<f64>` (dynamic-dimensional tensor).
773pub type BlockedSparseDynTensor = ArrayD<f64>;
774
775// ─────────────────────────────────────────────────────────────────────────────
776// Tests
777// ─────────────────────────────────────────────────────────────────────────────
778
779#[cfg(test)]
780mod tests {
781    use super::*;
782    use scirs2_core::ndarray::Array2;
783
784    // ── Helpers ───────────────────────────────────────────────────────────────
785
786    /// Build a 4×4 matrix where every element equals its linear index as f64.
787    fn make_4x4() -> Array2<f64> {
788        Array2::from_shape_fn((4, 4), |(r, c)| (r * 4 + c) as f64 + 1.0)
789    }
790
791    /// Build a 4×4 identity matrix.
792    fn make_identity_4() -> Array2<f64> {
793        Array2::<f64>::eye(4)
794    }
795
796    // ── Construction tests ────────────────────────────────────────────────────
797
798    #[test]
799    fn test_from_dense_all_nonzero() {
800        let dense = make_4x4();
801        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10)
802            .expect("from_dense should succeed");
803        // 4×4 matrix with 2×2 blocks → 4 blocks total, all non-zero.
804        assert_eq!(bst.nnz_blocks(), 4, "all 4 blocks should be stored");
805    }
806
807    #[test]
808    fn test_from_dense_threshold_drops_blocks() {
809        // Create 4×4 where the bottom-right 2×2 block is near-zero.
810        let mut dense = make_4x4();
811        dense.slice_mut(s![2..4, 2..4]).fill(1e-15);
812
813        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10)
814            .expect("from_dense should succeed");
815        // Only 3 blocks should be stored (bottom-right dropped).
816        assert_eq!(bst.nnz_blocks(), 3, "near-zero block should be dropped");
817    }
818
819    #[test]
820    fn test_to_dense_roundtrip() {
821        let dense = make_4x4();
822        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10)
823            .expect("from_dense should succeed");
824        let recovered = bst.to_dense();
825
826        for ((r, c), &original_val) in dense.indexed_iter() {
827            let recovered_val = recovered[(r, c)];
828            assert!(
829                (original_val - recovered_val).abs() < 1e-12,
830                "mismatch at ({r},{c}): {original_val} vs {recovered_val}"
831            );
832        }
833    }
834
835    #[test]
836    fn test_get_block_exists() {
837        let dense = make_4x4();
838        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10)
839            .expect("from_dense should succeed");
840
841        let block = bst.get_block(0, 0).expect("block (0,0) must be stored");
842        // Top-left 2×2 of make_4x4() is [[1,2],[5,6]].
843        assert!((block[(0, 0)] - 1.0).abs() < 1e-12);
844        assert!((block[(0, 1)] - 2.0).abs() < 1e-12);
845        assert!((block[(1, 0)] - 5.0).abs() < 1e-12);
846        assert!((block[(1, 1)] - 6.0).abs() < 1e-12);
847    }
848
849    #[test]
850    fn test_get_block_missing() {
851        let mut dense = make_4x4();
852        dense.slice_mut(s![2..4, 2..4]).fill(1e-15);
853
854        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10)
855            .expect("from_dense should succeed");
856
857        // Block (1,1) is the bottom-right block — it was zeroed out.
858        let result = bst.get_block(1, 1);
859        assert!(result.is_none(), "dropped block should return None");
860    }
861
862    #[test]
863    fn test_set_block_inserts() {
864        let mut bst =
865            BlockedSparseTensor::empty(4, 4, 2, 2).expect("empty construction should succeed");
866        assert_eq!(bst.nnz_blocks(), 0);
867
868        let new_block = Array2::from_elem((2, 2), 7.0);
869        bst.set_block(0, 1, new_block.clone())
870            .expect("set_block should succeed");
871
872        assert_eq!(bst.nnz_blocks(), 1);
873        let retrieved = bst
874            .get_block(0, 1)
875            .expect("block must be present after set");
876        assert!((retrieved[(0, 0)] - 7.0).abs() < 1e-12);
877    }
878
879    #[test]
880    fn test_nnz_blocks() {
881        let identity = make_identity_4();
882        // 4×4 identity with 2×2 blocks → 2 diagonal blocks non-zero.
883        let bst = BlockedSparseTensor::from_dense(&identity, 2, 2, 1e-10)
884            .expect("from_dense should succeed");
885        assert_eq!(bst.nnz_blocks(), 2);
886    }
887
888    // ── Sparsity tests ────────────────────────────────────────────────────────
889
890    #[test]
891    fn test_sparsity_all_dense() {
892        let dense = make_4x4();
893        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10)
894            .expect("from_dense should succeed");
895        assert!(
896            bst.sparsity().abs() < 1e-12,
897            "sparsity should be 0 when all blocks stored"
898        );
899    }
900
901    #[test]
902    fn test_sparsity_all_sparse() {
903        // All near-zero → no blocks stored.
904        let tiny = Array2::<f64>::from_elem((4, 4), 1e-15);
905        let bst =
906            BlockedSparseTensor::from_dense(&tiny, 2, 2, 1e-10).expect("from_dense should succeed");
907        assert!(
908            (bst.sparsity() - 1.0).abs() < 1e-12,
909            "sparsity should be 1 when no blocks stored"
910        );
911    }
912
913    // ── Matrix multiply tests ─────────────────────────────────────────────────
914
915    #[test]
916    fn test_blocked_sparse_dense_mm_correctness() {
917        let a_dense = make_4x4();
918        let b_dense = Array2::from_shape_fn((4, 3), |(r, c)| (r * 3 + c) as f64 * 0.1);
919        let expected = a_dense.dot(&b_dense);
920
921        let a_bsr = BlockedSparseTensor::from_dense(&a_dense, 2, 2, 1e-10)
922            .expect("from_dense should succeed");
923        let result =
924            blocked_sparse_dense_mm(&a_bsr, &b_dense).expect("sparse-dense mm should succeed");
925
926        for ((r, c), &exp_val) in expected.indexed_iter() {
927            let got = result[(r, c)];
928            assert!(
929                (exp_val - got).abs() < 1e-9,
930                "sparse-dense mm mismatch at ({r},{c}): expected {exp_val}, got {got}"
931            );
932        }
933    }
934
935    #[test]
936    fn test_blocked_sparse_mm_correctness() {
937        let a_dense = make_4x4();
938        let b_dense = Array2::from_shape_fn((4, 4), |(r, c)| ((r + 1) * (c + 1)) as f64 * 0.5);
939        let expected = a_dense.dot(&b_dense);
940
941        let a_bsr = BlockedSparseTensor::from_dense(&a_dense, 2, 2, 1e-10).expect("from_dense A");
942        let b_bsr = BlockedSparseTensor::from_dense(&b_dense, 2, 2, 1e-10).expect("from_dense B");
943
944        let c_bsr = blocked_sparse_mm(&a_bsr, &b_bsr).expect("sparse-sparse mm should succeed");
945        let c_dense = c_bsr.to_dense();
946
947        for ((r, c), &exp_val) in expected.indexed_iter() {
948            let got = c_dense[(r, c)];
949            assert!(
950                (exp_val - got).abs() < 1e-9,
951                "sparse-sparse mm mismatch at ({r},{c}): expected {exp_val}, got {got}"
952            );
953        }
954    }
955
956    // ── Element-wise operation tests ──────────────────────────────────────────
957
958    #[test]
959    fn test_blocked_sparse_add() {
960        let a_dense = make_4x4();
961        let b_dense = Array2::from_shape_fn((4, 4), |(r, c)| (r + c) as f64);
962        let expected = &a_dense + &b_dense;
963
964        let a_bsr = BlockedSparseTensor::from_dense(&a_dense, 2, 2, 1e-10).expect("from_dense A");
965        let b_bsr = BlockedSparseTensor::from_dense(&b_dense, 2, 2, 1e-10).expect("from_dense B");
966
967        let c_bsr = blocked_sparse_add(&a_bsr, &b_bsr).expect("add should succeed");
968        let c_dense = c_bsr.to_dense();
969
970        for ((r, c), &exp_val) in expected.indexed_iter() {
971            let got = c_dense[(r, c)];
972            assert!(
973                (exp_val - got).abs() < 1e-12,
974                "add mismatch at ({r},{c}): expected {exp_val}, got {got}"
975            );
976        }
977    }
978
979    #[test]
980    fn test_blocked_sparse_scale() {
981        let a_dense = make_4x4();
982        let a_bsr = BlockedSparseTensor::from_dense(&a_dense, 2, 2, 1e-10).expect("from_dense");
983
984        let scaled_bsr = blocked_sparse_scale(&a_bsr, 2.0);
985        let scaled_dense = scaled_bsr.to_dense();
986
987        for ((r, c), &orig) in a_dense.indexed_iter() {
988            let got = scaled_dense[(r, c)];
989            assert!(
990                (got - orig * 2.0).abs() < 1e-12,
991                "scale mismatch at ({r},{c}): expected {}, got {got}",
992                orig * 2.0
993            );
994        }
995    }
996
997    // ── BlockSparsityStats tests ──────────────────────────────────────────────
998
999    #[test]
1000    fn test_block_sparsity_stats_density() {
1001        let dense = make_4x4();
1002        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10).expect("from_dense");
1003        let stats = BlockSparsityStats::compute(&bst);
1004        let sum = stats.density + stats.sparsity;
1005        assert!(
1006            (sum - 1.0).abs() < 1e-12,
1007            "density + sparsity must equal 1.0, got {sum}"
1008        );
1009    }
1010
1011    #[test]
1012    fn test_block_sparsity_stats_compression_ratio() {
1013        // Identity matrix: 2 out of 4 blocks are non-zero → sparse → compression > 1.
1014        let identity = make_identity_4();
1015        let bst = BlockedSparseTensor::from_dense(&identity, 2, 2, 1e-10).expect("from_dense");
1016        let stats = BlockSparsityStats::compute(&bst);
1017        assert!(
1018            stats.compression_ratio > 1.0,
1019            "compression_ratio should be > 1.0 for a sparse matrix, got {}",
1020            stats.compression_ratio
1021        );
1022    }
1023
1024    // ── BlockSparsityPattern tests ────────────────────────────────────────────
1025
1026    #[test]
1027    fn test_block_sparsity_pattern_from_tensor() {
1028        let identity = make_identity_4();
1029        let bst = BlockedSparseTensor::from_dense(&identity, 2, 2, 1e-10).expect("from_dense");
1030        let pattern = BlockSparsityPattern::from_tensor(&bst);
1031
1032        // density (pattern) == 1 - sparsity (tensor)
1033        let expected_density = 1.0 - bst.sparsity();
1034        assert!(
1035            (pattern.density() - expected_density).abs() < 1e-12,
1036            "pattern density {} should equal tensor density {}",
1037            pattern.density(),
1038            expected_density
1039        );
1040    }
1041
1042    #[test]
1043    fn test_block_sparsity_pattern_symmetric() {
1044        // Build a symmetric dense matrix.
1045        let base = make_4x4();
1046        let sym = &base + &base.t().to_owned();
1047        let bst = BlockedSparseTensor::from_dense(&sym, 2, 2, 1e-10).expect("from_dense");
1048        let pattern = BlockSparsityPattern::from_tensor(&bst);
1049        // All 4 blocks are non-zero → symmetric.
1050        assert!(
1051            pattern.is_symmetric(),
1052            "fully dense pattern must be symmetric"
1053        );
1054    }
1055
1056    #[test]
1057    fn test_block_sparsity_pattern_ascii_shape() {
1058        let dense = make_4x4();
1059        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10).expect("from_dense");
1060        let pattern = BlockSparsityPattern::from_tensor(&bst);
1061        let ascii = pattern.to_ascii();
1062
1063        // Should have exactly nblock_rows lines.
1064        let lines: Vec<&str> = ascii.lines().collect();
1065        assert_eq!(
1066            lines.len(),
1067            pattern.nblock_rows,
1068            "ASCII should have {} lines",
1069            pattern.nblock_rows
1070        );
1071        // Each line should have exactly nblock_cols characters.
1072        for line in &lines {
1073            assert_eq!(
1074                line.len(),
1075                pattern.nblock_cols,
1076                "each ASCII line should have {} characters",
1077                pattern.nblock_cols
1078            );
1079        }
1080    }
1081
1082    // ── Error condition tests ─────────────────────────────────────────────────
1083
1084    #[test]
1085    fn test_dimension_not_divisible_error() {
1086        // 5×4 matrix with block size 2 — row dimension not divisible.
1087        let odd = Array2::<f64>::zeros((5, 4));
1088        let result = BlockedSparseTensor::from_dense(&odd, 2, 2, 1e-10);
1089        match result {
1090            Err(BlockedSparseError::DimensionNotDivisibleByBlock { dim: 5, block: 2 }) => {}
1091            other => panic!("expected DimensionNotDivisibleByBlock, got {other:?}"),
1092        }
1093    }
1094
1095    #[test]
1096    fn test_memory_bytes_positive() {
1097        let dense = make_4x4();
1098        let bst = BlockedSparseTensor::from_dense(&dense, 2, 2, 1e-10).expect("from_dense");
1099        assert!(
1100            bst.memory_bytes() > 0,
1101            "memory_bytes must be positive for non-empty tensor"
1102        );
1103    }
1104}