1use crate::csr_array::CsrArray;
7use crate::error::{SparseError, SparseResult};
8use crate::sparray::SparseArray;
9use scirs2_core::ndarray::{Array1, ArrayView1};
10use scirs2_core::numeric::{Float, SparseElement};
11use std::collections::VecDeque;
12use std::fmt::Debug;
13
14use scirs2_core::simd_ops::SimdUnifiedOps;
16
17#[derive(Debug, Clone)]
19pub struct MemoryTracker {
20 current_usage: usize,
22 peak_usage: usize,
24 _memorylimit: usize,
26}
27
28impl MemoryTracker {
29 pub fn new(_memorylimit: usize) -> Self {
31 Self {
32 current_usage: 0,
33 peak_usage: 0,
34 _memorylimit,
35 }
36 }
37
38 pub fn allocate(&mut self, size: usize) -> SparseResult<()> {
40 self.current_usage += size;
41 self.peak_usage = self.peak_usage.max(self.current_usage);
42
43 if self.current_usage > self._memorylimit {
44 Err(SparseError::ValueError("Memory limit exceeded".to_string()))
45 } else {
46 Ok(())
47 }
48 }
49
50 pub fn deallocate(&mut self, size: usize) {
52 self.current_usage = self.current_usage.saturating_sub(size);
53 }
54
55 pub fn current_usage(&self) -> usize {
57 self.current_usage
58 }
59
60 pub fn peak_usage(&self) -> usize {
62 self.peak_usage
63 }
64
65 pub fn can_allocate(&self, size: usize) -> bool {
67 self.current_usage + size <= self._memorylimit
68 }
69}
70
71#[allow(dead_code)]
87pub fn streaming_sparse_matvec<T, S>(
88 matrix: &S,
89 x: &ArrayView1<T>,
90 chunk_size: usize,
91 mut memory_tracker: Option<&mut MemoryTracker>,
92) -> SparseResult<Array1<T>>
93where
94 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
95 S: SparseArray<T>,
96{
97 let (rows, cols) = matrix.shape();
98
99 if x.len() != cols {
100 return Err(SparseError::DimensionMismatch {
101 expected: cols,
102 found: x.len(),
103 });
104 }
105
106 let mut y = Array1::zeros(rows);
107 let element_size = std::mem::size_of::<T>();
108
109 let num_chunks = rows.div_ceil(chunk_size);
111
112 for chunk_idx in 0..num_chunks {
113 let row_start = chunk_idx * chunk_size;
114 let row_end = std::cmp::min(row_start + chunk_size, rows);
115 let current_chunk_size = row_end - row_start;
116
117 let chunk_memory = current_chunk_size * cols * element_size; if let Some(tracker) = memory_tracker.as_ref() {
121 if !tracker.can_allocate(chunk_memory) {
122 return Err(SparseError::ValueError(
123 "Insufficient memory for chunk processing".to_string(),
124 ));
125 }
126 }
127
128 if let Some(tracker) = memory_tracker.as_mut() {
130 tracker.allocate(chunk_memory)?;
131 }
132
133 let (row_indices, col_indices, values) = matrix.find();
135 let mut chunk_result = vec![T::sparse_zero(); current_chunk_size];
136
137 for (k, (&row, &col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
139 if row >= row_start && row < row_end {
140 let local_row = row - row_start;
141 chunk_result[local_row] = chunk_result[local_row] + values[k] * x[col];
142 }
143 }
144
145 for (i, &val) in chunk_result.iter().enumerate() {
147 y[row_start + i] = val;
148 }
149
150 if let Some(tracker) = memory_tracker.as_mut() {
152 tracker.deallocate(chunk_memory);
153 }
154 }
155
156 Ok(y)
157}
158
159pub struct OutOfCoreProcessor<T>
164where
165 T: Float + SparseElement + Debug + Copy + 'static,
166{
167 _memorylimit: usize,
168 #[allow(dead_code)]
169 chunk_size: usize,
170 temp_storage: VecDeque<Vec<T>>,
171}
172
173impl<T> OutOfCoreProcessor<T>
174where
175 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
176{
177 pub fn new(_memorylimit: usize) -> Self {
179 let chunk_size = _memorylimit / (8 * std::mem::size_of::<T>()); Self {
182 _memorylimit,
183 chunk_size,
184 temp_storage: VecDeque::new(),
185 }
186 }
187
188 pub fn out_of_core_matmul<S1, S2>(&mut self, a: &S1, b: &S2) -> SparseResult<CsrArray<T>>
190 where
191 S1: SparseArray<T>,
192 S2: SparseArray<T>,
193 {
194 let (a_rows, a_cols) = a.shape();
195 let (b_rows, b_cols) = b.shape();
196
197 if a_cols != b_rows {
198 return Err(SparseError::DimensionMismatch {
199 expected: a_cols,
200 found: b_rows,
201 });
202 }
203
204 let element_size = std::mem::size_of::<T>();
206 let max_chunk_size = self._memorylimit / (4 * element_size * b_cols); let chunk_size = std::cmp::min(max_chunk_size, a_rows).max(1);
208
209 let mut result_rows = Vec::new();
210 let mut result_cols = Vec::new();
211 let mut result_values = Vec::new();
212
213 for chunk_start in (0..a_rows).step_by(chunk_size) {
215 let chunk_end = std::cmp::min(chunk_start + chunk_size, a_rows);
216
217 let (a_row_indices, a_col_indices, a_values) = a.find();
219 let mut chunk_a_data = Vec::new();
220
221 for (k, (&row, &col)) in a_row_indices.iter().zip(a_col_indices.iter()).enumerate() {
223 if row >= chunk_start && row < chunk_end {
224 chunk_a_data.push((row - chunk_start, col, a_values[k]));
225 }
226 }
227
228 let chunk_result =
230 self.compute_chunk_product(&chunk_a_data, b, chunk_end - chunk_start, b_cols)?;
231
232 for (local_row, col, val) in chunk_result {
234 if !SparseElement::is_zero(&val) {
235 result_rows.push(chunk_start + local_row);
236 result_cols.push(col);
237 result_values.push(val);
238 }
239 }
240 }
241
242 CsrArray::from_triplets(
243 &result_rows,
244 &result_cols,
245 &result_values,
246 (a_rows, b_cols),
247 true,
248 )
249 }
250
251 fn compute_chunk_product<S>(
253 &self,
254 chunk_a: &[(usize, usize, T)],
255 b: &S,
256 chunk_rows: usize,
257 b_cols: usize,
258 ) -> SparseResult<Vec<(usize, usize, T)>>
259 where
260 S: SparseArray<T>,
261 {
262 let mut result = Vec::new();
263 let (b_row_indices, b_col_indices, b_values) = b.find();
264
265 let mut b_by_row: std::collections::HashMap<usize, Vec<(usize, T)>> =
267 std::collections::HashMap::new();
268 for (k, (&row, &col)) in b_row_indices.iter().zip(b_col_indices.iter()).enumerate() {
269 b_by_row.entry(row).or_default().push((col, b_values[k]));
270 }
271
272 for i in 0..chunk_rows {
274 let mut a_row_entries = Vec::new();
276 for &(row, col, val) in chunk_a {
277 if row == i {
278 a_row_entries.push((col, val));
279 }
280 }
281
282 for j in 0..b_cols {
284 let mut dot_product = T::sparse_zero();
285
286 for &(a_col, a_val) in &a_row_entries {
288 if let Some(b_row_data) = b_by_row.get(&a_col) {
289 for &(b_col, b_val) in b_row_data {
290 if b_col == j {
291 dot_product = dot_product + a_val * b_val;
292 break;
293 }
294 }
295 }
296 }
297
298 if !SparseElement::is_zero(&dot_product) {
299 result.push((i, j, dot_product));
300 }
301 }
302 }
303
304 Ok(result)
305 }
306
307 #[allow(dead_code)]
309 fn process_chunk_matmul<S1, S2>(
310 &mut self,
311 _a: &S1,
312 _b_csc: &S2,
313 _row_start: usize,
314 _row_end: usize,
315 _b_cols: usize,
316 ) -> SparseResult<ChunkResult<T>>
317 where
318 S1: SparseArray<T>,
319 S2: SparseArray<T>,
320 {
321 Err(SparseError::ValueError(
323 "Process chunk matmul not implemented".to_string(),
324 ))
325 }
326
327 #[allow(dead_code)]
328 fn process_chunk_matmul_old<S1, S2>(
329 &mut self,
330 a: &S1,
331 b_csc: &S2,
332 row_start: usize,
333 row_end: usize,
334 b_cols: usize,
335 ) -> SparseResult<ChunkResult<T>>
336 where
337 S1: SparseArray<T>,
338 S2: SparseArray<T>,
339 {
340 let mut chunk_data = Vec::new();
341 let mut chunk_indices = Vec::new();
342 let mut chunk_indptr = vec![0];
343
344 let (a_row_indices, a_col_indices, a_values) = a.find();
345 let (b_row_indices, b_col_indices, b_values) = b_csc.find();
346 let b_indptr = b_csc
347 .get_indptr()
348 .ok_or_else(|| SparseError::ValueError("CSC matrix must have indptr".to_string()))?;
349
350 for i in row_start..row_end {
351 let mut row_data = Vec::new();
352 let mut row_indices = Vec::new();
353
354 let mut a_entries = Vec::new();
356 for (k, (&row, &col)) in a_row_indices.iter().zip(a_col_indices.iter()).enumerate() {
357 if row == i {
358 a_entries.push((col, a_values[k]));
359 }
360 }
361
362 for j in 0..b_cols {
364 let mut sum = T::sparse_zero();
365 let b_col_start = b_indptr[j];
366 let b_col_end = b_indptr[j + 1];
367
368 for &(a_col, a_val) in &a_entries {
370 for b_idx in b_col_start..b_col_end {
371 if b_row_indices[b_idx] == a_col {
372 sum = sum + a_val * b_values[b_idx];
373 break;
374 }
375 }
376 }
377
378 if !SparseElement::is_zero(&sum) {
379 row_data.push(sum);
380 row_indices.push(j);
381 }
382 }
383
384 chunk_data.extend(row_data);
385 chunk_indices.extend(row_indices);
386 chunk_indptr.push(chunk_data.len());
387 }
388
389 Ok(ChunkResult {
390 data: chunk_data,
391 indices: chunk_indices,
392 indptr: chunk_indptr,
393 })
394 }
395
396 pub fn memory_stats(&self) -> (usize, usize) {
398 let current_usage = self
399 .temp_storage
400 .iter()
401 .map(|v| v.len() * std::mem::size_of::<T>())
402 .sum();
403 (current_usage, self._memorylimit)
404 }
405}
406
407struct ChunkResult<T> {
409 #[allow(dead_code)]
410 data: Vec<T>,
411 #[allow(dead_code)]
412 indices: Vec<usize>,
413 #[allow(dead_code)]
414 indptr: Vec<usize>,
415}
416
417pub struct CacheAwareOps;
419
420impl CacheAwareOps {
421 pub fn cache_optimized_spmv<T, S>(
426 matrix: &S,
427 x: &ArrayView1<T>,
428 cache_line_size: usize,
429 ) -> SparseResult<Array1<T>>
430 where
431 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
432 S: SparseArray<T>,
433 {
434 let (rows, cols) = matrix.shape();
435
436 if x.len() != cols {
437 return Err(SparseError::DimensionMismatch {
438 expected: cols,
439 found: x.len(),
440 });
441 }
442
443 let mut y = Array1::zeros(rows);
444 let elements_per_cache_line = cache_line_size / std::mem::size_of::<T>();
445
446 let (row_indices, col_indices, values) = matrix.find();
448
449 let mut sorted_ops: Vec<(usize, usize, T)> = row_indices
451 .iter()
452 .zip(col_indices.iter())
453 .zip(values.iter())
454 .map(|((&row, &col), &val)| (row, col, val))
455 .collect();
456
457 sorted_ops.sort_by_key(|&(_, col_, _)| col_);
458
459 for chunk in sorted_ops.chunks(elements_per_cache_line) {
461 for &(row, col, val) in chunk {
462 y[row] = y[row] + val * x[col];
463 }
464 }
465
466 Ok(y)
467 }
468
469 pub fn cache_optimized_transpose<T, S>(
471 matrix: &S,
472 cache_line_size: usize,
473 ) -> SparseResult<CsrArray<T>>
474 where
475 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
476 S: SparseArray<T>,
477 {
478 let (rows, cols) = matrix.shape();
479 let (row_indices, col_indices, values) = matrix.find();
480
481 let elements_per_cache_line = cache_line_size / std::mem::size_of::<T>();
483
484 let mut transposed_triplets = Vec::new();
485
486 for chunk_start in (0..row_indices.len()).step_by(elements_per_cache_line) {
488 let chunk_end = std::cmp::min(chunk_start + elements_per_cache_line, row_indices.len());
489
490 for k in chunk_start..chunk_end {
491 transposed_triplets.push((col_indices[k], row_indices[k], values[k]));
492 }
493 }
494
495 transposed_triplets.sort_by_key(|&(new_row_, _, _)| new_row_);
497
498 let new_rows: Vec<usize> = transposed_triplets
499 .iter()
500 .map(|&(new_row_, _, _)| new_row_)
501 .collect();
502 let new_cols: Vec<usize> = transposed_triplets
503 .iter()
504 .map(|&(_, new_col_, _)| new_col_)
505 .collect();
506 let new_values: Vec<T> = transposed_triplets.iter().map(|&(_, _, val)| val).collect();
507
508 CsrArray::from_triplets(&new_rows, &new_cols, &new_values, (cols, rows), false)
509 }
510}
511
512pub struct MemoryPool<T>
514where
515 T: Float + SparseElement + Debug + Copy + 'static,
516{
517 available_buffers: Vec<Vec<T>>,
518 allocated_buffers: Vec<Vec<T>>,
519 _pool_sizelimit: usize,
520}
521
522impl<T> MemoryPool<T>
523where
524 T: Float + SparseElement + Debug + Copy + 'static,
525{
526 pub fn new(_pool_sizelimit: usize) -> Self {
528 Self {
529 available_buffers: Vec::new(),
530 allocated_buffers: Vec::new(),
531 _pool_sizelimit,
532 }
533 }
534
535 pub fn allocate(&mut self, size: usize) -> Vec<T> {
537 if let Some(mut buffer) = self.available_buffers.pop() {
538 buffer.resize(size, T::sparse_zero());
539 buffer
540 } else {
541 vec![T::sparse_zero(); size]
542 }
543 }
544
545 pub fn deallocate(&mut self, mut buffer: Vec<T>) {
547 if self.available_buffers.len() < self._pool_sizelimit {
548 buffer.clear();
549 self.available_buffers.push(buffer);
550 }
551 }
553
554 pub fn stats(&self) -> (usize, usize) {
556 (self.available_buffers.len(), self.allocated_buffers.len())
557 }
558}
559
560pub struct ChunkedOperations;
562
563impl ChunkedOperations {
564 pub fn chunked_sparse_add<T, S1, S2>(
566 a: &S1,
567 b: &S2,
568 chunk_size: usize,
569 mut memory_tracker: Option<&mut MemoryTracker>,
570 ) -> SparseResult<CsrArray<T>>
571 where
572 T: Float
573 + SparseElement
574 + Debug
575 + Copy
576 + 'static
577 + SimdUnifiedOps
578 + Send
579 + Sync
580 + std::ops::AddAssign,
581 S1: SparseArray<T>,
582 S2: SparseArray<T>,
583 {
584 let (a_rows, a_cols) = a.shape();
585 let (b_rows, b_cols) = b.shape();
586
587 if (a_rows, a_cols) != (b_rows, b_cols) {
588 return Err(SparseError::ShapeMismatch {
589 expected: (a_rows, a_cols),
590 found: (b_rows, b_cols),
591 });
592 }
593
594 let mut result_rows = Vec::new();
595 let mut result_cols = Vec::new();
596 let mut result_values = Vec::new();
597
598 let element_size = std::mem::size_of::<T>();
599
600 let (a_rowsidx, a_cols_idx, a_values) = a.find();
602 let (b_rowsidx, b_cols_idx, b_values) = b.find();
603
604 for chunk_start in (0..a_rows).step_by(chunk_size) {
606 let chunk_end = std::cmp::min(chunk_start + chunk_size, a_rows);
607 let current_chunk_size = chunk_end - chunk_start;
608
609 let chunk_memory = current_chunk_size * a_cols * element_size * 2; if let Some(ref mut tracker) = memory_tracker {
613 if !tracker.can_allocate(chunk_memory) {
614 return Err(SparseError::ValueError(
615 "Insufficient memory for chunked addition".to_string(),
616 ));
617 }
618 tracker.allocate(chunk_memory)?;
619 }
620
621 let mut chunk_result: std::collections::HashMap<(usize, usize), T> =
623 std::collections::HashMap::new();
624
625 for (k, (&row, &col)) in a_rowsidx.iter().zip(a_cols_idx.iter()).enumerate() {
627 if row >= chunk_start && row < chunk_end {
628 let local_row = row - chunk_start;
629 let key = (local_row, col);
630 if let Some(existing_val) = chunk_result.get_mut(&key) {
631 *existing_val += a_values[k];
632 } else {
633 chunk_result.insert(key, a_values[k]);
634 }
635 }
636 }
637
638 for (k, (&row, &col)) in b_rowsidx.iter().zip(b_cols_idx.iter()).enumerate() {
640 if row >= chunk_start && row < chunk_end {
641 let local_row = row - chunk_start;
642 let key = (local_row, col);
643 if let Some(existing_val) = chunk_result.get_mut(&key) {
644 *existing_val += b_values[k];
645 } else {
646 chunk_result.insert(key, b_values[k]);
647 }
648 }
649 }
650
651 for ((local_row, col), val) in chunk_result {
653 if !SparseElement::is_zero(&val) {
654 result_rows.push(chunk_start + local_row);
655 result_cols.push(col);
656 result_values.push(val);
657 }
658 }
659
660 if let Some(ref mut tracker) = memory_tracker {
661 tracker.deallocate(chunk_memory);
662 }
663 }
664
665 CsrArray::from_triplets(
666 &result_rows,
667 &result_cols,
668 &result_values,
669 (a_rows, a_cols),
670 false,
671 )
672 }
673
674 pub fn chunked_sparse_scale<T, S>(
676 matrix: &S,
677 scalar: T,
678 chunk_size: usize,
679 mut memory_tracker: Option<&mut MemoryTracker>,
680 ) -> SparseResult<CsrArray<T>>
681 where
682 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
683 S: SparseArray<T>,
684 {
685 let (rows, cols) = matrix.shape();
686 let mut result_rows = Vec::new();
687 let mut result_cols = Vec::new();
688 let mut result_values = Vec::new();
689
690 let element_size = std::mem::size_of::<T>();
691
692 for chunk_start in (0..rows).step_by(chunk_size) {
694 let chunk_end = std::cmp::min(chunk_start + chunk_size, rows);
695 let current_chunk_size = chunk_end - chunk_start;
696
697 let chunk_memory = current_chunk_size * cols * element_size;
699
700 if let Some(ref mut tracker) = memory_tracker {
701 if !tracker.can_allocate(chunk_memory) {
702 return Err(SparseError::ValueError(
703 "Insufficient memory for chunked scaling".to_string(),
704 ));
705 }
706 tracker.allocate(chunk_memory)?;
707 }
708
709 let (row_indices, col_indices, values) = matrix.find();
711
712 for (k, (&row, &col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
713 if row >= chunk_start && row < chunk_end {
714 let scaled_value = values[k] * scalar;
715 if !SparseElement::is_zero(&scaled_value) {
716 result_rows.push(row);
717 result_cols.push(col);
718 result_values.push(scaled_value);
719 }
720 }
721 }
722
723 if let Some(ref mut tracker) = memory_tracker {
724 tracker.deallocate(chunk_memory);
725 }
726 }
727
728 CsrArray::from_triplets(
729 &result_rows,
730 &result_cols,
731 &result_values,
732 (rows, cols),
733 false,
734 )
735 }
736
737 pub fn chunked_format_conversion<T, S>(
739 matrix: &S,
740 chunk_size: usize,
741 mut memory_tracker: Option<&mut MemoryTracker>,
742 ) -> SparseResult<CsrArray<T>>
743 where
744 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
745 S: SparseArray<T>,
746 {
747 let (rows, cols) = matrix.shape();
748 let mut all_triplets = Vec::new();
749
750 let element_size = std::mem::size_of::<T>();
751
752 for chunk_start in (0..rows).step_by(chunk_size) {
754 let chunk_end = std::cmp::min(chunk_start + chunk_size, rows);
755 let current_chunk_size = chunk_end - chunk_start;
756
757 let chunk_memory = current_chunk_size * cols * element_size;
759
760 if let Some(ref mut tracker) = memory_tracker {
761 if !tracker.can_allocate(chunk_memory) {
762 return Err(SparseError::ValueError(
763 "Insufficient memory for format conversion".to_string(),
764 ));
765 }
766 tracker.allocate(chunk_memory)?;
767 }
768
769 let (row_indices, col_indices, values) = matrix.find();
771 let mut chunk_triplets = Vec::new();
772
773 for (k, (&row, &col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
774 if row >= chunk_start && row < chunk_end && !SparseElement::is_zero(&values[k]) {
775 chunk_triplets.push((row, col, values[k]));
776 }
777 }
778
779 all_triplets.extend(chunk_triplets);
780
781 if let Some(ref mut tracker) = memory_tracker {
782 tracker.deallocate(chunk_memory);
783 }
784 }
785
786 let result_rows: Vec<usize> = all_triplets.iter().map(|&(r_, _, _)| r_).collect();
788 let result_cols: Vec<usize> = all_triplets.iter().map(|&(_, c_, _)| c_).collect();
789 let result_values: Vec<T> = all_triplets.iter().map(|&(_, _, v)| v).collect();
790
791 CsrArray::from_triplets(
792 &result_rows,
793 &result_cols,
794 &result_values,
795 (rows, cols),
796 false,
797 )
798 }
799
800 pub fn bandwidth_reduction<T, S>(
802 matrix: &S,
803 mut memory_tracker: Option<&mut MemoryTracker>,
804 ) -> SparseResult<(Vec<usize>, CsrArray<T>)>
805 where
806 T: Float + SparseElement + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
807 S: SparseArray<T>,
808 {
809 let (rows, cols) = matrix.shape();
810
811 if rows != cols {
812 return Err(SparseError::ValueError(
813 "Bandwidth reduction requires square matrix".to_string(),
814 ));
815 }
816
817 let element_size = std::mem::size_of::<usize>();
818 let memory_needed = rows * element_size * 4; if let Some(ref mut tracker) = memory_tracker {
821 if !tracker.can_allocate(memory_needed) {
822 return Err(SparseError::ValueError(
823 "Insufficient memory for bandwidth reduction".to_string(),
824 ));
825 }
826 tracker.allocate(memory_needed)?;
827 }
828
829 let (row_indices, col_indices_, _) = matrix.find();
831 let mut adj_list: Vec<Vec<usize>> = vec![Vec::new(); rows];
832
833 for (&row, &col) in row_indices.iter().zip(col_indices_.iter()) {
834 if row != col {
835 adj_list[row].push(col);
836 adj_list[col].push(row);
837 }
838 }
839
840 for neighbors in &mut adj_list {
842 neighbors.sort_unstable();
843 neighbors.dedup();
844 }
845
846 let start_vertex = (0..rows).min_by_key(|&v| adj_list[v].len()).unwrap_or(0);
848
849 let mut ordering = Vec::new();
851 let mut visited = vec![false; rows];
852 let mut queue = VecDeque::new();
853
854 queue.push_back(start_vertex);
856 visited[start_vertex] = true;
857
858 while let Some(current) = queue.pop_front() {
859 ordering.push(current);
860
861 let mut neighbors = adj_list[current]
863 .iter()
864 .filter(|&&v| !visited[v])
865 .map(|&v| (adj_list[v].len(), v))
866 .collect::<Vec<_>>();
867
868 neighbors.sort_unstable();
869
870 for (_, neighbor) in neighbors {
871 if !visited[neighbor] {
872 visited[neighbor] = true;
873 queue.push_back(neighbor);
874 }
875 }
876 }
877
878 for (v, &is_visited) in visited.iter().enumerate().take(rows) {
880 if !is_visited {
881 ordering.push(v);
882 }
883 }
884
885 ordering.reverse();
887
888 let mut perm_rows = Vec::new();
890 let mut perm_cols = Vec::new();
891 let mut perm_values = Vec::new();
892
893 let (orig_rows, orig_cols, orig_values) = matrix.find();
894
895 let mut inv_perm = vec![0; rows];
897 for (new_idx, &old_idx) in ordering.iter().enumerate() {
898 inv_perm[old_idx] = new_idx;
899 }
900
901 for (k, (&row, &col)) in orig_rows.iter().zip(orig_cols.iter()).enumerate() {
903 let new_row = inv_perm[row];
904 let new_col = inv_perm[col];
905 perm_rows.push(new_row);
906 perm_cols.push(new_col);
907 perm_values.push(orig_values[k]);
908 }
909
910 let reordered_matrix =
911 CsrArray::from_triplets(&perm_rows, &perm_cols, &perm_values, (rows, cols), false)?;
912
913 if let Some(ref mut tracker) = memory_tracker {
914 tracker.deallocate(memory_needed);
915 }
916
917 Ok((ordering, reordered_matrix))
918 }
919}
920
921#[cfg(test)]
922mod tests {
923 use super::*;
924 use crate::csr_array::CsrArray;
925 use approx::assert_relative_eq;
926
927 #[test]
928 #[ignore] fn test_memory_tracker() {
930 let mut tracker = MemoryTracker::new(1000);
931
932 assert!(tracker.allocate(500).is_ok());
934 assert_eq!(tracker.current_usage(), 500);
935 assert_eq!(tracker.peak_usage(), 500);
936
937 assert!(tracker.allocate(600).is_err());
939
940 tracker.deallocate(200);
942 assert_eq!(tracker.current_usage(), 300);
943 assert_eq!(tracker.peak_usage(), 500); assert!(tracker.can_allocate(700));
947 assert!(!tracker.can_allocate(800));
948 }
949
950 #[test]
951 fn test_streaming_sparse_matvec() {
952 let rows = vec![0, 0, 1, 2, 2];
953 let cols = vec![0, 2, 1, 0, 2];
954 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
955 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
956
957 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
958
959 let mut tracker = MemoryTracker::new(10000);
960 let result = streaming_sparse_matvec(&matrix, &x.view(), 2, Some(&mut tracker)).unwrap();
961
962 assert_relative_eq!(result[0], 7.0);
964 assert_relative_eq!(result[1], 6.0);
965 assert_relative_eq!(result[2], 19.0);
966
967 assert!(tracker.peak_usage() > 0);
968 }
969
970 #[test]
971 fn test_out_of_core_processor() {
972 let mut processor = OutOfCoreProcessor::<f64>::new(1_000_000);
973
974 let rowsa = vec![0, 1, 1];
977 let cols_a = vec![0, 0, 1];
978 let data_a = vec![2.0, 1.0, 3.0];
979 let matrix_a = CsrArray::from_triplets(&rowsa, &cols_a, &data_a, (2, 2), false).unwrap();
980
981 let rowsb = vec![0, 1];
983 let cols_b = vec![0, 1];
984 let data_b = vec![1.0, 2.0];
985 let matrix_b = CsrArray::from_triplets(&rowsb, &cols_b, &data_b, (2, 2), false).unwrap();
986
987 let result = processor.out_of_core_matmul(&matrix_a, &matrix_b).unwrap();
988
989 assert_eq!(result.shape(), (2, 2));
991
992 assert_relative_eq!(result.get(0, 0), 2.0);
994 assert_relative_eq!(result.get(0, 1), 0.0);
995 assert_relative_eq!(result.get(1, 0), 1.0);
996 assert_relative_eq!(result.get(1, 1), 6.0);
997
998 let (current, limit) = processor.memory_stats();
999 assert!(current <= limit);
1000 }
1001
1002 #[test]
1003 fn test_cache_aware_ops() {
1004 let rows = vec![0, 0, 1, 2, 2];
1005 let cols = vec![0, 2, 1, 0, 2];
1006 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1007 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1008
1009 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1010
1011 let result = CacheAwareOps::cache_optimized_spmv(&matrix, &x.view(), 64).unwrap();
1013 assert_relative_eq!(result[0], 7.0);
1014 assert_relative_eq!(result[1], 6.0);
1015 assert_relative_eq!(result[2], 19.0);
1016
1017 let transposed = CacheAwareOps::cache_optimized_transpose(&matrix, 64).unwrap();
1019 assert_eq!(transposed.shape(), (3, 3));
1020
1021 assert_relative_eq!(transposed.get(0, 0), 1.0); assert_relative_eq!(transposed.get(2, 0), 2.0); assert_relative_eq!(transposed.get(1, 1), 3.0); }
1026
1027 #[test]
1028 fn test_memory_pool() {
1029 let mut pool = MemoryPool::<f64>::new(5);
1030
1031 let buffer1 = pool.allocate(100);
1033 assert_eq!(buffer1.len(), 100);
1034
1035 pool.deallocate(buffer1);
1037
1038 let (available, allocated) = pool.stats();
1039 assert_eq!(available, 1);
1040 assert_eq!(allocated, 0);
1041
1042 let buffer2 = pool.allocate(50);
1044 assert_eq!(buffer2.len(), 50);
1045
1046 pool.deallocate(buffer2);
1047 }
1048
1049 #[test]
1050 fn test_streamingmemory_limit() {
1051 let rows = vec![0, 0, 1, 2, 2];
1052 let cols = vec![0, 2, 1, 0, 2];
1053 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1054 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1055
1056 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1057
1058 let mut tracker = MemoryTracker::new(10);
1060 let result = streaming_sparse_matvec(&matrix, &x.view(), 1, Some(&mut tracker));
1061
1062 assert!(result.is_err());
1064 }
1065
1066 #[test]
1067 fn test_chunked_sparse_add() {
1068 let rowsa = vec![0, 1, 2];
1070 let cols_a = vec![0, 1, 2];
1071 let data_a = vec![1.0, 2.0, 3.0];
1072 let matrix_a = CsrArray::from_triplets(&rowsa, &cols_a, &data_a, (3, 3), false).unwrap();
1073
1074 let rowsb = vec![0, 1, 2];
1075 let cols_b = vec![0, 1, 2];
1076 let data_b = vec![4.0, 5.0, 6.0];
1077 let matrix_b = CsrArray::from_triplets(&rowsb, &cols_b, &data_b, (3, 3), false).unwrap();
1078
1079 let mut tracker = MemoryTracker::new(10000);
1080 let result =
1081 ChunkedOperations::chunked_sparse_add(&matrix_a, &matrix_b, 2, Some(&mut tracker))
1082 .unwrap();
1083
1084 assert_eq!(result.shape(), (3, 3));
1086
1087 assert_relative_eq!(result.get(0, 0), 5.0);
1089 assert_relative_eq!(result.get(1, 1), 7.0);
1090 assert_relative_eq!(result.get(2, 2), 9.0);
1091 }
1092
1093 #[test]
1094 fn test_chunked_sparse_scale() {
1095 let rows = vec![0, 1, 2];
1096 let cols = vec![0, 1, 2];
1097 let data = vec![1.0, 2.0, 3.0];
1098 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1099
1100 let mut tracker = MemoryTracker::new(10000);
1101 let result =
1102 ChunkedOperations::chunked_sparse_scale(&matrix, 2.0, 2, Some(&mut tracker)).unwrap();
1103
1104 assert_eq!(result.shape(), (3, 3));
1106
1107 assert_relative_eq!(result.get(0, 0), 2.0);
1109 assert_relative_eq!(result.get(1, 1), 4.0);
1110 assert_relative_eq!(result.get(2, 2), 6.0);
1111 }
1112
1113 #[test]
1114 fn test_chunked_format_conversion() {
1115 let rows = vec![0, 1, 2];
1116 let cols = vec![0, 1, 2];
1117 let data = vec![1.0, 2.0, 3.0];
1118 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1119
1120 let mut tracker = MemoryTracker::new(10000);
1121 let result =
1122 ChunkedOperations::chunked_format_conversion(&matrix, 2, Some(&mut tracker)).unwrap();
1123
1124 assert_eq!(result.shape(), matrix.shape());
1126 assert_eq!(result.nnz(), matrix.nnz());
1127
1128 assert_relative_eq!(result.get(0, 0), 1.0);
1130 assert_relative_eq!(result.get(1, 1), 2.0);
1131 assert_relative_eq!(result.get(2, 2), 3.0);
1132 }
1133
1134 #[test]
1135 fn test_bandwidth_reduction() {
1136 let rows = vec![0, 0, 1, 1, 2, 2, 3, 3];
1138 let cols = vec![0, 3, 1, 2, 1, 2, 0, 3];
1139 let data = vec![1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 4.0];
1140 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (4, 4), false).unwrap();
1141
1142 let mut tracker = MemoryTracker::new(100000);
1143 let (ordering, reordered) =
1144 ChunkedOperations::bandwidth_reduction(&matrix, Some(&mut tracker)).unwrap();
1145
1146 assert_eq!(ordering.len(), 4);
1148
1149 assert_eq!(reordered.shape(), (4, 4));
1151 assert_eq!(reordered.nnz(), matrix.nnz());
1152
1153 let mut sorted_ordering = ordering.clone();
1155 sorted_ordering.sort_unstable();
1156 assert_eq!(sorted_ordering, vec![0, 1, 2, 3]);
1157 }
1158
1159 #[test]
1160 fn test_chunked_operationsmemory_limit() {
1161 let rows = vec![0, 1, 2];
1162 let cols = vec![0, 1, 2];
1163 let data = vec![1.0, 2.0, 3.0];
1164 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1165
1166 let mut tracker = MemoryTracker::new(10);
1168
1169 assert!(
1171 ChunkedOperations::chunked_sparse_scale(&matrix, 2.0, 1, Some(&mut tracker)).is_err()
1172 );
1173
1174 tracker = MemoryTracker::new(10); assert!(
1176 ChunkedOperations::chunked_format_conversion(&matrix, 1, Some(&mut tracker)).is_err()
1177 );
1178
1179 tracker = MemoryTracker::new(10); assert!(ChunkedOperations::bandwidth_reduction(&matrix, Some(&mut tracker)).is_err());
1181 }
1182
1183 #[test]
1184 fn test_chunked_add_different_sparsity_patterns() {
1185 let rowsa = vec![0, 2];
1187 let cols_a = vec![0, 2];
1188 let data_a = vec![1.0, 3.0];
1189 let matrix_a = CsrArray::from_triplets(&rowsa, &cols_a, &data_a, (3, 3), false).unwrap();
1190
1191 let rowsb = vec![1, 2];
1192 let cols_b = vec![1, 0];
1193 let data_b = vec![2.0, 1.0];
1194 let matrix_b = CsrArray::from_triplets(&rowsb, &cols_b, &data_b, (3, 3), false).unwrap();
1195
1196 let result = ChunkedOperations::chunked_sparse_add(&matrix_a, &matrix_b, 1, None).unwrap();
1197
1198 assert_relative_eq!(result.get(0, 0), 1.0); assert_relative_eq!(result.get(1, 1), 2.0); assert_relative_eq!(result.get(2, 2), 3.0); assert_relative_eq!(result.get(2, 0), 1.0); }
1204}