1use crate::csr_array::CsrArray;
7use crate::error::{SparseError, SparseResult};
8use crate::sparray::SparseArray;
9use scirs2_core::ndarray::{Array1, ArrayView1};
10use scirs2_core::numeric::Float;
11use std::collections::VecDeque;
12use std::fmt::Debug;
13
14use scirs2_core::simd_ops::SimdUnifiedOps;
16
17#[derive(Debug, Clone)]
19pub struct MemoryTracker {
20 current_usage: usize,
22 peak_usage: usize,
24 _memorylimit: usize,
26}
27
28impl MemoryTracker {
29 pub fn new(_memorylimit: usize) -> Self {
31 Self {
32 current_usage: 0,
33 peak_usage: 0,
34 _memorylimit,
35 }
36 }
37
38 pub fn allocate(&mut self, size: usize) -> SparseResult<()> {
40 self.current_usage += size;
41 self.peak_usage = self.peak_usage.max(self.current_usage);
42
43 if self.current_usage > self._memorylimit {
44 Err(SparseError::ValueError("Memory limit exceeded".to_string()))
45 } else {
46 Ok(())
47 }
48 }
49
50 pub fn deallocate(&mut self, size: usize) {
52 self.current_usage = self.current_usage.saturating_sub(size);
53 }
54
55 pub fn current_usage(&self) -> usize {
57 self.current_usage
58 }
59
60 pub fn peak_usage(&self) -> usize {
62 self.peak_usage
63 }
64
65 pub fn can_allocate(&self, size: usize) -> bool {
67 self.current_usage + size <= self._memorylimit
68 }
69}
70
71#[allow(dead_code)]
87pub fn streaming_sparse_matvec<T, S>(
88 matrix: &S,
89 x: &ArrayView1<T>,
90 chunk_size: usize,
91 mut memory_tracker: Option<&mut MemoryTracker>,
92) -> SparseResult<Array1<T>>
93where
94 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
95 S: SparseArray<T>,
96{
97 let (rows, cols) = matrix.shape();
98
99 if x.len() != cols {
100 return Err(SparseError::DimensionMismatch {
101 expected: cols,
102 found: x.len(),
103 });
104 }
105
106 let mut y = Array1::zeros(rows);
107 let element_size = std::mem::size_of::<T>();
108
109 let num_chunks = rows.div_ceil(chunk_size);
111
112 for chunk_idx in 0..num_chunks {
113 let row_start = chunk_idx * chunk_size;
114 let row_end = std::cmp::min(row_start + chunk_size, rows);
115 let current_chunk_size = row_end - row_start;
116
117 let chunk_memory = current_chunk_size * cols * element_size; if let Some(tracker) = memory_tracker.as_ref() {
121 if !tracker.can_allocate(chunk_memory) {
122 return Err(SparseError::ValueError(
123 "Insufficient memory for chunk processing".to_string(),
124 ));
125 }
126 }
127
128 if let Some(tracker) = memory_tracker.as_mut() {
130 tracker.allocate(chunk_memory)?;
131 }
132
133 let (row_indices, col_indices, values) = matrix.find();
135 let mut chunk_result = vec![T::zero(); current_chunk_size];
136
137 for (k, (&row, &col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
139 if row >= row_start && row < row_end {
140 let local_row = row - row_start;
141 chunk_result[local_row] = chunk_result[local_row] + values[k] * x[col];
142 }
143 }
144
145 for (i, &val) in chunk_result.iter().enumerate() {
147 y[row_start + i] = val;
148 }
149
150 if let Some(tracker) = memory_tracker.as_mut() {
152 tracker.deallocate(chunk_memory);
153 }
154 }
155
156 Ok(y)
157}
158
159pub struct OutOfCoreProcessor<T>
164where
165 T: Float + Debug + Copy + 'static,
166{
167 _memorylimit: usize,
168 #[allow(dead_code)]
169 chunk_size: usize,
170 temp_storage: VecDeque<Vec<T>>,
171}
172
173impl<T> OutOfCoreProcessor<T>
174where
175 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
176{
177 pub fn new(_memorylimit: usize) -> Self {
179 let chunk_size = _memorylimit / (8 * std::mem::size_of::<T>()); Self {
182 _memorylimit,
183 chunk_size,
184 temp_storage: VecDeque::new(),
185 }
186 }
187
188 pub fn out_of_core_matmul<S1, S2>(&mut self, a: &S1, b: &S2) -> SparseResult<CsrArray<T>>
190 where
191 S1: SparseArray<T>,
192 S2: SparseArray<T>,
193 {
194 let (a_rows, a_cols) = a.shape();
195 let (b_rows, b_cols) = b.shape();
196
197 if a_cols != b_rows {
198 return Err(SparseError::DimensionMismatch {
199 expected: a_cols,
200 found: b_rows,
201 });
202 }
203
204 let element_size = std::mem::size_of::<T>();
206 let max_chunk_size = self._memorylimit / (4 * element_size * b_cols); let chunk_size = std::cmp::min(max_chunk_size, a_rows).max(1);
208
209 let mut result_rows = Vec::new();
210 let mut result_cols = Vec::new();
211 let mut result_values = Vec::new();
212
213 for chunk_start in (0..a_rows).step_by(chunk_size) {
215 let chunk_end = std::cmp::min(chunk_start + chunk_size, a_rows);
216
217 let (a_row_indices, a_col_indices, a_values) = a.find();
219 let mut chunk_a_data = Vec::new();
220
221 for (k, (&row, &col)) in a_row_indices.iter().zip(a_col_indices.iter()).enumerate() {
223 if row >= chunk_start && row < chunk_end {
224 chunk_a_data.push((row - chunk_start, col, a_values[k]));
225 }
226 }
227
228 let chunk_result =
230 self.compute_chunk_product(&chunk_a_data, b, chunk_end - chunk_start, b_cols)?;
231
232 for (local_row, col, val) in chunk_result {
234 if !val.is_zero() {
235 result_rows.push(chunk_start + local_row);
236 result_cols.push(col);
237 result_values.push(val);
238 }
239 }
240 }
241
242 CsrArray::from_triplets(
243 &result_rows,
244 &result_cols,
245 &result_values,
246 (a_rows, b_cols),
247 true,
248 )
249 }
250
251 fn compute_chunk_product<S>(
253 &self,
254 chunk_a: &[(usize, usize, T)],
255 b: &S,
256 chunk_rows: usize,
257 b_cols: usize,
258 ) -> SparseResult<Vec<(usize, usize, T)>>
259 where
260 S: SparseArray<T>,
261 {
262 let mut result = Vec::new();
263 let (b_row_indices, b_col_indices, b_values) = b.find();
264
265 let mut b_by_row: std::collections::HashMap<usize, Vec<(usize, T)>> =
267 std::collections::HashMap::new();
268 for (k, (&row, &col)) in b_row_indices.iter().zip(b_col_indices.iter()).enumerate() {
269 b_by_row.entry(row).or_default().push((col, b_values[k]));
270 }
271
272 for i in 0..chunk_rows {
274 let mut a_row_entries = Vec::new();
276 for &(row, col, val) in chunk_a {
277 if row == i {
278 a_row_entries.push((col, val));
279 }
280 }
281
282 for j in 0..b_cols {
284 let mut dot_product = T::zero();
285
286 for &(a_col, a_val) in &a_row_entries {
288 if let Some(b_row_data) = b_by_row.get(&a_col) {
289 for &(b_col, b_val) in b_row_data {
290 if b_col == j {
291 dot_product = dot_product + a_val * b_val;
292 break;
293 }
294 }
295 }
296 }
297
298 if !dot_product.is_zero() {
299 result.push((i, j, dot_product));
300 }
301 }
302 }
303
304 Ok(result)
305 }
306
307 #[allow(dead_code)]
309 fn process_chunk_matmul<S1, S2>(
310 &mut self,
311 _a: &S1,
312 _b_csc: &S2,
313 _row_start: usize,
314 _row_end: usize,
315 _b_cols: usize,
316 ) -> SparseResult<ChunkResult<T>>
317 where
318 S1: SparseArray<T>,
319 S2: SparseArray<T>,
320 {
321 Err(SparseError::ValueError(
323 "Process chunk matmul not implemented".to_string(),
324 ))
325 }
326
327 #[allow(dead_code)]
328 fn process_chunk_matmul_old<S1, S2>(
329 &mut self,
330 a: &S1,
331 b_csc: &S2,
332 row_start: usize,
333 row_end: usize,
334 b_cols: usize,
335 ) -> SparseResult<ChunkResult<T>>
336 where
337 S1: SparseArray<T>,
338 S2: SparseArray<T>,
339 {
340 let mut chunk_data = Vec::new();
341 let mut chunk_indices = Vec::new();
342 let mut chunk_indptr = vec![0];
343
344 let (a_row_indices, a_col_indices, a_values) = a.find();
345 let (b_row_indices, b_col_indices, b_values) = b_csc.find();
346 let b_indptr = b_csc
347 .get_indptr()
348 .ok_or_else(|| SparseError::ValueError("CSC matrix must have indptr".to_string()))?;
349
350 for i in row_start..row_end {
351 let mut row_data = Vec::new();
352 let mut row_indices = Vec::new();
353
354 let mut a_entries = Vec::new();
356 for (k, (&row, &col)) in a_row_indices.iter().zip(a_col_indices.iter()).enumerate() {
357 if row == i {
358 a_entries.push((col, a_values[k]));
359 }
360 }
361
362 for j in 0..b_cols {
364 let mut sum = T::zero();
365 let b_col_start = b_indptr[j];
366 let b_col_end = b_indptr[j + 1];
367
368 for &(a_col, a_val) in &a_entries {
370 for b_idx in b_col_start..b_col_end {
371 if b_row_indices[b_idx] == a_col {
372 sum = sum + a_val * b_values[b_idx];
373 break;
374 }
375 }
376 }
377
378 if !sum.is_zero() {
379 row_data.push(sum);
380 row_indices.push(j);
381 }
382 }
383
384 chunk_data.extend(row_data);
385 chunk_indices.extend(row_indices);
386 chunk_indptr.push(chunk_data.len());
387 }
388
389 Ok(ChunkResult {
390 data: chunk_data,
391 indices: chunk_indices,
392 indptr: chunk_indptr,
393 })
394 }
395
396 pub fn memory_stats(&self) -> (usize, usize) {
398 let current_usage = self
399 .temp_storage
400 .iter()
401 .map(|v| v.len() * std::mem::size_of::<T>())
402 .sum();
403 (current_usage, self._memorylimit)
404 }
405}
406
407struct ChunkResult<T> {
409 #[allow(dead_code)]
410 data: Vec<T>,
411 #[allow(dead_code)]
412 indices: Vec<usize>,
413 #[allow(dead_code)]
414 indptr: Vec<usize>,
415}
416
417pub struct CacheAwareOps;
419
420impl CacheAwareOps {
421 pub fn cache_optimized_spmv<T, S>(
426 matrix: &S,
427 x: &ArrayView1<T>,
428 cache_line_size: usize,
429 ) -> SparseResult<Array1<T>>
430 where
431 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
432 S: SparseArray<T>,
433 {
434 let (rows, cols) = matrix.shape();
435
436 if x.len() != cols {
437 return Err(SparseError::DimensionMismatch {
438 expected: cols,
439 found: x.len(),
440 });
441 }
442
443 let mut y = Array1::zeros(rows);
444 let elements_per_cache_line = cache_line_size / std::mem::size_of::<T>();
445
446 let (row_indices, col_indices, values) = matrix.find();
448
449 let mut sorted_ops: Vec<(usize, usize, T)> = row_indices
451 .iter()
452 .zip(col_indices.iter())
453 .zip(values.iter())
454 .map(|((&row, &col), &val)| (row, col, val))
455 .collect();
456
457 sorted_ops.sort_by_key(|&(_, col_, _)| col_);
458
459 for chunk in sorted_ops.chunks(elements_per_cache_line) {
461 for &(row, col, val) in chunk {
462 y[row] = y[row] + val * x[col];
463 }
464 }
465
466 Ok(y)
467 }
468
469 pub fn cache_optimized_transpose<T, S>(
471 matrix: &S,
472 cache_line_size: usize,
473 ) -> SparseResult<CsrArray<T>>
474 where
475 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
476 S: SparseArray<T>,
477 {
478 let (rows, cols) = matrix.shape();
479 let (row_indices, col_indices, values) = matrix.find();
480
481 let elements_per_cache_line = cache_line_size / std::mem::size_of::<T>();
483
484 let mut transposed_triplets = Vec::new();
485
486 for chunk_start in (0..row_indices.len()).step_by(elements_per_cache_line) {
488 let chunk_end = std::cmp::min(chunk_start + elements_per_cache_line, row_indices.len());
489
490 for k in chunk_start..chunk_end {
491 transposed_triplets.push((col_indices[k], row_indices[k], values[k]));
492 }
493 }
494
495 transposed_triplets.sort_by_key(|&(new_row_, _, _)| new_row_);
497
498 let new_rows: Vec<usize> = transposed_triplets
499 .iter()
500 .map(|&(new_row_, _, _)| new_row_)
501 .collect();
502 let new_cols: Vec<usize> = transposed_triplets
503 .iter()
504 .map(|&(_, new_col_, _)| new_col_)
505 .collect();
506 let new_values: Vec<T> = transposed_triplets.iter().map(|&(_, _, val)| val).collect();
507
508 CsrArray::from_triplets(&new_rows, &new_cols, &new_values, (cols, rows), false)
509 }
510}
511
512pub struct MemoryPool<T>
514where
515 T: Float + Debug + Copy + 'static,
516{
517 available_buffers: Vec<Vec<T>>,
518 allocated_buffers: Vec<Vec<T>>,
519 _pool_sizelimit: usize,
520}
521
522impl<T> MemoryPool<T>
523where
524 T: Float + Debug + Copy + 'static,
525{
526 pub fn new(_pool_sizelimit: usize) -> Self {
528 Self {
529 available_buffers: Vec::new(),
530 allocated_buffers: Vec::new(),
531 _pool_sizelimit,
532 }
533 }
534
535 pub fn allocate(&mut self, size: usize) -> Vec<T> {
537 if let Some(mut buffer) = self.available_buffers.pop() {
538 buffer.resize(size, T::zero());
539 buffer
540 } else {
541 vec![T::zero(); size]
542 }
543 }
544
545 pub fn deallocate(&mut self, mut buffer: Vec<T>) {
547 if self.available_buffers.len() < self._pool_sizelimit {
548 buffer.clear();
549 self.available_buffers.push(buffer);
550 }
551 }
553
554 pub fn stats(&self) -> (usize, usize) {
556 (self.available_buffers.len(), self.allocated_buffers.len())
557 }
558}
559
560pub struct ChunkedOperations;
562
563impl ChunkedOperations {
564 pub fn chunked_sparse_add<T, S1, S2>(
566 a: &S1,
567 b: &S2,
568 chunk_size: usize,
569 mut memory_tracker: Option<&mut MemoryTracker>,
570 ) -> SparseResult<CsrArray<T>>
571 where
572 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync + std::ops::AddAssign,
573 S1: SparseArray<T>,
574 S2: SparseArray<T>,
575 {
576 let (a_rows, a_cols) = a.shape();
577 let (b_rows, b_cols) = b.shape();
578
579 if (a_rows, a_cols) != (b_rows, b_cols) {
580 return Err(SparseError::ShapeMismatch {
581 expected: (a_rows, a_cols),
582 found: (b_rows, b_cols),
583 });
584 }
585
586 let mut result_rows = Vec::new();
587 let mut result_cols = Vec::new();
588 let mut result_values = Vec::new();
589
590 let element_size = std::mem::size_of::<T>();
591
592 let (a_rowsidx, a_cols_idx, a_values) = a.find();
594 let (b_rowsidx, b_cols_idx, b_values) = b.find();
595
596 for chunk_start in (0..a_rows).step_by(chunk_size) {
598 let chunk_end = std::cmp::min(chunk_start + chunk_size, a_rows);
599 let current_chunk_size = chunk_end - chunk_start;
600
601 let chunk_memory = current_chunk_size * a_cols * element_size * 2; if let Some(ref mut tracker) = memory_tracker {
605 if !tracker.can_allocate(chunk_memory) {
606 return Err(SparseError::ValueError(
607 "Insufficient memory for chunked addition".to_string(),
608 ));
609 }
610 tracker.allocate(chunk_memory)?;
611 }
612
613 let mut chunk_result: std::collections::HashMap<(usize, usize), T> =
615 std::collections::HashMap::new();
616
617 for (k, (&row, &col)) in a_rowsidx.iter().zip(a_cols_idx.iter()).enumerate() {
619 if row >= chunk_start && row < chunk_end {
620 let local_row = row - chunk_start;
621 let key = (local_row, col);
622 if let Some(existing_val) = chunk_result.get_mut(&key) {
623 *existing_val += a_values[k];
624 } else {
625 chunk_result.insert(key, a_values[k]);
626 }
627 }
628 }
629
630 for (k, (&row, &col)) in b_rowsidx.iter().zip(b_cols_idx.iter()).enumerate() {
632 if row >= chunk_start && row < chunk_end {
633 let local_row = row - chunk_start;
634 let key = (local_row, col);
635 if let Some(existing_val) = chunk_result.get_mut(&key) {
636 *existing_val += b_values[k];
637 } else {
638 chunk_result.insert(key, b_values[k]);
639 }
640 }
641 }
642
643 for ((local_row, col), val) in chunk_result {
645 if !val.is_zero() {
646 result_rows.push(chunk_start + local_row);
647 result_cols.push(col);
648 result_values.push(val);
649 }
650 }
651
652 if let Some(ref mut tracker) = memory_tracker {
653 tracker.deallocate(chunk_memory);
654 }
655 }
656
657 CsrArray::from_triplets(
658 &result_rows,
659 &result_cols,
660 &result_values,
661 (a_rows, a_cols),
662 false,
663 )
664 }
665
666 pub fn chunked_sparse_scale<T, S>(
668 matrix: &S,
669 scalar: T,
670 chunk_size: usize,
671 mut memory_tracker: Option<&mut MemoryTracker>,
672 ) -> SparseResult<CsrArray<T>>
673 where
674 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
675 S: SparseArray<T>,
676 {
677 let (rows, cols) = matrix.shape();
678 let mut result_rows = Vec::new();
679 let mut result_cols = Vec::new();
680 let mut result_values = Vec::new();
681
682 let element_size = std::mem::size_of::<T>();
683
684 for chunk_start in (0..rows).step_by(chunk_size) {
686 let chunk_end = std::cmp::min(chunk_start + chunk_size, rows);
687 let current_chunk_size = chunk_end - chunk_start;
688
689 let chunk_memory = current_chunk_size * cols * element_size;
691
692 if let Some(ref mut tracker) = memory_tracker {
693 if !tracker.can_allocate(chunk_memory) {
694 return Err(SparseError::ValueError(
695 "Insufficient memory for chunked scaling".to_string(),
696 ));
697 }
698 tracker.allocate(chunk_memory)?;
699 }
700
701 let (row_indices, col_indices, values) = matrix.find();
703
704 for (k, (&row, &col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
705 if row >= chunk_start && row < chunk_end {
706 let scaled_value = values[k] * scalar;
707 if !scaled_value.is_zero() {
708 result_rows.push(row);
709 result_cols.push(col);
710 result_values.push(scaled_value);
711 }
712 }
713 }
714
715 if let Some(ref mut tracker) = memory_tracker {
716 tracker.deallocate(chunk_memory);
717 }
718 }
719
720 CsrArray::from_triplets(
721 &result_rows,
722 &result_cols,
723 &result_values,
724 (rows, cols),
725 false,
726 )
727 }
728
729 pub fn chunked_format_conversion<T, S>(
731 matrix: &S,
732 chunk_size: usize,
733 mut memory_tracker: Option<&mut MemoryTracker>,
734 ) -> SparseResult<CsrArray<T>>
735 where
736 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
737 S: SparseArray<T>,
738 {
739 let (rows, cols) = matrix.shape();
740 let mut all_triplets = Vec::new();
741
742 let element_size = std::mem::size_of::<T>();
743
744 for chunk_start in (0..rows).step_by(chunk_size) {
746 let chunk_end = std::cmp::min(chunk_start + chunk_size, rows);
747 let current_chunk_size = chunk_end - chunk_start;
748
749 let chunk_memory = current_chunk_size * cols * element_size;
751
752 if let Some(ref mut tracker) = memory_tracker {
753 if !tracker.can_allocate(chunk_memory) {
754 return Err(SparseError::ValueError(
755 "Insufficient memory for format conversion".to_string(),
756 ));
757 }
758 tracker.allocate(chunk_memory)?;
759 }
760
761 let (row_indices, col_indices, values) = matrix.find();
763 let mut chunk_triplets = Vec::new();
764
765 for (k, (&row, &col)) in row_indices.iter().zip(col_indices.iter()).enumerate() {
766 if row >= chunk_start && row < chunk_end && !values[k].is_zero() {
767 chunk_triplets.push((row, col, values[k]));
768 }
769 }
770
771 all_triplets.extend(chunk_triplets);
772
773 if let Some(ref mut tracker) = memory_tracker {
774 tracker.deallocate(chunk_memory);
775 }
776 }
777
778 let result_rows: Vec<usize> = all_triplets.iter().map(|&(r_, _, _)| r_).collect();
780 let result_cols: Vec<usize> = all_triplets.iter().map(|&(_, c_, _)| c_).collect();
781 let result_values: Vec<T> = all_triplets.iter().map(|&(_, _, v)| v).collect();
782
783 CsrArray::from_triplets(
784 &result_rows,
785 &result_cols,
786 &result_values,
787 (rows, cols),
788 false,
789 )
790 }
791
792 pub fn bandwidth_reduction<T, S>(
794 matrix: &S,
795 mut memory_tracker: Option<&mut MemoryTracker>,
796 ) -> SparseResult<(Vec<usize>, CsrArray<T>)>
797 where
798 T: Float + Debug + Copy + 'static + SimdUnifiedOps + Send + Sync,
799 S: SparseArray<T>,
800 {
801 let (rows, cols) = matrix.shape();
802
803 if rows != cols {
804 return Err(SparseError::ValueError(
805 "Bandwidth reduction requires square matrix".to_string(),
806 ));
807 }
808
809 let element_size = std::mem::size_of::<usize>();
810 let memory_needed = rows * element_size * 4; if let Some(ref mut tracker) = memory_tracker {
813 if !tracker.can_allocate(memory_needed) {
814 return Err(SparseError::ValueError(
815 "Insufficient memory for bandwidth reduction".to_string(),
816 ));
817 }
818 tracker.allocate(memory_needed)?;
819 }
820
821 let (row_indices, col_indices_, _) = matrix.find();
823 let mut adj_list: Vec<Vec<usize>> = vec![Vec::new(); rows];
824
825 for (&row, &col) in row_indices.iter().zip(col_indices_.iter()) {
826 if row != col {
827 adj_list[row].push(col);
828 adj_list[col].push(row);
829 }
830 }
831
832 for neighbors in &mut adj_list {
834 neighbors.sort_unstable();
835 neighbors.dedup();
836 }
837
838 let start_vertex = (0..rows).min_by_key(|&v| adj_list[v].len()).unwrap_or(0);
840
841 let mut ordering = Vec::new();
843 let mut visited = vec![false; rows];
844 let mut queue = VecDeque::new();
845
846 queue.push_back(start_vertex);
848 visited[start_vertex] = true;
849
850 while let Some(current) = queue.pop_front() {
851 ordering.push(current);
852
853 let mut neighbors = adj_list[current]
855 .iter()
856 .filter(|&&v| !visited[v])
857 .map(|&v| (adj_list[v].len(), v))
858 .collect::<Vec<_>>();
859
860 neighbors.sort_unstable();
861
862 for (_, neighbor) in neighbors {
863 if !visited[neighbor] {
864 visited[neighbor] = true;
865 queue.push_back(neighbor);
866 }
867 }
868 }
869
870 for (v, &is_visited) in visited.iter().enumerate().take(rows) {
872 if !is_visited {
873 ordering.push(v);
874 }
875 }
876
877 ordering.reverse();
879
880 let mut perm_rows = Vec::new();
882 let mut perm_cols = Vec::new();
883 let mut perm_values = Vec::new();
884
885 let (orig_rows, orig_cols, orig_values) = matrix.find();
886
887 let mut inv_perm = vec![0; rows];
889 for (new_idx, &old_idx) in ordering.iter().enumerate() {
890 inv_perm[old_idx] = new_idx;
891 }
892
893 for (k, (&row, &col)) in orig_rows.iter().zip(orig_cols.iter()).enumerate() {
895 let new_row = inv_perm[row];
896 let new_col = inv_perm[col];
897 perm_rows.push(new_row);
898 perm_cols.push(new_col);
899 perm_values.push(orig_values[k]);
900 }
901
902 let reordered_matrix =
903 CsrArray::from_triplets(&perm_rows, &perm_cols, &perm_values, (rows, cols), false)?;
904
905 if let Some(ref mut tracker) = memory_tracker {
906 tracker.deallocate(memory_needed);
907 }
908
909 Ok((ordering, reordered_matrix))
910 }
911}
912
913#[cfg(test)]
914mod tests {
915 use super::*;
916 use crate::csr_array::CsrArray;
917 use approx::assert_relative_eq;
918
919 #[test]
920 #[ignore] fn test_memory_tracker() {
922 let mut tracker = MemoryTracker::new(1000);
923
924 assert!(tracker.allocate(500).is_ok());
926 assert_eq!(tracker.current_usage(), 500);
927 assert_eq!(tracker.peak_usage(), 500);
928
929 assert!(tracker.allocate(600).is_err());
931
932 tracker.deallocate(200);
934 assert_eq!(tracker.current_usage(), 300);
935 assert_eq!(tracker.peak_usage(), 500); assert!(tracker.can_allocate(700));
939 assert!(!tracker.can_allocate(800));
940 }
941
942 #[test]
943 fn test_streaming_sparse_matvec() {
944 let rows = vec![0, 0, 1, 2, 2];
945 let cols = vec![0, 2, 1, 0, 2];
946 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
947 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
948
949 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
950
951 let mut tracker = MemoryTracker::new(10000);
952 let result = streaming_sparse_matvec(&matrix, &x.view(), 2, Some(&mut tracker)).unwrap();
953
954 assert_relative_eq!(result[0], 7.0);
956 assert_relative_eq!(result[1], 6.0);
957 assert_relative_eq!(result[2], 19.0);
958
959 assert!(tracker.peak_usage() > 0);
960 }
961
962 #[test]
963 fn test_out_of_core_processor() {
964 let mut processor = OutOfCoreProcessor::<f64>::new(1_000_000);
965
966 let rowsa = vec![0, 1, 1];
969 let cols_a = vec![0, 0, 1];
970 let data_a = vec![2.0, 1.0, 3.0];
971 let matrix_a = CsrArray::from_triplets(&rowsa, &cols_a, &data_a, (2, 2), false).unwrap();
972
973 let rowsb = vec![0, 1];
975 let cols_b = vec![0, 1];
976 let data_b = vec![1.0, 2.0];
977 let matrix_b = CsrArray::from_triplets(&rowsb, &cols_b, &data_b, (2, 2), false).unwrap();
978
979 let result = processor.out_of_core_matmul(&matrix_a, &matrix_b).unwrap();
980
981 assert_eq!(result.shape(), (2, 2));
983
984 assert_relative_eq!(result.get(0, 0), 2.0);
986 assert_relative_eq!(result.get(0, 1), 0.0);
987 assert_relative_eq!(result.get(1, 0), 1.0);
988 assert_relative_eq!(result.get(1, 1), 6.0);
989
990 let (current, limit) = processor.memory_stats();
991 assert!(current <= limit);
992 }
993
994 #[test]
995 fn test_cache_aware_ops() {
996 let rows = vec![0, 0, 1, 2, 2];
997 let cols = vec![0, 2, 1, 0, 2];
998 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
999 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1000
1001 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1002
1003 let result = CacheAwareOps::cache_optimized_spmv(&matrix, &x.view(), 64).unwrap();
1005 assert_relative_eq!(result[0], 7.0);
1006 assert_relative_eq!(result[1], 6.0);
1007 assert_relative_eq!(result[2], 19.0);
1008
1009 let transposed = CacheAwareOps::cache_optimized_transpose(&matrix, 64).unwrap();
1011 assert_eq!(transposed.shape(), (3, 3));
1012
1013 assert_relative_eq!(transposed.get(0, 0), 1.0); assert_relative_eq!(transposed.get(2, 0), 2.0); assert_relative_eq!(transposed.get(1, 1), 3.0); }
1018
1019 #[test]
1020 fn test_memory_pool() {
1021 let mut pool = MemoryPool::<f64>::new(5);
1022
1023 let buffer1 = pool.allocate(100);
1025 assert_eq!(buffer1.len(), 100);
1026
1027 pool.deallocate(buffer1);
1029
1030 let (available, allocated) = pool.stats();
1031 assert_eq!(available, 1);
1032 assert_eq!(allocated, 0);
1033
1034 let buffer2 = pool.allocate(50);
1036 assert_eq!(buffer2.len(), 50);
1037
1038 pool.deallocate(buffer2);
1039 }
1040
1041 #[test]
1042 fn test_streamingmemory_limit() {
1043 let rows = vec![0, 0, 1, 2, 2];
1044 let cols = vec![0, 2, 1, 0, 2];
1045 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
1046 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1047
1048 let x = Array1::from_vec(vec![1.0, 2.0, 3.0]);
1049
1050 let mut tracker = MemoryTracker::new(10);
1052 let result = streaming_sparse_matvec(&matrix, &x.view(), 1, Some(&mut tracker));
1053
1054 assert!(result.is_err());
1056 }
1057
1058 #[test]
1059 fn test_chunked_sparse_add() {
1060 let rowsa = vec![0, 1, 2];
1062 let cols_a = vec![0, 1, 2];
1063 let data_a = vec![1.0, 2.0, 3.0];
1064 let matrix_a = CsrArray::from_triplets(&rowsa, &cols_a, &data_a, (3, 3), false).unwrap();
1065
1066 let rowsb = vec![0, 1, 2];
1067 let cols_b = vec![0, 1, 2];
1068 let data_b = vec![4.0, 5.0, 6.0];
1069 let matrix_b = CsrArray::from_triplets(&rowsb, &cols_b, &data_b, (3, 3), false).unwrap();
1070
1071 let mut tracker = MemoryTracker::new(10000);
1072 let result =
1073 ChunkedOperations::chunked_sparse_add(&matrix_a, &matrix_b, 2, Some(&mut tracker))
1074 .unwrap();
1075
1076 assert_eq!(result.shape(), (3, 3));
1078
1079 assert_relative_eq!(result.get(0, 0), 5.0);
1081 assert_relative_eq!(result.get(1, 1), 7.0);
1082 assert_relative_eq!(result.get(2, 2), 9.0);
1083 }
1084
1085 #[test]
1086 fn test_chunked_sparse_scale() {
1087 let rows = vec![0, 1, 2];
1088 let cols = vec![0, 1, 2];
1089 let data = vec![1.0, 2.0, 3.0];
1090 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1091
1092 let mut tracker = MemoryTracker::new(10000);
1093 let result =
1094 ChunkedOperations::chunked_sparse_scale(&matrix, 2.0, 2, Some(&mut tracker)).unwrap();
1095
1096 assert_eq!(result.shape(), (3, 3));
1098
1099 assert_relative_eq!(result.get(0, 0), 2.0);
1101 assert_relative_eq!(result.get(1, 1), 4.0);
1102 assert_relative_eq!(result.get(2, 2), 6.0);
1103 }
1104
1105 #[test]
1106 fn test_chunked_format_conversion() {
1107 let rows = vec![0, 1, 2];
1108 let cols = vec![0, 1, 2];
1109 let data = vec![1.0, 2.0, 3.0];
1110 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1111
1112 let mut tracker = MemoryTracker::new(10000);
1113 let result =
1114 ChunkedOperations::chunked_format_conversion(&matrix, 2, Some(&mut tracker)).unwrap();
1115
1116 assert_eq!(result.shape(), matrix.shape());
1118 assert_eq!(result.nnz(), matrix.nnz());
1119
1120 assert_relative_eq!(result.get(0, 0), 1.0);
1122 assert_relative_eq!(result.get(1, 1), 2.0);
1123 assert_relative_eq!(result.get(2, 2), 3.0);
1124 }
1125
1126 #[test]
1127 fn test_bandwidth_reduction() {
1128 let rows = vec![0, 0, 1, 1, 2, 2, 3, 3];
1130 let cols = vec![0, 3, 1, 2, 1, 2, 0, 3];
1131 let data = vec![1.0, 1.0, 2.0, 1.0, 1.0, 3.0, 1.0, 4.0];
1132 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (4, 4), false).unwrap();
1133
1134 let mut tracker = MemoryTracker::new(100000);
1135 let (ordering, reordered) =
1136 ChunkedOperations::bandwidth_reduction(&matrix, Some(&mut tracker)).unwrap();
1137
1138 assert_eq!(ordering.len(), 4);
1140
1141 assert_eq!(reordered.shape(), (4, 4));
1143 assert_eq!(reordered.nnz(), matrix.nnz());
1144
1145 let mut sorted_ordering = ordering.clone();
1147 sorted_ordering.sort_unstable();
1148 assert_eq!(sorted_ordering, vec![0, 1, 2, 3]);
1149 }
1150
1151 #[test]
1152 fn test_chunked_operationsmemory_limit() {
1153 let rows = vec![0, 1, 2];
1154 let cols = vec![0, 1, 2];
1155 let data = vec![1.0, 2.0, 3.0];
1156 let matrix = CsrArray::from_triplets(&rows, &cols, &data, (3, 3), false).unwrap();
1157
1158 let mut tracker = MemoryTracker::new(10);
1160
1161 assert!(
1163 ChunkedOperations::chunked_sparse_scale(&matrix, 2.0, 1, Some(&mut tracker)).is_err()
1164 );
1165
1166 tracker = MemoryTracker::new(10); assert!(
1168 ChunkedOperations::chunked_format_conversion(&matrix, 1, Some(&mut tracker)).is_err()
1169 );
1170
1171 tracker = MemoryTracker::new(10); assert!(ChunkedOperations::bandwidth_reduction(&matrix, Some(&mut tracker)).is_err());
1173 }
1174
1175 #[test]
1176 fn test_chunked_add_different_sparsity_patterns() {
1177 let rowsa = vec![0, 2];
1179 let cols_a = vec![0, 2];
1180 let data_a = vec![1.0, 3.0];
1181 let matrix_a = CsrArray::from_triplets(&rowsa, &cols_a, &data_a, (3, 3), false).unwrap();
1182
1183 let rowsb = vec![1, 2];
1184 let cols_b = vec![1, 0];
1185 let data_b = vec![2.0, 1.0];
1186 let matrix_b = CsrArray::from_triplets(&rowsb, &cols_b, &data_b, (3, 3), false).unwrap();
1187
1188 let result = ChunkedOperations::chunked_sparse_add(&matrix_a, &matrix_b, 1, None).unwrap();
1189
1190 assert_relative_eq!(result.get(0, 0), 1.0); assert_relative_eq!(result.get(1, 1), 2.0); assert_relative_eq!(result.get(2, 2), 3.0); assert_relative_eq!(result.get(2, 0), 1.0); }
1196}