1use crate::error::{Result, TextError};
7use scirs2_core::ndarray::{Array1, Array2};
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub struct CsrMatrix {
13 values: Vec<f64>,
15 colindices: Vec<usize>,
17 row_ptrs: Vec<usize>,
19 n_rows: usize,
21 n_cols: usize,
23}
24
25impl CsrMatrix {
26 pub fn from_dense(dense: &Array2<f64>) -> Self {
28 let (n_rows, n_cols) = dense.dim();
29 let mut values = Vec::new();
30 let mut colindices = Vec::new();
31 let mut row_ptrs = vec![0];
32
33 for row in dense.rows() {
34 for (colidx, &value) in row.iter().enumerate() {
35 if value != 0.0 {
36 values.push(value);
37 colindices.push(colidx);
38 }
39 }
40 row_ptrs.push(values.len());
41 }
42
43 Self {
44 values,
45 colindices,
46 row_ptrs,
47 n_rows,
48 n_cols,
49 }
50 }
51
52 pub fn zeros(_n_rows: usize, ncols: usize) -> Self {
54 Self {
55 values: Vec::new(),
56 colindices: Vec::new(),
57 row_ptrs: vec![0; _n_rows + 1],
58 n_rows: _n_rows,
59 n_cols: ncols,
60 }
61 }
62
63 pub fn shape(&self) -> (usize, usize) {
65 (self.n_rows, self.n_cols)
66 }
67
68 pub fn nnz(&self) -> usize {
70 self.values.len()
71 }
72
73 pub fn memory_usage(&self) -> usize {
75 self.values.len() * std::mem::size_of::<f64>()
76 + self.colindices.len() * std::mem::size_of::<usize>()
77 + self.row_ptrs.len() * std::mem::size_of::<usize>()
78 }
79
80 pub fn to_dense(&self) -> Array2<f64> {
82 let mut dense = Array2::zeros((self.n_rows, self.n_cols));
83
84 for rowidx in 0..self.n_rows {
85 let start = self.row_ptrs[rowidx];
86 let end = self.row_ptrs[rowidx + 1];
87
88 for i in start..end {
89 let colidx = self.colindices[i];
90 let value = self.values[i];
91 dense[[rowidx, colidx]] = value;
92 }
93 }
94
95 dense
96 }
97
98 pub fn get_row(&self, rowidx: usize) -> Result<SparseVector> {
100 if rowidx >= self.n_rows {
101 return Err(TextError::InvalidInput(format!(
102 "Row index {} out of bounds for matrix with {} rows",
103 rowidx, self.n_rows
104 )));
105 }
106
107 let start = self.row_ptrs[rowidx];
108 let end = self.row_ptrs[rowidx + 1];
109
110 let indices: Vec<usize> = self.colindices[start..end].to_vec();
111 let values: Vec<f64> = self.values[start..end].to_vec();
112
113 Ok(SparseVector {
114 indices,
115 values,
116 size: self.n_cols,
117 })
118 }
119
120 pub fn dot(&self, vector: &Array1<f64>) -> Result<Array1<f64>> {
122 if vector.len() != self.n_cols {
123 return Err(TextError::InvalidInput(format!(
124 "Vector dimension {} doesn't match matrix columns {}",
125 vector.len(),
126 self.n_cols
127 )));
128 }
129
130 let mut result = Array1::zeros(self.n_rows);
131
132 for rowidx in 0..self.n_rows {
133 let start = self.row_ptrs[rowidx];
134 let end = self.row_ptrs[rowidx + 1];
135
136 let mut sum = 0.0;
137 for i in start..end {
138 let colidx = self.colindices[i];
139 sum += self.values[i] * vector[colidx];
140 }
141 result[rowidx] = sum;
142 }
143
144 Ok(result)
145 }
146}
147
148#[derive(Debug, Clone)]
150pub struct SparseVector {
151 indices: Vec<usize>,
153 values: Vec<f64>,
155 size: usize,
157}
158
159impl SparseVector {
160 pub fn new(size: usize) -> Self {
162 Self {
163 indices: Vec::new(),
164 values: Vec::new(),
165 size,
166 }
167 }
168
169 pub fn from_dense(dense: &Array1<f64>) -> Self {
171 let mut indices = Vec::new();
172 let mut values = Vec::new();
173
174 for (idx, &value) in dense.iter().enumerate() {
175 if value != 0.0 {
176 indices.push(idx);
177 values.push(value);
178 }
179 }
180
181 Self {
182 indices,
183 values,
184 size: dense.len(),
185 }
186 }
187
188 pub fn fromindices_values(indices: Vec<usize>, values: Vec<f64>, size: usize) -> Self {
190 assert_eq!(
191 indices.len(),
192 values.len(),
193 "Indices and values must have the same length"
194 );
195 Self {
196 indices,
197 values,
198 size,
199 }
200 }
201
202 pub fn to_dense(&self) -> Array1<f64> {
204 let mut dense = Array1::zeros(self.size);
205
206 for (&idx, &value) in self.indices.iter().zip(self.values.iter()) {
207 dense[idx] = value;
208 }
209
210 dense
211 }
212
213 pub fn nnz(&self) -> usize {
215 self.values.len()
216 }
217
218 pub fn size(&self) -> usize {
220 self.size
221 }
222
223 pub fn dotsparse(&self, other: &SparseVector) -> Result<f64> {
225 if self.size != other.size {
226 return Err(TextError::InvalidInput(format!(
227 "Vector dimensions don't match: {} vs {}",
228 self.size, other.size
229 )));
230 }
231
232 let mut result = 0.0;
233 let mut i = 0;
234 let mut j = 0;
235
236 while i < self.indices.len() && j < other.indices.len() {
238 match self.indices[i].cmp(&other.indices[j]) {
239 std::cmp::Ordering::Equal => {
240 result += self.values[i] * other.values[j];
241 i += 1;
242 j += 1;
243 }
244 std::cmp::Ordering::Less => {
245 i += 1;
246 }
247 std::cmp::Ordering::Greater => {
248 j += 1;
249 }
250 }
251 }
252
253 Ok(result)
254 }
255
256 pub fn norm(&self) -> f64 {
258 self.values.iter().map(|&x| x * x).sum::<f64>().sqrt()
259 }
260
261 pub fn scale(&mut self, scalar: f64) {
263 for value in &mut self.values {
264 *value *= scalar;
265 }
266 }
267
268 pub fn indices(&self) -> &[usize] {
270 &self.indices
271 }
272
273 pub fn values(&self) -> &[f64] {
275 &self.values
276 }
277
278 pub fn values_mut(&mut self) -> &mut [f64] {
280 &mut self.values
281 }
282}
283
284#[derive(Debug, Clone)]
286pub struct DokMatrix {
287 data: HashMap<(usize, usize), f64>,
288 n_rows: usize,
289 n_cols: usize,
290}
291
292impl DokMatrix {
293 pub fn new(_n_rows: usize, ncols: usize) -> Self {
295 Self {
296 data: HashMap::new(),
297 n_rows: _n_rows,
298 n_cols: ncols,
299 }
300 }
301
302 pub fn set(&mut self, row: usize, col: usize, value: f64) -> Result<()> {
304 if row >= self.n_rows || col >= self.n_cols {
305 return Err(TextError::InvalidInput(format!(
306 "Index ({}, {}) out of bounds for matrix shape ({}, {})",
307 row, col, self.n_rows, self.n_cols
308 )));
309 }
310
311 if value != 0.0 {
312 self.data.insert((row, col), value);
313 } else {
314 self.data.remove(&(row, col));
315 }
316
317 Ok(())
318 }
319
320 pub fn get(&self, row: usize, col: usize) -> f64 {
322 *self.data.get(&(row, col)).unwrap_or(&0.0)
323 }
324
325 pub fn to_csr(&self) -> CsrMatrix {
327 let mut entries: Vec<((usize, usize), f64)> =
328 self.data.iter().map(|(&k, &v)| (k, v)).collect();
329
330 entries.sort_by_key(|&((r, c), _)| (r, c));
332
333 let mut values = Vec::with_capacity(entries.len());
334 let mut colindices = Vec::with_capacity(entries.len());
335 let mut row_ptrs = vec![0];
336
337 let mut current_row = 0;
338
339 for ((row, col), value) in entries {
340 while current_row < row {
341 row_ptrs.push(values.len());
342 current_row += 1;
343 }
344
345 values.push(value);
346 colindices.push(col);
347 }
348
349 while current_row < self.n_rows {
350 row_ptrs.push(values.len());
351 current_row += 1;
352 }
353
354 CsrMatrix {
355 values,
356 colindices,
357 row_ptrs,
358 n_rows: self.n_rows,
359 n_cols: self.n_cols,
360 }
361 }
362
363 pub fn nnz(&self) -> usize {
365 self.data.len()
366 }
367}
368
369pub struct SparseMatrixBuilder {
371 rows: Vec<SparseVector>,
372 n_cols: usize,
373}
374
375impl SparseMatrixBuilder {
376 pub fn new(_ncols: usize) -> Self {
378 Self {
379 rows: Vec::new(),
380 n_cols: _ncols,
381 }
382 }
383
384 pub fn add_row(&mut self, row: SparseVector) -> Result<()> {
386 if row.size() != self.n_cols {
387 return Err(TextError::InvalidInput(format!(
388 "Row size {} doesn't match expected columns {}",
389 row.size(),
390 self.n_cols
391 )));
392 }
393
394 self.rows.push(row);
395 Ok(())
396 }
397
398 pub fn build(self) -> CsrMatrix {
400 let n_rows = self.rows.len();
401 let mut values = Vec::new();
402 let mut colindices = Vec::new();
403 let mut row_ptrs = vec![0];
404
405 for row in self.rows {
406 values.extend(row.values);
407 colindices.extend(row.indices);
408 row_ptrs.push(values.len());
409 }
410
411 CsrMatrix {
412 values,
413 colindices,
414 row_ptrs,
415 n_rows,
416 n_cols: self.n_cols,
417 }
418 }
419}
420
421#[derive(Debug, Clone)]
423pub struct CooMatrix {
424 rowindices: Vec<usize>,
426 colindices: Vec<usize>,
428 values: Vec<f64>,
430 n_rows: usize,
432 n_cols: usize,
434}
435
436impl CooMatrix {
437 pub fn new(_n_rows: usize, ncols: usize) -> Self {
439 Self {
440 rowindices: Vec::new(),
441 colindices: Vec::new(),
442 values: Vec::new(),
443 n_rows: _n_rows,
444 n_cols: ncols,
445 }
446 }
447
448 pub fn push(&mut self, row: usize, col: usize, value: f64) -> Result<()> {
450 if row >= self.n_rows || col >= self.n_cols {
451 return Err(TextError::InvalidInput(format!(
452 "Index ({}, {}) out of bounds for matrix shape ({}, {})",
453 row, col, self.n_rows, self.n_cols
454 )));
455 }
456
457 if value != 0.0 {
458 self.rowindices.push(row);
459 self.colindices.push(col);
460 self.values.push(value);
461 }
462
463 Ok(())
464 }
465
466 pub fn to_csr(&self) -> CsrMatrix {
468 let mut indices: Vec<usize> = (0..self.values.len()).collect();
470 indices.sort_by_key(|&i| (self.rowindices[i], self.colindices[i]));
471
472 let mut values = Vec::with_capacity(self.values.len());
473 let mut colindices = Vec::with_capacity(self.values.len());
474 let mut row_ptrs = vec![0];
475
476 let mut current_row = 0;
477 for &idx in &indices {
478 let row = self.rowindices[idx];
479
480 while current_row < row {
482 row_ptrs.push(values.len());
483 current_row += 1;
484 }
485
486 values.push(self.values[idx]);
487 colindices.push(self.colindices[idx]);
488 }
489
490 while row_ptrs.len() <= self.n_rows {
492 row_ptrs.push(values.len());
493 }
494
495 CsrMatrix {
496 values,
497 colindices,
498 row_ptrs,
499 n_rows: self.n_rows,
500 n_cols: self.n_cols,
501 }
502 }
503
504 pub fn nnz(&self) -> usize {
506 self.values.len()
507 }
508}
509
510#[derive(Debug, Clone)]
512pub struct CscMatrix {
513 values: Vec<f64>,
515 rowindices: Vec<usize>,
517 col_ptrs: Vec<usize>,
519 n_rows: usize,
521 n_cols: usize,
523}
524
525impl CscMatrix {
526 pub fn from_coo(coo: &CooMatrix) -> Self {
528 let mut indices: Vec<usize> = (0..coo.values.len()).collect();
530 indices.sort_by_key(|&i| (coo.colindices[i], coo.rowindices[i]));
531
532 let mut values = Vec::with_capacity(coo.values.len());
533 let mut rowindices = Vec::with_capacity(coo.values.len());
534 let mut col_ptrs = vec![0];
535
536 let mut current_col = 0;
537 for &idx in &indices {
538 let col = coo.colindices[idx];
539
540 while current_col < col {
542 col_ptrs.push(values.len());
543 current_col += 1;
544 }
545
546 values.push(coo.values[idx]);
547 rowindices.push(coo.rowindices[idx]);
548 }
549
550 while col_ptrs.len() <= coo.n_cols {
552 col_ptrs.push(values.len());
553 }
554
555 Self {
556 values,
557 rowindices,
558 col_ptrs,
559 n_rows: coo.n_rows,
560 n_cols: coo.n_cols,
561 }
562 }
563
564 pub fn get_col(&self, colidx: usize) -> Result<SparseVector> {
566 if colidx >= self.n_cols {
567 return Err(TextError::InvalidInput(format!(
568 "Column index {} out of bounds for matrix with {} columns",
569 colidx, self.n_cols
570 )));
571 }
572
573 let start = self.col_ptrs[colidx];
574 let end = self.col_ptrs[colidx + 1];
575
576 let indices: Vec<usize> = self.rowindices[start..end].to_vec();
577 let values: Vec<f64> = self.values[start..end].to_vec();
578
579 Ok(SparseVector::fromindices_values(
580 indices,
581 values,
582 self.n_rows,
583 ))
584 }
585
586 pub fn shape(&self) -> (usize, usize) {
588 (self.n_rows, self.n_cols)
589 }
590
591 pub fn nnz(&self) -> usize {
593 self.values.len()
594 }
595
596 pub fn memory_usage(&self) -> usize {
598 self.values.len() * std::mem::size_of::<f64>()
599 + self.rowindices.len() * std::mem::size_of::<usize>()
600 + self.col_ptrs.len() * std::mem::size_of::<usize>()
601 }
602
603 pub fn dot(&self, vector: &Array1<f64>) -> Result<Array1<f64>> {
605 if vector.len() != self.n_rows {
606 return Err(TextError::InvalidInput(format!(
607 "Vector dimension {} doesn't match matrix rows {}",
608 vector.len(),
609 self.n_rows
610 )));
611 }
612
613 let mut result = Array1::zeros(self.n_cols);
614
615 for colidx in 0..self.n_cols {
616 let start = self.col_ptrs[colidx];
617 let end = self.col_ptrs[colidx + 1];
618
619 let mut sum = 0.0;
620 for i in start..end {
621 let rowidx = self.rowindices[i];
622 sum += self.values[i] * vector[rowidx];
623 }
624 result[colidx] = sum;
625 }
626
627 Ok(result)
628 }
629}
630
631#[derive(Debug, Clone)]
633pub struct BlockSparseMatrix {
634 _blocksize: usize,
636 blocks: HashMap<(usize, usize), Array2<f64>>,
638 n_rows: usize,
640 n_cols: usize,
642}
643
644impl BlockSparseMatrix {
645 pub fn new(_n_rows: usize, n_cols: usize, _blocksize: usize) -> Self {
647 Self {
648 _blocksize,
649 blocks: HashMap::new(),
650 n_rows: _n_rows,
651 n_cols,
652 }
653 }
654
655 pub fn set(&mut self, row: usize, col: usize, value: f64) -> Result<()> {
657 if row >= self.n_rows || col >= self.n_cols {
658 return Err(TextError::InvalidInput(format!(
659 "Index ({row}, {col}) out of bounds"
660 )));
661 }
662
663 let block_row = row / self._blocksize;
664 let block_col = col / self._blocksize;
665 let local_row = row % self._blocksize;
666 let local_col = col % self._blocksize;
667
668 let block = self
669 .blocks
670 .entry((block_row, block_col))
671 .or_insert_with(|| Array2::zeros((self._blocksize, self._blocksize)));
672
673 block[[local_row, local_col]] = value;
674
675 Ok(())
676 }
677
678 pub fn get(&self, row: usize, col: usize) -> f64 {
680 if row >= self.n_rows || col >= self.n_cols {
681 return 0.0;
682 }
683
684 let block_row = row / self._blocksize;
685 let block_col = col / self._blocksize;
686 let local_row = row % self._blocksize;
687 let local_col = col % self._blocksize;
688
689 self.blocks
690 .get(&(block_row, block_col))
691 .map(|block| block[[local_row, local_col]])
692 .unwrap_or(0.0)
693 }
694
695 pub fn memory_usage(&self) -> usize {
697 self.blocks.len() * self._blocksize * self._blocksize * std::mem::size_of::<f64>()
698 }
699}
700
701#[derive(Debug, Clone)]
703pub struct HierarchicalSparseMatrix {
704 top_level_blocks: HashMap<(usize, usize), CompressedBlock>,
706 top_blocksize: usize,
708 #[allow(dead_code)]
710 sub_blocksize: usize,
711 n_rows: usize,
713 n_cols: usize,
714}
715
716#[derive(Debug, Clone)]
718pub struct CompressedBlock {
719 rle_data: Vec<(usize, f64)>, #[allow(dead_code)]
723 block_rows: usize,
724 #[allow(dead_code)]
725 block_cols: usize,
726}
727
728impl HierarchicalSparseMatrix {
729 pub fn new(_n_rows: usize, n_cols: usize, top_blocksize: usize, sub_blocksize: usize) -> Self {
731 Self {
732 top_level_blocks: HashMap::new(),
733 top_blocksize,
734 sub_blocksize,
735 n_rows: _n_rows,
736 n_cols,
737 }
738 }
739
740 pub fn set(&mut self, row: usize, col: usize, value: f64) -> Result<()> {
742 if row >= self.n_rows || col >= self.n_cols {
743 return Err(TextError::InvalidInput(format!(
744 "Index ({row}, {col}) out of bounds"
745 )));
746 }
747
748 let top_row = row / self.top_blocksize;
749 let top_col = col / self.top_blocksize;
750
751 let block = self
754 .top_level_blocks
755 .entry((top_row, top_col))
756 .or_insert_with(|| CompressedBlock::new(self.top_blocksize, self.top_blocksize));
757
758 let local_row = row % self.top_blocksize;
760 let local_col = col % self.top_blocksize;
761 let linear_pos = local_row * self.top_blocksize + local_col;
762
763 block.set_value(linear_pos, value);
764 Ok(())
765 }
766
767 pub fn get(&self, row: usize, col: usize) -> f64 {
769 if row >= self.n_rows || col >= self.n_cols {
770 return 0.0;
771 }
772
773 let top_row = row / self.top_blocksize;
774 let top_col = col / self.top_blocksize;
775
776 if let Some(block) = self.top_level_blocks.get(&(top_row, top_col)) {
777 let local_row = row % self.top_blocksize;
778 let local_col = col % self.top_blocksize;
779 let linear_pos = local_row * self.top_blocksize + local_col;
780 block.get_value(linear_pos)
781 } else {
782 0.0
783 }
784 }
785
786 pub fn memory_usage(&self) -> usize {
788 self.top_level_blocks
789 .values()
790 .map(|block| block.memory_usage())
791 .sum::<usize>()
792 + std::mem::size_of::<HashMap<(usize, usize), CompressedBlock>>()
793 }
794}
795
796impl CompressedBlock {
797 fn new(rows: usize, cols: usize) -> Self {
798 Self {
799 rle_data: Vec::new(),
800 block_rows: rows,
801 block_cols: cols,
802 }
803 }
804
805 fn set_value(&mut self, position: usize, value: f64) {
806 if value == 0.0 {
807 self.rle_data.retain(|(pos_, _)| *pos_ != position);
809 return;
810 }
811
812 if let Some(entry) = self.rle_data.iter_mut().find(|(pos_, _)| *pos_ == position) {
814 entry.1 = value;
815 } else {
816 let insert_pos = self
818 .rle_data
819 .binary_search_by_key(&position, |&(pos_, _)| pos_)
820 .unwrap_or_else(|pos| pos);
821 self.rle_data.insert(insert_pos, (position, value));
822 }
823 }
824
825 fn get_value(&self, position: usize) -> f64 {
826 self.rle_data
827 .iter()
828 .find(|(pos_, _)| *pos_ == position)
829 .map(|(_, value)| *value)
830 .unwrap_or(0.0)
831 }
832
833 fn memory_usage(&self) -> usize {
834 self.rle_data.len() * std::mem::size_of::<(usize, f64)>()
835 }
836}
837
838#[derive(Debug, Clone)]
840pub struct BitPackedSparseVector {
841 bit_data: Vec<u64>,
843 size: usize,
845}
846
847impl BitPackedSparseVector {
848 pub fn from_boolindices(indices: &[usize], size: usize) -> Self {
850 let num_words = size.div_ceil(64); let mut bit_data = vec![0u64; num_words];
852
853 for &idx in indices {
854 if idx < size {
855 let word_idx = idx / 64;
856 let bit_idx = idx % 64;
857 bit_data[word_idx] |= 1u64 << bit_idx;
858 }
859 }
860
861 Self { bit_data, size }
862 }
863
864 pub fn getindices(&self) -> Vec<usize> {
866 let mut indices = Vec::new();
867
868 for (word_idx, &word) in self.bit_data.iter().enumerate() {
869 if word != 0 {
870 for bit_idx in 0..64 {
871 if word & (1u64 << bit_idx) != 0 {
872 let global_idx = word_idx * 64 + bit_idx;
873 if global_idx < self.size {
874 indices.push(global_idx);
875 }
876 }
877 }
878 }
879 }
880
881 indices
882 }
883
884 pub fn is_set(&self, idx: usize) -> bool {
886 if idx >= self.size {
887 return false;
888 }
889
890 let word_idx = idx / 64;
891 let bit_idx = idx % 64;
892
893 if word_idx < self.bit_data.len() {
894 self.bit_data[word_idx] & (1u64 << bit_idx) != 0
895 } else {
896 false
897 }
898 }
899
900 pub fn count_ones(&self) -> usize {
902 self.bit_data
903 .iter()
904 .map(|&word| word.count_ones() as usize)
905 .sum()
906 }
907
908 pub fn memory_usage(&self) -> usize {
910 self.bit_data.len() * std::mem::size_of::<u64>() + std::mem::size_of::<usize>()
911 }
912
913 pub fn intersection(&self, other: &BitPackedSparseVector) -> Result<BitPackedSparseVector> {
915 if self.size != other.size {
916 return Err(TextError::InvalidInput(format!(
917 "Vector sizes don't match for intersection: {} vs {}",
918 self.size, other.size
919 )));
920 }
921
922 let mut result_data = Vec::with_capacity(self.bit_data.len());
923 for (&a, &b) in self.bit_data.iter().zip(other.bit_data.iter()) {
924 result_data.push(a & b);
925 }
926
927 Ok(BitPackedSparseVector {
928 bit_data: result_data,
929 size: self.size,
930 })
931 }
932
933 pub fn union(&self, other: &BitPackedSparseVector) -> Result<BitPackedSparseVector> {
935 if self.size != other.size {
936 return Err(TextError::InvalidInput(format!(
937 "Vector sizes don't match for union: {} vs {}",
938 self.size, other.size
939 )));
940 }
941
942 let mut result_data = Vec::with_capacity(self.bit_data.len());
943 for (&a, &b) in self.bit_data.iter().zip(other.bit_data.iter()) {
944 result_data.push(a | b);
945 }
946
947 Ok(BitPackedSparseVector {
948 bit_data: result_data,
949 size: self.size,
950 })
951 }
952}
953
954pub struct SparseMemoryPool {
956 value_pool: Vec<Vec<f64>>,
958 index_pool: Vec<Vec<usize>>,
960 _blocksize: usize,
962}
963
964impl SparseMemoryPool {
965 pub fn new(__blocksize: usize) -> Self {
967 Self {
968 value_pool: Vec::new(),
969 index_pool: Vec::new(),
970 _blocksize: __blocksize,
971 }
972 }
973
974 pub fn allocate_values(&mut self, size: usize) -> Vec<f64> {
976 if let Some(mut vec) = self.value_pool.pop() {
977 vec.clear();
978 vec.reserve(size);
979 vec
980 } else {
981 Vec::with_capacity(size.max(self._blocksize))
982 }
983 }
984
985 pub fn allocateindices(&mut self, size: usize) -> Vec<usize> {
987 if let Some(mut vec) = self.index_pool.pop() {
988 vec.clear();
989 vec.reserve(size);
990 vec
991 } else {
992 Vec::with_capacity(size.max(self._blocksize))
993 }
994 }
995
996 pub fn deallocate_values(&mut self, mut vec: Vec<f64>) {
998 if vec.capacity() >= self._blocksize {
999 vec.clear();
1000 self.value_pool.push(vec);
1001 }
1002 }
1003
1004 pub fn deallocateindices(&mut self, mut vec: Vec<usize>) {
1006 if vec.capacity() >= self._blocksize {
1007 vec.clear();
1008 self.index_pool.push(vec);
1009 }
1010 }
1011}
1012
1013#[derive(Debug)]
1015pub enum AdaptiveSparseMatrix {
1016 Csr(CsrMatrix),
1018 Csc(CscMatrix),
1020 Coo(CooMatrix),
1022 Block(BlockSparseMatrix),
1024 Hierarchical(HierarchicalSparseMatrix),
1026}
1027
1028impl AdaptiveSparseMatrix {
1029 pub fn from_coo_adaptive(coo: CooMatrix) -> Self {
1031 let nnz = coo.nnz();
1032 let (n_rows, n_cols) = (coo.n_rows, coo.n_cols);
1033 let total_elements = n_rows * n_cols;
1034 let sparsity = nnz as f64 / total_elements as f64;
1035
1036 if sparsity < 0.001 && total_elements > 10000 {
1038 let hierarchical = HierarchicalSparseMatrix::new(n_rows, n_cols, 64, 8);
1040 Self::Hierarchical(hierarchical)
1042 } else if sparsity < 0.01 && nnz > 1000 {
1043 let _blocksize = ((nnz as f64).sqrt() as usize).clamp(8, 64);
1045 Self::Block(BlockSparseMatrix::new(n_rows, n_cols, _blocksize))
1046 } else if n_rows > n_cols * 2 {
1047 Self::Csc(CscMatrix::from_coo(&coo))
1049 } else {
1050 Self::Csr(coo.to_csr())
1052 }
1053 }
1054
1055 pub fn memory_usage(&self) -> usize {
1057 match self {
1058 Self::Csr(m) => m.memory_usage(),
1059 Self::Csc(m) => m.memory_usage(),
1060 Self::Coo(m) => {
1061 m.nnz() * (2 * std::mem::size_of::<usize>() + std::mem::size_of::<f64>())
1062 }
1063 Self::Block(m) => m.memory_usage(),
1064 Self::Hierarchical(m) => m.memory_usage(),
1065 }
1066 }
1067}
1068
1069pub struct StreamingSparseProcessor {
1071 chunk_size: usize,
1072 #[allow(dead_code)]
1073 memory_limit: usize,
1074}
1075
1076impl StreamingSparseProcessor {
1077 pub fn new(_chunk_size: usize, memorylimit: usize) -> Self {
1079 Self {
1080 chunk_size: _chunk_size,
1081 memory_limit: memorylimit,
1082 }
1083 }
1084
1085 pub fn process_chunks<F, R>(&self, matrix: &CsrMatrix, processor: F) -> Result<Vec<R>>
1087 where
1088 F: Fn(&CsrMatrix) -> Result<R>,
1089 {
1090 let mut results = Vec::new();
1091 let (n_rows, n_cols) = matrix.shape();
1092 let rows_per_chunk = (self.chunk_size).min(n_rows);
1093
1094 for chunk_start in (0..n_rows).step_by(rows_per_chunk) {
1095 let chunk_end = (chunk_start + rows_per_chunk).min(n_rows);
1096
1097 let chunk = self.extract_row_slice(matrix, chunk_start, chunk_end)?;
1099 let result = processor(&chunk)?;
1100 results.push(result);
1101 }
1102
1103 Ok(results)
1104 }
1105
1106 fn extract_row_slice(
1108 &self,
1109 matrix: &CsrMatrix,
1110 start_row: usize,
1111 end_row: usize,
1112 ) -> Result<CsrMatrix> {
1113 let mut values = Vec::new();
1115 let mut colindices = Vec::new();
1116 let mut row_ptrs = vec![0];
1117
1118 for _row in start_row..end_row {
1119 if let Ok(sparse_row) = matrix.get_row(_row) {
1120 values.extend(sparse_row.values().iter());
1121 colindices.extend(sparse_row.indices().iter());
1122 row_ptrs.push(values.len());
1123 }
1124 }
1125
1126 Ok(CsrMatrix {
1127 values,
1128 colindices,
1129 row_ptrs,
1130 n_rows: end_row - start_row,
1131 n_cols: matrix.shape().1,
1132 })
1133 }
1134}
1135
1136#[derive(Debug, Clone)]
1138pub struct ApproximateSparseMatrix {
1139 row_sketches: Vec<Vec<(u32, f32)>>, #[allow(dead_code)]
1143 sketchsize: usize,
1144 n_rows: usize,
1146 #[allow(dead_code)]
1147 n_cols: usize,
1148}
1149
1150impl ApproximateSparseMatrix {
1151 pub fn from_csr(matrix: &CsrMatrix, sketchsize: usize) -> Self {
1153 let (n_rows, n_cols) = matrix.shape();
1154 let mut row_sketches = Vec::with_capacity(n_rows);
1155
1156 for rowidx in 0..n_rows {
1157 if let Ok(row) = matrix.get_row(rowidx) {
1158 let sketch = Self::create_row_sketch(&row, sketchsize);
1159 row_sketches.push(sketch);
1160 } else {
1161 row_sketches.push(Vec::new());
1162 }
1163 }
1164
1165 Self {
1166 row_sketches,
1167 sketchsize,
1168 n_rows,
1169 n_cols,
1170 }
1171 }
1172
1173 fn create_row_sketch(row: &SparseVector, sketchsize: usize) -> Vec<(u32, f32)> {
1175 let mut sketch = vec![(0u32, 0.0f32); sketchsize];
1176
1177 for (&idx, &val) in row.indices().iter().zip(row.values().iter()) {
1178 let hash = ((idx as u64 * 2654435761u64) % (sketchsize as u64)) as usize;
1180 sketch[hash].0 = idx as u32;
1181 sketch[hash].1 += val as f32;
1182 }
1183
1184 sketch.into_iter().filter(|(_, val)| *val != 0.0).collect()
1186 }
1187
1188 pub fn approximate_row_similarity(&self, row1: usize, row2: usize) -> f32 {
1190 if row1 >= self.n_rows || row2 >= self.n_rows {
1191 return 0.0;
1192 }
1193
1194 let sketch1 = &self.row_sketches[row1];
1195 let sketch2 = &self.row_sketches[row2];
1196
1197 let mut similarity = 0.0f32;
1198 let mut i = 0;
1199 let mut j = 0;
1200
1201 while i < sketch1.len() && j < sketch2.len() {
1203 match sketch1[i].0.cmp(&sketch2[j].0) {
1204 std::cmp::Ordering::Equal => {
1205 similarity += sketch1[i].1 * sketch2[j].1;
1206 i += 1;
1207 j += 1;
1208 }
1209 std::cmp::Ordering::Less => i += 1,
1210 std::cmp::Ordering::Greater => j += 1,
1211 }
1212 }
1213
1214 similarity
1215 }
1216
1217 pub fn memory_usage(&self) -> usize {
1219 self.row_sketches
1220 .iter()
1221 .map(|sketch| sketch.len() * std::mem::size_of::<(u32, f32)>())
1222 .sum::<usize>()
1223 }
1224}
1225
1226#[derive(Debug, Clone)]
1228pub struct QuantizedSparseVector {
1229 indices: Vec<u32>,
1231 values: Vec<i8>,
1233 scale: f32,
1235 zero_point: i8,
1237 size: usize,
1239}
1240
1241impl QuantizedSparseVector {
1242 pub fn fromsparse(sparse: &SparseVector) -> Self {
1244 if sparse.values.is_empty() {
1245 return Self {
1246 indices: Vec::new(),
1247 values: Vec::new(),
1248 scale: 1.0,
1249 zero_point: 0,
1250 size: sparse.size,
1251 };
1252 }
1253
1254 let min_val = sparse.values.iter().fold(f64::INFINITY, |a, &b| a.min(b));
1256 let max_val = sparse
1257 .values
1258 .iter()
1259 .fold(f64::NEG_INFINITY, |a, &b| a.max(b));
1260
1261 let range = max_val - min_val;
1263 let scale = (range / 255.0) as f32;
1264 let zero_point = (-min_val / range * 255.0).round() as i8;
1265
1266 let indices: Vec<u32> = sparse.indices.iter().map(|&i| i as u32).collect();
1268 let values: Vec<i8> = sparse
1269 .values
1270 .iter()
1271 .map(|&v| {
1272 let scaled = ((v - min_val) / range * 255.0).round();
1273 (scaled.clamp(0.0, 255.0) as i16 - 128) as i8 })
1275 .collect();
1276
1277 Self {
1278 indices,
1279 values,
1280 scale,
1281 zero_point,
1282 size: sparse.size,
1283 }
1284 }
1285
1286 pub fn tosparse(&self) -> SparseVector {
1288 let indices: Vec<usize> = self.indices.iter().map(|&i| i as usize).collect();
1289 let values: Vec<f64> = self
1290 .values
1291 .iter()
1292 .map(|&v| {
1293 let dequantized = (v as f32 + 128.0 - self.zero_point as f32) * self.scale;
1294 dequantized as f64
1295 })
1296 .collect();
1297
1298 SparseVector::fromindices_values(indices, values, self.size)
1299 }
1300
1301 pub fn memory_usage(&self) -> usize {
1303 self.indices.len() * std::mem::size_of::<u32>()
1304 + self.values.len() * std::mem::size_of::<i8>()
1305 + std::mem::size_of::<f32>() * 2
1306 + std::mem::size_of::<usize>()
1307 }
1308}
1309
1310#[cfg(test)]
1311mod tests {
1312 use super::*;
1313 use scirs2_core::ndarray::{arr1, arr2};
1314
1315 #[test]
1316 fn test_csr_from_dense() {
1317 let dense = arr2(&[[1.0, 0.0, 2.0], [0.0, 3.0, 0.0], [4.0, 0.0, 5.0]]);
1318
1319 let csr = CsrMatrix::from_dense(&dense);
1320
1321 assert_eq!(csr.shape(), (3, 3));
1322 assert_eq!(csr.nnz(), 5);
1323
1324 let reconstructed = csr.to_dense();
1325 assert_eq!(reconstructed, dense);
1326 }
1327
1328 #[test]
1329 fn testsparse_vector() {
1330 let dense = arr1(&[0.0, 1.0, 0.0, 2.0, 0.0]);
1331 let sparse = SparseVector::from_dense(&dense);
1332
1333 assert_eq!(sparse.nnz(), 2);
1334 assert_eq!(sparse.size(), 5);
1335
1336 let reconstructed = sparse.to_dense();
1337 assert_eq!(reconstructed, dense);
1338 }
1339
1340 #[test]
1341 fn testsparse_dot_product() {
1342 let v1 = SparseVector::from_dense(&arr1(&[1.0, 0.0, 2.0, 0.0]));
1343 let v2 = SparseVector::from_dense(&arr1(&[0.0, 3.0, 2.0, 0.0]));
1344
1345 let dot = v1.dotsparse(&v2).unwrap();
1346 assert_eq!(dot, 4.0); }
1348
1349 #[test]
1350 fn test_dok_matrix() {
1351 let mut dok = DokMatrix::new(3, 3);
1352
1353 dok.set(0, 0, 1.0).unwrap();
1354 dok.set(1, 1, 2.0).unwrap();
1355 dok.set(2, 0, 3.0).unwrap();
1356
1357 assert_eq!(dok.get(0, 0), 1.0);
1358 assert_eq!(dok.get(0, 1), 0.0);
1359 assert_eq!(dok.nnz(), 3);
1360
1361 let csr = dok.to_csr();
1362 assert_eq!(csr.nnz(), 3);
1363 }
1364
1365 #[test]
1366 fn test_matrix_builder() {
1367 let mut builder = SparseMatrixBuilder::new(4);
1368
1369 builder
1370 .add_row(SparseVector::from_dense(&arr1(&[1.0, 0.0, 2.0, 0.0])))
1371 .unwrap();
1372 builder
1373 .add_row(SparseVector::from_dense(&arr1(&[0.0, 3.0, 0.0, 4.0])))
1374 .unwrap();
1375
1376 let matrix = builder.build();
1377 assert_eq!(matrix.shape(), (2, 4));
1378 assert_eq!(matrix.nnz(), 4);
1379 }
1380
1381 #[test]
1382 fn test_memory_efficiency() {
1383 let n = 1000;
1385 let mut dense = Array2::zeros((n, n));
1386
1387 for i in 0..n / 10 {
1389 for j in 0..n / 10 {
1390 dense[[i, j]] = i as f64 + j as f64;
1391 }
1392 }
1393
1394 let sparse = CsrMatrix::from_dense(&dense);
1395
1396 let dense_memory = n * n * std::mem::size_of::<f64>();
1398 let sparse_memory = sparse.memory_usage();
1399
1400 println!("Dense memory: {dense_memory} bytes");
1401 println!("Sparse memory: {sparse_memory} bytes");
1402 println!(
1403 "Memory savings: {:.1}%",
1404 (1.0 - sparse_memory as f64 / dense_memory as f64) * 100.0
1405 );
1406
1407 assert!(sparse_memory < dense_memory / 10); }
1409
1410 #[test]
1411 fn test_coo_matrix() {
1412 let mut coo = CooMatrix::new(3, 3);
1413
1414 coo.push(0, 0, 1.0).unwrap();
1415 coo.push(1, 1, 2.0).unwrap();
1416 coo.push(2, 0, 3.0).unwrap();
1417 coo.push(1, 2, 4.0).unwrap();
1418
1419 assert_eq!(coo.nnz(), 4);
1420
1421 let csr = coo.to_csr();
1423 assert_eq!(csr.nnz(), 4);
1424 assert_eq!(csr.shape(), (3, 3));
1425
1426 let dense = csr.to_dense();
1428 assert_eq!(dense[[0, 0]], 1.0);
1429 assert_eq!(dense[[1, 1]], 2.0);
1430 assert_eq!(dense[[2, 0]], 3.0);
1431 assert_eq!(dense[[1, 2]], 4.0);
1432 }
1433
1434 #[test]
1435 fn test_csc_matrix() {
1436 let mut coo = CooMatrix::new(3, 3);
1437 coo.push(0, 0, 1.0).unwrap();
1438 coo.push(1, 1, 2.0).unwrap();
1439 coo.push(2, 0, 3.0).unwrap();
1440
1441 let csc = CscMatrix::from_coo(&coo);
1442
1443 let col0 = csc.get_col(0).unwrap();
1445 assert_eq!(col0.nnz(), 2);
1446 let dense_col0 = col0.to_dense();
1447 assert_eq!(dense_col0[0], 1.0);
1448 assert_eq!(dense_col0[2], 3.0);
1449
1450 let col1 = csc.get_col(1).unwrap();
1452 assert_eq!(col1.nnz(), 1);
1453 let dense_col1 = col1.to_dense();
1454 assert_eq!(dense_col1[1], 2.0);
1455 }
1456
1457 #[test]
1458 fn test_blocksparse_matrix() {
1459 let mut blocksparse = BlockSparseMatrix::new(10, 10, 3);
1460
1461 blocksparse.set(0, 0, 1.0).unwrap();
1463 blocksparse.set(5, 5, 2.0).unwrap();
1464 blocksparse.set(9, 9, 3.0).unwrap();
1465
1466 assert_eq!(blocksparse.get(0, 0), 1.0);
1468 assert_eq!(blocksparse.get(5, 5), 2.0);
1469 assert_eq!(blocksparse.get(9, 9), 3.0);
1470 assert_eq!(blocksparse.get(3, 3), 0.0);
1471
1472 let memory = blocksparse.memory_usage();
1474 assert!(memory > 0);
1475 }
1476
1477 #[test]
1478 fn test_quantizedsparse_vector() {
1479 let mut sparse = SparseVector::new(10);
1480 sparse.indices = vec![1, 3, 7];
1481 sparse.values = vec![100.0, 50.0, 200.0];
1483
1484 let quantized = QuantizedSparseVector::fromsparse(&sparse);
1486 assert_eq!(quantized.indices.len(), 3);
1487 assert_eq!(quantized.values.len(), 3);
1488
1489 let dequantized = quantized.tosparse();
1491 assert_eq!(dequantized.nnz(), 3);
1492
1493 let orig_values = sparse.values();
1495 let deq_values = dequantized.values();
1496 for i in 0..3 {
1497 let error = (orig_values[i] - deq_values[i]).abs();
1498 let relative_error = error / orig_values[i].abs();
1499 assert!(relative_error < 0.05); }
1502
1503 let orig_memory = sparse.indices.len() * std::mem::size_of::<usize>()
1505 + sparse.values.len() * std::mem::size_of::<f64>();
1506 let quantized_memory = quantized.memory_usage();
1507 assert!(quantized_memory < orig_memory);
1508 }
1509}