1use crate::error::GraphError;
7use scirs2_core::ndarray::Array2;
8use std::fs::File;
9use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
10use std::mem;
11use std::path::Path;
12
13#[derive(Debug, Clone)]
18pub struct CSRGraph {
19 n_nodes: usize,
21 n_edges: usize,
23 row_ptr: Vec<usize>,
25 col_idx: Vec<usize>,
27 weights: Vec<f64>,
29}
30
31impl CSRGraph {
32 pub fn from_edges(n_nodes: usize, edges: Vec<(usize, usize, f64)>) -> Result<Self, GraphError> {
34 let n_edges = edges.len();
35
36 let mut col_idx = Vec::with_capacity(n_edges);
38 let mut weights = Vec::with_capacity(n_edges);
39
40 let mut degree = vec![0; n_nodes];
42
43 for &(src, dst_, _) in &edges {
45 if src >= n_nodes {
46 return Err(GraphError::node_not_found_with_context(
47 src,
48 n_nodes,
49 "CSR graph construction",
50 ));
51 }
52 if dst_ >= n_nodes {
53 return Err(GraphError::node_not_found_with_context(
54 dst_,
55 n_nodes,
56 "CSR graph construction",
57 ));
58 }
59 degree[src] += 1;
60 }
61
62 let mut row_ptr = Vec::with_capacity(n_nodes + 1);
64 row_ptr.push(0);
65 for ° in °ree {
66 row_ptr.push(row_ptr.last().expect("Operation failed") + deg);
67 }
68
69 col_idx.resize(n_edges, 0);
71 weights.resize(n_edges, 0.0);
72 let mut current_pos = row_ptr.clone();
73 current_pos.pop(); for (src, dst, weight) in edges {
77 let pos = current_pos[src];
78 col_idx[pos] = dst;
79 weights[pos] = weight;
80 current_pos[src] += 1;
81 }
82
83 for node in 0..n_nodes {
85 let start = row_ptr[node];
86 let end = row_ptr[node + 1];
87
88 if end > start {
89 let mut pairs: Vec<(usize, f64)> = col_idx[start..end]
91 .iter()
92 .zip(&weights[start..end])
93 .map(|(&c, &w)| (c, w))
94 .collect();
95
96 pairs.sort_unstable_by_key(|&(col_, _)| col_);
98
99 for (i, (col, weight)) in pairs.into_iter().enumerate() {
101 col_idx[start + i] = col;
102 weights[start + i] = weight;
103 }
104 }
105 }
106
107 Ok(CSRGraph {
108 n_nodes,
109 n_edges,
110 row_ptr,
111 col_idx,
112 weights,
113 })
114 }
115
116 pub fn with_capacity(n_nodes: usize, estimated_edges: usize) -> Self {
118 CSRGraph {
119 n_nodes,
120 n_edges: 0,
121 row_ptr: vec![0; n_nodes + 1],
122 col_idx: Vec::with_capacity(estimated_edges),
123 weights: Vec::with_capacity(estimated_edges),
124 }
125 }
126
127 pub fn neighbors(&self, node: usize) -> impl Iterator<Item = (usize, f64)> + '_ {
129 let start = self.row_ptr[node];
130 let end = self.row_ptr[node + 1];
131
132 self.col_idx[start..end]
133 .iter()
134 .zip(&self.weights[start..end])
135 .map(|(&idx, &weight)| (idx, weight))
136 }
137
138 pub fn degree(&self, node: usize) -> usize {
140 self.row_ptr[node + 1] - self.row_ptr[node]
141 }
142
143 pub fn node_count(&self) -> usize {
145 self.n_nodes
146 }
147
148 pub fn memory_usage(&self) -> usize {
150 mem::size_of_val(&self.n_nodes)
151 + mem::size_of_val(&self.n_edges)
152 + mem::size_of_val(&self.row_ptr[..])
153 + mem::size_of_val(&self.col_idx[..])
154 + mem::size_of_val(&self.weights[..])
155 }
156
157 pub fn to_adjacency_matrix(&self) -> Array2<f64> {
159 let mut matrix = Array2::zeros((self.n_nodes, self.n_nodes));
160
161 for src in 0..self.n_nodes {
162 for (dst, weight) in self.neighbors(src) {
163 matrix[[src, dst]] = weight;
164 }
165 }
166
167 matrix
168 }
169}
170
171#[derive(Debug, Clone)]
175pub struct BitPackedGraph {
176 n_nodes: usize,
178 bits: Vec<u64>,
181 directed: bool,
183}
184
185impl BitPackedGraph {
186 pub fn new(n_nodes: usize, directed: bool) -> Self {
188 let bits_needed = if directed {
189 n_nodes * n_nodes
190 } else {
191 n_nodes * (n_nodes + 1) / 2 };
193
194 let words_needed = bits_needed.div_ceil(64);
195
196 BitPackedGraph {
197 n_nodes,
198 bits: vec![0; words_needed],
199 directed,
200 }
201 }
202
203 fn bit_position(&self, from: usize, to: usize) -> Option<usize> {
205 if from >= self.n_nodes || to >= self.n_nodes {
206 return None;
207 }
208
209 if self.directed {
210 Some(from * self.n_nodes + to)
211 } else {
212 let (u, v) = if from <= to { (from, to) } else { (to, from) };
214 if u == 0 {
216 Some(v)
217 } else {
218 Some(u * (2 * self.n_nodes - u - 1) / 2 + v)
219 }
220 }
221 }
222
223 pub fn add_edge(&mut self, from: usize, to: usize) -> Result<(), GraphError> {
225 let bit_pos = self.bit_position(from, to).ok_or_else(|| {
226 GraphError::node_not_found_with_context(from, self.n_nodes, "add_edge operation")
227 })?;
228
229 let word_idx = bit_pos / 64;
230 let bit_idx = bit_pos % 64;
231
232 self.bits[word_idx] |= 1u64 << bit_idx;
233
234 Ok(())
235 }
236
237 pub fn has_edge(&self, from: usize, to: usize) -> bool {
239 if let Some(bit_pos) = self.bit_position(from, to) {
240 let word_idx = bit_pos / 64;
241 let bit_idx = bit_pos % 64;
242
243 (self.bits[word_idx] & (1u64 << bit_idx)) != 0
244 } else {
245 false
246 }
247 }
248
249 pub fn neighbors(&self, node: usize) -> Vec<usize> {
251 let mut neighbors = Vec::new();
252
253 if self.directed {
254 let start_bit = node * self.n_nodes;
256 let end_bit = start_bit + self.n_nodes;
257
258 let start_word = start_bit / 64;
259 let end_word = end_bit.div_ceil(64);
260
261 for word_idx in start_word..end_word {
262 if word_idx >= self.bits.len() {
263 break;
264 }
265
266 let mut word = self.bits[word_idx];
267 let word_start_bit = word_idx * 64;
268
269 if word_start_bit < start_bit {
271 let skip_bits = start_bit - word_start_bit;
272 word &= !((1u64 << skip_bits) - 1);
273 }
274 if word_start_bit + 64 > end_bit {
275 let keep_bits = end_bit - word_start_bit;
276 word &= (1u64 << keep_bits) - 1;
277 }
278
279 while word != 0 {
281 let bit_pos = word.trailing_zeros() as usize;
282 let global_bit = word_start_bit + bit_pos;
283 if global_bit >= start_bit && global_bit < end_bit {
284 let neighbor = global_bit - start_bit;
285 neighbors.push(neighbor);
286 }
287 word &= word - 1; }
289 }
290 } else {
291 for other in 0..self.n_nodes {
293 if self.has_edge(node, other) {
294 neighbors.push(other);
295 }
296 }
297 }
298
299 neighbors
300 }
301
302 pub fn degree(&self, node: usize) -> usize {
304 if node >= self.n_nodes {
305 return 0;
306 }
307
308 if self.directed {
309 let start_bit = node * self.n_nodes;
310 let end_bit = start_bit + self.n_nodes;
311
312 let start_word = start_bit / 64;
313 let end_word = end_bit.div_ceil(64);
314 let mut count = 0;
315
316 for word_idx in start_word..end_word {
317 if word_idx >= self.bits.len() {
318 break;
319 }
320
321 let mut word = self.bits[word_idx];
322 let word_start_bit = word_idx * 64;
323
324 if word_start_bit < start_bit {
326 let skip_bits = start_bit - word_start_bit;
327 word &= !((1u64 << skip_bits) - 1);
328 }
329 if word_start_bit + 64 > end_bit {
330 let keep_bits = end_bit - word_start_bit;
331 word &= (1u64 << keep_bits) - 1;
332 }
333
334 count += word.count_ones() as usize;
335 }
336
337 count
338 } else {
339 self.neighbors(node).len()
341 }
342 }
343
344 pub fn memory_usage(&self) -> usize {
346 mem::size_of_val(&self.n_nodes)
347 + mem::size_of_val(&self.bits[..])
348 + mem::size_of_val(&self.directed)
349 }
350}
351
352#[derive(Debug, Clone)]
356pub struct CompressedAdjacencyList {
357 n_nodes: usize,
359 data: Vec<u8>,
361 offsets: Vec<usize>,
363}
364
365impl CompressedAdjacencyList {
366 pub fn from_adjacency(_adjlists: Vec<Vec<usize>>) -> Self {
368 let n_nodes = _adjlists.len();
369 let mut data = Vec::new();
370 let mut offsets = Vec::with_capacity(n_nodes + 1);
371
372 offsets.push(0);
373
374 for neighbors in _adjlists {
375 let _start_pos = data.len();
376
377 let mut sorted_neighbors = neighbors;
379 sorted_neighbors.sort_unstable();
380
381 Self::encode_varint(sorted_neighbors.len(), &mut data);
383
384 let mut prev = 0;
386 for &neighbor in &sorted_neighbors {
387 let delta = neighbor - prev;
388 Self::encode_varint(delta, &mut data);
389 prev = neighbor;
390 }
391
392 offsets.push(data.len());
393 }
394
395 CompressedAdjacencyList {
396 n_nodes,
397 data,
398 offsets,
399 }
400 }
401
402 fn encode_varint(mut value: usize, output: &mut Vec<u8>) {
404 while value >= 0x80 {
405 output.push((value & 0x7F) as u8 | 0x80);
406 value >>= 7;
407 }
408 output.push(value as u8);
409 }
410
411 fn decode_varint(data: &[u8], pos: &mut usize) -> usize {
413 let mut value = 0;
414 let mut shift = 0;
415
416 loop {
417 let byte = data[*pos];
418 *pos += 1;
419
420 value |= ((byte & 0x7F) as usize) << shift;
421
422 if byte & 0x80 == 0 {
423 break;
424 }
425
426 shift += 7;
427 }
428
429 value
430 }
431
432 pub fn neighbors(&self, node: usize) -> Vec<usize> {
434 if node >= self.n_nodes {
435 return Vec::new();
436 }
437
438 let start = self.offsets[node];
439 let end = self.offsets[node + 1];
440 let data_slice = &self.data[start..end];
441
442 let mut pos = 0;
443 let count = Self::decode_varint(data_slice, &mut pos);
444
445 let mut neighbors = Vec::with_capacity(count);
446 let mut current = 0;
447
448 for _ in 0..count {
449 let delta = Self::decode_varint(data_slice, &mut pos);
450 current += delta;
451 neighbors.push(current);
452 }
453
454 neighbors
455 }
456
457 pub fn memory_usage(&self) -> usize {
459 mem::size_of_val(&self.n_nodes)
460 + mem::size_of_val(&self.data[..])
461 + mem::size_of_val(&self.offsets[..])
462 }
463}
464
465pub enum HybridGraph {
467 CSR(CSRGraph),
469 BitPacked(BitPackedGraph),
471 Compressed(CompressedAdjacencyList),
473}
474
475impl HybridGraph {
476 pub fn auto_select(
478 n_nodes: usize,
479 edges: Vec<(usize, usize, Option<f64>)>,
480 directed: bool,
481 ) -> Result<Self, GraphError> {
482 let n_edges = edges.len();
483 let density = n_edges as f64 / (n_nodes * n_nodes) as f64;
484 let all_unweighted = edges.iter().all(|(_, _, w)| w.is_none());
485
486 if all_unweighted && density > 0.1 {
487 let mut graph = BitPackedGraph::new(n_nodes, directed);
489 for (src, dst_, _) in edges {
490 graph.add_edge(src, dst_)?;
491 }
492 Ok(HybridGraph::BitPacked(graph))
493 } else if density < 0.01 {
494 let weighted_edges: Vec<(usize, usize, f64)> = edges
496 .into_iter()
497 .map(|(s, d, w)| (s, d, w.unwrap_or(1.0)))
498 .collect();
499 let graph = CSRGraph::from_edges(n_nodes, weighted_edges)?;
500 Ok(HybridGraph::CSR(graph))
501 } else {
502 let mut adj_lists = vec![Vec::new(); n_nodes];
504 for (src, dst_, _) in edges {
505 adj_lists[src].push(dst_);
506 if !directed {
507 adj_lists[dst_].push(src);
508 }
509 }
510 let graph = CompressedAdjacencyList::from_adjacency(adj_lists);
511 Ok(HybridGraph::Compressed(graph))
512 }
513 }
514
515 pub fn memory_usage(&self) -> usize {
517 match self {
518 HybridGraph::CSR(g) => g.memory_usage(),
519 HybridGraph::BitPacked(g) => g.memory_usage(),
520 HybridGraph::Compressed(g) => g.memory_usage(),
521 }
522 }
523}
524
525#[derive(Debug)]
536pub struct MemmapGraph {
537 n_nodes: usize,
539 n_edges: usize,
541 file: File,
543 #[allow(dead_code)]
546 header_size: usize,
547 row_ptr_offset: usize,
548 col_idx_offset: usize,
549 weights_offset: usize,
550}
551
552impl MemmapGraph {
553 const FIELD_BYTES: usize = 8;
557 const HEADER_BYTES: usize = 2 * Self::FIELD_BYTES;
559
560 fn read_usize_le(bytes: &[u8]) -> io::Result<usize> {
564 let mut buf = [0u8; 8];
565 buf.copy_from_slice(&bytes[..8]);
566 let value = u64::from_le_bytes(buf);
567 usize::try_from(value).map_err(|_| {
568 io::Error::new(
569 io::ErrorKind::InvalidData,
570 "MemmapGraph: value exceeds usize range on this target (file written on 64-bit, read on 32-bit?)",
571 )
572 })
573 }
574
575 pub fn from_csr<P: AsRef<Path>>(csr: &CSRGraph, path: P) -> io::Result<Self> {
577 let mut file = File::create(&path)?;
578 let mut writer = BufWriter::new(&mut file);
579
580 writer.write_all(&(csr.n_nodes as u64).to_le_bytes())?;
582 writer.write_all(&(csr.n_edges as u64).to_le_bytes())?;
583
584 for &ptr in &csr.row_ptr {
586 writer.write_all(&(ptr as u64).to_le_bytes())?;
587 }
588
589 for &idx in &csr.col_idx {
591 writer.write_all(&(idx as u64).to_le_bytes())?;
592 }
593
594 for &weight in &csr.weights {
596 writer.write_all(&weight.to_le_bytes())?;
597 }
598
599 writer.flush()?;
600 drop(writer);
601
602 let file = File::open(path)?;
604
605 let header_size = Self::HEADER_BYTES;
606 let row_ptr_offset = header_size;
607 let col_idx_offset = row_ptr_offset + (csr.n_nodes + 1) * Self::FIELD_BYTES;
608 let weights_offset = col_idx_offset + csr.n_edges * Self::FIELD_BYTES;
609
610 Ok(MemmapGraph {
611 n_nodes: csr.n_nodes,
612 n_edges: csr.n_edges,
613 file,
614 header_size,
615 row_ptr_offset,
616 col_idx_offset,
617 weights_offset,
618 })
619 }
620
621 pub fn from_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
623 let mut file = File::open(path)?;
624 let mut buffer = [0u8; Self::HEADER_BYTES];
625
626 file.read_exact(&mut buffer)?;
628 let n_nodes = Self::read_usize_le(&buffer[0..8])?;
629 let n_edges = Self::read_usize_le(&buffer[8..16])?;
630
631 let header_size = Self::HEADER_BYTES;
632 let row_ptr_offset = header_size;
633 let col_idx_offset = row_ptr_offset + (n_nodes + 1) * Self::FIELD_BYTES;
634 let weights_offset = col_idx_offset + n_edges * Self::FIELD_BYTES;
635
636 Ok(MemmapGraph {
637 n_nodes,
638 n_edges,
639 file,
640 header_size,
641 row_ptr_offset,
642 col_idx_offset,
643 weights_offset,
644 })
645 }
646
647 fn get_row_ptrs(&mut self, node: usize) -> io::Result<(usize, usize)> {
649 if node >= self.n_nodes {
650 return Ok((0, 0));
651 }
652
653 let mut buffer = [0u8; 2 * Self::FIELD_BYTES];
654 let offset = self.row_ptr_offset + node * Self::FIELD_BYTES;
655
656 self.file.seek(SeekFrom::Start(offset as u64))?;
657 self.file.read_exact(&mut buffer)?;
658
659 let start = Self::read_usize_le(&buffer[0..8])?;
660 let end = Self::read_usize_le(&buffer[8..16])?;
661
662 Ok((start, end))
663 }
664
665 pub fn neighbors(&mut self, node: usize) -> io::Result<Vec<(usize, f64)>> {
667 let (start, end) = self.get_row_ptrs(node)?;
668 let degree = end - start;
669
670 if degree == 0 {
671 return Ok(Vec::new());
672 }
673
674 let mut col_buffer = vec![0u8; degree * Self::FIELD_BYTES];
676 let col_offset = self.col_idx_offset + start * Self::FIELD_BYTES;
677 self.file.seek(SeekFrom::Start(col_offset as u64))?;
678 self.file.read_exact(&mut col_buffer)?;
679
680 let mut weight_buffer = vec![0u8; degree * Self::FIELD_BYTES];
682 let weight_offset = self.weights_offset + start * Self::FIELD_BYTES;
683 self.file.seek(SeekFrom::Start(weight_offset as u64))?;
684 self.file.read_exact(&mut weight_buffer)?;
685
686 let mut neighbors = Vec::with_capacity(degree);
688 for i in 0..degree {
689 let col_bytes = &col_buffer[i * Self::FIELD_BYTES..(i + 1) * Self::FIELD_BYTES];
690 let weight_bytes = &weight_buffer[i * Self::FIELD_BYTES..(i + 1) * Self::FIELD_BYTES];
691
692 let col_idx = Self::read_usize_le(col_bytes)?;
693 let mut weight_buf = [0u8; 8];
694 weight_buf.copy_from_slice(&weight_bytes[..8]);
695 let weight = f64::from_le_bytes(weight_buf);
696
697 neighbors.push((col_idx, weight));
698 }
699
700 Ok(neighbors)
701 }
702
703 pub fn degree(&mut self, node: usize) -> io::Result<usize> {
705 let (start, end) = self.get_row_ptrs(node)?;
706 Ok(end - start)
707 }
708
709 pub fn node_count(&self) -> usize {
711 self.n_nodes
712 }
713
714 pub fn edge_count(&self) -> usize {
716 self.n_edges
717 }
718
719 pub fn has_edge(&mut self, from: usize, to: usize) -> io::Result<bool> {
721 let neighbors = self.neighbors(from)?;
722 Ok(neighbors.iter().any(|&(neighbor_, _)| neighbor_ == to))
723 }
724}
725
726impl MemmapGraph {
728 pub fn batch_neighbors(&mut self, nodes: &[usize]) -> io::Result<Vec<Vec<(usize, f64)>>> {
730 let mut results = Vec::with_capacity(nodes.len());
731
732 let mut sorted_nodes: Vec<_> = nodes.iter().enumerate().collect();
734 sorted_nodes.sort_by_key(|(_, &node)| node);
735
736 for (_, &node) in sorted_nodes {
737 results.push(self.neighbors(node)?);
738 }
739
740 Ok(results)
741 }
742
743 pub fn stream_edges<F>(&mut self, mut callback: F) -> io::Result<()>
745 where
746 F: FnMut(usize, usize, f64) -> bool, {
748 for node in 0..self.n_nodes {
749 let neighbors = self.neighbors(node)?;
750 for (neighbor, weight) in neighbors {
751 if !callback(node, neighbor, weight) {
752 return Ok(());
753 }
754 }
755 }
756 Ok(())
757 }
758}
759
760#[cfg(test)]
761mod tests {
762 use super::*;
763
764 #[test]
765 fn test_csr_graph() {
766 let edges = vec![(0, 1, 1.0), (0, 2, 2.0), (1, 2, 3.0), (2, 3, 4.0)];
767
768 let graph = CSRGraph::from_edges(4, edges).expect("Operation failed");
769
770 assert_eq!(graph.degree(0), 2);
771 assert_eq!(graph.degree(3), 0);
772
773 let neighbors: Vec<_> = graph.neighbors(0).collect();
774 assert_eq!(neighbors, vec![(1, 1.0), (2, 2.0)]);
775 }
776
777 #[test]
778 fn test_bit_packed_graph() {
779 let mut graph = BitPackedGraph::new(4, false);
780
781 graph.add_edge(0, 1).expect("Operation failed");
782 graph.add_edge(1, 2).expect("Operation failed");
783 graph.add_edge(0, 3).expect("Operation failed");
784
785 assert!(graph.has_edge(0, 1));
786 assert!(graph.has_edge(1, 0)); assert!(!graph.has_edge(2, 3));
788
789 let neighbors = graph.neighbors(0);
790 assert!(neighbors.contains(&1));
791 assert!(neighbors.contains(&3));
792 }
793
794 #[test]
795 fn test_compressed_adjacency() {
796 let adj_lists = vec![
797 vec![1, 2, 5],
798 vec![0, 2],
799 vec![0, 1, 3],
800 vec![2],
801 vec![],
802 vec![0],
803 ];
804
805 let graph = CompressedAdjacencyList::from_adjacency(adj_lists.clone());
806
807 for (node, expected) in adj_lists.iter().enumerate() {
808 let neighbors = graph.neighbors(node);
809 assert_eq!(&neighbors, expected);
810 }
811
812 let uncompressed_size = adj_lists
814 .iter()
815 .map(|list| list.len() * mem::size_of::<usize>())
816 .sum::<usize>();
817
818 let compressed_size = graph.memory_usage();
819 println!(
822 "Uncompressed: {} bytes, Compressed: {} bytes",
823 uncompressed_size, compressed_size
824 );
825 }
826
827 #[test]
844 fn test_issue_125_memmap_wasm32_portability() {
845 const _: () = assert!(MemmapGraph::FIELD_BYTES == 8);
848 const _: () = assert!(MemmapGraph::HEADER_BYTES == 16);
849 const _: () = assert!(MemmapGraph::FIELD_BYTES == core::mem::size_of::<u64>());
850
851 let edges = vec![
853 (0usize, 1usize, 1.5_f64),
854 (0, 2, 2.5),
855 (1, 2, 3.5),
856 (2, 3, 4.5),
857 ];
858 let csr = CSRGraph::from_edges(4, edges).expect("CSR construction");
859
860 let mut path = std::env::temp_dir();
862 path.push(format!("scirs2_graph_issue125_{}.bin", std::process::id()));
863
864 let _written = MemmapGraph::from_csr(&csr, &path).expect("write memmap");
866 let metadata = std::fs::metadata(&path).expect("file metadata");
867
868 let n_nodes = 4_u64;
873 let n_edges = csr.n_edges as u64;
875 let expected_bytes = 8 * (2 + (n_nodes + 1) + n_edges + n_edges);
876 assert_eq!(
877 metadata.len(),
878 expected_bytes,
879 "on-disk file size must match documented portable format"
880 );
881
882 let mut loaded = MemmapGraph::from_file(&path).expect("read memmap");
883 assert_eq!(loaded.node_count(), 4);
884 assert_eq!(loaded.edge_count(), csr.n_edges);
885
886 for node in 0..4 {
889 let mut roundtrip = loaded.neighbors(node).expect("neighbors");
890 let mut original: Vec<(usize, f64)> = csr.neighbors(node).collect();
891 roundtrip.sort_by_key(|a| a.0);
892 original.sort_by_key(|a| a.0);
893 assert_eq!(roundtrip, original, "node {node} neighbors must round-trip");
894 }
895
896 let _ = std::fs::remove_file(&path);
898 }
899}