1use crate::error::GraphError;
7use scirs2_core::ndarray::Array2;
8use std::fs::File;
9use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
10use std::mem;
11use std::path::Path;
12
13#[derive(Debug, Clone)]
18pub struct CSRGraph {
19 n_nodes: usize,
21 n_edges: usize,
23 row_ptr: Vec<usize>,
25 col_idx: Vec<usize>,
27 weights: Vec<f64>,
29}
30
31impl CSRGraph {
32 pub fn from_edges(n_nodes: usize, edges: Vec<(usize, usize, f64)>) -> Result<Self, GraphError> {
34 let n_edges = edges.len();
35
36 let mut col_idx = Vec::with_capacity(n_edges);
38 let mut weights = Vec::with_capacity(n_edges);
39
40 let mut degree = vec![0; n_nodes];
42
43 for &(src, dst_, _) in &edges {
45 if src >= n_nodes {
46 return Err(GraphError::node_not_found_with_context(
47 src,
48 n_nodes,
49 "CSR graph construction",
50 ));
51 }
52 if dst_ >= n_nodes {
53 return Err(GraphError::node_not_found_with_context(
54 dst_,
55 n_nodes,
56 "CSR graph construction",
57 ));
58 }
59 degree[src] += 1;
60 }
61
62 let mut row_ptr = Vec::with_capacity(n_nodes + 1);
64 row_ptr.push(0);
65 for ° in °ree {
66 row_ptr.push(row_ptr.last().unwrap() + deg);
67 }
68
69 col_idx.resize(n_edges, 0);
71 weights.resize(n_edges, 0.0);
72 let mut current_pos = row_ptr.clone();
73 current_pos.pop(); for (src, dst, weight) in edges {
77 let pos = current_pos[src];
78 col_idx[pos] = dst;
79 weights[pos] = weight;
80 current_pos[src] += 1;
81 }
82
83 for node in 0..n_nodes {
85 let start = row_ptr[node];
86 let end = row_ptr[node + 1];
87
88 if end > start {
89 let mut pairs: Vec<(usize, f64)> = col_idx[start..end]
91 .iter()
92 .zip(&weights[start..end])
93 .map(|(&c, &w)| (c, w))
94 .collect();
95
96 pairs.sort_unstable_by_key(|&(col_, _)| col_);
98
99 for (i, (col, weight)) in pairs.into_iter().enumerate() {
101 col_idx[start + i] = col;
102 weights[start + i] = weight;
103 }
104 }
105 }
106
107 Ok(CSRGraph {
108 n_nodes,
109 n_edges,
110 row_ptr,
111 col_idx,
112 weights,
113 })
114 }
115
116 pub fn with_capacity(n_nodes: usize, estimated_edges: usize) -> Self {
118 CSRGraph {
119 n_nodes,
120 n_edges: 0,
121 row_ptr: vec![0; n_nodes + 1],
122 col_idx: Vec::with_capacity(estimated_edges),
123 weights: Vec::with_capacity(estimated_edges),
124 }
125 }
126
127 pub fn neighbors(&self, node: usize) -> impl Iterator<Item = (usize, f64)> + '_ {
129 let start = self.row_ptr[node];
130 let end = self.row_ptr[node + 1];
131
132 self.col_idx[start..end]
133 .iter()
134 .zip(&self.weights[start..end])
135 .map(|(&idx, &weight)| (idx, weight))
136 }
137
138 pub fn degree(&self, node: usize) -> usize {
140 self.row_ptr[node + 1] - self.row_ptr[node]
141 }
142
143 pub fn node_count(&self) -> usize {
145 self.n_nodes
146 }
147
148 pub fn memory_usage(&self) -> usize {
150 mem::size_of_val(&self.n_nodes)
151 + mem::size_of_val(&self.n_edges)
152 + mem::size_of_val(&self.row_ptr[..])
153 + mem::size_of_val(&self.col_idx[..])
154 + mem::size_of_val(&self.weights[..])
155 }
156
157 pub fn to_adjacency_matrix(&self) -> Array2<f64> {
159 let mut matrix = Array2::zeros((self.n_nodes, self.n_nodes));
160
161 for src in 0..self.n_nodes {
162 for (dst, weight) in self.neighbors(src) {
163 matrix[[src, dst]] = weight;
164 }
165 }
166
167 matrix
168 }
169}
170
171#[derive(Debug, Clone)]
175pub struct BitPackedGraph {
176 n_nodes: usize,
178 bits: Vec<u64>,
181 directed: bool,
183}
184
185impl BitPackedGraph {
186 pub fn new(n_nodes: usize, directed: bool) -> Self {
188 let bits_needed = if directed {
189 n_nodes * n_nodes
190 } else {
191 n_nodes * (n_nodes + 1) / 2 };
193
194 let words_needed = bits_needed.div_ceil(64);
195
196 BitPackedGraph {
197 n_nodes,
198 bits: vec![0; words_needed],
199 directed,
200 }
201 }
202
203 fn bit_position(&self, from: usize, to: usize) -> Option<usize> {
205 if from >= self.n_nodes || to >= self.n_nodes {
206 return None;
207 }
208
209 if self.directed {
210 Some(from * self.n_nodes + to)
211 } else {
212 let (u, v) = if from <= to { (from, to) } else { (to, from) };
214 if u == 0 {
216 Some(v)
217 } else {
218 Some(u * (2 * self.n_nodes - u - 1) / 2 + v)
219 }
220 }
221 }
222
223 pub fn add_edge(&mut self, from: usize, to: usize) -> Result<(), GraphError> {
225 let bit_pos = self.bit_position(from, to).ok_or_else(|| {
226 GraphError::node_not_found_with_context(from, self.n_nodes, "add_edge operation")
227 })?;
228
229 let word_idx = bit_pos / 64;
230 let bit_idx = bit_pos % 64;
231
232 self.bits[word_idx] |= 1u64 << bit_idx;
233
234 Ok(())
235 }
236
237 pub fn has_edge(&self, from: usize, to: usize) -> bool {
239 if let Some(bit_pos) = self.bit_position(from, to) {
240 let word_idx = bit_pos / 64;
241 let bit_idx = bit_pos % 64;
242
243 (self.bits[word_idx] & (1u64 << bit_idx)) != 0
244 } else {
245 false
246 }
247 }
248
249 pub fn neighbors(&self, node: usize) -> Vec<usize> {
251 let mut neighbors = Vec::new();
252
253 if self.directed {
254 let start_bit = node * self.n_nodes;
256 let end_bit = start_bit + self.n_nodes;
257
258 let start_word = start_bit / 64;
259 let end_word = end_bit.div_ceil(64);
260
261 for word_idx in start_word..end_word {
262 if word_idx >= self.bits.len() {
263 break;
264 }
265
266 let mut word = self.bits[word_idx];
267 let word_start_bit = word_idx * 64;
268
269 if word_start_bit < start_bit {
271 let skip_bits = start_bit - word_start_bit;
272 word &= !((1u64 << skip_bits) - 1);
273 }
274 if word_start_bit + 64 > end_bit {
275 let keep_bits = end_bit - word_start_bit;
276 word &= (1u64 << keep_bits) - 1;
277 }
278
279 while word != 0 {
281 let bit_pos = word.trailing_zeros() as usize;
282 let global_bit = word_start_bit + bit_pos;
283 if global_bit >= start_bit && global_bit < end_bit {
284 let neighbor = global_bit - start_bit;
285 neighbors.push(neighbor);
286 }
287 word &= word - 1; }
289 }
290 } else {
291 for other in 0..self.n_nodes {
293 if self.has_edge(node, other) {
294 neighbors.push(other);
295 }
296 }
297 }
298
299 neighbors
300 }
301
302 pub fn degree(&self, node: usize) -> usize {
304 if node >= self.n_nodes {
305 return 0;
306 }
307
308 if self.directed {
309 let start_bit = node * self.n_nodes;
310 let end_bit = start_bit + self.n_nodes;
311
312 let start_word = start_bit / 64;
313 let end_word = end_bit.div_ceil(64);
314 let mut count = 0;
315
316 for word_idx in start_word..end_word {
317 if word_idx >= self.bits.len() {
318 break;
319 }
320
321 let mut word = self.bits[word_idx];
322 let word_start_bit = word_idx * 64;
323
324 if word_start_bit < start_bit {
326 let skip_bits = start_bit - word_start_bit;
327 word &= !((1u64 << skip_bits) - 1);
328 }
329 if word_start_bit + 64 > end_bit {
330 let keep_bits = end_bit - word_start_bit;
331 word &= (1u64 << keep_bits) - 1;
332 }
333
334 count += word.count_ones() as usize;
335 }
336
337 count
338 } else {
339 self.neighbors(node).len()
341 }
342 }
343
344 pub fn memory_usage(&self) -> usize {
346 mem::size_of_val(&self.n_nodes)
347 + mem::size_of_val(&self.bits[..])
348 + mem::size_of_val(&self.directed)
349 }
350}
351
352#[derive(Debug, Clone)]
356pub struct CompressedAdjacencyList {
357 n_nodes: usize,
359 data: Vec<u8>,
361 offsets: Vec<usize>,
363}
364
365impl CompressedAdjacencyList {
366 pub fn from_adjacency(_adjlists: Vec<Vec<usize>>) -> Self {
368 let n_nodes = _adjlists.len();
369 let mut data = Vec::new();
370 let mut offsets = Vec::with_capacity(n_nodes + 1);
371
372 offsets.push(0);
373
374 for neighbors in _adjlists {
375 let _start_pos = data.len();
376
377 let mut sorted_neighbors = neighbors;
379 sorted_neighbors.sort_unstable();
380
381 Self::encode_varint(sorted_neighbors.len(), &mut data);
383
384 let mut prev = 0;
386 for &neighbor in &sorted_neighbors {
387 let delta = neighbor - prev;
388 Self::encode_varint(delta, &mut data);
389 prev = neighbor;
390 }
391
392 offsets.push(data.len());
393 }
394
395 CompressedAdjacencyList {
396 n_nodes,
397 data,
398 offsets,
399 }
400 }
401
402 fn encode_varint(mut value: usize, output: &mut Vec<u8>) {
404 while value >= 0x80 {
405 output.push((value & 0x7F) as u8 | 0x80);
406 value >>= 7;
407 }
408 output.push(value as u8);
409 }
410
411 fn decode_varint(data: &[u8], pos: &mut usize) -> usize {
413 let mut value = 0;
414 let mut shift = 0;
415
416 loop {
417 let byte = data[*pos];
418 *pos += 1;
419
420 value |= ((byte & 0x7F) as usize) << shift;
421
422 if byte & 0x80 == 0 {
423 break;
424 }
425
426 shift += 7;
427 }
428
429 value
430 }
431
432 pub fn neighbors(&self, node: usize) -> Vec<usize> {
434 if node >= self.n_nodes {
435 return Vec::new();
436 }
437
438 let start = self.offsets[node];
439 let end = self.offsets[node + 1];
440 let data_slice = &self.data[start..end];
441
442 let mut pos = 0;
443 let count = Self::decode_varint(data_slice, &mut pos);
444
445 let mut neighbors = Vec::with_capacity(count);
446 let mut current = 0;
447
448 for _ in 0..count {
449 let delta = Self::decode_varint(data_slice, &mut pos);
450 current += delta;
451 neighbors.push(current);
452 }
453
454 neighbors
455 }
456
457 pub fn memory_usage(&self) -> usize {
459 mem::size_of_val(&self.n_nodes)
460 + mem::size_of_val(&self.data[..])
461 + mem::size_of_val(&self.offsets[..])
462 }
463}
464
465pub enum HybridGraph {
467 CSR(CSRGraph),
469 BitPacked(BitPackedGraph),
471 Compressed(CompressedAdjacencyList),
473}
474
475impl HybridGraph {
476 pub fn auto_select(
478 n_nodes: usize,
479 edges: Vec<(usize, usize, Option<f64>)>,
480 directed: bool,
481 ) -> Result<Self, GraphError> {
482 let n_edges = edges.len();
483 let density = n_edges as f64 / (n_nodes * n_nodes) as f64;
484 let all_unweighted = edges.iter().all(|(_, _, w)| w.is_none());
485
486 if all_unweighted && density > 0.1 {
487 let mut graph = BitPackedGraph::new(n_nodes, directed);
489 for (src, dst_, _) in edges {
490 graph.add_edge(src, dst_)?;
491 }
492 Ok(HybridGraph::BitPacked(graph))
493 } else if density < 0.01 {
494 let weighted_edges: Vec<(usize, usize, f64)> = edges
496 .into_iter()
497 .map(|(s, d, w)| (s, d, w.unwrap_or(1.0)))
498 .collect();
499 let graph = CSRGraph::from_edges(n_nodes, weighted_edges)?;
500 Ok(HybridGraph::CSR(graph))
501 } else {
502 let mut adj_lists = vec![Vec::new(); n_nodes];
504 for (src, dst_, _) in edges {
505 adj_lists[src].push(dst_);
506 if !directed {
507 adj_lists[dst_].push(src);
508 }
509 }
510 let graph = CompressedAdjacencyList::from_adjacency(adj_lists);
511 Ok(HybridGraph::Compressed(graph))
512 }
513 }
514
515 pub fn memory_usage(&self) -> usize {
517 match self {
518 HybridGraph::CSR(g) => g.memory_usage(),
519 HybridGraph::BitPacked(g) => g.memory_usage(),
520 HybridGraph::Compressed(g) => g.memory_usage(),
521 }
522 }
523}
524
525#[derive(Debug)]
527pub struct MemmapGraph {
528 n_nodes: usize,
530 n_edges: usize,
532 file: File,
534 #[allow(dead_code)]
537 header_size: usize,
538 row_ptr_offset: usize,
539 col_idx_offset: usize,
540 weights_offset: usize,
541}
542
543impl MemmapGraph {
544 pub fn from_csr<P: AsRef<Path>>(csr: &CSRGraph, path: P) -> io::Result<Self> {
546 let mut file = File::create(&path)?;
547 let mut writer = BufWriter::new(&mut file);
548
549 writer.write_all(&csr.n_nodes.to_le_bytes())?;
551 writer.write_all(&csr.n_edges.to_le_bytes())?;
552
553 for &ptr in &csr.row_ptr {
555 writer.write_all(&ptr.to_le_bytes())?;
556 }
557
558 for &idx in &csr.col_idx {
560 writer.write_all(&idx.to_le_bytes())?;
561 }
562
563 for &weight in &csr.weights {
565 writer.write_all(&weight.to_le_bytes())?;
566 }
567
568 writer.flush()?;
569 drop(writer);
570
571 let file = File::open(path)?;
573
574 let header_size = 16; let row_ptr_offset = header_size;
576 let col_idx_offset = row_ptr_offset + (csr.n_nodes + 1) * 8;
577 let weights_offset = col_idx_offset + csr.n_edges * 8;
578
579 Ok(MemmapGraph {
580 n_nodes: csr.n_nodes,
581 n_edges: csr.n_edges,
582 file,
583 header_size,
584 row_ptr_offset,
585 col_idx_offset,
586 weights_offset,
587 })
588 }
589
590 pub fn from_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
592 let mut file = File::open(path)?;
593 let mut buffer = [0u8; 16];
594
595 file.read_exact(&mut buffer)?;
597 let n_nodes = usize::from_le_bytes([
598 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
599 ]);
600 let n_edges = usize::from_le_bytes([
601 buffer[8], buffer[9], buffer[10], buffer[11], buffer[12], buffer[13], buffer[14],
602 buffer[15],
603 ]);
604
605 let header_size = 16;
606 let row_ptr_offset = header_size;
607 let col_idx_offset = row_ptr_offset + (n_nodes + 1) * 8;
608 let weights_offset = col_idx_offset + n_edges * 8;
609
610 Ok(MemmapGraph {
611 n_nodes,
612 n_edges,
613 file,
614 header_size,
615 row_ptr_offset,
616 col_idx_offset,
617 weights_offset,
618 })
619 }
620
621 fn get_row_ptrs(&mut self, node: usize) -> io::Result<(usize, usize)> {
623 if node >= self.n_nodes {
624 return Ok((0, 0));
625 }
626
627 let mut buffer = [0u8; 16];
628 let offset = self.row_ptr_offset + node * 8;
629
630 self.file.seek(SeekFrom::Start(offset as u64))?;
631 self.file.read_exact(&mut buffer)?;
632
633 let start = usize::from_le_bytes([
634 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], buffer[5], buffer[6], buffer[7],
635 ]);
636 let end = usize::from_le_bytes([
637 buffer[8], buffer[9], buffer[10], buffer[11], buffer[12], buffer[13], buffer[14],
638 buffer[15],
639 ]);
640
641 Ok((start, end))
642 }
643
644 pub fn neighbors(&mut self, node: usize) -> io::Result<Vec<(usize, f64)>> {
646 let (start, end) = self.get_row_ptrs(node)?;
647 let degree = end - start;
648
649 if degree == 0 {
650 return Ok(Vec::new());
651 }
652
653 let mut col_buffer = vec![0u8; degree * 8];
655 let col_offset = self.col_idx_offset + start * 8;
656 self.file.seek(SeekFrom::Start(col_offset as u64))?;
657 self.file.read_exact(&mut col_buffer)?;
658
659 let mut weight_buffer = vec![0u8; degree * 8];
661 let weight_offset = self.weights_offset + start * 8;
662 self.file.seek(SeekFrom::Start(weight_offset as u64))?;
663 self.file.read_exact(&mut weight_buffer)?;
664
665 let mut neighbors = Vec::with_capacity(degree);
667 for i in 0..degree {
668 let col_bytes = &col_buffer[i * 8..(i + 1) * 8];
669 let weight_bytes = &weight_buffer[i * 8..(i + 1) * 8];
670
671 let col_idx = usize::from_le_bytes([
672 col_bytes[0],
673 col_bytes[1],
674 col_bytes[2],
675 col_bytes[3],
676 col_bytes[4],
677 col_bytes[5],
678 col_bytes[6],
679 col_bytes[7],
680 ]);
681 let weight = f64::from_le_bytes([
682 weight_bytes[0],
683 weight_bytes[1],
684 weight_bytes[2],
685 weight_bytes[3],
686 weight_bytes[4],
687 weight_bytes[5],
688 weight_bytes[6],
689 weight_bytes[7],
690 ]);
691
692 neighbors.push((col_idx, weight));
693 }
694
695 Ok(neighbors)
696 }
697
698 pub fn degree(&mut self, node: usize) -> io::Result<usize> {
700 let (start, end) = self.get_row_ptrs(node)?;
701 Ok(end - start)
702 }
703
704 pub fn node_count(&self) -> usize {
706 self.n_nodes
707 }
708
709 pub fn edge_count(&self) -> usize {
711 self.n_edges
712 }
713
714 pub fn has_edge(&mut self, from: usize, to: usize) -> io::Result<bool> {
716 let neighbors = self.neighbors(from)?;
717 Ok(neighbors.iter().any(|&(neighbor_, _)| neighbor_ == to))
718 }
719}
720
721impl MemmapGraph {
723 pub fn batch_neighbors(&mut self, nodes: &[usize]) -> io::Result<Vec<Vec<(usize, f64)>>> {
725 let mut results = Vec::with_capacity(nodes.len());
726
727 let mut sorted_nodes: Vec<_> = nodes.iter().enumerate().collect();
729 sorted_nodes.sort_by_key(|(_, &node)| node);
730
731 for (_, &node) in sorted_nodes {
732 results.push(self.neighbors(node)?);
733 }
734
735 Ok(results)
736 }
737
738 pub fn stream_edges<F>(&mut self, mut callback: F) -> io::Result<()>
740 where
741 F: FnMut(usize, usize, f64) -> bool, {
743 for node in 0..self.n_nodes {
744 let neighbors = self.neighbors(node)?;
745 for (neighbor, weight) in neighbors {
746 if !callback(node, neighbor, weight) {
747 return Ok(());
748 }
749 }
750 }
751 Ok(())
752 }
753}
754
755#[cfg(test)]
756mod tests {
757 use super::*;
758
759 #[test]
760 fn test_csr_graph() {
761 let edges = vec![(0, 1, 1.0), (0, 2, 2.0), (1, 2, 3.0), (2, 3, 4.0)];
762
763 let graph = CSRGraph::from_edges(4, edges).unwrap();
764
765 assert_eq!(graph.degree(0), 2);
766 assert_eq!(graph.degree(3), 0);
767
768 let neighbors: Vec<_> = graph.neighbors(0).collect();
769 assert_eq!(neighbors, vec![(1, 1.0), (2, 2.0)]);
770 }
771
772 #[test]
773 fn test_bit_packed_graph() {
774 let mut graph = BitPackedGraph::new(4, false);
775
776 graph.add_edge(0, 1).unwrap();
777 graph.add_edge(1, 2).unwrap();
778 graph.add_edge(0, 3).unwrap();
779
780 assert!(graph.has_edge(0, 1));
781 assert!(graph.has_edge(1, 0)); assert!(!graph.has_edge(2, 3));
783
784 let neighbors = graph.neighbors(0);
785 assert!(neighbors.contains(&1));
786 assert!(neighbors.contains(&3));
787 }
788
789 #[test]
790 fn test_compressed_adjacency() {
791 let adj_lists = vec![
792 vec![1, 2, 5],
793 vec![0, 2],
794 vec![0, 1, 3],
795 vec![2],
796 vec![],
797 vec![0],
798 ];
799
800 let graph = CompressedAdjacencyList::from_adjacency(adj_lists.clone());
801
802 for (node, expected) in adj_lists.iter().enumerate() {
803 let neighbors = graph.neighbors(node);
804 assert_eq!(&neighbors, expected);
805 }
806
807 let uncompressed_size = adj_lists
809 .iter()
810 .map(|list| list.len() * mem::size_of::<usize>())
811 .sum::<usize>();
812
813 let compressed_size = graph.memory_usage();
814 println!(
817 "Uncompressed: {} bytes, Compressed: {} bytes",
818 uncompressed_size, compressed_size
819 );
820 }
821}