Skip to main content

scirs2_graph/memory/
compact.rs

1//! Compact graph representations for memory efficiency
2//!
3//! This module provides memory-efficient graph storage formats optimized for
4//! different graph characteristics (sparse, dense, regular degree, etc.).
5
6use crate::error::GraphError;
7use scirs2_core::ndarray::Array2;
8use std::fs::File;
9use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
10use std::mem;
11use std::path::Path;
12
13/// Compressed Sparse Row (CSR) format for sparse graphs
14///
15/// This format is highly memory-efficient for sparse graphs and provides
16/// fast row (neighbor) access.
17#[derive(Debug, Clone)]
18pub struct CSRGraph {
19    /// Number of nodes
20    n_nodes: usize,
21    /// Number of edges
22    n_edges: usize,
23    /// Row pointers - indices where each node's edges start
24    row_ptr: Vec<usize>,
25    /// Column indices - destination nodes
26    col_idx: Vec<usize>,
27    /// Edge weights
28    weights: Vec<f64>,
29}
30
31impl CSRGraph {
32    /// Create a new CSR graph from edge list (optimized version)
33    pub fn from_edges(n_nodes: usize, edges: Vec<(usize, usize, f64)>) -> Result<Self, GraphError> {
34        let n_edges = edges.len();
35
36        // Pre-allocate with exact sizes to avoid reallocations
37        let mut col_idx = Vec::with_capacity(n_edges);
38        let mut weights = Vec::with_capacity(n_edges);
39
40        // Use counting sort for better performance when source nodes are dense
41        let mut degree = vec![0; n_nodes];
42
43        // First pass: count degrees and validate nodes
44        for &(src, dst_, _) in &edges {
45            if src >= n_nodes {
46                return Err(GraphError::node_not_found_with_context(
47                    src,
48                    n_nodes,
49                    "CSR graph construction",
50                ));
51            }
52            if dst_ >= n_nodes {
53                return Err(GraphError::node_not_found_with_context(
54                    dst_,
55                    n_nodes,
56                    "CSR graph construction",
57                ));
58            }
59            degree[src] += 1;
60        }
61
62        // Build row pointers using prefix sum
63        let mut row_ptr = Vec::with_capacity(n_nodes + 1);
64        row_ptr.push(0);
65        for &deg in &degree {
66            row_ptr.push(row_ptr.last().expect("Operation failed") + deg);
67        }
68
69        // Initialize working arrays for building CSR
70        col_idx.resize(n_edges, 0);
71        weights.resize(n_edges, 0.0);
72        let mut current_pos = row_ptr.clone();
73        current_pos.pop(); // Remove last element
74
75        // Fill CSR arrays directly without sorting
76        for (src, dst, weight) in edges {
77            let pos = current_pos[src];
78            col_idx[pos] = dst;
79            weights[pos] = weight;
80            current_pos[src] += 1;
81        }
82
83        // Sort neighbors within each row for better cache performance
84        for node in 0..n_nodes {
85            let start = row_ptr[node];
86            let end = row_ptr[node + 1];
87
88            if end > start {
89                // Create pairs for sorting
90                let mut pairs: Vec<(usize, f64)> = col_idx[start..end]
91                    .iter()
92                    .zip(&weights[start..end])
93                    .map(|(&c, &w)| (c, w))
94                    .collect();
95
96                // Sort by column index
97                pairs.sort_unstable_by_key(|&(col_, _)| col_);
98
99                // Write back sorted data
100                for (i, (col, weight)) in pairs.into_iter().enumerate() {
101                    col_idx[start + i] = col;
102                    weights[start + i] = weight;
103                }
104            }
105        }
106
107        Ok(CSRGraph {
108            n_nodes,
109            n_edges,
110            row_ptr,
111            col_idx,
112            weights,
113        })
114    }
115
116    /// Create CSR graph with pre-allocated capacity (for streaming construction)
117    pub fn with_capacity(n_nodes: usize, estimated_edges: usize) -> Self {
118        CSRGraph {
119            n_nodes,
120            n_edges: 0,
121            row_ptr: vec![0; n_nodes + 1],
122            col_idx: Vec::with_capacity(estimated_edges),
123            weights: Vec::with_capacity(estimated_edges),
124        }
125    }
126
127    /// Get neighbors of a node
128    pub fn neighbors(&self, node: usize) -> impl Iterator<Item = (usize, f64)> + '_ {
129        let start = self.row_ptr[node];
130        let end = self.row_ptr[node + 1];
131
132        self.col_idx[start..end]
133            .iter()
134            .zip(&self.weights[start..end])
135            .map(|(&idx, &weight)| (idx, weight))
136    }
137
138    /// Get degree of a node
139    pub fn degree(&self, node: usize) -> usize {
140        self.row_ptr[node + 1] - self.row_ptr[node]
141    }
142
143    /// Get number of nodes
144    pub fn node_count(&self) -> usize {
145        self.n_nodes
146    }
147
148    /// Memory usage in bytes
149    pub fn memory_usage(&self) -> usize {
150        mem::size_of_val(&self.n_nodes)
151            + mem::size_of_val(&self.n_edges)
152            + mem::size_of_val(&self.row_ptr[..])
153            + mem::size_of_val(&self.col_idx[..])
154            + mem::size_of_val(&self.weights[..])
155    }
156
157    /// Convert to adjacency matrix (for dense operations)
158    pub fn to_adjacency_matrix(&self) -> Array2<f64> {
159        let mut matrix = Array2::zeros((self.n_nodes, self.n_nodes));
160
161        for src in 0..self.n_nodes {
162            for (dst, weight) in self.neighbors(src) {
163                matrix[[src, dst]] = weight;
164            }
165        }
166
167        matrix
168    }
169}
170
171/// Bit-packed representation for unweighted graphs
172///
173/// Uses 1 bit per potential edge, extremely memory efficient for unweighted graphs.
174#[derive(Debug, Clone)]
175pub struct BitPackedGraph {
176    /// Number of nodes
177    n_nodes: usize,
178    /// Bit array storing adjacency information
179    /// For undirected graphs, only upper triangle is stored
180    bits: Vec<u64>,
181    /// Whether the graph is directed
182    directed: bool,
183}
184
185impl BitPackedGraph {
186    /// Create a new bit-packed graph
187    pub fn new(n_nodes: usize, directed: bool) -> Self {
188        let bits_needed = if directed {
189            n_nodes * n_nodes
190        } else {
191            n_nodes * (n_nodes + 1) / 2 // Upper triangle including diagonal
192        };
193
194        let words_needed = bits_needed.div_ceil(64);
195
196        BitPackedGraph {
197            n_nodes,
198            bits: vec![0; words_needed],
199            directed,
200        }
201    }
202
203    /// Calculate bit position for an edge
204    fn bit_position(&self, from: usize, to: usize) -> Option<usize> {
205        if from >= self.n_nodes || to >= self.n_nodes {
206            return None;
207        }
208
209        if self.directed {
210            Some(from * self.n_nodes + to)
211        } else {
212            // For undirected, normalize to upper triangle
213            let (u, v) = if from <= to { (from, to) } else { (to, from) };
214            // Calculate position in upper triangular matrix using safe arithmetic
215            if u == 0 {
216                Some(v)
217            } else {
218                Some(u * (2 * self.n_nodes - u - 1) / 2 + v)
219            }
220        }
221    }
222
223    /// Add an edge
224    pub fn add_edge(&mut self, from: usize, to: usize) -> Result<(), GraphError> {
225        let bit_pos = self.bit_position(from, to).ok_or_else(|| {
226            GraphError::node_not_found_with_context(from, self.n_nodes, "add_edge operation")
227        })?;
228
229        let word_idx = bit_pos / 64;
230        let bit_idx = bit_pos % 64;
231
232        self.bits[word_idx] |= 1u64 << bit_idx;
233
234        Ok(())
235    }
236
237    /// Check if edge exists
238    pub fn has_edge(&self, from: usize, to: usize) -> bool {
239        if let Some(bit_pos) = self.bit_position(from, to) {
240            let word_idx = bit_pos / 64;
241            let bit_idx = bit_pos % 64;
242
243            (self.bits[word_idx] & (1u64 << bit_idx)) != 0
244        } else {
245            false
246        }
247    }
248
249    /// Get neighbors of a node (optimized with SIMD-like operations)
250    pub fn neighbors(&self, node: usize) -> Vec<usize> {
251        let mut neighbors = Vec::new();
252
253        if self.directed {
254            // For directed graphs, check outgoing edges
255            let start_bit = node * self.n_nodes;
256            let end_bit = start_bit + self.n_nodes;
257
258            let start_word = start_bit / 64;
259            let end_word = end_bit.div_ceil(64);
260
261            for word_idx in start_word..end_word {
262                if word_idx >= self.bits.len() {
263                    break;
264                }
265
266                let mut word = self.bits[word_idx];
267                let word_start_bit = word_idx * 64;
268
269                // Mask out bits outside our range
270                if word_start_bit < start_bit {
271                    let skip_bits = start_bit - word_start_bit;
272                    word &= !((1u64 << skip_bits) - 1);
273                }
274                if word_start_bit + 64 > end_bit {
275                    let keep_bits = end_bit - word_start_bit;
276                    word &= (1u64 << keep_bits) - 1;
277                }
278
279                // Extract set bits efficiently
280                while word != 0 {
281                    let bit_pos = word.trailing_zeros() as usize;
282                    let global_bit = word_start_bit + bit_pos;
283                    if global_bit >= start_bit && global_bit < end_bit {
284                        let neighbor = global_bit - start_bit;
285                        neighbors.push(neighbor);
286                    }
287                    word &= word - 1; // Clear lowest set bit
288                }
289            }
290        } else {
291            // For undirected graphs, check both directions efficiently
292            for other in 0..self.n_nodes {
293                if self.has_edge(node, other) {
294                    neighbors.push(other);
295                }
296            }
297        }
298
299        neighbors
300    }
301
302    /// Get degree of a node efficiently
303    pub fn degree(&self, node: usize) -> usize {
304        if node >= self.n_nodes {
305            return 0;
306        }
307
308        if self.directed {
309            let start_bit = node * self.n_nodes;
310            let end_bit = start_bit + self.n_nodes;
311
312            let start_word = start_bit / 64;
313            let end_word = end_bit.div_ceil(64);
314            let mut count = 0;
315
316            for word_idx in start_word..end_word {
317                if word_idx >= self.bits.len() {
318                    break;
319                }
320
321                let mut word = self.bits[word_idx];
322                let word_start_bit = word_idx * 64;
323
324                // Mask out bits outside our range
325                if word_start_bit < start_bit {
326                    let skip_bits = start_bit - word_start_bit;
327                    word &= !((1u64 << skip_bits) - 1);
328                }
329                if word_start_bit + 64 > end_bit {
330                    let keep_bits = end_bit - word_start_bit;
331                    word &= (1u64 << keep_bits) - 1;
332                }
333
334                count += word.count_ones() as usize;
335            }
336
337            count
338        } else {
339            // For undirected graphs, count efficiently
340            self.neighbors(node).len()
341        }
342    }
343
344    /// Memory usage in bytes
345    pub fn memory_usage(&self) -> usize {
346        mem::size_of_val(&self.n_nodes)
347            + mem::size_of_val(&self.bits[..])
348            + mem::size_of_val(&self.directed)
349    }
350}
351
352/// Compressed adjacency list using variable-length encoding
353///
354/// Uses delta encoding and variable-length integers for neighbor lists.
355#[derive(Debug, Clone)]
356pub struct CompressedAdjacencyList {
357    /// Number of nodes
358    n_nodes: usize,
359    /// Compressed neighbor data
360    data: Vec<u8>,
361    /// Offsets into data for each node
362    offsets: Vec<usize>,
363}
364
365impl CompressedAdjacencyList {
366    /// Create from adjacency lists
367    pub fn from_adjacency(_adjlists: Vec<Vec<usize>>) -> Self {
368        let n_nodes = _adjlists.len();
369        let mut data = Vec::new();
370        let mut offsets = Vec::with_capacity(n_nodes + 1);
371
372        offsets.push(0);
373
374        for neighbors in _adjlists {
375            let _start_pos = data.len();
376
377            // Sort neighbors for delta encoding
378            let mut sorted_neighbors = neighbors;
379            sorted_neighbors.sort_unstable();
380
381            // Encode count
382            Self::encode_varint(sorted_neighbors.len(), &mut data);
383
384            // Delta encode neighbors
385            let mut prev = 0;
386            for &neighbor in &sorted_neighbors {
387                let delta = neighbor - prev;
388                Self::encode_varint(delta, &mut data);
389                prev = neighbor;
390            }
391
392            offsets.push(data.len());
393        }
394
395        CompressedAdjacencyList {
396            n_nodes,
397            data,
398            offsets,
399        }
400    }
401
402    /// Variable-length integer encoding
403    fn encode_varint(mut value: usize, output: &mut Vec<u8>) {
404        while value >= 0x80 {
405            output.push((value & 0x7F) as u8 | 0x80);
406            value >>= 7;
407        }
408        output.push(value as u8);
409    }
410
411    /// Variable-length integer decoding
412    fn decode_varint(data: &[u8], pos: &mut usize) -> usize {
413        let mut value = 0;
414        let mut shift = 0;
415
416        loop {
417            let byte = data[*pos];
418            *pos += 1;
419
420            value |= ((byte & 0x7F) as usize) << shift;
421
422            if byte & 0x80 == 0 {
423                break;
424            }
425
426            shift += 7;
427        }
428
429        value
430    }
431
432    /// Get neighbors of a node
433    pub fn neighbors(&self, node: usize) -> Vec<usize> {
434        if node >= self.n_nodes {
435            return Vec::new();
436        }
437
438        let start = self.offsets[node];
439        let end = self.offsets[node + 1];
440        let data_slice = &self.data[start..end];
441
442        let mut pos = 0;
443        let count = Self::decode_varint(data_slice, &mut pos);
444
445        let mut neighbors = Vec::with_capacity(count);
446        let mut current = 0;
447
448        for _ in 0..count {
449            let delta = Self::decode_varint(data_slice, &mut pos);
450            current += delta;
451            neighbors.push(current);
452        }
453
454        neighbors
455    }
456
457    /// Memory usage in bytes
458    pub fn memory_usage(&self) -> usize {
459        mem::size_of_val(&self.n_nodes)
460            + mem::size_of_val(&self.data[..])
461            + mem::size_of_val(&self.offsets[..])
462    }
463}
464
465/// Hybrid graph representation that chooses optimal format based on graph properties
466pub enum HybridGraph {
467    /// Use CSR for sparse graphs
468    CSR(CSRGraph),
469    /// Use bit-packed for dense unweighted graphs
470    BitPacked(BitPackedGraph),
471    /// Use compressed adjacency for medium density
472    Compressed(CompressedAdjacencyList),
473}
474
475impl HybridGraph {
476    /// Automatically choose the best representation based on graph properties
477    pub fn auto_select(
478        n_nodes: usize,
479        edges: Vec<(usize, usize, Option<f64>)>,
480        directed: bool,
481    ) -> Result<Self, GraphError> {
482        let n_edges = edges.len();
483        let density = n_edges as f64 / (n_nodes * n_nodes) as f64;
484        let all_unweighted = edges.iter().all(|(_, _, w)| w.is_none());
485
486        if all_unweighted && density > 0.1 {
487            // Dense unweighted - use bit-packed
488            let mut graph = BitPackedGraph::new(n_nodes, directed);
489            for (src, dst_, _) in edges {
490                graph.add_edge(src, dst_)?;
491            }
492            Ok(HybridGraph::BitPacked(graph))
493        } else if density < 0.01 {
494            // Very sparse - use CSR
495            let weighted_edges: Vec<(usize, usize, f64)> = edges
496                .into_iter()
497                .map(|(s, d, w)| (s, d, w.unwrap_or(1.0)))
498                .collect();
499            let graph = CSRGraph::from_edges(n_nodes, weighted_edges)?;
500            Ok(HybridGraph::CSR(graph))
501        } else {
502            // Medium density - use compressed adjacency
503            let mut adj_lists = vec![Vec::new(); n_nodes];
504            for (src, dst_, _) in edges {
505                adj_lists[src].push(dst_);
506                if !directed {
507                    adj_lists[dst_].push(src);
508                }
509            }
510            let graph = CompressedAdjacencyList::from_adjacency(adj_lists);
511            Ok(HybridGraph::Compressed(graph))
512        }
513    }
514
515    /// Get memory usage
516    pub fn memory_usage(&self) -> usize {
517        match self {
518            HybridGraph::CSR(g) => g.memory_usage(),
519            HybridGraph::BitPacked(g) => g.memory_usage(),
520            HybridGraph::Compressed(g) => g.memory_usage(),
521        }
522    }
523}
524
525/// Memory-mapped graph for extremely large graphs that don't fit in RAM
526///
527/// # Wire format (portable across 32-bit and 64-bit targets)
528///
529/// All integer fields are serialised as little-endian `u64` (8 bytes each), even
530/// though they are held in memory as `usize`. This keeps the on-disk layout
531/// identical on 32-bit (e.g. `wasm32-unknown-unknown`) and 64-bit hosts so a
532/// graph written on one can be read on the other. The conversion is checked:
533/// reading a value that does not fit in this target's `usize` returns
534/// `io::ErrorKind::InvalidData` rather than truncating. See issue #125.
535#[derive(Debug)]
536pub struct MemmapGraph {
537    /// Number of nodes
538    n_nodes: usize,
539    /// Number of edges
540    n_edges: usize,
541    /// File handle for the graph data
542    file: File,
543    /// CSR format stored on disk
544    /// Format: [n_nodes:u64][n_edges:u64][row_ptr:(n_nodes+1)*u64][col_idx:n_edges*u64][weights:n_edges*f64]
545    #[allow(dead_code)]
546    header_size: usize,
547    row_ptr_offset: usize,
548    col_idx_offset: usize,
549    weights_offset: usize,
550}
551
552impl MemmapGraph {
553    /// Size of one serialised `usize`/`u64` field in the on-disk format.
554    /// Hardcoded to 8 (size of `u64`) so the format is architecture-independent
555    /// — do NOT replace with `size_of::<usize>()`, that would break wasm32.
556    const FIELD_BYTES: usize = 8;
557    /// Header is two u64 fields: n_nodes and n_edges.
558    const HEADER_BYTES: usize = 2 * Self::FIELD_BYTES;
559
560    /// Helper: read a little-endian u64 from a byte slice and convert to usize.
561    /// Returns `InvalidData` if the value exceeds `usize::MAX` on this target
562    /// (only possible on 32-bit targets reading a file written on 64-bit).
563    fn read_usize_le(bytes: &[u8]) -> io::Result<usize> {
564        let mut buf = [0u8; 8];
565        buf.copy_from_slice(&bytes[..8]);
566        let value = u64::from_le_bytes(buf);
567        usize::try_from(value).map_err(|_| {
568            io::Error::new(
569                io::ErrorKind::InvalidData,
570                "MemmapGraph: value exceeds usize range on this target (file written on 64-bit, read on 32-bit?)",
571            )
572        })
573    }
574
575    /// Create a new memory-mapped graph from an existing CSR graph
576    pub fn from_csr<P: AsRef<Path>>(csr: &CSRGraph, path: P) -> io::Result<Self> {
577        let mut file = File::create(&path)?;
578        let mut writer = BufWriter::new(&mut file);
579
580        // Write header (cast through u64 for cross-platform wire format)
581        writer.write_all(&(csr.n_nodes as u64).to_le_bytes())?;
582        writer.write_all(&(csr.n_edges as u64).to_le_bytes())?;
583
584        // Write row pointers
585        for &ptr in &csr.row_ptr {
586            writer.write_all(&(ptr as u64).to_le_bytes())?;
587        }
588
589        // Write column indices
590        for &idx in &csr.col_idx {
591            writer.write_all(&(idx as u64).to_le_bytes())?;
592        }
593
594        // Write weights (f64 is already a fixed 8-byte format)
595        for &weight in &csr.weights {
596            writer.write_all(&weight.to_le_bytes())?;
597        }
598
599        writer.flush()?;
600        drop(writer);
601
602        // Reopen for reading
603        let file = File::open(path)?;
604
605        let header_size = Self::HEADER_BYTES;
606        let row_ptr_offset = header_size;
607        let col_idx_offset = row_ptr_offset + (csr.n_nodes + 1) * Self::FIELD_BYTES;
608        let weights_offset = col_idx_offset + csr.n_edges * Self::FIELD_BYTES;
609
610        Ok(MemmapGraph {
611            n_nodes: csr.n_nodes,
612            n_edges: csr.n_edges,
613            file,
614            header_size,
615            row_ptr_offset,
616            col_idx_offset,
617            weights_offset,
618        })
619    }
620
621    /// Load an existing memory-mapped graph
622    pub fn from_file<P: AsRef<Path>>(path: P) -> io::Result<Self> {
623        let mut file = File::open(path)?;
624        let mut buffer = [0u8; Self::HEADER_BYTES];
625
626        // Read header
627        file.read_exact(&mut buffer)?;
628        let n_nodes = Self::read_usize_le(&buffer[0..8])?;
629        let n_edges = Self::read_usize_le(&buffer[8..16])?;
630
631        let header_size = Self::HEADER_BYTES;
632        let row_ptr_offset = header_size;
633        let col_idx_offset = row_ptr_offset + (n_nodes + 1) * Self::FIELD_BYTES;
634        let weights_offset = col_idx_offset + n_edges * Self::FIELD_BYTES;
635
636        Ok(MemmapGraph {
637            n_nodes,
638            n_edges,
639            file,
640            header_size,
641            row_ptr_offset,
642            col_idx_offset,
643            weights_offset,
644        })
645    }
646
647    /// Get row pointers for a node (reads from disk)
648    fn get_row_ptrs(&mut self, node: usize) -> io::Result<(usize, usize)> {
649        if node >= self.n_nodes {
650            return Ok((0, 0));
651        }
652
653        let mut buffer = [0u8; 2 * Self::FIELD_BYTES];
654        let offset = self.row_ptr_offset + node * Self::FIELD_BYTES;
655
656        self.file.seek(SeekFrom::Start(offset as u64))?;
657        self.file.read_exact(&mut buffer)?;
658
659        let start = Self::read_usize_le(&buffer[0..8])?;
660        let end = Self::read_usize_le(&buffer[8..16])?;
661
662        Ok((start, end))
663    }
664
665    /// Get neighbors of a node (reads from disk)
666    pub fn neighbors(&mut self, node: usize) -> io::Result<Vec<(usize, f64)>> {
667        let (start, end) = self.get_row_ptrs(node)?;
668        let degree = end - start;
669
670        if degree == 0 {
671            return Ok(Vec::new());
672        }
673
674        // Read column indices (each u64 = 8 bytes)
675        let mut col_buffer = vec![0u8; degree * Self::FIELD_BYTES];
676        let col_offset = self.col_idx_offset + start * Self::FIELD_BYTES;
677        self.file.seek(SeekFrom::Start(col_offset as u64))?;
678        self.file.read_exact(&mut col_buffer)?;
679
680        // Read weights (each f64 = 8 bytes)
681        let mut weight_buffer = vec![0u8; degree * Self::FIELD_BYTES];
682        let weight_offset = self.weights_offset + start * Self::FIELD_BYTES;
683        self.file.seek(SeekFrom::Start(weight_offset as u64))?;
684        self.file.read_exact(&mut weight_buffer)?;
685
686        // Parse neighbors
687        let mut neighbors = Vec::with_capacity(degree);
688        for i in 0..degree {
689            let col_bytes = &col_buffer[i * Self::FIELD_BYTES..(i + 1) * Self::FIELD_BYTES];
690            let weight_bytes = &weight_buffer[i * Self::FIELD_BYTES..(i + 1) * Self::FIELD_BYTES];
691
692            let col_idx = Self::read_usize_le(col_bytes)?;
693            let mut weight_buf = [0u8; 8];
694            weight_buf.copy_from_slice(&weight_bytes[..8]);
695            let weight = f64::from_le_bytes(weight_buf);
696
697            neighbors.push((col_idx, weight));
698        }
699
700        Ok(neighbors)
701    }
702
703    /// Get degree of a node
704    pub fn degree(&mut self, node: usize) -> io::Result<usize> {
705        let (start, end) = self.get_row_ptrs(node)?;
706        Ok(end - start)
707    }
708
709    /// Get number of nodes
710    pub fn node_count(&self) -> usize {
711        self.n_nodes
712    }
713
714    /// Get number of edges
715    pub fn edge_count(&self) -> usize {
716        self.n_edges
717    }
718
719    /// Check if an edge exists (requires reading neighbors)
720    pub fn has_edge(&mut self, from: usize, to: usize) -> io::Result<bool> {
721        let neighbors = self.neighbors(from)?;
722        Ok(neighbors.iter().any(|&(neighbor_, _)| neighbor_ == to))
723    }
724}
725
726/// Optimized batch operations for memory-mapped graphs
727impl MemmapGraph {
728    /// Read multiple nodes' neighbors in one operation (more efficient)
729    pub fn batch_neighbors(&mut self, nodes: &[usize]) -> io::Result<Vec<Vec<(usize, f64)>>> {
730        let mut results = Vec::with_capacity(nodes.len());
731
732        // Sort nodes to minimize seeking
733        let mut sorted_nodes: Vec<_> = nodes.iter().enumerate().collect();
734        sorted_nodes.sort_by_key(|(_, &node)| node);
735
736        for (_, &node) in sorted_nodes {
737            results.push(self.neighbors(node)?);
738        }
739
740        Ok(results)
741    }
742
743    /// Stream through all edges without loading everything into memory
744    pub fn stream_edges<F>(&mut self, mut callback: F) -> io::Result<()>
745    where
746        F: FnMut(usize, usize, f64) -> bool, // Returns true to continue
747    {
748        for node in 0..self.n_nodes {
749            let neighbors = self.neighbors(node)?;
750            for (neighbor, weight) in neighbors {
751                if !callback(node, neighbor, weight) {
752                    return Ok(());
753                }
754            }
755        }
756        Ok(())
757    }
758}
759
760#[cfg(test)]
761mod tests {
762    use super::*;
763
764    #[test]
765    fn test_csr_graph() {
766        let edges = vec![(0, 1, 1.0), (0, 2, 2.0), (1, 2, 3.0), (2, 3, 4.0)];
767
768        let graph = CSRGraph::from_edges(4, edges).expect("Operation failed");
769
770        assert_eq!(graph.degree(0), 2);
771        assert_eq!(graph.degree(3), 0);
772
773        let neighbors: Vec<_> = graph.neighbors(0).collect();
774        assert_eq!(neighbors, vec![(1, 1.0), (2, 2.0)]);
775    }
776
777    #[test]
778    fn test_bit_packed_graph() {
779        let mut graph = BitPackedGraph::new(4, false);
780
781        graph.add_edge(0, 1).expect("Operation failed");
782        graph.add_edge(1, 2).expect("Operation failed");
783        graph.add_edge(0, 3).expect("Operation failed");
784
785        assert!(graph.has_edge(0, 1));
786        assert!(graph.has_edge(1, 0)); // Undirected
787        assert!(!graph.has_edge(2, 3));
788
789        let neighbors = graph.neighbors(0);
790        assert!(neighbors.contains(&1));
791        assert!(neighbors.contains(&3));
792    }
793
794    #[test]
795    fn test_compressed_adjacency() {
796        let adj_lists = vec![
797            vec![1, 2, 5],
798            vec![0, 2],
799            vec![0, 1, 3],
800            vec![2],
801            vec![],
802            vec![0],
803        ];
804
805        let graph = CompressedAdjacencyList::from_adjacency(adj_lists.clone());
806
807        for (node, expected) in adj_lists.iter().enumerate() {
808            let neighbors = graph.neighbors(node);
809            assert_eq!(&neighbors, expected);
810        }
811
812        // Check memory compression (note: compression may not always be effective for small graphs)
813        let uncompressed_size = adj_lists
814            .iter()
815            .map(|list| list.len() * mem::size_of::<usize>())
816            .sum::<usize>();
817
818        let compressed_size = graph.memory_usage();
819        // For small graphs, compression overhead may exceed savings
820        // Just verify the compressed graph works correctly
821        println!(
822            "Uncompressed: {} bytes, Compressed: {} bytes",
823            uncompressed_size, compressed_size
824        );
825    }
826
827    /// Regression test for issue #125 — `MemmapGraph` failed to build on
828    /// `wasm32-unknown-unknown` because the wire format used
829    /// `usize::{to,from}_le_bytes` directly, which produces `[u8; 4]` on
830    /// 32-bit targets and `[u8; 8]` on 64-bit, conflicting with the
831    /// hardcoded 16-byte buffer reads.
832    ///
833    /// The fix casts all `usize` values through `u64` for the on-disk format.
834    /// This test verifies:
835    /// 1. Compile-time format constants (`FIELD_BYTES == 8`,
836    ///    `HEADER_BYTES == 16`) — documents the wire format. Note: this
837    ///    cannot catch a regression to `size_of::<usize>()` on x86_64 since
838    ///    they happen to be equal there, but it does document the intent.
839    /// 2. A round-trip write→read produces correct data.
840    /// 3. The on-disk file size exactly matches the documented format —
841    ///    this *does* catch any regression that changes field width on a
842    ///    32-bit target (where `usize == 4` would shrink the file).
843    #[test]
844    fn test_issue_125_memmap_wasm32_portability() {
845        // Compile-time format invariants: the wire format must use u64
846        // (8 bytes), not usize (varies by target).
847        const _: () = assert!(MemmapGraph::FIELD_BYTES == 8);
848        const _: () = assert!(MemmapGraph::HEADER_BYTES == 16);
849        const _: () = assert!(MemmapGraph::FIELD_BYTES == core::mem::size_of::<u64>());
850
851        // Functional round-trip: build CSR, persist, reload, compare.
852        let edges = vec![
853            (0usize, 1usize, 1.5_f64),
854            (0, 2, 2.5),
855            (1, 2, 3.5),
856            (2, 3, 4.5),
857        ];
858        let csr = CSRGraph::from_edges(4, edges).expect("CSR construction");
859
860        // Use std::env::temp_dir() per project policy.
861        let mut path = std::env::temp_dir();
862        path.push(format!("scirs2_graph_issue125_{}.bin", std::process::id()));
863
864        // Write via from_csr, then reload via from_file.
865        let _written = MemmapGraph::from_csr(&csr, &path).expect("write memmap");
866        let metadata = std::fs::metadata(&path).expect("file metadata");
867
868        // Documented format:
869        //   header (2 * u64) + row_ptr ((n_nodes+1) * u64)
870        //   + col_idx (n_edges * u64) + weights (n_edges * f64)
871        // All eight-byte fields, so total is always:
872        let n_nodes = 4_u64;
873        // Access private n_edges field directly (test is in same module).
874        let n_edges = csr.n_edges as u64;
875        let expected_bytes = 8 * (2 + (n_nodes + 1) + n_edges + n_edges);
876        assert_eq!(
877            metadata.len(),
878            expected_bytes,
879            "on-disk file size must match documented portable format"
880        );
881
882        let mut loaded = MemmapGraph::from_file(&path).expect("read memmap");
883        assert_eq!(loaded.node_count(), 4);
884        assert_eq!(loaded.edge_count(), csr.n_edges);
885
886        // Verify neighbors match. CSRGraph is directed-ish: from_edges stores
887        // each (src,dst) once. We compare the union of neighbor sets.
888        for node in 0..4 {
889            let mut roundtrip = loaded.neighbors(node).expect("neighbors");
890            let mut original: Vec<(usize, f64)> = csr.neighbors(node).collect();
891            roundtrip.sort_by_key(|a| a.0);
892            original.sort_by_key(|a| a.0);
893            assert_eq!(roundtrip, original, "node {node} neighbors must round-trip");
894        }
895
896        // Cleanup
897        let _ = std::fs::remove_file(&path);
898    }
899}