grafeo_core/index/
adjacency.rs

1//! Chunked adjacency lists - the core data structure for graph traversal.
2//!
3//! Every graph database needs fast neighbor lookups. This implementation uses
4//! chunked storage with delta buffers, giving you:
5//!
6//! - **O(1) amortized inserts** - new edges go into a delta buffer
7//! - **Cache-friendly scans** - chunks are sized for L1/L2 cache
8//! - **Soft deletes** - deletions don't require recompaction
9//! - **Concurrent reads** - RwLock allows many simultaneous traversals
10//! - **Compression** - cold chunks can be compressed using DeltaBitPacked
11
12use crate::storage::{BitPackedInts, DeltaBitPacked};
13use grafeo_common::types::{EdgeId, NodeId};
14use grafeo_common::utils::hash::{FxHashMap, FxHashSet};
15use parking_lot::RwLock;
16use smallvec::SmallVec;
17use std::sync::atomic::{AtomicUsize, Ordering};
18
19/// Default chunk capacity (number of edges per chunk).
20const DEFAULT_CHUNK_CAPACITY: usize = 64;
21
22/// Threshold for delta buffer compaction.
23///
24/// Lower values reduce memory overhead and iteration cost for delta buffers,
25/// but increase compaction frequency. 64 provides a good balance for typical workloads.
26const DELTA_COMPACTION_THRESHOLD: usize = 64;
27
28/// Threshold for cold chunk compression.
29///
30/// When the number of hot chunks exceeds this threshold, the oldest hot chunks
31/// are compressed and moved to cold storage. This balances memory usage with
32/// the cost of compression/decompression.
33const COLD_COMPRESSION_THRESHOLD: usize = 4;
34
35/// A chunk of adjacency entries.
36#[derive(Debug, Clone)]
37struct AdjacencyChunk {
38    /// Destination node IDs.
39    destinations: Vec<NodeId>,
40    /// Edge IDs (parallel to destinations).
41    edge_ids: Vec<EdgeId>,
42    /// Capacity of this chunk.
43    capacity: usize,
44}
45
46impl AdjacencyChunk {
47    fn new(capacity: usize) -> Self {
48        Self {
49            destinations: Vec::with_capacity(capacity),
50            edge_ids: Vec::with_capacity(capacity),
51            capacity,
52        }
53    }
54
55    fn len(&self) -> usize {
56        self.destinations.len()
57    }
58
59    fn is_full(&self) -> bool {
60        self.destinations.len() >= self.capacity
61    }
62
63    fn push(&mut self, dst: NodeId, edge_id: EdgeId) -> bool {
64        if self.is_full() {
65            return false;
66        }
67        self.destinations.push(dst);
68        self.edge_ids.push(edge_id);
69        true
70    }
71
72    fn iter(&self) -> impl Iterator<Item = (NodeId, EdgeId)> + '_ {
73        self.destinations
74            .iter()
75            .copied()
76            .zip(self.edge_ids.iter().copied())
77    }
78
79    /// Compresses this chunk into a `CompressedAdjacencyChunk`.
80    ///
81    /// The entries are sorted by destination node ID for better delta compression.
82    /// Use this for cold chunks that won't be modified.
83    #[allow(dead_code)]
84    fn compress(&self) -> CompressedAdjacencyChunk {
85        // Sort entries by destination for better delta compression
86        let mut entries: Vec<_> = self
87            .destinations
88            .iter()
89            .copied()
90            .zip(self.edge_ids.iter().copied())
91            .collect();
92        entries.sort_by_key(|(dst, _)| dst.as_u64());
93
94        // Extract sorted destinations and corresponding edge IDs
95        let sorted_dsts: Vec<u64> = entries.iter().map(|(d, _)| d.as_u64()).collect();
96        let sorted_edges: Vec<u64> = entries.iter().map(|(_, e)| e.as_u64()).collect();
97
98        CompressedAdjacencyChunk {
99            destinations: DeltaBitPacked::encode(&sorted_dsts),
100            edge_ids: BitPackedInts::pack(&sorted_edges),
101            count: entries.len(),
102        }
103    }
104}
105
106/// A compressed chunk of adjacency entries.
107///
108/// Uses DeltaBitPacked for destination node IDs (sorted for good compression)
109/// and BitPackedInts for edge IDs. Typical compression ratio is 5-10x for
110/// dense adjacency lists.
111#[derive(Debug, Clone)]
112struct CompressedAdjacencyChunk {
113    /// Delta + bit-packed destination node IDs (sorted).
114    destinations: DeltaBitPacked,
115    /// Bit-packed edge IDs.
116    edge_ids: BitPackedInts,
117    /// Number of entries.
118    count: usize,
119}
120
121impl CompressedAdjacencyChunk {
122    /// Returns the number of entries in this chunk.
123    fn len(&self) -> usize {
124        self.count
125    }
126
127    /// Returns true if this chunk is empty.
128    #[allow(dead_code)]
129    fn is_empty(&self) -> bool {
130        self.count == 0
131    }
132
133    /// Decompresses and iterates over all entries.
134    fn iter(&self) -> impl Iterator<Item = (NodeId, EdgeId)> + '_ {
135        let dsts = self.destinations.decode();
136        let edges = self.edge_ids.unpack();
137
138        dsts.into_iter()
139            .zip(edges)
140            .map(|(d, e)| (NodeId::new(d), EdgeId::new(e)))
141    }
142
143    /// Returns the approximate memory size in bytes.
144    #[allow(dead_code)]
145    fn memory_size(&self) -> usize {
146        // DeltaBitPacked: 8 bytes base + packed deltas
147        // BitPackedInts: packed data
148        let dest_size = 8 + self.destinations.to_bytes().len();
149        let edge_size = self.edge_ids.data().len() * 8;
150        dest_size + edge_size
151    }
152
153    /// Returns the compression ratio compared to uncompressed storage.
154    #[allow(dead_code)]
155    fn compression_ratio(&self) -> f64 {
156        if self.count == 0 {
157            return 1.0;
158        }
159        let uncompressed = self.count * 16; // 8 bytes each for NodeId and EdgeId
160        let compressed = self.memory_size();
161        if compressed == 0 {
162            return f64::INFINITY;
163        }
164        uncompressed as f64 / compressed as f64
165    }
166}
167
168/// Adjacency list for a single node.
169///
170/// Uses a tiered storage model:
171/// - **Hot chunks**: Recent data, uncompressed for fast modification
172/// - **Cold chunks**: Older data, compressed for memory efficiency
173/// - **Delta buffer**: Very recent insertions, not yet compacted
174#[derive(Debug)]
175struct AdjacencyList {
176    /// Hot chunks (mutable, uncompressed) - for recent data.
177    hot_chunks: Vec<AdjacencyChunk>,
178    /// Cold chunks (immutable, compressed) - for older data.
179    cold_chunks: Vec<CompressedAdjacencyChunk>,
180    /// Delta buffer for recent insertions.
181    /// Uses SmallVec with 16 inline entries for cache-friendly access.
182    /// Most nodes have <16 recent insertions before compaction, so this
183    /// avoids heap allocation in the common case.
184    delta_inserts: SmallVec<[(NodeId, EdgeId); 16]>,
185    /// Set of deleted edge IDs.
186    deleted: FxHashSet<EdgeId>,
187}
188
189impl AdjacencyList {
190    fn new() -> Self {
191        Self {
192            hot_chunks: Vec::new(),
193            cold_chunks: Vec::new(),
194            delta_inserts: SmallVec::new(),
195            deleted: FxHashSet::default(),
196        }
197    }
198
199    fn add_edge(&mut self, dst: NodeId, edge_id: EdgeId) {
200        // Try to add to the last hot chunk
201        if let Some(last) = self.hot_chunks.last_mut() {
202            if last.push(dst, edge_id) {
203                return;
204            }
205        }
206
207        // Add to delta buffer
208        self.delta_inserts.push((dst, edge_id));
209    }
210
211    fn mark_deleted(&mut self, edge_id: EdgeId) {
212        self.deleted.insert(edge_id);
213    }
214
215    fn compact(&mut self, chunk_capacity: usize) {
216        if self.delta_inserts.is_empty() {
217            return;
218        }
219
220        // Create new chunks from delta buffer
221        // Check if last hot chunk has room, and if so, pop it to continue filling
222        let last_has_room = self.hot_chunks.last().is_some_and(|c| !c.is_full());
223        let mut current_chunk = if last_has_room {
224            // Invariant: is_some_and() returned true, so hot_chunks is non-empty
225            self.hot_chunks
226                .pop()
227                .expect("hot_chunks is non-empty: is_some_and() succeeded on previous line")
228        } else {
229            AdjacencyChunk::new(chunk_capacity)
230        };
231
232        for (dst, edge_id) in self.delta_inserts.drain(..) {
233            if !current_chunk.push(dst, edge_id) {
234                self.hot_chunks.push(current_chunk);
235                current_chunk = AdjacencyChunk::new(chunk_capacity);
236                current_chunk.push(dst, edge_id);
237            }
238        }
239
240        if current_chunk.len() > 0 {
241            self.hot_chunks.push(current_chunk);
242        }
243
244        // Check if we should compress some hot chunks to cold
245        self.maybe_compress_to_cold();
246    }
247
248    /// Compresses oldest hot chunks to cold storage if threshold exceeded.
249    fn maybe_compress_to_cold(&mut self) {
250        // Keep at least COLD_COMPRESSION_THRESHOLD hot chunks for write performance
251        while self.hot_chunks.len() > COLD_COMPRESSION_THRESHOLD {
252            // Remove the oldest (first) hot chunk
253            let oldest = self.hot_chunks.remove(0);
254
255            // Skip empty chunks
256            if oldest.len() == 0 {
257                continue;
258            }
259
260            // Compress and add to cold storage
261            let compressed = oldest.compress();
262            self.cold_chunks.push(compressed);
263        }
264    }
265
266    /// Forces all hot chunks to be compressed to cold storage.
267    ///
268    /// Useful when memory pressure is high or the node is rarely accessed.
269    #[allow(dead_code)]
270    fn freeze_all(&mut self) {
271        for chunk in self.hot_chunks.drain(..) {
272            if chunk.len() > 0 {
273                self.cold_chunks.push(chunk.compress());
274            }
275        }
276    }
277
278    fn iter(&self) -> impl Iterator<Item = (NodeId, EdgeId)> + '_ {
279        let deleted = &self.deleted;
280
281        // Iterate cold chunks first (oldest data)
282        let cold_iter = self.cold_chunks.iter().flat_map(|c| c.iter());
283
284        // Then hot chunks
285        let hot_iter = self.hot_chunks.iter().flat_map(|c| c.iter());
286
287        // Finally delta buffer (newest data)
288        let delta_iter = self.delta_inserts.iter().copied();
289
290        cold_iter
291            .chain(hot_iter)
292            .chain(delta_iter)
293            .filter(move |(_, edge_id)| !deleted.contains(edge_id))
294    }
295
296    fn neighbors(&self) -> impl Iterator<Item = NodeId> + '_ {
297        self.iter().map(|(dst, _)| dst)
298    }
299
300    fn degree(&self) -> usize {
301        self.iter().count()
302    }
303
304    /// Returns the number of entries in hot storage.
305    #[allow(dead_code)]
306    fn hot_count(&self) -> usize {
307        self.hot_chunks.iter().map(|c| c.len()).sum::<usize>() + self.delta_inserts.len()
308    }
309
310    /// Returns the number of entries in cold storage.
311    #[allow(dead_code)]
312    fn cold_count(&self) -> usize {
313        self.cold_chunks.iter().map(|c| c.len()).sum()
314    }
315
316    /// Returns the approximate memory size in bytes.
317    #[allow(dead_code)]
318    fn memory_size(&self) -> usize {
319        // Hot chunks: full uncompressed size
320        let hot_size = self.hot_chunks.iter().map(|c| c.len() * 16).sum::<usize>();
321
322        // Cold chunks: compressed size
323        let cold_size = self
324            .cold_chunks
325            .iter()
326            .map(|c| c.memory_size())
327            .sum::<usize>();
328
329        // Delta buffer
330        let delta_size = self.delta_inserts.len() * 16;
331
332        // Deleted set (rough estimate)
333        let deleted_size = self.deleted.len() * 16;
334
335        hot_size + cold_size + delta_size + deleted_size
336    }
337
338    /// Returns the compression ratio for cold storage.
339    #[allow(dead_code)]
340    fn cold_compression_ratio(&self) -> f64 {
341        let total_cold_entries: usize = self.cold_chunks.iter().map(|c| c.len()).sum();
342        if total_cold_entries == 0 {
343            return 1.0;
344        }
345
346        let uncompressed_size = total_cold_entries * 16;
347        let compressed_size: usize = self.cold_chunks.iter().map(|c| c.memory_size()).sum();
348
349        if compressed_size == 0 {
350            return f64::INFINITY;
351        }
352
353        uncompressed_size as f64 / compressed_size as f64
354    }
355}
356
357/// The main structure for traversing graph edges.
358///
359/// Given a node, this tells you all its neighbors and the edges connecting them.
360/// Internally uses chunked storage (64 edges per chunk) with a delta buffer for
361/// recent inserts. Deletions are soft (tombstones) until compaction.
362///
363/// # Example
364///
365/// ```
366/// use grafeo_core::index::ChunkedAdjacency;
367/// use grafeo_common::types::{NodeId, EdgeId};
368///
369/// let adj = ChunkedAdjacency::new();
370///
371/// // Build a star graph: node 0 connects to nodes 1, 2, 3
372/// adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(100));
373/// adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(101));
374/// adj.add_edge(NodeId::new(0), NodeId::new(3), EdgeId::new(102));
375///
376/// // Fast neighbor lookup
377/// let neighbors = adj.neighbors(NodeId::new(0));
378/// assert_eq!(neighbors.len(), 3);
379/// ```
380pub struct ChunkedAdjacency {
381    /// Adjacency lists indexed by source node.
382    lists: RwLock<FxHashMap<NodeId, AdjacencyList>>,
383    /// Chunk capacity for new chunks.
384    chunk_capacity: usize,
385    /// Total number of edges (including deleted).
386    edge_count: AtomicUsize,
387    /// Number of deleted edges.
388    deleted_count: AtomicUsize,
389}
390
391impl ChunkedAdjacency {
392    /// Creates a new chunked adjacency structure.
393    #[must_use]
394    pub fn new() -> Self {
395        Self::with_chunk_capacity(DEFAULT_CHUNK_CAPACITY)
396    }
397
398    /// Creates a new chunked adjacency with custom chunk capacity.
399    #[must_use]
400    pub fn with_chunk_capacity(capacity: usize) -> Self {
401        Self {
402            lists: RwLock::new(FxHashMap::default()),
403            chunk_capacity: capacity,
404            edge_count: AtomicUsize::new(0),
405            deleted_count: AtomicUsize::new(0),
406        }
407    }
408
409    /// Adds an edge from src to dst.
410    pub fn add_edge(&self, src: NodeId, dst: NodeId, edge_id: EdgeId) {
411        let mut lists = self.lists.write();
412        lists
413            .entry(src)
414            .or_insert_with(AdjacencyList::new)
415            .add_edge(dst, edge_id);
416        self.edge_count.fetch_add(1, Ordering::Relaxed);
417    }
418
419    /// Marks an edge as deleted.
420    pub fn mark_deleted(&self, src: NodeId, edge_id: EdgeId) {
421        let mut lists = self.lists.write();
422        if let Some(list) = lists.get_mut(&src) {
423            list.mark_deleted(edge_id);
424            self.deleted_count.fetch_add(1, Ordering::Relaxed);
425        }
426    }
427
428    /// Returns all neighbors of a node.
429    ///
430    /// Note: This allocates a Vec to collect neighbors while the internal lock
431    /// is held, then returns the Vec. For traversal performance, consider using
432    /// `edges_from` if you also need edge IDs, to avoid multiple lookups.
433    #[must_use]
434    pub fn neighbors(&self, src: NodeId) -> Vec<NodeId> {
435        let lists = self.lists.read();
436        lists
437            .get(&src)
438            .map(|list| list.neighbors().collect())
439            .unwrap_or_default()
440    }
441
442    /// Returns all (neighbor, edge_id) pairs for outgoing edges from a node.
443    ///
444    /// Note: This allocates a Vec to collect edges while the internal lock
445    /// is held, then returns the Vec. This is intentional to avoid holding
446    /// the lock across iteration.
447    #[must_use]
448    pub fn edges_from(&self, src: NodeId) -> Vec<(NodeId, EdgeId)> {
449        let lists = self.lists.read();
450        lists
451            .get(&src)
452            .map(|list| list.iter().collect())
453            .unwrap_or_default()
454    }
455
456    /// Returns the out-degree of a node (number of outgoing edges).
457    ///
458    /// For forward adjacency, this counts edges where `src` is the source.
459    pub fn out_degree(&self, src: NodeId) -> usize {
460        let lists = self.lists.read();
461        lists.get(&src).map_or(0, |list| list.degree())
462    }
463
464    /// Returns the in-degree of a node (number of incoming edges).
465    ///
466    /// This is semantically equivalent to `out_degree` but named differently
467    /// for use with backward adjacency where edges are stored in reverse.
468    /// When called on `backward_adj`, this returns the count of edges
469    /// where `node` is the destination.
470    pub fn in_degree(&self, node: NodeId) -> usize {
471        let lists = self.lists.read();
472        lists.get(&node).map_or(0, |list| list.degree())
473    }
474
475    /// Compacts all adjacency lists.
476    pub fn compact(&self) {
477        let mut lists = self.lists.write();
478        for list in lists.values_mut() {
479            list.compact(self.chunk_capacity);
480        }
481    }
482
483    /// Compacts delta buffers that exceed the threshold.
484    pub fn compact_if_needed(&self) {
485        let mut lists = self.lists.write();
486        for list in lists.values_mut() {
487            if list.delta_inserts.len() >= DELTA_COMPACTION_THRESHOLD {
488                list.compact(self.chunk_capacity);
489            }
490        }
491    }
492
493    /// Returns the total number of edges (including deleted).
494    pub fn total_edge_count(&self) -> usize {
495        self.edge_count.load(Ordering::Relaxed)
496    }
497
498    /// Returns the number of active (non-deleted) edges.
499    pub fn active_edge_count(&self) -> usize {
500        self.edge_count.load(Ordering::Relaxed) - self.deleted_count.load(Ordering::Relaxed)
501    }
502
503    /// Returns the number of nodes with adjacency lists.
504    pub fn node_count(&self) -> usize {
505        self.lists.read().len()
506    }
507
508    /// Clears all adjacency lists.
509    pub fn clear(&self) {
510        let mut lists = self.lists.write();
511        lists.clear();
512        self.edge_count.store(0, Ordering::Relaxed);
513        self.deleted_count.store(0, Ordering::Relaxed);
514    }
515
516    /// Returns memory statistics for this adjacency structure.
517    #[must_use]
518    pub fn memory_stats(&self) -> AdjacencyMemoryStats {
519        let lists = self.lists.read();
520
521        let mut hot_entries = 0usize;
522        let mut cold_entries = 0usize;
523        let mut hot_bytes = 0usize;
524        let mut cold_bytes = 0usize;
525
526        for list in lists.values() {
527            hot_entries += list.hot_count();
528            cold_entries += list.cold_count();
529
530            // Hot: uncompressed (16 bytes per entry)
531            hot_bytes += list.hot_count() * 16;
532
533            // Cold: compressed size from memory_size()
534            for cold_chunk in &list.cold_chunks {
535                cold_bytes += cold_chunk.memory_size();
536            }
537        }
538
539        AdjacencyMemoryStats {
540            hot_entries,
541            cold_entries,
542            hot_bytes,
543            cold_bytes,
544            node_count: lists.len(),
545        }
546    }
547
548    /// Forces all hot chunks to be compressed for all adjacency lists.
549    ///
550    /// This is useful when memory pressure is high or during shutdown.
551    pub fn freeze_all(&self) {
552        let mut lists = self.lists.write();
553        for list in lists.values_mut() {
554            list.freeze_all();
555        }
556    }
557}
558
559/// Memory statistics for the adjacency structure.
560#[derive(Debug, Clone)]
561pub struct AdjacencyMemoryStats {
562    /// Number of entries in hot (uncompressed) storage.
563    pub hot_entries: usize,
564    /// Number of entries in cold (compressed) storage.
565    pub cold_entries: usize,
566    /// Bytes used by hot storage.
567    pub hot_bytes: usize,
568    /// Bytes used by cold storage.
569    pub cold_bytes: usize,
570    /// Number of nodes with adjacency lists.
571    pub node_count: usize,
572}
573
574impl AdjacencyMemoryStats {
575    /// Returns the total number of entries.
576    #[must_use]
577    pub fn total_entries(&self) -> usize {
578        self.hot_entries + self.cold_entries
579    }
580
581    /// Returns the total memory used in bytes.
582    #[must_use]
583    pub fn total_bytes(&self) -> usize {
584        self.hot_bytes + self.cold_bytes
585    }
586
587    /// Returns the compression ratio for cold storage.
588    ///
589    /// Values > 1.0 indicate actual compression.
590    #[must_use]
591    pub fn cold_compression_ratio(&self) -> f64 {
592        if self.cold_entries == 0 || self.cold_bytes == 0 {
593            return 1.0;
594        }
595        let uncompressed = self.cold_entries * 16;
596        uncompressed as f64 / self.cold_bytes as f64
597    }
598
599    /// Returns the overall compression ratio.
600    #[must_use]
601    pub fn overall_compression_ratio(&self) -> f64 {
602        let total_entries = self.total_entries();
603        if total_entries == 0 || self.total_bytes() == 0 {
604            return 1.0;
605        }
606        let uncompressed = total_entries * 16;
607        uncompressed as f64 / self.total_bytes() as f64
608    }
609}
610
611impl Default for ChunkedAdjacency {
612    fn default() -> Self {
613        Self::new()
614    }
615}
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620
621    #[test]
622    fn test_basic_adjacency() {
623        let adj = ChunkedAdjacency::new();
624
625        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
626        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
627        adj.add_edge(NodeId::new(0), NodeId::new(3), EdgeId::new(2));
628
629        let neighbors = adj.neighbors(NodeId::new(0));
630        assert_eq!(neighbors.len(), 3);
631        assert!(neighbors.contains(&NodeId::new(1)));
632        assert!(neighbors.contains(&NodeId::new(2)));
633        assert!(neighbors.contains(&NodeId::new(3)));
634    }
635
636    #[test]
637    fn test_out_degree() {
638        let adj = ChunkedAdjacency::new();
639
640        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
641        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
642
643        assert_eq!(adj.out_degree(NodeId::new(0)), 2);
644        assert_eq!(adj.out_degree(NodeId::new(1)), 0);
645    }
646
647    #[test]
648    fn test_mark_deleted() {
649        let adj = ChunkedAdjacency::new();
650
651        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
652        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
653
654        adj.mark_deleted(NodeId::new(0), EdgeId::new(0));
655
656        let neighbors = adj.neighbors(NodeId::new(0));
657        assert_eq!(neighbors.len(), 1);
658        assert!(neighbors.contains(&NodeId::new(2)));
659    }
660
661    #[test]
662    fn test_edges_from() {
663        let adj = ChunkedAdjacency::new();
664
665        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(10));
666        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(20));
667
668        let edges = adj.edges_from(NodeId::new(0));
669        assert_eq!(edges.len(), 2);
670        assert!(edges.contains(&(NodeId::new(1), EdgeId::new(10))));
671        assert!(edges.contains(&(NodeId::new(2), EdgeId::new(20))));
672    }
673
674    #[test]
675    fn test_compaction() {
676        let adj = ChunkedAdjacency::with_chunk_capacity(4);
677
678        // Add more edges than chunk capacity
679        for i in 0..10 {
680            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
681        }
682
683        adj.compact();
684
685        // All edges should still be accessible
686        let neighbors = adj.neighbors(NodeId::new(0));
687        assert_eq!(neighbors.len(), 10);
688    }
689
690    #[test]
691    fn test_edge_counts() {
692        let adj = ChunkedAdjacency::new();
693
694        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
695        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
696        adj.add_edge(NodeId::new(1), NodeId::new(2), EdgeId::new(2));
697
698        assert_eq!(adj.total_edge_count(), 3);
699        assert_eq!(adj.active_edge_count(), 3);
700
701        adj.mark_deleted(NodeId::new(0), EdgeId::new(0));
702
703        assert_eq!(adj.total_edge_count(), 3);
704        assert_eq!(adj.active_edge_count(), 2);
705    }
706
707    #[test]
708    fn test_clear() {
709        let adj = ChunkedAdjacency::new();
710
711        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
712        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
713
714        adj.clear();
715
716        assert_eq!(adj.total_edge_count(), 0);
717        assert_eq!(adj.node_count(), 0);
718    }
719
720    #[test]
721    fn test_chunk_compression() {
722        // Create a chunk with some edges
723        let mut chunk = AdjacencyChunk::new(64);
724
725        // Add edges with various destination IDs
726        for i in 0..20 {
727            chunk.push(NodeId::new(100 + i * 5), EdgeId::new(1000 + i));
728        }
729
730        // Compress the chunk
731        let compressed = chunk.compress();
732
733        // Verify all data is preserved
734        assert_eq!(compressed.len(), 20);
735
736        // Decompress and verify
737        let entries: Vec<_> = compressed.iter().collect();
738        assert_eq!(entries.len(), 20);
739
740        // After compression, entries are sorted by destination
741        // Verify destinations are sorted
742        for window in entries.windows(2) {
743            assert!(window[0].0.as_u64() <= window[1].0.as_u64());
744        }
745
746        // Verify all original destinations are present
747        let original_dsts: std::collections::HashSet<_> =
748            (0..20).map(|i| NodeId::new(100 + i * 5)).collect();
749        let compressed_dsts: std::collections::HashSet<_> =
750            entries.iter().map(|(d, _)| *d).collect();
751        assert_eq!(original_dsts, compressed_dsts);
752
753        // Check compression ratio (should be > 1.0 for sorted data)
754        let ratio = compressed.compression_ratio();
755        assert!(
756            ratio > 1.0,
757            "Expected compression ratio > 1.0, got {}",
758            ratio
759        );
760    }
761
762    #[test]
763    fn test_empty_chunk_compression() {
764        let chunk = AdjacencyChunk::new(64);
765        let compressed = chunk.compress();
766
767        assert_eq!(compressed.len(), 0);
768        assert!(compressed.is_empty());
769        assert_eq!(compressed.iter().count(), 0);
770    }
771
772    #[test]
773    fn test_hot_to_cold_migration() {
774        // Use small chunk capacity to trigger cold compression faster
775        let adj = ChunkedAdjacency::with_chunk_capacity(8);
776
777        // Add many edges to force multiple chunks and cold compression
778        // With chunk_capacity=8 and COLD_COMPRESSION_THRESHOLD=4,
779        // we need more than 4 * 8 = 32 edges to trigger cold compression
780        for i in 0..100 {
781            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
782        }
783
784        // Force compaction to trigger hot→cold migration
785        adj.compact();
786
787        // All edges should still be accessible
788        let neighbors = adj.neighbors(NodeId::new(0));
789        assert_eq!(neighbors.len(), 100);
790
791        // Check memory stats
792        let stats = adj.memory_stats();
793        assert_eq!(stats.total_entries(), 100);
794
795        // With 100 edges and threshold of 4 hot chunks (32 edges),
796        // we should have some cold entries
797        assert!(
798            stats.cold_entries > 0,
799            "Expected some cold entries, got {}",
800            stats.cold_entries
801        );
802    }
803
804    #[test]
805    fn test_memory_stats() {
806        let adj = ChunkedAdjacency::with_chunk_capacity(8);
807
808        // Add edges
809        for i in 0..20 {
810            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
811        }
812
813        adj.compact();
814
815        let stats = adj.memory_stats();
816        assert_eq!(stats.total_entries(), 20);
817        assert_eq!(stats.node_count, 1);
818        assert!(stats.total_bytes() > 0);
819    }
820
821    #[test]
822    fn test_freeze_all() {
823        let adj = ChunkedAdjacency::with_chunk_capacity(8);
824
825        // Add some edges
826        for i in 0..30 {
827            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
828        }
829
830        adj.compact();
831
832        // Get initial stats
833        let before = adj.memory_stats();
834
835        // Freeze all hot chunks
836        adj.freeze_all();
837
838        // Get stats after freeze
839        let after = adj.memory_stats();
840
841        // All data should now be in cold storage
842        assert_eq!(after.hot_entries, 0);
843        assert_eq!(after.cold_entries, before.total_entries());
844
845        // All edges should still be accessible
846        let neighbors = adj.neighbors(NodeId::new(0));
847        assert_eq!(neighbors.len(), 30);
848    }
849
850    #[test]
851    fn test_cold_compression_ratio() {
852        let adj = ChunkedAdjacency::with_chunk_capacity(8);
853
854        // Add many edges with sequential IDs (compresses well)
855        for i in 0..200 {
856            adj.add_edge(NodeId::new(0), NodeId::new(100 + i), EdgeId::new(i));
857        }
858
859        adj.compact();
860        adj.freeze_all();
861
862        let stats = adj.memory_stats();
863
864        // Sequential data should compress well
865        let ratio = stats.cold_compression_ratio();
866        assert!(
867            ratio > 1.5,
868            "Expected cold compression ratio > 1.5, got {}",
869            ratio
870        );
871    }
872
873    #[test]
874    fn test_deleted_edges_with_cold_storage() {
875        let adj = ChunkedAdjacency::with_chunk_capacity(8);
876
877        // Add edges
878        for i in 0..50 {
879            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
880        }
881
882        adj.compact();
883
884        // Delete some edges (some will be in cold storage after compaction)
885        for i in (0..50).step_by(2) {
886            adj.mark_deleted(NodeId::new(0), EdgeId::new(i));
887        }
888
889        // Should have half the edges
890        let neighbors = adj.neighbors(NodeId::new(0));
891        assert_eq!(neighbors.len(), 25);
892
893        // Verify only odd-numbered destinations remain
894        for neighbor in neighbors {
895            assert!(neighbor.as_u64() % 2 == 0); // Original IDs were i+1, so even means odd i
896        }
897    }
898
899    #[test]
900    fn test_adjacency_list_memory_size() {
901        let mut list = AdjacencyList::new();
902
903        // Add edges
904        for i in 0..50 {
905            list.add_edge(NodeId::new(i + 1), EdgeId::new(i));
906        }
907
908        // Compact with small chunk capacity to get multiple chunks
909        list.compact(8);
910
911        let size = list.memory_size();
912        assert!(size > 0);
913
914        // Size should be roughly proportional to entry count
915        // Each entry is 16 bytes uncompressed
916        assert!(size <= 50 * 16 + 200); // Allow some overhead
917    }
918
919    #[test]
920    fn test_cold_iteration_order() {
921        let adj = ChunkedAdjacency::with_chunk_capacity(8);
922
923        // Add edges in order
924        for i in 0..50 {
925            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
926        }
927
928        adj.compact();
929
930        // Collect all edges
931        let edges = adj.edges_from(NodeId::new(0));
932
933        // All edges should be present
934        assert_eq!(edges.len(), 50);
935
936        // Verify edge IDs are present (may be reordered due to compression sorting)
937        let edge_ids: std::collections::HashSet<_> = edges.iter().map(|(_, e)| *e).collect();
938        for i in 0..50 {
939            assert!(edge_ids.contains(&EdgeId::new(i)));
940        }
941    }
942
943    #[test]
944    fn test_in_degree() {
945        let adj = ChunkedAdjacency::new();
946
947        // Simulate backward adjacency: edges stored as (dst, src)
948        // Edge 1->2: backward stores (2, 1)
949        // Edge 3->2: backward stores (2, 3)
950        adj.add_edge(NodeId::new(2), NodeId::new(1), EdgeId::new(0)); // 1->2 in backward
951        adj.add_edge(NodeId::new(2), NodeId::new(3), EdgeId::new(1)); // 3->2 in backward
952
953        // In-degree of node 2 is 2 (two edges point to it)
954        assert_eq!(adj.in_degree(NodeId::new(2)), 2);
955
956        // Node 1 has no incoming edges
957        assert_eq!(adj.in_degree(NodeId::new(1)), 0);
958    }
959
960    #[test]
961    fn test_bidirectional_edges() {
962        let forward = ChunkedAdjacency::new();
963        let backward = ChunkedAdjacency::new();
964
965        // Add edge: 1 -> 2
966        let edge_id = EdgeId::new(100);
967        forward.add_edge(NodeId::new(1), NodeId::new(2), edge_id);
968        backward.add_edge(NodeId::new(2), NodeId::new(1), edge_id); // Reverse for backward!
969
970        // Forward: edges from node 1 → returns (dst=2, edge_id)
971        let forward_edges = forward.edges_from(NodeId::new(1));
972        assert_eq!(forward_edges.len(), 1);
973        assert_eq!(forward_edges[0], (NodeId::new(2), edge_id));
974
975        // Forward: node 2 has no outgoing edges
976        assert_eq!(forward.edges_from(NodeId::new(2)).len(), 0);
977
978        // Backward: edges to node 2 → stored as edges_from(2) → returns (src=1, edge_id)
979        let backward_edges = backward.edges_from(NodeId::new(2));
980        assert_eq!(backward_edges.len(), 1);
981        assert_eq!(backward_edges[0], (NodeId::new(1), edge_id));
982
983        // Backward: node 1 has no incoming edges
984        assert_eq!(backward.edges_from(NodeId::new(1)).len(), 0);
985    }
986
987    #[test]
988    fn test_bidirectional_chain() {
989        // Test chain: A -> B -> C
990        let forward = ChunkedAdjacency::new();
991        let backward = ChunkedAdjacency::new();
992
993        let a = NodeId::new(1);
994        let b = NodeId::new(2);
995        let c = NodeId::new(3);
996
997        // Edge A -> B
998        let edge_ab = EdgeId::new(10);
999        forward.add_edge(a, b, edge_ab);
1000        backward.add_edge(b, a, edge_ab);
1001
1002        // Edge B -> C
1003        let edge_bc = EdgeId::new(20);
1004        forward.add_edge(b, c, edge_bc);
1005        backward.add_edge(c, b, edge_bc);
1006
1007        // Forward traversal from A: should reach B
1008        let from_a = forward.edges_from(a);
1009        assert_eq!(from_a.len(), 1);
1010        assert_eq!(from_a[0].0, b);
1011
1012        // Forward traversal from B: should reach C
1013        let from_b = forward.edges_from(b);
1014        assert_eq!(from_b.len(), 1);
1015        assert_eq!(from_b[0].0, c);
1016
1017        // Backward traversal to C: should find B
1018        let to_c = backward.edges_from(c);
1019        assert_eq!(to_c.len(), 1);
1020        assert_eq!(to_c[0].0, b);
1021
1022        // Backward traversal to B: should find A
1023        let to_b = backward.edges_from(b);
1024        assert_eq!(to_b.len(), 1);
1025        assert_eq!(to_b[0].0, a);
1026
1027        // Node A has no incoming edges
1028        assert_eq!(backward.edges_from(a).len(), 0);
1029
1030        // Node C has no outgoing edges
1031        assert_eq!(forward.edges_from(c).len(), 0);
1032    }
1033}
grafeo_core/index/adjacency.rs

grafeo_core/index/
adjacency.rs