grafeo_core/index/
adjacency.rs

1//! Chunked adjacency lists - the core data structure for graph traversal.
2//!
3//! Every graph database needs fast neighbor lookups. This implementation uses
4//! chunked storage with delta buffers, giving you:
5//!
6//! - **O(1) amortized inserts** - new edges go into a delta buffer
7//! - **Cache-friendly scans** - chunks are sized for L1/L2 cache
8//! - **Soft deletes** - deletions don't require recompaction
9//! - **Concurrent reads** - RwLock allows many simultaneous traversals
10//! - **Compression** - cold chunks can be compressed using DeltaBitPacked
11
12use crate::storage::{BitPackedInts, DeltaBitPacked};
13use grafeo_common::types::{EdgeId, NodeId};
14use grafeo_common::utils::hash::{FxHashMap, FxHashSet};
15use parking_lot::RwLock;
16use smallvec::SmallVec;
17use std::sync::atomic::{AtomicUsize, Ordering};
18
19/// Default chunk capacity (number of edges per chunk).
20const DEFAULT_CHUNK_CAPACITY: usize = 64;
21
22/// Threshold for delta buffer compaction.
23///
24/// Lower values reduce memory overhead and iteration cost for delta buffers,
25/// but increase compaction frequency. 64 provides a good balance for typical workloads.
26const DELTA_COMPACTION_THRESHOLD: usize = 64;
27
28/// Threshold for cold chunk compression.
29///
30/// When the number of hot chunks exceeds this threshold, the oldest hot chunks
31/// are compressed and moved to cold storage. This balances memory usage with
32/// the cost of compression/decompression.
33const COLD_COMPRESSION_THRESHOLD: usize = 4;
34
35/// A chunk of adjacency entries.
36#[derive(Debug, Clone)]
37struct AdjacencyChunk {
38    /// Destination node IDs.
39    destinations: Vec<NodeId>,
40    /// Edge IDs (parallel to destinations).
41    edge_ids: Vec<EdgeId>,
42    /// Capacity of this chunk.
43    capacity: usize,
44}
45
46impl AdjacencyChunk {
47    fn new(capacity: usize) -> Self {
48        Self {
49            destinations: Vec::with_capacity(capacity),
50            edge_ids: Vec::with_capacity(capacity),
51            capacity,
52        }
53    }
54
55    fn len(&self) -> usize {
56        self.destinations.len()
57    }
58
59    fn is_full(&self) -> bool {
60        self.destinations.len() >= self.capacity
61    }
62
63    fn push(&mut self, dst: NodeId, edge_id: EdgeId) -> bool {
64        if self.is_full() {
65            return false;
66        }
67        self.destinations.push(dst);
68        self.edge_ids.push(edge_id);
69        true
70    }
71
72    fn iter(&self) -> impl Iterator<Item = (NodeId, EdgeId)> + '_ {
73        self.destinations
74            .iter()
75            .copied()
76            .zip(self.edge_ids.iter().copied())
77    }
78
79    /// Compresses this chunk into a `CompressedAdjacencyChunk`.
80    ///
81    /// The entries are sorted by destination node ID for better delta compression.
82    /// Use this for cold chunks that won't be modified.
83    #[allow(dead_code)]
84    fn compress(&self) -> CompressedAdjacencyChunk {
85        // Sort entries by destination for better delta compression
86        let mut entries: Vec<_> = self
87            .destinations
88            .iter()
89            .copied()
90            .zip(self.edge_ids.iter().copied())
91            .collect();
92        entries.sort_by_key(|(dst, _)| dst.as_u64());
93
94        // Extract sorted destinations and corresponding edge IDs
95        let sorted_dsts: Vec<u64> = entries.iter().map(|(d, _)| d.as_u64()).collect();
96        let sorted_edges: Vec<u64> = entries.iter().map(|(_, e)| e.as_u64()).collect();
97
98        CompressedAdjacencyChunk {
99            destinations: DeltaBitPacked::encode(&sorted_dsts),
100            edge_ids: BitPackedInts::pack(&sorted_edges),
101            count: entries.len(),
102        }
103    }
104}
105
106/// A compressed chunk of adjacency entries.
107///
108/// Uses DeltaBitPacked for destination node IDs (sorted for good compression)
109/// and BitPackedInts for edge IDs. Typical compression ratio is 5-10x for
110/// dense adjacency lists.
111#[derive(Debug, Clone)]
112struct CompressedAdjacencyChunk {
113    /// Delta + bit-packed destination node IDs (sorted).
114    destinations: DeltaBitPacked,
115    /// Bit-packed edge IDs.
116    edge_ids: BitPackedInts,
117    /// Number of entries.
118    count: usize,
119}
120
121impl CompressedAdjacencyChunk {
122    /// Returns the number of entries in this chunk.
123    fn len(&self) -> usize {
124        self.count
125    }
126
127    /// Returns true if this chunk is empty.
128    #[allow(dead_code)]
129    fn is_empty(&self) -> bool {
130        self.count == 0
131    }
132
133    /// Decompresses and iterates over all entries.
134    fn iter(&self) -> impl Iterator<Item = (NodeId, EdgeId)> + '_ {
135        let dsts = self.destinations.decode();
136        let edges = self.edge_ids.unpack();
137
138        dsts.into_iter()
139            .zip(edges)
140            .map(|(d, e)| (NodeId::new(d), EdgeId::new(e)))
141    }
142
143    /// Returns the approximate memory size in bytes.
144    #[allow(dead_code)]
145    fn memory_size(&self) -> usize {
146        // DeltaBitPacked: 8 bytes base + packed deltas
147        // BitPackedInts: packed data
148        let dest_size = 8 + self.destinations.to_bytes().len();
149        let edge_size = self.edge_ids.data().len() * 8;
150        dest_size + edge_size
151    }
152
153    /// Returns the compression ratio compared to uncompressed storage.
154    #[allow(dead_code)]
155    fn compression_ratio(&self) -> f64 {
156        if self.count == 0 {
157            return 1.0;
158        }
159        let uncompressed = self.count * 16; // 8 bytes each for NodeId and EdgeId
160        let compressed = self.memory_size();
161        if compressed == 0 {
162            return f64::INFINITY;
163        }
164        uncompressed as f64 / compressed as f64
165    }
166}
167
168/// Adjacency list for a single node.
169///
170/// Uses a tiered storage model:
171/// - **Hot chunks**: Recent data, uncompressed for fast modification
172/// - **Cold chunks**: Older data, compressed for memory efficiency
173/// - **Delta buffer**: Very recent insertions, not yet compacted
174#[derive(Debug)]
175struct AdjacencyList {
176    /// Hot chunks (mutable, uncompressed) - for recent data.
177    hot_chunks: Vec<AdjacencyChunk>,
178    /// Cold chunks (immutable, compressed) - for older data.
179    cold_chunks: Vec<CompressedAdjacencyChunk>,
180    /// Delta buffer for recent insertions.
181    /// Uses SmallVec with 16 inline entries for cache-friendly access.
182    /// Most nodes have <16 recent insertions before compaction, so this
183    /// avoids heap allocation in the common case.
184    delta_inserts: SmallVec<[(NodeId, EdgeId); 16]>,
185    /// Set of deleted edge IDs.
186    deleted: FxHashSet<EdgeId>,
187}
188
189impl AdjacencyList {
190    fn new() -> Self {
191        Self {
192            hot_chunks: Vec::new(),
193            cold_chunks: Vec::new(),
194            delta_inserts: SmallVec::new(),
195            deleted: FxHashSet::default(),
196        }
197    }
198
199    fn add_edge(&mut self, dst: NodeId, edge_id: EdgeId) {
200        // Try to add to the last hot chunk
201        if let Some(last) = self.hot_chunks.last_mut() {
202            if last.push(dst, edge_id) {
203                return;
204            }
205        }
206
207        // Add to delta buffer
208        self.delta_inserts.push((dst, edge_id));
209    }
210
211    fn mark_deleted(&mut self, edge_id: EdgeId) {
212        self.deleted.insert(edge_id);
213    }
214
215    fn compact(&mut self, chunk_capacity: usize) {
216        if self.delta_inserts.is_empty() {
217            return;
218        }
219
220        // Create new chunks from delta buffer
221        // Check if last hot chunk has room, and if so, pop it to continue filling
222        let last_has_room = self.hot_chunks.last().is_some_and(|c| !c.is_full());
223        let mut current_chunk = if last_has_room {
224            // Invariant: is_some_and() returned true, so hot_chunks is non-empty
225            self.hot_chunks
226                .pop()
227                .expect("hot_chunks is non-empty: is_some_and() succeeded on previous line")
228        } else {
229            AdjacencyChunk::new(chunk_capacity)
230        };
231
232        for (dst, edge_id) in self.delta_inserts.drain(..) {
233            if !current_chunk.push(dst, edge_id) {
234                self.hot_chunks.push(current_chunk);
235                current_chunk = AdjacencyChunk::new(chunk_capacity);
236                current_chunk.push(dst, edge_id);
237            }
238        }
239
240        if current_chunk.len() > 0 {
241            self.hot_chunks.push(current_chunk);
242        }
243
244        // Check if we should compress some hot chunks to cold
245        self.maybe_compress_to_cold();
246    }
247
248    /// Compresses oldest hot chunks to cold storage if threshold exceeded.
249    fn maybe_compress_to_cold(&mut self) {
250        // Keep at least COLD_COMPRESSION_THRESHOLD hot chunks for write performance
251        while self.hot_chunks.len() > COLD_COMPRESSION_THRESHOLD {
252            // Remove the oldest (first) hot chunk
253            let oldest = self.hot_chunks.remove(0);
254
255            // Skip empty chunks
256            if oldest.len() == 0 {
257                continue;
258            }
259
260            // Compress and add to cold storage
261            let compressed = oldest.compress();
262            self.cold_chunks.push(compressed);
263        }
264    }
265
266    /// Forces all hot chunks to be compressed to cold storage.
267    ///
268    /// Useful when memory pressure is high or the node is rarely accessed.
269    #[allow(dead_code)]
270    fn freeze_all(&mut self) {
271        for chunk in self.hot_chunks.drain(..) {
272            if chunk.len() > 0 {
273                self.cold_chunks.push(chunk.compress());
274            }
275        }
276    }
277
278    fn iter(&self) -> impl Iterator<Item = (NodeId, EdgeId)> + '_ {
279        let deleted = &self.deleted;
280
281        // Iterate cold chunks first (oldest data)
282        let cold_iter = self.cold_chunks.iter().flat_map(|c| c.iter());
283
284        // Then hot chunks
285        let hot_iter = self.hot_chunks.iter().flat_map(|c| c.iter());
286
287        // Finally delta buffer (newest data)
288        let delta_iter = self.delta_inserts.iter().copied();
289
290        cold_iter
291            .chain(hot_iter)
292            .chain(delta_iter)
293            .filter(move |(_, edge_id)| !deleted.contains(edge_id))
294    }
295
296    fn neighbors(&self) -> impl Iterator<Item = NodeId> + '_ {
297        self.iter().map(|(dst, _)| dst)
298    }
299
300    fn degree(&self) -> usize {
301        self.iter().count()
302    }
303
304    /// Returns the number of entries in hot storage.
305    #[allow(dead_code)]
306    fn hot_count(&self) -> usize {
307        self.hot_chunks.iter().map(|c| c.len()).sum::<usize>() + self.delta_inserts.len()
308    }
309
310    /// Returns the number of entries in cold storage.
311    #[allow(dead_code)]
312    fn cold_count(&self) -> usize {
313        self.cold_chunks.iter().map(|c| c.len()).sum()
314    }
315
316    /// Returns the approximate memory size in bytes.
317    #[allow(dead_code)]
318    fn memory_size(&self) -> usize {
319        // Hot chunks: full uncompressed size
320        let hot_size = self.hot_chunks.iter().map(|c| c.len() * 16).sum::<usize>();
321
322        // Cold chunks: compressed size
323        let cold_size = self
324            .cold_chunks
325            .iter()
326            .map(|c| c.memory_size())
327            .sum::<usize>();
328
329        // Delta buffer
330        let delta_size = self.delta_inserts.len() * 16;
331
332        // Deleted set (rough estimate)
333        let deleted_size = self.deleted.len() * 16;
334
335        hot_size + cold_size + delta_size + deleted_size
336    }
337
338    /// Returns the compression ratio for cold storage.
339    #[allow(dead_code)]
340    fn cold_compression_ratio(&self) -> f64 {
341        let total_cold_entries: usize = self.cold_chunks.iter().map(|c| c.len()).sum();
342        if total_cold_entries == 0 {
343            return 1.0;
344        }
345
346        let uncompressed_size = total_cold_entries * 16;
347        let compressed_size: usize = self.cold_chunks.iter().map(|c| c.memory_size()).sum();
348
349        if compressed_size == 0 {
350            return f64::INFINITY;
351        }
352
353        uncompressed_size as f64 / compressed_size as f64
354    }
355}
356
357/// The main structure for traversing graph edges.
358///
359/// Given a node, this tells you all its neighbors and the edges connecting them.
360/// Internally uses chunked storage (64 edges per chunk) with a delta buffer for
361/// recent inserts. Deletions are soft (tombstones) until compaction.
362///
363/// # Example
364///
365/// ```
366/// use grafeo_core::index::ChunkedAdjacency;
367/// use grafeo_common::types::{NodeId, EdgeId};
368///
369/// let adj = ChunkedAdjacency::new();
370///
371/// // Build a star graph: node 0 connects to nodes 1, 2, 3
372/// adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(100));
373/// adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(101));
374/// adj.add_edge(NodeId::new(0), NodeId::new(3), EdgeId::new(102));
375///
376/// // Fast neighbor lookup
377/// let neighbors = adj.neighbors(NodeId::new(0));
378/// assert_eq!(neighbors.len(), 3);
379/// ```
380pub struct ChunkedAdjacency {
381    /// Adjacency lists indexed by source node.
382    /// Lock order: 10 (nested, acquired via LpgStore::forward_adj/backward_adj)
383    lists: RwLock<FxHashMap<NodeId, AdjacencyList>>,
384    /// Chunk capacity for new chunks.
385    chunk_capacity: usize,
386    /// Total number of edges (including deleted).
387    edge_count: AtomicUsize,
388    /// Number of deleted edges.
389    deleted_count: AtomicUsize,
390}
391
392impl ChunkedAdjacency {
393    /// Creates a new chunked adjacency structure.
394    #[must_use]
395    pub fn new() -> Self {
396        Self::with_chunk_capacity(DEFAULT_CHUNK_CAPACITY)
397    }
398
399    /// Creates a new chunked adjacency with custom chunk capacity.
400    #[must_use]
401    pub fn with_chunk_capacity(capacity: usize) -> Self {
402        Self {
403            lists: RwLock::new(FxHashMap::default()),
404            chunk_capacity: capacity,
405            edge_count: AtomicUsize::new(0),
406            deleted_count: AtomicUsize::new(0),
407        }
408    }
409
410    /// Adds an edge from src to dst.
411    pub fn add_edge(&self, src: NodeId, dst: NodeId, edge_id: EdgeId) {
412        let mut lists = self.lists.write();
413        lists
414            .entry(src)
415            .or_insert_with(AdjacencyList::new)
416            .add_edge(dst, edge_id);
417        self.edge_count.fetch_add(1, Ordering::Relaxed);
418    }
419
420    /// Marks an edge as deleted.
421    pub fn mark_deleted(&self, src: NodeId, edge_id: EdgeId) {
422        let mut lists = self.lists.write();
423        if let Some(list) = lists.get_mut(&src) {
424            list.mark_deleted(edge_id);
425            self.deleted_count.fetch_add(1, Ordering::Relaxed);
426        }
427    }
428
429    /// Returns all neighbors of a node.
430    ///
431    /// Note: This allocates a Vec to collect neighbors while the internal lock
432    /// is held, then returns the Vec. For traversal performance, consider using
433    /// `edges_from` if you also need edge IDs, to avoid multiple lookups.
434    #[must_use]
435    pub fn neighbors(&self, src: NodeId) -> Vec<NodeId> {
436        let lists = self.lists.read();
437        lists
438            .get(&src)
439            .map(|list| list.neighbors().collect())
440            .unwrap_or_default()
441    }
442
443    /// Returns all (neighbor, edge_id) pairs for outgoing edges from a node.
444    ///
445    /// Note: This allocates a Vec to collect edges while the internal lock
446    /// is held, then returns the Vec. This is intentional to avoid holding
447    /// the lock across iteration.
448    #[must_use]
449    pub fn edges_from(&self, src: NodeId) -> Vec<(NodeId, EdgeId)> {
450        let lists = self.lists.read();
451        lists
452            .get(&src)
453            .map(|list| list.iter().collect())
454            .unwrap_or_default()
455    }
456
457    /// Returns the out-degree of a node (number of outgoing edges).
458    ///
459    /// For forward adjacency, this counts edges where `src` is the source.
460    pub fn out_degree(&self, src: NodeId) -> usize {
461        let lists = self.lists.read();
462        lists.get(&src).map_or(0, |list| list.degree())
463    }
464
465    /// Returns the in-degree of a node (number of incoming edges).
466    ///
467    /// This is semantically equivalent to `out_degree` but named differently
468    /// for use with backward adjacency where edges are stored in reverse.
469    /// When called on `backward_adj`, this returns the count of edges
470    /// where `node` is the destination.
471    pub fn in_degree(&self, node: NodeId) -> usize {
472        let lists = self.lists.read();
473        lists.get(&node).map_or(0, |list| list.degree())
474    }
475
476    /// Compacts all adjacency lists.
477    pub fn compact(&self) {
478        let mut lists = self.lists.write();
479        for list in lists.values_mut() {
480            list.compact(self.chunk_capacity);
481        }
482    }
483
484    /// Compacts delta buffers that exceed the threshold.
485    pub fn compact_if_needed(&self) {
486        let mut lists = self.lists.write();
487        for list in lists.values_mut() {
488            if list.delta_inserts.len() >= DELTA_COMPACTION_THRESHOLD {
489                list.compact(self.chunk_capacity);
490            }
491        }
492    }
493
494    /// Returns the total number of edges (including deleted).
495    pub fn total_edge_count(&self) -> usize {
496        self.edge_count.load(Ordering::Relaxed)
497    }
498
499    /// Returns the number of active (non-deleted) edges.
500    pub fn active_edge_count(&self) -> usize {
501        self.edge_count.load(Ordering::Relaxed) - self.deleted_count.load(Ordering::Relaxed)
502    }
503
504    /// Returns the number of nodes with adjacency lists.
505    pub fn node_count(&self) -> usize {
506        self.lists.read().len()
507    }
508
509    /// Clears all adjacency lists.
510    pub fn clear(&self) {
511        let mut lists = self.lists.write();
512        lists.clear();
513        self.edge_count.store(0, Ordering::Relaxed);
514        self.deleted_count.store(0, Ordering::Relaxed);
515    }
516
517    /// Returns memory statistics for this adjacency structure.
518    #[must_use]
519    pub fn memory_stats(&self) -> AdjacencyMemoryStats {
520        let lists = self.lists.read();
521
522        let mut hot_entries = 0usize;
523        let mut cold_entries = 0usize;
524        let mut hot_bytes = 0usize;
525        let mut cold_bytes = 0usize;
526
527        for list in lists.values() {
528            hot_entries += list.hot_count();
529            cold_entries += list.cold_count();
530
531            // Hot: uncompressed (16 bytes per entry)
532            hot_bytes += list.hot_count() * 16;
533
534            // Cold: compressed size from memory_size()
535            for cold_chunk in &list.cold_chunks {
536                cold_bytes += cold_chunk.memory_size();
537            }
538        }
539
540        AdjacencyMemoryStats {
541            hot_entries,
542            cold_entries,
543            hot_bytes,
544            cold_bytes,
545            node_count: lists.len(),
546        }
547    }
548
549    /// Forces all hot chunks to be compressed for all adjacency lists.
550    ///
551    /// This is useful when memory pressure is high or during shutdown.
552    pub fn freeze_all(&self) {
553        let mut lists = self.lists.write();
554        for list in lists.values_mut() {
555            list.freeze_all();
556        }
557    }
558}
559
560/// Memory statistics for the adjacency structure.
561#[derive(Debug, Clone)]
562pub struct AdjacencyMemoryStats {
563    /// Number of entries in hot (uncompressed) storage.
564    pub hot_entries: usize,
565    /// Number of entries in cold (compressed) storage.
566    pub cold_entries: usize,
567    /// Bytes used by hot storage.
568    pub hot_bytes: usize,
569    /// Bytes used by cold storage.
570    pub cold_bytes: usize,
571    /// Number of nodes with adjacency lists.
572    pub node_count: usize,
573}
574
575impl AdjacencyMemoryStats {
576    /// Returns the total number of entries.
577    #[must_use]
578    pub fn total_entries(&self) -> usize {
579        self.hot_entries + self.cold_entries
580    }
581
582    /// Returns the total memory used in bytes.
583    #[must_use]
584    pub fn total_bytes(&self) -> usize {
585        self.hot_bytes + self.cold_bytes
586    }
587
588    /// Returns the compression ratio for cold storage.
589    ///
590    /// Values > 1.0 indicate actual compression.
591    #[must_use]
592    pub fn cold_compression_ratio(&self) -> f64 {
593        if self.cold_entries == 0 || self.cold_bytes == 0 {
594            return 1.0;
595        }
596        let uncompressed = self.cold_entries * 16;
597        uncompressed as f64 / self.cold_bytes as f64
598    }
599
600    /// Returns the overall compression ratio.
601    #[must_use]
602    pub fn overall_compression_ratio(&self) -> f64 {
603        let total_entries = self.total_entries();
604        if total_entries == 0 || self.total_bytes() == 0 {
605            return 1.0;
606        }
607        let uncompressed = total_entries * 16;
608        uncompressed as f64 / self.total_bytes() as f64
609    }
610}
611
612impl Default for ChunkedAdjacency {
613    fn default() -> Self {
614        Self::new()
615    }
616}
617
618#[cfg(test)]
619mod tests {
620    use super::*;
621
622    #[test]
623    fn test_basic_adjacency() {
624        let adj = ChunkedAdjacency::new();
625
626        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
627        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
628        adj.add_edge(NodeId::new(0), NodeId::new(3), EdgeId::new(2));
629
630        let neighbors = adj.neighbors(NodeId::new(0));
631        assert_eq!(neighbors.len(), 3);
632        assert!(neighbors.contains(&NodeId::new(1)));
633        assert!(neighbors.contains(&NodeId::new(2)));
634        assert!(neighbors.contains(&NodeId::new(3)));
635    }
636
637    #[test]
638    fn test_out_degree() {
639        let adj = ChunkedAdjacency::new();
640
641        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
642        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
643
644        assert_eq!(adj.out_degree(NodeId::new(0)), 2);
645        assert_eq!(adj.out_degree(NodeId::new(1)), 0);
646    }
647
648    #[test]
649    fn test_mark_deleted() {
650        let adj = ChunkedAdjacency::new();
651
652        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
653        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
654
655        adj.mark_deleted(NodeId::new(0), EdgeId::new(0));
656
657        let neighbors = adj.neighbors(NodeId::new(0));
658        assert_eq!(neighbors.len(), 1);
659        assert!(neighbors.contains(&NodeId::new(2)));
660    }
661
662    #[test]
663    fn test_edges_from() {
664        let adj = ChunkedAdjacency::new();
665
666        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(10));
667        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(20));
668
669        let edges = adj.edges_from(NodeId::new(0));
670        assert_eq!(edges.len(), 2);
671        assert!(edges.contains(&(NodeId::new(1), EdgeId::new(10))));
672        assert!(edges.contains(&(NodeId::new(2), EdgeId::new(20))));
673    }
674
675    #[test]
676    fn test_compaction() {
677        let adj = ChunkedAdjacency::with_chunk_capacity(4);
678
679        // Add more edges than chunk capacity
680        for i in 0..10 {
681            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
682        }
683
684        adj.compact();
685
686        // All edges should still be accessible
687        let neighbors = adj.neighbors(NodeId::new(0));
688        assert_eq!(neighbors.len(), 10);
689    }
690
691    #[test]
692    fn test_edge_counts() {
693        let adj = ChunkedAdjacency::new();
694
695        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
696        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
697        adj.add_edge(NodeId::new(1), NodeId::new(2), EdgeId::new(2));
698
699        assert_eq!(adj.total_edge_count(), 3);
700        assert_eq!(adj.active_edge_count(), 3);
701
702        adj.mark_deleted(NodeId::new(0), EdgeId::new(0));
703
704        assert_eq!(adj.total_edge_count(), 3);
705        assert_eq!(adj.active_edge_count(), 2);
706    }
707
708    #[test]
709    fn test_clear() {
710        let adj = ChunkedAdjacency::new();
711
712        adj.add_edge(NodeId::new(0), NodeId::new(1), EdgeId::new(0));
713        adj.add_edge(NodeId::new(0), NodeId::new(2), EdgeId::new(1));
714
715        adj.clear();
716
717        assert_eq!(adj.total_edge_count(), 0);
718        assert_eq!(adj.node_count(), 0);
719    }
720
721    #[test]
722    fn test_chunk_compression() {
723        // Create a chunk with some edges
724        let mut chunk = AdjacencyChunk::new(64);
725
726        // Add edges with various destination IDs
727        for i in 0..20 {
728            chunk.push(NodeId::new(100 + i * 5), EdgeId::new(1000 + i));
729        }
730
731        // Compress the chunk
732        let compressed = chunk.compress();
733
734        // Verify all data is preserved
735        assert_eq!(compressed.len(), 20);
736
737        // Decompress and verify
738        let entries: Vec<_> = compressed.iter().collect();
739        assert_eq!(entries.len(), 20);
740
741        // After compression, entries are sorted by destination
742        // Verify destinations are sorted
743        for window in entries.windows(2) {
744            assert!(window[0].0.as_u64() <= window[1].0.as_u64());
745        }
746
747        // Verify all original destinations are present
748        let original_dsts: std::collections::HashSet<_> =
749            (0..20).map(|i| NodeId::new(100 + i * 5)).collect();
750        let compressed_dsts: std::collections::HashSet<_> =
751            entries.iter().map(|(d, _)| *d).collect();
752        assert_eq!(original_dsts, compressed_dsts);
753
754        // Check compression ratio (should be > 1.0 for sorted data)
755        let ratio = compressed.compression_ratio();
756        assert!(
757            ratio > 1.0,
758            "Expected compression ratio > 1.0, got {}",
759            ratio
760        );
761    }
762
763    #[test]
764    fn test_empty_chunk_compression() {
765        let chunk = AdjacencyChunk::new(64);
766        let compressed = chunk.compress();
767
768        assert_eq!(compressed.len(), 0);
769        assert!(compressed.is_empty());
770        assert_eq!(compressed.iter().count(), 0);
771    }
772
773    #[test]
774    fn test_hot_to_cold_migration() {
775        // Use small chunk capacity to trigger cold compression faster
776        let adj = ChunkedAdjacency::with_chunk_capacity(8);
777
778        // Add many edges to force multiple chunks and cold compression
779        // With chunk_capacity=8 and COLD_COMPRESSION_THRESHOLD=4,
780        // we need more than 4 * 8 = 32 edges to trigger cold compression
781        for i in 0..100 {
782            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
783        }
784
785        // Force compaction to trigger hot→cold migration
786        adj.compact();
787
788        // All edges should still be accessible
789        let neighbors = adj.neighbors(NodeId::new(0));
790        assert_eq!(neighbors.len(), 100);
791
792        // Check memory stats
793        let stats = adj.memory_stats();
794        assert_eq!(stats.total_entries(), 100);
795
796        // With 100 edges and threshold of 4 hot chunks (32 edges),
797        // we should have some cold entries
798        assert!(
799            stats.cold_entries > 0,
800            "Expected some cold entries, got {}",
801            stats.cold_entries
802        );
803    }
804
805    #[test]
806    fn test_memory_stats() {
807        let adj = ChunkedAdjacency::with_chunk_capacity(8);
808
809        // Add edges
810        for i in 0..20 {
811            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
812        }
813
814        adj.compact();
815
816        let stats = adj.memory_stats();
817        assert_eq!(stats.total_entries(), 20);
818        assert_eq!(stats.node_count, 1);
819        assert!(stats.total_bytes() > 0);
820    }
821
822    #[test]
823    fn test_freeze_all() {
824        let adj = ChunkedAdjacency::with_chunk_capacity(8);
825
826        // Add some edges
827        for i in 0..30 {
828            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
829        }
830
831        adj.compact();
832
833        // Get initial stats
834        let before = adj.memory_stats();
835
836        // Freeze all hot chunks
837        adj.freeze_all();
838
839        // Get stats after freeze
840        let after = adj.memory_stats();
841
842        // All data should now be in cold storage
843        assert_eq!(after.hot_entries, 0);
844        assert_eq!(after.cold_entries, before.total_entries());
845
846        // All edges should still be accessible
847        let neighbors = adj.neighbors(NodeId::new(0));
848        assert_eq!(neighbors.len(), 30);
849    }
850
851    #[test]
852    fn test_cold_compression_ratio() {
853        let adj = ChunkedAdjacency::with_chunk_capacity(8);
854
855        // Add many edges with sequential IDs (compresses well)
856        for i in 0..200 {
857            adj.add_edge(NodeId::new(0), NodeId::new(100 + i), EdgeId::new(i));
858        }
859
860        adj.compact();
861        adj.freeze_all();
862
863        let stats = adj.memory_stats();
864
865        // Sequential data should compress well
866        let ratio = stats.cold_compression_ratio();
867        assert!(
868            ratio > 1.5,
869            "Expected cold compression ratio > 1.5, got {}",
870            ratio
871        );
872    }
873
874    #[test]
875    fn test_deleted_edges_with_cold_storage() {
876        let adj = ChunkedAdjacency::with_chunk_capacity(8);
877
878        // Add edges
879        for i in 0..50 {
880            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
881        }
882
883        adj.compact();
884
885        // Delete some edges (some will be in cold storage after compaction)
886        for i in (0..50).step_by(2) {
887            adj.mark_deleted(NodeId::new(0), EdgeId::new(i));
888        }
889
890        // Should have half the edges
891        let neighbors = adj.neighbors(NodeId::new(0));
892        assert_eq!(neighbors.len(), 25);
893
894        // Verify only odd-numbered destinations remain
895        for neighbor in neighbors {
896            assert!(neighbor.as_u64() % 2 == 0); // Original IDs were i+1, so even means odd i
897        }
898    }
899
900    #[test]
901    fn test_adjacency_list_memory_size() {
902        let mut list = AdjacencyList::new();
903
904        // Add edges
905        for i in 0..50 {
906            list.add_edge(NodeId::new(i + 1), EdgeId::new(i));
907        }
908
909        // Compact with small chunk capacity to get multiple chunks
910        list.compact(8);
911
912        let size = list.memory_size();
913        assert!(size > 0);
914
915        // Size should be roughly proportional to entry count
916        // Each entry is 16 bytes uncompressed
917        assert!(size <= 50 * 16 + 200); // Allow some overhead
918    }
919
920    #[test]
921    fn test_cold_iteration_order() {
922        let adj = ChunkedAdjacency::with_chunk_capacity(8);
923
924        // Add edges in order
925        for i in 0..50 {
926            adj.add_edge(NodeId::new(0), NodeId::new(i + 1), EdgeId::new(i));
927        }
928
929        adj.compact();
930
931        // Collect all edges
932        let edges = adj.edges_from(NodeId::new(0));
933
934        // All edges should be present
935        assert_eq!(edges.len(), 50);
936
937        // Verify edge IDs are present (may be reordered due to compression sorting)
938        let edge_ids: std::collections::HashSet<_> = edges.iter().map(|(_, e)| *e).collect();
939        for i in 0..50 {
940            assert!(edge_ids.contains(&EdgeId::new(i)));
941        }
942    }
943
944    #[test]
945    fn test_in_degree() {
946        let adj = ChunkedAdjacency::new();
947
948        // Simulate backward adjacency: edges stored as (dst, src)
949        // Edge 1->2: backward stores (2, 1)
950        // Edge 3->2: backward stores (2, 3)
951        adj.add_edge(NodeId::new(2), NodeId::new(1), EdgeId::new(0)); // 1->2 in backward
952        adj.add_edge(NodeId::new(2), NodeId::new(3), EdgeId::new(1)); // 3->2 in backward
953
954        // In-degree of node 2 is 2 (two edges point to it)
955        assert_eq!(adj.in_degree(NodeId::new(2)), 2);
956
957        // Node 1 has no incoming edges
958        assert_eq!(adj.in_degree(NodeId::new(1)), 0);
959    }
960
961    #[test]
962    fn test_bidirectional_edges() {
963        let forward = ChunkedAdjacency::new();
964        let backward = ChunkedAdjacency::new();
965
966        // Add edge: 1 -> 2
967        let edge_id = EdgeId::new(100);
968        forward.add_edge(NodeId::new(1), NodeId::new(2), edge_id);
969        backward.add_edge(NodeId::new(2), NodeId::new(1), edge_id); // Reverse for backward!
970
971        // Forward: edges from node 1 → returns (dst=2, edge_id)
972        let forward_edges = forward.edges_from(NodeId::new(1));
973        assert_eq!(forward_edges.len(), 1);
974        assert_eq!(forward_edges[0], (NodeId::new(2), edge_id));
975
976        // Forward: node 2 has no outgoing edges
977        assert_eq!(forward.edges_from(NodeId::new(2)).len(), 0);
978
979        // Backward: edges to node 2 → stored as edges_from(2) → returns (src=1, edge_id)
980        let backward_edges = backward.edges_from(NodeId::new(2));
981        assert_eq!(backward_edges.len(), 1);
982        assert_eq!(backward_edges[0], (NodeId::new(1), edge_id));
983
984        // Backward: node 1 has no incoming edges
985        assert_eq!(backward.edges_from(NodeId::new(1)).len(), 0);
986    }
987
988    #[test]
989    fn test_bidirectional_chain() {
990        // Test chain: A -> B -> C
991        let forward = ChunkedAdjacency::new();
992        let backward = ChunkedAdjacency::new();
993
994        let a = NodeId::new(1);
995        let b = NodeId::new(2);
996        let c = NodeId::new(3);
997
998        // Edge A -> B
999        let edge_ab = EdgeId::new(10);
1000        forward.add_edge(a, b, edge_ab);
1001        backward.add_edge(b, a, edge_ab);
1002
1003        // Edge B -> C
1004        let edge_bc = EdgeId::new(20);
1005        forward.add_edge(b, c, edge_bc);
1006        backward.add_edge(c, b, edge_bc);
1007
1008        // Forward traversal from A: should reach B
1009        let from_a = forward.edges_from(a);
1010        assert_eq!(from_a.len(), 1);
1011        assert_eq!(from_a[0].0, b);
1012
1013        // Forward traversal from B: should reach C
1014        let from_b = forward.edges_from(b);
1015        assert_eq!(from_b.len(), 1);
1016        assert_eq!(from_b[0].0, c);
1017
1018        // Backward traversal to C: should find B
1019        let to_c = backward.edges_from(c);
1020        assert_eq!(to_c.len(), 1);
1021        assert_eq!(to_c[0].0, b);
1022
1023        // Backward traversal to B: should find A
1024        let to_b = backward.edges_from(b);
1025        assert_eq!(to_b.len(), 1);
1026        assert_eq!(to_b[0].0, a);
1027
1028        // Node A has no incoming edges
1029        assert_eq!(backward.edges_from(a).len(), 0);
1030
1031        // Node C has no outgoing edges
1032        assert_eq!(forward.edges_from(c).len(), 0);
1033    }
1034}
grafeo_core/index/adjacency.rs

grafeo_core/index/
adjacency.rs