Skip to main content

webgraph/graphs/
csr_graph.rs

1/*
2 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
3 * SPDX-FileCopyrightText: 2025 Tommaso Fontana
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::bvgraph::EF;
9use crate::traits::*;
10use common_traits::UnsignedInt;
11use epserde::Epserde;
12use lender::{IntoLender, Lend, Lender, Lending, check_covariance, for_};
13use sux::{
14    bits::BitFieldVec,
15    dict::EliasFanoBuilder,
16    rank_sel::{SelectAdaptConst, SelectZeroAdaptConst},
17};
18use value_traits::{
19    iter::{IterFrom, IterateByValueFrom},
20    slices::SliceByValue,
21};
22
23/// A [`CsrGraph`] with Elias–Fano-encoded degree cumulative function and
24/// [`BitFieldVec`]-encoded successors.
25pub type CompressedCsrGraph = CsrGraph<EF, BitFieldVec>;
26
27/// A [`CsrSortedGraph`] with Elias–Fano-encoded degree cumulative function and
28/// [`BitFieldVec`]-encoded successors.
29pub type CompressedCsrSortedGraph = CsrSortedGraph<EF, BitFieldVec>;
30
31/// A compressed sparse-row graph.
32///
33/// It is a graph representation that stores the degree cumulative function
34/// (DCF) and the successors in a compressed format. The DCF is a sequence of
35/// offsets that indicates the start of the neighbors for each node in the
36/// graph. Building a CSR graph requires always a sorted lender.
37///
38/// The lenders returned by a CSR graph are sorted; however, the successors may
39/// be unsorted. If you need the additional guarantee that the successors are
40/// sorted, use [`CsrSortedGraph`], which however requires a lender returning
41/// sorted successors.
42///
43/// Depending on the performance and memory requirements, both the DCF and
44/// successors can be stored in different formats. The default is to use boxed
45/// slices for both the DCF and successors, which is the fastest choice.
46///
47/// A [`CompressedCsrGraph`], instead, is a [`CsrGraph`] where the DCF is
48/// represented using an Elias-Fano encoding, and the successors are represented
49/// using a [`BitFieldVec`]. There is also a [version with sorted
50/// successors](CompressedCsrSortedGraph). Their construction requires a
51/// sequential graph providing the number of arcs.
52#[derive(Debug, Clone, Epserde)]
53#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
54pub struct CsrGraph<DCF = Box<[usize]>, S = Box<[usize]>> {
55    dcf: DCF,
56    successors: S,
57}
58
59/// A wrapper for a [`CsrGraph`] with the additional guarantee that the
60/// successors are sorted.
61#[derive(Debug, Clone, Epserde)]
62#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
63pub struct CsrSortedGraph<DCF = Box<[usize]>, S = Box<[usize]>>(CsrGraph<DCF, S>);
64
65impl<DCF, S> CsrGraph<DCF, S> {
66    /// Creates a new CSR graph from the given degree cumulative function and
67    /// successors.
68    ///
69    /// # Safety
70    /// The degree cumulative function must be monotone and coherent with the
71    /// successors.
72    pub unsafe fn from_parts(dcf: DCF, successors: S) -> Self {
73        Self { dcf, successors }
74    }
75
76    /// Returns a reference to the degree cumulative function.
77    #[inline(always)]
78    pub fn dcf(&self) -> &DCF {
79        &self.dcf
80    }
81
82    /// Returns a reference to the successors.
83    #[inline(always)]
84    pub fn successors(&self) -> &S {
85        &self.successors
86    }
87
88    /// Consumes the graph, returning the degree cumulative function and
89    /// the successors.
90    #[inline(always)]
91    pub fn into_inner(self) -> (DCF, S) {
92        (self.dcf, self.successors)
93    }
94}
95
96impl core::default::Default for CsrGraph {
97    fn default() -> Self {
98        Self {
99            dcf: vec![0].into(),
100            successors: vec![].into(),
101        }
102    }
103}
104
105impl CsrGraph {
106    /// Creates an empty CSR graph.
107    pub fn new() -> Self {
108        Self::default()
109    }
110
111    /// Internal method to create a graph from a lender with optional size hints.
112    ///
113    /// The `num_nodes_hint` and `num_arcs_hint` parameters are used to
114    /// pre-allocate the vectors, improving performance when the sizes are known
115    /// in advance.
116    fn _from_lender<I: IntoLender>(
117        iter_nodes: I,
118        num_nodes_hint: Option<usize>,
119        num_arcs_hint: Option<usize>,
120    ) -> Self
121    where
122        I::Lender: for<'next> NodeLabelsLender<'next, Label = usize> + SortedLender,
123    {
124        let mut max_node = 0;
125        let mut dcf = Vec::with_capacity(num_nodes_hint.unwrap_or(0) + 1);
126        dcf.push(0);
127        let mut successors = Vec::with_capacity(num_arcs_hint.unwrap_or(0));
128
129        let mut last_src = 0;
130        for_!( (src, succs) in iter_nodes {
131            while last_src < src {
132                dcf.push(successors.len());
133                last_src += 1;
134            }
135            max_node = max_node.max(src);
136            for succ in succs {
137                successors.push(succ);
138                max_node = max_node.max(succ);
139            }
140        });
141        for _ in last_src..=max_node {
142            dcf.push(successors.len());
143        }
144        dcf.shrink_to_fit();
145        successors.shrink_to_fit();
146        unsafe { Self::from_parts(dcf.into(), successors.into()) }
147    }
148
149    /// Creates a new CSR graph from an [`IntoLender`] yielding a
150    /// [`NodeLabelsLender`].
151    ///
152    /// This method will determine the number of nodes from the maximum node ID
153    /// encountered.
154    pub fn from_lender<I: IntoLender>(iter_nodes: I) -> Self
155    where
156        I::Lender: for<'next> NodeLabelsLender<'next, Label = usize> + SortedLender,
157    {
158        Self::_from_lender(iter_nodes, None, None)
159    }
160
161    /// Creates a new CSR graph from a sorted [`IntoLender`] yielding a
162    /// sorted [`NodeLabelsLender`].
163    ///
164    /// This method is an alias for [`from_lender`](Self::from_lender), as both
165    /// sorted and unsorted lenders are handled identically in the unsorted case.
166    pub fn from_sorted_lender<I: IntoLender>(iter_nodes: I) -> Self
167    where
168        I::Lender: for<'next> NodeLabelsLender<'next, Label = usize> + SortedLender,
169        for<'succ> LenderIntoIter<'succ, I::Lender>: SortedIterator,
170    {
171        Self::from_lender(iter_nodes)
172    }
173
174    /// Creates a new CSR graph from a [`SequentialGraph`].
175    ///
176    /// This method uses the graph's size hints for efficient pre-allocation.
177    pub fn from_seq_graph<G: SequentialGraph>(g: &G) -> Self
178    where
179        for<'a> G::Lender<'a>: SortedLender,
180    {
181        Self::_from_lender(
182            g.iter(),
183            Some(g.num_nodes()),
184            g.num_arcs_hint().map(|n| n as usize),
185        )
186    }
187}
188
189impl CsrSortedGraph {
190    /// Creates a new sorted CSR graph from an [`IntoLender`] yielding a sorted
191    /// [`NodeLabelsLender`] with sorted successors.
192    pub fn from_lender<I: IntoLender>(iter_nodes: I) -> Self
193    where
194        I::Lender: for<'next> NodeLabelsLender<'next, Label = usize> + SortedLender,
195        for<'succ> LenderIntoIter<'succ, I::Lender>: SortedIterator,
196    {
197        CsrSortedGraph(CsrGraph::from_lender(iter_nodes))
198    }
199
200    /// Creates a new sorted CSR graph from a [`SequentialGraph`] with
201    /// sorted lenders and sorted successors.
202    pub fn from_seq_graph<G: SequentialGraph>(g: &G) -> Self
203    where
204        for<'a> G::Lender<'a>: SortedLender,
205        for<'a, 'b> LenderIntoIter<'b, G::Lender<'a>>: SortedIterator,
206    {
207        CsrSortedGraph(CsrGraph::from_seq_graph(g))
208    }
209}
210
211impl CompressedCsrGraph {
212    /// Creates a new compressed CSR graph from a sequential graph with sorted
213    /// lender and providing the number of arcs.
214    ///
215    /// This method will return an error if the graph does not provide
216    /// the number of arcs.
217    pub fn try_from_graph<G: SequentialGraph>(g: &G) -> anyhow::Result<Self>
218    where
219        for<'a> G::Lender<'a>: SortedLender,
220    {
221        let n = g.num_nodes();
222        let u = g.num_arcs_hint().ok_or(anyhow::Error::msg(
223            "This sequential graph does not provide the number of arcs",
224        ))?;
225        let mut efb = EliasFanoBuilder::new(n + 1, u as usize + 1);
226        efb.push(0);
227        let mut successors = BitFieldVec::with_capacity(
228            if n == 0 { 0 } else { n.ilog2_ceil() as usize },
229            u as usize,
230        );
231        let mut last_src = 0;
232        for_!((src, succ) in g.iter() {
233            while last_src < src {
234                efb.push(successors.len());
235                last_src += 1;
236            }
237            successors.extend(succ);
238        });
239        for _ in last_src..g.num_nodes() {
240            efb.push(successors.len());
241        }
242        let ef = efb.build();
243        let ef: EF = unsafe { ef.map_high_bits(SelectAdaptConst::<_, _, 12, 4>::new) };
244        unsafe { Ok(Self::from_parts(ef, successors)) }
245    }
246}
247
248impl CompressedCsrSortedGraph {
249    /// Creates a new compressed CSR sorted graph from a sequential graph with
250    /// sorted lender, sorted successors, and providing the number of arcs.
251    ///
252    /// This method will return an error if the graph does not provide
253    /// the number of arcs.
254    pub fn try_from_graph<G: SequentialGraph>(g: &G) -> anyhow::Result<Self>
255    where
256        for<'a> G::Lender<'a>: SortedLender,
257        for<'a, 'b> LenderIntoIter<'b, G::Lender<'a>>: SortedIterator,
258    {
259        Ok(CsrSortedGraph(CsrGraph::try_from_graph(g)?))
260    }
261}
262
263/// Convenience implementation that makes it possible to iterate
264/// over the graph using the [`for_`] macro
265/// (see the [crate documentation](crate)).
266impl<'a, DCF, S> IntoLender for &'a CsrGraph<DCF, S>
267where
268    DCF: SliceByValue + IterateByValueFrom<Item = usize>,
269    S: SliceByValue + IterateByValueFrom<Item = usize>,
270{
271    type Lender = NodeLabels<IterFrom<'a, DCF>, IterFrom<'a, S>>;
272
273    #[inline(always)]
274    fn into_lender(self) -> Self::Lender {
275        self.iter()
276    }
277}
278
279/// Convenience implementation that makes it possible to iterate
280/// over the graph using the [`for_`] macro
281/// (see the [crate documentation](crate)).
282impl<'a, DCF, S> IntoLender for &'a CsrSortedGraph<DCF, S>
283where
284    DCF: SliceByValue + IterateByValueFrom<Item = usize>,
285    S: SliceByValue + IterateByValueFrom<Item = usize>,
286{
287    type Lender = <Self as SequentialLabeling>::Lender<'a>;
288
289    #[inline(always)]
290    fn into_lender(self) -> Self::Lender {
291        self.iter()
292    }
293}
294
295impl<DCF, S> SequentialLabeling for CsrGraph<DCF, S>
296where
297    DCF: SliceByValue + IterateByValueFrom<Item = usize>,
298    S: SliceByValue + IterateByValueFrom<Item = usize>,
299{
300    type Label = usize;
301    type Lender<'a>
302        = NodeLabels<IterFrom<'a, DCF>, IterFrom<'a, S>>
303    where
304        Self: 'a;
305
306    #[inline(always)]
307    fn num_nodes(&self) -> usize {
308        self.dcf.len() - 1
309    }
310
311    #[inline(always)]
312    fn num_arcs_hint(&self) -> Option<u64> {
313        Some(self.successors.len() as u64)
314    }
315
316    #[inline(always)]
317    fn iter_from(&self, from: usize) -> Self::Lender<'_> {
318        let mut offsets_iter = self.dcf.iter_value_from(from);
319        // skip the first offset, we don't start from `from + 1`
320        // because it might not exist
321        let offset = offsets_iter.next().unwrap_or(0);
322
323        NodeLabels {
324            node: from,
325            last_offset: offset,
326            current_offset: offset,
327            offsets_iter,
328            successors_iter: self.successors.iter_value_from(offset),
329        }
330    }
331
332    fn build_dcf(&self) -> crate::graphs::bvgraph::DCF {
333        let n = self.num_nodes();
334        let num_arcs = self.num_arcs_hint().unwrap() as usize;
335        let mut efb = EliasFanoBuilder::new(n + 1, num_arcs);
336        for val in self.dcf.iter_value_from(0).take(n + 1) {
337            efb.push(val);
338        }
339        unsafe {
340            efb.build().map_high_bits(|high_bits| {
341                SelectZeroAdaptConst::<_, _, 12, 4>::new(SelectAdaptConst::<_, _, 12, 4>::new(
342                    high_bits,
343                ))
344            })
345        }
346    }
347}
348
349impl<DCF, S> SequentialLabeling for CsrSortedGraph<DCF, S>
350where
351    DCF: SliceByValue + IterateByValueFrom<Item = usize>,
352    S: SliceByValue + IterateByValueFrom<Item = usize>,
353{
354    type Label = usize;
355    type Lender<'a>
356        = LenderSortedImpl<IterFrom<'a, DCF>, IterFrom<'a, S>>
357    where
358        Self: 'a;
359
360    #[inline(always)]
361    fn num_nodes(&self) -> usize {
362        self.0.num_nodes()
363    }
364
365    #[inline(always)]
366    fn num_arcs_hint(&self) -> Option<u64> {
367        self.0.num_arcs_hint()
368    }
369
370    #[inline(always)]
371    fn iter_from(&self, from: usize) -> Self::Lender<'_> {
372        LenderSortedImpl(self.0.iter_from(from))
373    }
374
375    fn build_dcf(&self) -> crate::graphs::bvgraph::DCF {
376        self.0.build_dcf()
377    }
378}
379
380impl<D, S> SequentialGraph for CsrGraph<D, S>
381where
382    D: SliceByValue + IterateByValueFrom<Item = usize>,
383    S: SliceByValue + IterateByValueFrom<Item = usize>,
384{
385}
386
387impl<D, S> SequentialGraph for CsrSortedGraph<D, S>
388where
389    D: SliceByValue + IterateByValueFrom<Item = usize>,
390    S: SliceByValue + IterateByValueFrom<Item = usize>,
391{
392}
393
394impl<DCF, S> RandomAccessLabeling for CsrGraph<DCF, S>
395where
396    DCF: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
397    S: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
398{
399    type Labels<'succ>
400        = core::iter::Take<IterFrom<'succ, S>>
401    where
402        Self: 'succ;
403
404    #[inline(always)]
405    fn num_arcs(&self) -> u64 {
406        self.successors.len() as u64
407    }
408
409    #[inline(always)]
410    fn outdegree(&self, node: usize) -> usize {
411        self.dcf.index_value(node + 1) - self.dcf.index_value(node)
412    }
413
414    #[inline(always)]
415    fn labels(&self, node: usize) -> <Self as RandomAccessLabeling>::Labels<'_> {
416        let start = self.dcf.index_value(node);
417        let end = self.dcf.index_value(node + 1);
418        self.successors.iter_value_from(start).take(end - start)
419    }
420}
421
422impl<DCF, S> RandomAccessLabeling for CsrSortedGraph<DCF, S>
423where
424    DCF: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
425    S: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
426{
427    type Labels<'succ>
428        = AssumeSortedIterator<core::iter::Take<IterFrom<'succ, S>>>
429    where
430        Self: 'succ;
431
432    #[inline(always)]
433    fn num_arcs(&self) -> u64 {
434        self.0.num_arcs()
435    }
436
437    #[inline(always)]
438    fn outdegree(&self, node: usize) -> usize {
439        self.0.outdegree(node)
440    }
441
442    #[inline(always)]
443    fn labels(&self, node: usize) -> <Self as RandomAccessLabeling>::Labels<'_> {
444        let labels = <CsrGraph<DCF, S> as RandomAccessLabeling>::labels(&self.0, node);
445        unsafe { AssumeSortedIterator::new(labels) }
446    }
447}
448
449impl<DCF, S> RandomAccessGraph for CsrGraph<DCF, S>
450where
451    DCF: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
452    S: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
453{
454}
455
456impl<DCF, S> RandomAccessGraph for CsrSortedGraph<DCF, S>
457where
458    DCF: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
459    S: SliceByValue<Value = usize> + IterateByValueFrom<Item = usize>,
460{
461}
462
463/// Sequential Lender for the CSR graph.
464#[derive(Debug, Clone)]
465pub struct NodeLabels<O: Iterator<Item = usize>, S: Iterator<Item = usize>> {
466    /// The next node to lend labels for.
467    node: usize,
468    /// This is the offset of the last successor of the previous node.
469    last_offset: usize,
470    /// This is the offset of the next successor to lend. This is modified
471    /// by the iterator we return.
472    current_offset: usize,
473    /// The offsets iterator.
474    offsets_iter: O,
475    /// The successors iterator.
476    successors_iter: S,
477}
478
479unsafe impl<O: Iterator<Item = usize>, S: Iterator<Item = usize>> SortedLender
480    for NodeLabels<O, S>
481{
482}
483
484impl<'succ, I, D> NodeLabelsLender<'succ> for NodeLabels<I, D>
485where
486    I: Iterator<Item = usize>,
487    D: Iterator<Item = usize>,
488{
489    type Label = usize;
490    type IntoIterator = SeqSucc<'succ, D>;
491}
492
493impl<'succ, I, D> Lending<'succ> for NodeLabels<I, D>
494where
495    I: Iterator<Item = usize>,
496    D: Iterator<Item = usize>,
497{
498    type Lend = (usize, SeqSucc<'succ, D>);
499}
500
501impl<I, D> Lender for NodeLabels<I, D>
502where
503    I: Iterator<Item = usize>,
504    D: Iterator<Item = usize>,
505{
506    check_covariance!();
507
508    #[inline(always)]
509    fn next(&mut self) -> Option<Lend<'_, Self>> {
510        // if the user of the iterator wasn't fully consumed,
511        // we need to skip the remaining successors
512        while self.current_offset < self.last_offset {
513            self.current_offset += 1;
514            self.successors_iter.next()?;
515        }
516
517        // implicitly exit if the offsets iterator is empty
518        let offset = self.offsets_iter.next()?;
519        self.last_offset = offset;
520
521        let node = self.node;
522        self.node += 1;
523
524        Some((
525            node,
526            SeqSucc {
527                succ_iter: &mut self.successors_iter,
528                current_offset: &mut self.current_offset,
529                last_offset: &self.last_offset,
530            },
531        ))
532    }
533}
534
535/// Sequential Lender for the CSR graph.
536#[derive(Debug, Clone)]
537#[repr(transparent)]
538pub struct LenderSortedImpl<O: Iterator<Item = usize>, S: Iterator<Item = usize>>(NodeLabels<O, S>);
539
540unsafe impl<O: Iterator<Item = usize>, S: Iterator<Item = usize>> SortedLender
541    for LenderSortedImpl<O, S>
542{
543}
544
545impl<'succ, I, D> NodeLabelsLender<'succ> for LenderSortedImpl<I, D>
546where
547    I: Iterator<Item = usize>,
548    D: Iterator<Item = usize>,
549{
550    type Label = usize;
551    type IntoIterator = AssumeSortedIterator<SeqSucc<'succ, D>>;
552}
553
554impl<'succ, I, D> Lending<'succ> for LenderSortedImpl<I, D>
555where
556    I: Iterator<Item = usize>,
557    D: Iterator<Item = usize>,
558{
559    type Lend = (usize, AssumeSortedIterator<SeqSucc<'succ, D>>);
560}
561
562impl<I, D> Lender for LenderSortedImpl<I, D>
563where
564    I: Iterator<Item = usize>,
565    D: Iterator<Item = usize>,
566{
567    check_covariance!();
568
569    #[inline(always)]
570    fn next(&mut self) -> Option<Lend<'_, Self>> {
571        let (src, succ) = self.0.next()?;
572        Some((src, unsafe { AssumeSortedIterator::new(succ) }))
573    }
574}
575
576/// The iterator on successors returned by the lender.
577///
578/// This is different from the random-access iterator because for better
579/// efficiency we have a single successors iterators that is forwarded by the
580/// lender.
581///
582/// If the DCF and the successors are compressed representations, this might be
583/// much faster than the random access iterator. When using vectors it might be
584/// slower, but it is still a good idea to use this iterator to avoid the
585/// overhead of creating a new iterator for each node.
586pub struct SeqSucc<'a, D> {
587    succ_iter: &'a mut D,
588    current_offset: &'a mut usize,
589    last_offset: &'a usize,
590}
591
592impl<D: Iterator<Item = usize>> Iterator for SeqSucc<'_, D> {
593    type Item = usize;
594
595    #[inline(always)]
596    fn next(&mut self) -> Option<Self::Item> {
597        if *self.current_offset >= *self.last_offset {
598            return None;
599        }
600        *self.current_offset += 1;
601        self.succ_iter.next()
602    }
603
604    #[inline(always)]
605    fn count(self) -> usize {
606        self.len()
607    }
608
609    #[inline(always)]
610    fn size_hint(&self) -> (usize, Option<usize>) {
611        let len = self.last_offset - *self.current_offset;
612        (len, Some(len))
613    }
614}
615
616impl<D: Iterator<Item = usize>> ExactSizeIterator for SeqSucc<'_, D> {
617    #[inline(always)]
618    fn len(&self) -> usize {
619        self.last_offset - *self.current_offset
620    }
621}