Skip to main content

webgraph/traits/
labels.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9/*!
10
11Traits for accessing labelings, both sequentially and
12in random-access fashion.
13
14A *labeling* is the basic storage unit for graph data. It associates to
15each node of a graph a list of labels. In the [sequential case](SequentialLabeling),
16one can obtain a [lender](SequentialLabeling::iter) that lends pairs given by a node
17and an iterator on the associated labels. In the [random-access case](RandomAccessLabeling),
18instead, one can get [an iterator on the labels associated with a node](RandomAccessLabeling::labels).
19Labelings can be [zipped together](crate::labels::Zip), obtaining a
20new labeling whose labels are pairs.
21
22The number of nodes *n* of the graph is returned by [`SequentialLabeling::num_nodes`],
23and nodes identifier are in the interval [0 . . *n*).
24
25*/
26
27use crate::{graphs::bvgraph::DCF, traits::LenderIntoIter, utils::Granularity};
28
29use super::{LenderLabel, NodeLabelsLender, ParMapFold};
30
31use core::ops::Range;
32use dsi_progress_logger::prelude::*;
33use impl_tools::autoimpl;
34use lender::*;
35use std::rc::Rc;
36use sux::{
37    dict::EliasFanoBuilder,
38    rank_sel::{SelectAdaptConst, SelectZeroAdaptConst},
39    traits::Succ,
40    utils::FairChunks,
41};
42use thiserror::Error;
43
44/// A labeling that can be accessed sequentially.
45///
46/// The iterator returned by [iter](SequentialLabeling::iter) is a
47/// [lender](NodeLabelsLender): to access the next pair, you must have finished
48/// to use the previous one. You can invoke [`Lender::copied`] to get a standard
49/// iterator, at the cost of some allocation and copying.
50///
51/// It is suggested that all implementors of this trait also implement
52/// [`IntoLender`] on a reference, returning the same lender as
53/// [`iter`](SequentialLabeling::iter). This makes it possible to use the
54/// [`for_!`](lender::for_) macro to iterate over the labeling (see the [module
55/// documentation](crate) for an example).
56///
57/// Note that there is no guarantee that the lender will return nodes in
58/// ascending order, or that the iterators on labels will return them in any
59/// specified order.
60///
61/// The marker traits [`SortedLender`] and [`SortedIterator`] can be used to
62/// force these properties. Note that [`SortedIterator`] implies that successors
63/// are returned in ascending order, and labels are returned in the same order.
64///
65/// This trait provides two default methods,
66/// [`par_apply`](SequentialLabeling::par_apply) and
67/// [`par_node_apply`](SequentialLabeling::par_node_apply), that make it easy to
68/// process in parallel the nodes of the labeling.
69///
70/// The function [`eq_sorted`] can be used to check whether two
71/// sorted labelings are equal.
72#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
73pub trait SequentialLabeling {
74    type Label;
75    /// The type of [`Lender`] over the successors of a node
76    /// returned by [`iter`](SequentialLabeling::iter).
77    type Lender<'node>: for<'next> NodeLabelsLender<'next, Label = Self::Label>
78    where
79        Self: 'node;
80
81    /// Returns the number of nodes in the graph.
82    fn num_nodes(&self) -> usize;
83
84    /// Returns the number of arcs in the graph, if available.
85    fn num_arcs_hint(&self) -> Option<u64> {
86        None
87    }
88
89    /// Returns an iterator over the labeling.
90    ///
91    /// Iterators over the labeling return pairs given by a node of the graph
92    /// and an [`IntoIterator`] over the labels.
93    fn iter(&self) -> Self::Lender<'_> {
94        self.iter_from(0)
95    }
96
97    /// Returns an iterator over the labeling starting at `from` (included).
98    ///
99    /// Note that if the iterator [is not sorted](SortedIterator), `from` is not
100    /// the node id of the first node returned by the iterator, but just the
101    /// starting point of the iteration
102    fn iter_from(&self, from: usize) -> Self::Lender<'_>;
103
104    /// Builds the degree cumulative function for this labeling.
105    ///
106    /// The degree cumulative function is the sequence 0, d₀, d₀ + d₁, … ,
107    /// *m*, where *dₖ* is the number of arcs/labels of node *k* and *m* is the
108    /// number of arcs/labels.
109    ///
110    /// # Panics
111    ///
112    /// Panics if [`num_arcs_hint`](SequentialLabeling::num_arcs_hint) returns
113    /// `None`.
114    fn build_dcf(&self) -> DCF {
115        let n = self.num_nodes();
116        let num_arcs = self
117            .num_arcs_hint()
118            .expect("build_dcf requires num_arcs_hint()") as usize;
119        let mut efb = EliasFanoBuilder::new(n + 1, num_arcs);
120        efb.push(0);
121        let mut cumul = 0usize;
122        let mut lender = self.iter();
123        while let Some((_node, succs)) = lender.next() {
124            cumul += succs.into_iter().count();
125            efb.push(cumul);
126        }
127        unsafe {
128            efb.build().map_high_bits(|high_bits| {
129                SelectZeroAdaptConst::<_, _, 12, 4>::new(SelectAdaptConst::<_, _, 12, 4>::new(
130                    high_bits,
131                ))
132            })
133        }
134    }
135
136    /// Applies `func` to each chunk of nodes of size `node_granularity` in
137    /// parallel, and folds the results using `fold`.
138    ///
139    /// # Arguments
140    ///
141    /// * `func` - The function to apply to each chunk of nodes.
142    ///
143    /// * `fold` - The function to fold the results obtained from each chunk. It
144    ///   will be passed to the [`Iterator::fold`].
145    ///
146    /// * `granularity` - The granularity of parallel tasks.
147    ///
148    /// * `pl` - An optional mutable reference to a progress logger.
149    ///
150    /// # Panics
151    ///
152    /// This method will panic if [`Granularity::node_granularity`] does.
153    fn par_node_apply<
154        A: Default + Send,
155        F: Fn(Range<usize>) -> A + Sync,
156        R: Fn(A, A) -> A + Sync,
157    >(
158        &self,
159        func: F,
160        fold: R,
161        granularity: Granularity,
162        pl: &mut impl ConcurrentProgressLog,
163    ) -> A {
164        let num_nodes = self.num_nodes();
165        let node_granularity = granularity.node_granularity(num_nodes, self.num_arcs_hint());
166        (0..num_nodes.div_ceil(node_granularity))
167            .map(|i| i * node_granularity..num_nodes.min((i + 1) * node_granularity))
168            .par_map_fold_with(
169                pl.clone(),
170                |pl, range| {
171                    let len = range.len();
172                    let res = func(range);
173                    pl.update_with_count(len);
174                    res
175                },
176                fold,
177            )
178    }
179
180    /// Applies `func` to each chunk of nodes containing approximately
181    /// `arc_granularity` arcs in parallel and folds the results using `fold`.
182    ///
183    /// You have to provide the degree cumulative function of the graph (i.e.,
184    /// the sequence 0, *d*₀, *d*₀ + *d*₁, ..., *a*, where *a* is the number of
185    /// arcs in the graph) in a form that makes it possible to compute
186    /// successors (for example, using the suitable `webgraph build` command).
187    ///
188    /// # Arguments
189    ///
190    /// * `func` - The function to apply to each chunk of nodes.
191    ///
192    /// * `fold` - The function to fold the results obtained from each chunk.
193    ///   It will be passed to the [`Iterator::fold`].
194    ///
195    /// * `granularity` - The granularity of parallel tests.
196    ///
197    /// * `deg_cumul_func` - The degree cumulative function of the graph.
198    ///
199    /// * `pl` - A mutable reference to a concurrent progress logger.
200    fn par_apply<
201        F: Fn(Range<usize>) -> A + Sync,
202        A: Default + Send,
203        R: Fn(A, A) -> A + Sync,
204        D: for<'a> Succ<Input = usize, Output<'a> = usize>,
205    >(
206        &self,
207        func: F,
208        fold: R,
209        granularity: Granularity,
210        deg_cumul: &D,
211        pl: &mut impl ConcurrentProgressLog,
212    ) -> A {
213        FairChunks::new(
214            granularity.arc_granularity(
215                self.num_nodes(),
216                Some(deg_cumul.get(deg_cumul.len() - 1) as u64),
217            ),
218            deg_cumul,
219        )
220        .par_map_fold_with(
221            pl.clone(),
222            |pl, range| {
223                let len = range.len();
224                let res = func(range);
225                pl.update_with_count(len);
226                res
227            },
228            fold,
229        )
230    }
231}
232
233/// Error types that can occur during graph equality checking.
234#[derive(Error, Debug, Clone, PartialEq, Eq)]
235pub enum EqError {
236    /// The graphs have different numbers of nodes.
237    #[error("Different number of nodes: {first} != {second}")]
238    NumNodes { first: usize, second: usize },
239
240    /// The graphs have different numbers of arcs.
241    #[error("Different number of arcs: {first} != {second}")]
242    NumArcs { first: u64, second: u64 },
243
244    /// Iteration on the two graphs returned a different node.
245    #[error("Different node: at index {index} {first} != {second}")]
246    Node {
247        index: usize,
248        first: usize,
249        second: usize,
250    },
251
252    /// The graphs have different successors for a specific node.
253    #[error("Different successors for node {node}: at index {index} {first} != {second}")]
254    Successors {
255        node: usize,
256        index: usize,
257        first: String,
258        second: String,
259    },
260
261    /// The graphs have different outdegrees for a specific node.
262    #[error("Different outdegree for node {node}: {first} != {second}")]
263    Outdegree {
264        node: usize,
265        first: usize,
266        second: usize,
267    },
268}
269
270#[doc(hidden)]
271/// Checks whether two sorted successors lists are identical,
272/// returning an appropriate error.
273pub fn eq_succs<L: PartialEq + std::fmt::Debug>(
274    node: usize,
275    succ0: impl IntoIterator<Item = L>,
276    succ1: impl IntoIterator<Item = L>,
277) -> Result<(), EqError> {
278    let mut succ0 = succ0.into_iter();
279    let mut succ1 = succ1.into_iter();
280    let mut index = 0;
281    loop {
282        match (succ0.next(), succ1.next()) {
283            (None, None) => return Ok(()),
284            (Some(s0), Some(s1)) => {
285                if s0 != s1 {
286                    return Err(EqError::Successors {
287                        node,
288                        index,
289                        first: format!("{s0:?}"),
290                        second: format!("{s1:?}"),
291                    });
292                }
293            }
294            (None, Some(_)) => {
295                return Err(EqError::Outdegree {
296                    node,
297                    first: index,
298                    second: index + 1 + succ1.count(),
299                });
300            }
301            (Some(_), None) => {
302                return Err(EqError::Outdegree {
303                    node,
304                    first: index + 1 + succ0.count(),
305                    second: index,
306                });
307            }
308        }
309        index += 1;
310    }
311}
312
313/// Checks if the two provided sorted labelings are equal.
314///
315/// Since graphs are labelings, this function can also be used to check whether
316/// sorted graphs are equal. If the graphs are different, an [`EqError`] is
317/// returned describing the first difference found.
318pub fn eq_sorted<L0: SequentialLabeling, L1: SequentialLabeling<Label = L0::Label>>(
319    l0: &L0,
320    l1: &L1,
321) -> Result<(), EqError>
322where
323    for<'a> L0::Lender<'a>: SortedLender,
324    for<'a> L1::Lender<'a>: SortedLender,
325    for<'a, 'b> LenderIntoIter<'b, L0::Lender<'a>>: SortedIterator,
326    for<'a, 'b> LenderIntoIter<'b, L1::Lender<'a>>: SortedIterator,
327    L0::Label: PartialEq + std::fmt::Debug,
328{
329    if l0.num_nodes() != l1.num_nodes() {
330        return Err(EqError::NumNodes {
331            first: l0.num_nodes(),
332            second: l1.num_nodes(),
333        });
334    }
335    for_!((index, ((node0, succ0), (node1, succ1))) in l0.iter().zip(l1.iter()).enumerate() {
336        if node0 != node1 {
337            return Err(EqError::Node {
338                index,
339                first: node0,
340                second: node1,
341            });
342        }
343        eq_succs(node0, succ0, succ1)?;
344    });
345    Ok(())
346}
347
348/// Convenience type alias for the iterator over the labels of a node
349/// returned by the [`iter_from`](SequentialLabeling::iter_from) method.
350pub type Labels<'succ, 'node, S> =
351    <<S as SequentialLabeling>::Lender<'node> as NodeLabelsLender<'succ>>::IntoIterator;
352
353/// Marker trait for lenders returned by [`SequentialLabeling::iter`] yielding
354/// node ids in ascending order.
355///
356/// The [`AssumeSortedLender`] type can be used to wrap a lender and
357/// unsafely implement this trait.
358///
359/// # Safety
360///
361/// The first element of the pairs returned by the iterator must go from zero to
362/// the [number of nodes](SequentialLabeling::num_nodes) of the graph, excluded.
363///
364/// # Examples
365///
366/// To bind the lender returned by [`SequentialLabeling::iter`] to implement this
367/// trait, you must use higher-rank trait bounds:
368/// ```rust
369/// use webgraph::traits::*;
370///
371/// fn takes_labeling_with_sorted_lender<G>(g: G) where
372///     G: SequentialLabeling,
373///     for<'a> G::Lender<'a>: SortedLender,
374/// {
375///     // ...
376/// }
377/// ```
378pub unsafe trait SortedLender: Lender {}
379
380/// A transparent wrapper for a [`NodeLabelsLender`] unsafely implementing
381/// [`SortedLender`].
382///
383/// This wrapper is useful when the underlying lender is known to return nodes
384/// in ascending order, but the trait is not implemented, and it is not possible
385/// to implement it directly because of the orphan rule.
386#[derive(Debug, Clone)]
387#[repr(transparent)]
388pub struct AssumeSortedLender<L> {
389    lender: L,
390}
391
392impl<L> AssumeSortedLender<L> {
393    /// # Safety
394    ///
395    /// The argument must return nodes in ascending order.
396    pub unsafe fn new(lender: L) -> Self {
397        Self { lender }
398    }
399}
400
401unsafe impl<L: Lender> SortedLender for AssumeSortedLender<L> {}
402
403impl<'succ, L: Lender> Lending<'succ> for AssumeSortedLender<L> {
404    type Lend = <L as Lending<'succ>>::Lend;
405}
406
407impl<L: Lender> Lender for AssumeSortedLender<L> {
408    // SAFETY: the lend is covariant as it directly delegates to the underlying
409    // covariant lender L.
410    unsafe_assume_covariance!();
411
412    #[inline(always)]
413    fn next(&mut self) -> Option<Lend<'_, Self>> {
414        self.lender.next()
415    }
416
417    #[inline(always)]
418    fn size_hint(&self) -> (usize, Option<usize>) {
419        self.lender.size_hint()
420    }
421}
422
423impl<L: ExactSizeLender> ExactSizeLender for AssumeSortedLender<L> {
424    #[inline(always)]
425    fn len(&self) -> usize {
426        self.lender.len()
427    }
428}
429
430impl<'lend, L> NodeLabelsLender<'lend> for AssumeSortedLender<L>
431where
432    L: Lender + for<'next> NodeLabelsLender<'next>,
433{
434    type Label = LenderLabel<'lend, L>;
435    type IntoIterator = <L as NodeLabelsLender<'lend>>::IntoIterator;
436}
437
438/// Marker trait for [`Iterator`]s yielding labels in the order induced by
439/// enumerating the successors in ascending order.
440///
441/// The [`AssumeSortedIterator`] type can be used to wrap an iterator and
442/// unsafely implement this trait.
443///
444/// # Safety
445///
446/// The labels returned by the iterator must be in the order in which they would
447/// be if successors were returned in ascending order.
448///
449/// # Examples
450///
451/// To bind the iterators returned by the lender returned by
452/// [`SequentialLabeling::iter`] to implement this trait, you must use
453/// higher-rank trait bounds:
454/// ```rust
455/// use webgraph::traits::*;
456///
457/// fn takes_labeling_with_sorted_iterators<G>(g: G) where
458///     G: SequentialLabeling,
459///     for<'a, 'b> LenderIntoIter<'b, G::Lender<'a>>: SortedIterator,
460/// {
461///     // ...
462/// }
463/// ```
464pub unsafe trait SortedIterator: Iterator {}
465
466/// A transparent wrapper for an [`Iterator`] unsafely implementing
467/// [`SortedIterator`].
468///
469/// This wrapper is useful when an iterator is known to return labels in sorted
470/// order, but the trait is not implemented, and it is not possible to implement
471/// it directly because of the orphan rule.
472#[derive(Debug, Clone)]
473#[repr(transparent)]
474pub struct AssumeSortedIterator<I> {
475    iter: I,
476}
477
478impl<I> AssumeSortedIterator<I> {
479    /// # Safety
480    /// This is unsafe as the purpose of this struct is to attach an unsafe
481    /// trait to a struct that does not implement it.
482    pub unsafe fn new(iter: I) -> Self {
483        Self { iter }
484    }
485}
486
487unsafe impl<I: Iterator> SortedIterator for AssumeSortedIterator<I> {}
488
489impl<I: Iterator> Iterator for AssumeSortedIterator<I> {
490    type Item = I::Item;
491
492    #[inline(always)]
493    fn next(&mut self) -> Option<Self::Item> {
494        self.iter.next()
495    }
496
497    #[inline(always)]
498    fn count(self) -> usize {
499        self.iter.count()
500    }
501}
502
503impl<I: ExactSizeIterator> ExactSizeIterator for AssumeSortedIterator<I> {
504    #[inline(always)]
505    fn len(&self) -> usize {
506        self.iter.len()
507    }
508}
509
510/// A [`SequentialLabeling`] providing, additionally, random access to
511/// the list of labels associated with a node.
512///
513/// The function [`check_impl`] can be used to check whether the
514/// sequential and random-access implementations of a labeling are consistent.
515#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
516pub trait RandomAccessLabeling: SequentialLabeling {
517    /// The type of the iterator over the labels of a node
518    /// returned by [`labels`](RandomAccessLabeling::labels).
519    type Labels<'succ>: IntoIterator<Item = <Self as SequentialLabeling>::Label>
520    where
521        Self: 'succ;
522
523    /// Returns the number of arcs in the graph.
524    fn num_arcs(&self) -> u64;
525
526    /// Returns the labels associated with a node.
527    fn labels(&self, node_id: usize) -> <Self as RandomAccessLabeling>::Labels<'_>;
528
529    /// Returns the number of labels associated with a node.
530    fn outdegree(&self, node_id: usize) -> usize;
531}
532
533/// Error types that can occur during checking the implementation of a random
534/// access labeling.
535#[derive(Error, Debug, Clone, PartialEq, Eq)]
536pub enum CheckImplError {
537    /// The number of nodes returned by [`iter`](SequentialLabeling::iter)
538    /// is different from the number returned by
539    /// [`num_nodes`](SequentialLabeling::num_nodes).
540    #[error("Different number of nodes: {iter} (iter) != {method} (num_nodes)")]
541    NumNodes { iter: usize, method: usize },
542
543    /// The number of successors returned by [`iter`](SequentialLabeling::iter)
544    /// is different from the number returned by
545    /// [`num_arcs`](RandomAccessLabeling::num_arcs).
546    #[error("Different number of arcs: {iter} (iter) != {method} (num_arcs)")]
547    NumArcs { iter: u64, method: u64 },
548
549    /// The two implementations return different labels for a specific node.
550    #[error(
551        "Different successors for node {node}: at index {index} {sequential} (sequential) != {random_access} (random access)"
552    )]
553    Successors {
554        node: usize,
555        index: usize,
556        sequential: String,
557        random_access: String,
558    },
559
560    /// The graphs have different outdegrees for a specific node.
561    #[error(
562        "Different outdegree for node {node}: {sequential} (sequential) != {random_access} (random access)"
563    )]
564    Outdegree {
565        node: usize,
566        sequential: usize,
567        random_access: usize,
568    },
569}
570
571/// Checks the sequential vs. random-access implementation of a sorted
572/// random-access labeling.
573///
574/// Note that this method will check that the sequential and random-access
575/// iterators on labels of each node are identical, and that the number of
576/// nodes returned by the sequential iterator is the same as the number of
577/// nodes returned by [`num_nodes`](SequentialLabeling::num_nodes).
578pub fn check_impl<L: RandomAccessLabeling>(l: L) -> Result<(), CheckImplError>
579where
580    L::Label: PartialEq + std::fmt::Debug,
581{
582    let mut num_nodes = 0;
583    let mut num_arcs: u64 = 0;
584    for_!((node, succ_iter) in l.iter() {
585        num_nodes += 1;
586        let mut succ_iter = succ_iter.into_iter();
587        let mut succ = l.labels(node).into_iter();
588        let mut index = 0;
589        loop {
590            match (succ_iter.next(), succ.next()) {
591                (None, None) => break,
592                (Some(s0), Some(s1)) => {
593                    if s0 != s1 {
594                        return Err(CheckImplError::Successors {
595                            node,
596                            index,
597                            sequential: format!("{s0:?}"),
598                            random_access: format!("{s1:?}"),
599                        });
600                    }
601                }
602                (None, Some(_)) => {
603                    return Err(CheckImplError::Outdegree {
604                        node,
605                        sequential: index,
606                        random_access: index + 1 + succ.count(),
607                    });
608                }
609                (Some(_), None) => {
610                    return Err(CheckImplError::Outdegree {
611                        node,
612                        sequential: index + 1 + succ_iter.count(),
613                        random_access: index,
614                    });
615                }
616            }
617            index += 1;
618        }
619        num_arcs += index as u64;
620    });
621
622    if num_nodes != l.num_nodes() {
623        Err(CheckImplError::NumNodes {
624            method: l.num_nodes(),
625            iter: num_nodes,
626        })
627    } else if num_arcs != l.num_arcs() {
628        Err(CheckImplError::NumArcs {
629            method: l.num_arcs(),
630            iter: num_arcs,
631        })
632    } else {
633        Ok(())
634    }
635}
636
637/// A struct used to make it easy to implement sequential access
638/// starting from random access.
639///
640/// Users can implement just random-access primitives and then use this
641/// structure to implement the lender returned by
642/// [`iter_from`](SequentialLabeling::iter_from) by just returning an instance
643/// of this struct.
644pub struct LenderImpl<'node, G: RandomAccessLabeling> {
645    /// The underlying random-access labeling.
646    pub labeling: &'node G,
647    /// The range of nodes to iterate over.
648    pub nodes: core::ops::Range<usize>,
649}
650
651unsafe impl<G: RandomAccessLabeling> SortedLender for LenderImpl<'_, G> {}
652
653impl<'succ, G: RandomAccessLabeling> NodeLabelsLender<'succ> for LenderImpl<'_, G> {
654    type Label = G::Label;
655    type IntoIterator = <G as RandomAccessLabeling>::Labels<'succ>;
656}
657
658impl<'succ, G: RandomAccessLabeling> Lending<'succ> for LenderImpl<'_, G> {
659    type Lend = (usize, <G as RandomAccessLabeling>::Labels<'succ>);
660}
661
662impl<G: RandomAccessLabeling> Lender for LenderImpl<'_, G> {
663    // SAFETY: the lend is covariant as it returns labels from a RandomAccessLabeling,
664    // which are expected to be covariant in the lifetime.
665    unsafe_assume_covariance!();
666
667    #[inline(always)]
668    fn next(&mut self) -> Option<Lend<'_, Self>> {
669        self.nodes
670            .next()
671            .map(|node_id| (node_id, self.labeling.labels(node_id)))
672    }
673
674    #[inline(always)]
675    fn size_hint(&self) -> (usize, Option<usize>) {
676        (self.nodes.len(), Some(self.nodes.len()))
677    }
678}
679
680impl<G: RandomAccessLabeling> ExactSizeLender for LenderImpl<'_, G> {
681    #[inline(always)]
682    fn len(&self) -> usize {
683        self.nodes.len()
684    }
685}