webgraph/traits/
labels.rs

1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9/*!
10
11Traits for access labelings, both sequentially and
12in random-access fashion.
13
14A *labeling* is the basic storage unit for graph data. It associates to
15each node of a graph a list of labels. In the [sequential case](SequentialLabeling),
16one can obtain a [lender](SequentialLabeling::iter) that lends pairs given by a node
17and an iterator on the associated labels. In the [random-access case](RandomAccessLabeling),
18instead, one can get [an iterator on the labels associated with a node](RandomAccessLabeling::labels).
19Labelings can be [zipped together](crate::labels::Zip), obtaining a
20new labeling whose labels are pairs.
21
22The number of nodes *n* of the graph is returned by [`SequentialLabeling::num_nodes`],
23and nodes identifier are in the interval [0 . . *n*).
24
25*/
26
27use crate::{traits::LenderIntoIter, utils::Granularity};
28
29use super::{LenderLabel, NodeLabelsLender, ParMapFold};
30
31use core::ops::Range;
32use dsi_progress_logger::prelude::*;
33use impl_tools::autoimpl;
34use lender::*;
35use rayon::ThreadPool;
36use std::rc::Rc;
37use thiserror::Error;
38
39use sux::{traits::Succ, utils::FairChunks};
40
41/// A labeling that can be accessed sequentially.
42///
43/// The iterator returned by [iter](SequentialLabeling::iter) is a
44/// [lender](NodeLabelsLender): to access the next pair, you must have finished
45/// to use the previous one. You can invoke [`Lender::copied`] to get a standard
46/// iterator, at the cost of some allocation and copying.
47///
48/// It is suggested that all implementors of this trait also implement
49/// [`IntoLender`] on a reference, returning the same lender as
50/// [`iter`](SequentialLabeling::iter). This makes it possible to use the
51/// [`for_!`](lender::for_) macro to iterate over the labeling (see the [module
52/// documentation](crate) for an example).
53///
54/// Note that there is no guarantee that the lender will return nodes in
55/// ascending order, or that the iterators on labels will return them in any
56/// specified order.
57///
58/// The marker traits [`SortedLender`] and [`SortedIterator`] can be used to
59/// force these properties. Note that [`SortedIterator`] implies that successors
60/// are returned in ascending order, and labels are returned in the same order.
61///
62/// This trait provides two default methods,
63/// [`par_apply`](SequentialLabeling::par_apply) and
64/// [`par_node_apply`](SequentialLabeling::par_node_apply), that make it easy to
65/// process in parallel the nodes of the labeling.
66///
67/// The function [`eq_sorted`] can be used to check whether two
68/// sorted labelings are equal.
69#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
70pub trait SequentialLabeling {
71    type Label;
72    /// The type of [`Lender`] over the successors of a node
73    /// returned by [`iter`](SequentialLabeling::iter).
74    type Lender<'node>: for<'next> NodeLabelsLender<'next, Label = Self::Label>
75    where
76        Self: 'node;
77
78    /// Returns the number of nodes in the graph.
79    fn num_nodes(&self) -> usize;
80
81    /// Returns the number of arcs in the graph, if available.
82    fn num_arcs_hint(&self) -> Option<u64> {
83        None
84    }
85
86    /// Returns an iterator over the labeling.
87    ///
88    /// Iterators over the labeling return pairs given by a node of the graph
89    /// and an [`IntoIterator`] over the labels.
90    fn iter(&self) -> Self::Lender<'_> {
91        self.iter_from(0)
92    }
93
94    /// Returns an iterator over the labeling starting at `from` (included).
95    ///
96    /// Note that if the iterator [is not sorted](SortedIterator), `from` is not
97    /// the node id of the first node returned by the iterator, but just the
98    /// starting point of the iteration
99    fn iter_from(&self, from: usize) -> Self::Lender<'_>;
100
101    /// Applies `func` to each chunk of nodes of size `node_granularity` in
102    /// parallel, and folds the results using `fold`.
103    ///
104    /// # Arguments
105    ///
106    /// * `func` - The function to apply to each chunk of nodes.
107    ///
108    /// * `fold` - The function to fold the results obtained from each chunk. It
109    ///   will be passed to the [`Iterator::fold`].
110    ///
111    /// * `granularity` - The granularity of parallel tasks.
112    ///
113    /// * `thread_pool` - The thread pool to use. The maximum level of
114    ///   parallelism is given by the number of threads in the pool.
115    ///
116    /// * `pl` - An optional mutable reference to a progress logger.
117    ///
118    /// # Panics
119    ///
120    /// This method will panic if [`Granularity::node_granularity`] does.
121    fn par_node_apply<
122        A: Default + Send,
123        F: Fn(Range<usize>) -> A + Sync,
124        R: Fn(A, A) -> A + Sync,
125    >(
126        &self,
127        func: F,
128        fold: R,
129        granularity: Granularity,
130        thread_pool: &ThreadPool,
131        pl: &mut impl ConcurrentProgressLog,
132    ) -> A {
133        let num_nodes = self.num_nodes();
134        let node_granularity = granularity.node_granularity(num_nodes, self.num_arcs_hint());
135        (0..num_nodes.div_ceil(node_granularity))
136            .map(|i| i * node_granularity..num_nodes.min((i + 1) * node_granularity))
137            .par_map_fold_with(
138                pl.clone(),
139                |pl, range| {
140                    let len = range.len();
141                    let res = func(range);
142                    pl.update_with_count(len);
143                    res
144                },
145                fold,
146                thread_pool,
147            )
148    }
149
150    /// Apply `func` to each chunk of nodes containing approximately
151    /// `arc_granularity` arcs in parallel and folds the results using `fold`.
152    ///
153    /// You have to provide the degree cumulative function of the graph (i.e.,
154    /// the sequence 0, *d*₀, *d*₀ + *d*₁, ..., *a*, where *a* is the number of
155    /// arcs in the graph) in a form that makes it possible to compute
156    /// successors (for example, using the suitable `webgraph build` command).
157    ///
158    /// # Arguments
159    ///
160    /// * `func` - The function to apply to each chunk of nodes.
161    ///
162    /// * `fold` - The function to fold the results obtained from each chunk.
163    ///   It will be passed to the [`Iterator::fold`].
164    ///
165    /// * `granularity` - The granularity of parallel tests.
166    ///
167    /// * `deg_cumul_func` - The degree cumulative function of the graph.
168    ///
169    /// * `thread_pool` - The thread pool to use. The maximum level of
170    ///   parallelism is given by the number of threads in the pool.
171    ///
172    /// * `pl` - A mutable reference to a concurrent progress logger.
173    fn par_apply<
174        F: Fn(Range<usize>) -> A + Sync,
175        A: Default + Send,
176        R: Fn(A, A) -> A + Sync,
177        D: for<'a> Succ<Input = usize, Output<'a> = usize>,
178    >(
179        &self,
180        func: F,
181        fold: R,
182        granularity: Granularity,
183        deg_cumul: &D,
184        thread_pool: &ThreadPool,
185        pl: &mut impl ConcurrentProgressLog,
186    ) -> A {
187        FairChunks::new(
188            granularity.arc_granularity(
189                self.num_nodes(),
190                Some(deg_cumul.get(deg_cumul.len() - 1) as u64),
191            ),
192            deg_cumul,
193        )
194        .par_map_fold_with(
195            pl.clone(),
196            |pl, range| {
197                let len = range.len();
198                let res = func(range);
199                pl.update_with_count(len);
200                res
201            },
202            fold,
203            thread_pool,
204        )
205    }
206}
207
208/// Error types that can occur during graph equality checking.
209#[derive(Error, Debug, Clone, PartialEq, Eq)]
210pub enum EqError {
211    /// The graphs have different numbers of nodes.
212    #[error("Different number of nodes: {first} != {second}")]
213    NumNodes { first: usize, second: usize },
214
215    /// The graphs have different numbers of arcs.
216    #[error("Different number of arcs: {first} !={second}")]
217    NumArcs { first: u64, second: u64 },
218
219    /// The graphs have different successors for a specific node.
220    #[error("Different successors for node {node}: at index {index} {first} != {second}")]
221    Successors {
222        node: usize,
223        index: usize,
224        first: String,
225        second: String,
226    },
227
228    /// The graphs have different outdegrees for a specific node.
229    #[error("Different outdegree for node {node}: {first} != {second}")]
230    Outdegree {
231        node: usize,
232        first: usize,
233        second: usize,
234    },
235}
236
237#[doc(hidden)]
238/// Checks whether two sorted successors lists are identical,
239/// returning an appropriate error.
240pub fn eq_succs<L: PartialEq + std::fmt::Debug>(
241    node: usize,
242    succ0: impl IntoIterator<Item = L>,
243    succ1: impl IntoIterator<Item = L>,
244) -> Result<(), EqError> {
245    let mut succ0 = succ0.into_iter();
246    let mut succ1 = succ1.into_iter();
247    let mut index = 0;
248    loop {
249        match (succ0.next(), succ1.next()) {
250            (None, None) => return Ok(()),
251            (Some(s0), Some(s1)) => {
252                if s0 != s1 {
253                    return Err(EqError::Successors {
254                        node,
255                        index,
256                        first: format!("{s0:?}"),
257                        second: format!("{s1:?}"),
258                    });
259                }
260            }
261            (None, Some(_)) => {
262                return Err(EqError::Outdegree {
263                    node,
264                    first: index,
265                    second: index + 1 + succ1.count(),
266                });
267            }
268            (Some(_), None) => {
269                return Err(EqError::Outdegree {
270                    node,
271                    first: index + 1 + succ0.count(),
272                    second: index,
273                });
274            }
275        }
276        index += 1;
277    }
278}
279
280/// Checks if the two provided sorted labelings are equal.
281///
282/// Since graphs are labelings, this function can also be used to check whether
283/// sorted graphs are equal. If the graphs are different, an [`EqError`] is
284/// returned describing the first difference found.
285pub fn eq_sorted<L0: SequentialLabeling, L1: SequentialLabeling<Label = L0::Label>>(
286    l0: &L0,
287    l1: &L1,
288) -> Result<(), EqError>
289where
290    for<'a> L0::Lender<'a>: SortedLender,
291    for<'a> L1::Lender<'a>: SortedLender,
292    for<'a, 'b> LenderIntoIter<'b, L0::Lender<'a>>: SortedIterator,
293    for<'a, 'b> LenderIntoIter<'b, L0::Lender<'a>>: SortedIterator,
294    L0::Label: PartialEq + std::fmt::Debug,
295{
296    if l0.num_nodes() != l1.num_nodes() {
297        return Err(EqError::NumNodes {
298            first: l0.num_nodes(),
299            second: l1.num_nodes(),
300        });
301    }
302    for_!(((node0, succ0), (node1, succ1)) in l0.iter().zip(l1.iter()) {
303        debug_assert_eq!(node0, node1);
304        eq_succs(node0, succ0, succ1)?;
305    });
306    Ok(())
307}
308
309/// Convenience type alias for the iterator over the labels of a node
310/// returned by the [`iter_from`](SequentialLabeling::iter_from) method.
311pub type Labels<'succ, 'node, S> =
312    <<S as SequentialLabeling>::Lender<'node> as NodeLabelsLender<'succ>>::IntoIterator;
313
314/// Marker trait for lenders returned by [`SequentialLabeling::iter`] yielding
315/// node ids in ascending order.
316///
317/// The [`AssumeSortedLender`] type can be used to wrap a lender and
318/// unsafely implement this trait.
319///
320/// # Safety
321///
322/// The first element of the pairs returned by the iterator must go from zero to
323/// the [number of nodes](SequentialLabeling::num_nodes) of the graph, excluded.
324///
325/// # Examples
326///
327/// To bind the lender returned by [`SequentialLabeling::iter`] to implement this
328/// trait, you must use higher-rank trait bounds:
329/// ```rust
330/// use webgraph::traits::*;
331///
332/// fn takes_labeling_with_sorted_lender<G>(g: G) where
333///     G: SequentialLabeling,
334///     for<'a> G::Lender<'a>: SortedLender,
335/// {
336///     // ...
337/// }
338/// ```
339pub unsafe trait SortedLender: Lender {}
340
341/// A transparent wrapper for a [`NodeLabelsLender`] unsafely implementing
342/// [`SortedLender`].
343///
344/// This wrapper is useful when the underlying lender is known to return nodes
345/// in ascending order, but the trait is not implemented, and it is not possible
346/// to implement it directly because of the orphan rule.
347#[derive(Debug, Clone)]
348#[repr(transparent)]
349pub struct AssumeSortedLender<L> {
350    lender: L,
351}
352
353impl<L> AssumeSortedLender<L> {
354    /// # Safety
355    ///
356    /// The argument must return nodes in ascending order.
357    pub unsafe fn new(lender: L) -> Self {
358        Self { lender }
359    }
360}
361
362unsafe impl<L: Lender> SortedLender for AssumeSortedLender<L> {}
363
364impl<'succ, L: Lender> Lending<'succ> for AssumeSortedLender<L> {
365    type Lend = <L as Lending<'succ>>::Lend;
366}
367
368impl<L: Lender> Lender for AssumeSortedLender<L> {
369    #[inline(always)]
370    fn next(&mut self) -> Option<Lend<'_, Self>> {
371        self.lender.next()
372    }
373
374    #[inline(always)]
375    fn size_hint(&self) -> (usize, Option<usize>) {
376        self.lender.size_hint()
377    }
378}
379
380impl<L: ExactSizeLender> ExactSizeLender for AssumeSortedLender<L> {
381    #[inline(always)]
382    fn len(&self) -> usize {
383        self.lender.len()
384    }
385}
386
387impl<'lend, L> NodeLabelsLender<'lend> for AssumeSortedLender<L>
388where
389    L: Lender + for<'next> NodeLabelsLender<'next>,
390{
391    type Label = LenderLabel<'lend, L>;
392    type IntoIterator = <L as NodeLabelsLender<'lend>>::IntoIterator;
393}
394
395/// Marker trait for [`Iterator`]s yielding labels in the order induced by
396/// enumerating the successors in ascending order.
397///
398/// The [`AssumeSortedIterator`] type can be used to wrap an iterator and
399/// unsafely implement this trait.
400///
401/// # Safety
402///
403/// The labels returned by the iterator must be in the order in which they would
404/// be if successors were returned in ascending order.
405///
406/// # Examples
407///
408/// To bind the iterators returned by the lender returned by
409/// [`SequentialLabeling::iter`] to implement this trait, you must use
410/// higher-rank trait bounds:
411/// ```rust
412/// use webgraph::traits::*;
413///
414/// fn takes_labeling_with_sorted_iterators<G>(g: G) where
415///     G: SequentialLabeling,
416///     for<'a, 'b> LenderIntoIter<'b, G::Lender<'a>>: SortedIterator,
417/// {
418///     // ...
419/// }
420/// ```
421pub unsafe trait SortedIterator: Iterator {}
422
423/// A transparent wrapper for an [`Iterator`] unsafely implementing
424/// [`SortedIterator`].
425///
426/// This wrapper is useful when an iterator is known to return labels in sorted
427/// order, but the trait is not implemented, and it is not possible to implement
428/// it directly because of the orphan rule.
429#[derive(Debug, Clone)]
430#[repr(transparent)]
431pub struct AssumeSortedIterator<I> {
432    iter: I,
433}
434
435impl<I> AssumeSortedIterator<I> {
436    /// # Safety
437    /// This is unsafe as the propose of this struct is to attach an unsafe
438    /// trait to a struct that does not implement it.
439    pub unsafe fn new(iter: I) -> Self {
440        Self { iter }
441    }
442}
443
444unsafe impl<I: Iterator> SortedIterator for AssumeSortedIterator<I> {}
445
446impl<I: Iterator> Iterator for AssumeSortedIterator<I> {
447    type Item = I::Item;
448
449    fn next(&mut self) -> Option<Self::Item> {
450        self.iter.next()
451    }
452}
453
454impl<I: ExactSizeIterator> ExactSizeIterator for AssumeSortedIterator<I> {
455    fn len(&self) -> usize {
456        self.iter.len()
457    }
458}
459
460/// A [`SequentialLabeling`] providing, additionally, random access to
461/// the list of labels associated with a node.
462///
463/// The function [`check_impl`] can be used to check whether the
464/// sequential and random-access implementations of a labeling are consistent.
465#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
466pub trait RandomAccessLabeling: SequentialLabeling {
467    /// The type of the iterator over the labels of a node
468    /// returned by [`labels`](RandomAccessLabeling::labels).
469    type Labels<'succ>: IntoIterator<Item = <Self as SequentialLabeling>::Label>
470    where
471        Self: 'succ;
472
473    /// Returns the number of arcs in the graph.
474    fn num_arcs(&self) -> u64;
475
476    /// Returns the labels associated with a node.
477    fn labels(&self, node_id: usize) -> <Self as RandomAccessLabeling>::Labels<'_>;
478
479    /// Returns the number of labels associated with a node.
480    fn outdegree(&self, node_id: usize) -> usize;
481}
482
483/// Error types that can occur during checking the implementation of a random
484/// access labeling.
485#[derive(Error, Debug, Clone, PartialEq, Eq)]
486pub enum CheckImplError {
487    /// The number of nodes returned by [`iter`](SequentialLabeling::iter)
488    /// is different from the number returned by
489    /// [`num_nodes`](SequentialLabeling::num_nodes).
490    #[error("Different number of nodes: {iter} (iter) != {method} (num_nodes)")]
491    NumNodes { iter: usize, method: usize },
492
493    /// The number of successors returned by [`iter`](SequentialLabeling::iter)
494    /// is different from the number returned by
495    /// [`num_arcs`](RandomAccessLabeling::num_arcs).
496    #[error("Different number of nodes: {iter} (iter) != {method} (num_arcs)")]
497    NumArcs { iter: u64, method: u64 },
498
499    /// The two implementations return different labels for a specific node.
500    #[error(
501        "Different successors for node {node}: at index {index} {sequential} (sequential) != {random_access} (random access)"
502    )]
503    Successors {
504        node: usize,
505        index: usize,
506        sequential: String,
507        random_access: String,
508    },
509
510    /// The graphs have different outdegrees for a specific node.
511    #[error(
512        "Different outdegree for node {node}: {sequential} (sequential) != {random_access} (random access)"
513    )]
514    Outdegree {
515        node: usize,
516        sequential: usize,
517        random_access: usize,
518    },
519}
520
521/// Checks the sequential vs. random-access implementation of a sorted
522/// random-access labeling.
523///
524/// Note that this method will check that the sequential and random-access
525/// iterators on labels of each node are identical, and that the number of
526/// nodes returned by the sequential iterator is the same as the number of
527/// nodes returned by [`num_nodes`](SequentialLabeling::num_nodes).
528pub fn check_impl<L: RandomAccessLabeling>(l: L) -> Result<(), CheckImplError>
529where
530    L::Label: PartialEq + std::fmt::Debug,
531{
532    let mut num_nodes = 0;
533    let mut num_arcs: u64 = 0;
534    for_!((node, succ_iter) in l.iter() {
535        num_nodes += 1;
536        let mut succ_iter = succ_iter.into_iter();
537        let mut succ = l.labels(node).into_iter();
538        let mut index = 0;
539        loop {
540            match (succ_iter.next(), succ.next()) {
541                (None, None) => break,
542                (Some(s0), Some(s1)) => {
543                    if s0 != s1 {
544                        return Err(CheckImplError::Successors {
545                            node,
546                            index,
547                            sequential: format!("{s0:?}"),
548                            random_access: format!("{s1:?}"),
549                        });
550                    }
551                }
552                (None, Some(_)) => {
553                    return Err(CheckImplError::Outdegree {
554                        node,
555                        sequential: index,
556                        random_access: index + 1 + succ.count(),
557                    });
558                }
559                (Some(_), None) => {
560                    return Err(CheckImplError::Outdegree {
561                        node,
562                        sequential: index + 1 + succ_iter.count(),
563                        random_access: index,
564                    });
565                }
566            }
567            index += 1;
568        }
569        num_arcs += index as u64;
570    });
571
572    if num_nodes != l.num_nodes() {
573        Err(CheckImplError::NumNodes {
574            method: l.num_nodes(),
575            iter: num_nodes,
576        })
577    } else if num_arcs != l.num_arcs() {
578        Err(CheckImplError::NumArcs {
579            method: l.num_arcs(),
580            iter: num_arcs,
581        })
582    } else {
583        Ok(())
584    }
585}
586
587/// A struct used to make it easy to implement sequential access
588/// starting from random access.
589///
590/// Users can implement just random-access primitives and then
591/// use this structure to implement sequential access.
592pub struct IteratorImpl<'node, G: RandomAccessLabeling> {
593    pub labeling: &'node G,
594    pub nodes: core::ops::Range<usize>,
595}
596
597unsafe impl<G: RandomAccessLabeling> SortedLender for IteratorImpl<'_, G> {}
598
599impl<'succ, G: RandomAccessLabeling> NodeLabelsLender<'succ> for IteratorImpl<'_, G> {
600    type Label = G::Label;
601    type IntoIterator = <G as RandomAccessLabeling>::Labels<'succ>;
602}
603
604impl<'succ, G: RandomAccessLabeling> Lending<'succ> for IteratorImpl<'_, G> {
605    type Lend = (usize, <G as RandomAccessLabeling>::Labels<'succ>);
606}
607
608impl<G: RandomAccessLabeling> Lender for IteratorImpl<'_, G> {
609    #[inline(always)]
610    fn next(&mut self) -> Option<Lend<'_, Self>> {
611        self.nodes
612            .next()
613            .map(|node_id| (node_id, self.labeling.labels(node_id)))
614    }
615}