webgraph/traits/labels.rs
1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9/*!
10
11Traits for access labelings, both sequentially and
12in random-access fashion.
13
14A *labeling* is the basic storage unit for graph data. It associates to
15each node of a graph a list of labels. In the [sequential case](SequentialLabeling),
16one can obtain a [lender](SequentialLabeling::iter) that lends pairs given by a node
17and an iterator on the associated labels. In the [random-access case](RandomAccessLabeling),
18instead, one can get [an iterator on the labels associated with a node](RandomAccessLabeling::labels).
19Labelings can be [zipped together](crate::labels::Zip), obtaining a
20new labeling whose labels are pairs.
21
22The number of nodes *n* of the graph is returned by [`SequentialLabeling::num_nodes`],
23and nodes identifier are in the interval [0 . . *n*).
24
25*/
26
27use crate::{traits::LenderIntoIter, utils::Granularity};
28
29use super::{LenderLabel, NodeLabelsLender, ParMapFold};
30
31use core::ops::Range;
32use dsi_progress_logger::prelude::*;
33use impl_tools::autoimpl;
34use lender::*;
35use rayon::ThreadPool;
36use std::rc::Rc;
37use thiserror::Error;
38
39use sux::{traits::Succ, utils::FairChunks};
40
41/// A labeling that can be accessed sequentially.
42///
43/// The iterator returned by [iter](SequentialLabeling::iter) is a
44/// [lender](NodeLabelsLender): to access the next pair, you must have finished
45/// to use the previous one. You can invoke [`Lender::copied`] to get a standard
46/// iterator, at the cost of some allocation and copying.
47///
48/// It is suggested that all implementors of this trait also implement
49/// [`IntoLender`] on a reference, returning the same lender as
50/// [`iter`](SequentialLabeling::iter). This makes it possible to use the
51/// [`for_!`](lender::for_) macro to iterate over the labeling (see the [module
52/// documentation](crate) for an example).
53///
54/// Note that there is no guarantee that the lender will return nodes in
55/// ascending order, or that the iterators on labels will return them in any
56/// specified order.
57///
58/// The marker traits [`SortedLender`] and [`SortedIterator`] can be used to
59/// force these properties. Note that [`SortedIterator`] implies that successors
60/// are returned in ascending order, and labels are returned in the same order.
61///
62/// This trait provides two default methods,
63/// [`par_apply`](SequentialLabeling::par_apply) and
64/// [`par_node_apply`](SequentialLabeling::par_node_apply), that make it easy to
65/// process in parallel the nodes of the labeling.
66///
67/// The function [`eq_sorted`] can be used to check whether two
68/// sorted labelings are equal.
69#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
70pub trait SequentialLabeling {
71 type Label;
72 /// The type of [`Lender`] over the successors of a node
73 /// returned by [`iter`](SequentialLabeling::iter).
74 type Lender<'node>: for<'next> NodeLabelsLender<'next, Label = Self::Label>
75 where
76 Self: 'node;
77
78 /// Returns the number of nodes in the graph.
79 fn num_nodes(&self) -> usize;
80
81 /// Returns the number of arcs in the graph, if available.
82 fn num_arcs_hint(&self) -> Option<u64> {
83 None
84 }
85
86 /// Returns an iterator over the labeling.
87 ///
88 /// Iterators over the labeling return pairs given by a node of the graph
89 /// and an [`IntoIterator`] over the labels.
90 fn iter(&self) -> Self::Lender<'_> {
91 self.iter_from(0)
92 }
93
94 /// Returns an iterator over the labeling starting at `from` (included).
95 ///
96 /// Note that if the iterator [is not sorted](SortedIterator), `from` is not
97 /// the node id of the first node returned by the iterator, but just the
98 /// starting point of the iteration
99 fn iter_from(&self, from: usize) -> Self::Lender<'_>;
100
101 /// Applies `func` to each chunk of nodes of size `node_granularity` in
102 /// parallel, and folds the results using `fold`.
103 ///
104 /// # Arguments
105 ///
106 /// * `func` - The function to apply to each chunk of nodes.
107 ///
108 /// * `fold` - The function to fold the results obtained from each chunk. It
109 /// will be passed to the [`Iterator::fold`].
110 ///
111 /// * `granularity` - The granularity of parallel tasks.
112 ///
113 /// * `thread_pool` - The thread pool to use. The maximum level of
114 /// parallelism is given by the number of threads in the pool.
115 ///
116 /// * `pl` - An optional mutable reference to a progress logger.
117 ///
118 /// # Panics
119 ///
120 /// This method will panic if [`Granularity::node_granularity`] does.
121 fn par_node_apply<
122 A: Default + Send,
123 F: Fn(Range<usize>) -> A + Sync,
124 R: Fn(A, A) -> A + Sync,
125 >(
126 &self,
127 func: F,
128 fold: R,
129 granularity: Granularity,
130 thread_pool: &ThreadPool,
131 pl: &mut impl ConcurrentProgressLog,
132 ) -> A {
133 let num_nodes = self.num_nodes();
134 let node_granularity = granularity.node_granularity(num_nodes, self.num_arcs_hint());
135 (0..num_nodes.div_ceil(node_granularity))
136 .map(|i| i * node_granularity..num_nodes.min((i + 1) * node_granularity))
137 .par_map_fold_with(
138 pl.clone(),
139 |pl, range| {
140 let len = range.len();
141 let res = func(range);
142 pl.update_with_count(len);
143 res
144 },
145 fold,
146 thread_pool,
147 )
148 }
149
150 /// Apply `func` to each chunk of nodes containing approximately
151 /// `arc_granularity` arcs in parallel and folds the results using `fold`.
152 ///
153 /// You have to provide the degree cumulative function of the graph (i.e.,
154 /// the sequence 0, *d*₀, *d*₀ + *d*₁, ..., *a*, where *a* is the number of
155 /// arcs in the graph) in a form that makes it possible to compute
156 /// successors (for example, using the suitable `webgraph build` command).
157 ///
158 /// # Arguments
159 ///
160 /// * `func` - The function to apply to each chunk of nodes.
161 ///
162 /// * `fold` - The function to fold the results obtained from each chunk.
163 /// It will be passed to the [`Iterator::fold`].
164 ///
165 /// * `granularity` - The granularity of parallel tests.
166 ///
167 /// * `deg_cumul_func` - The degree cumulative function of the graph.
168 ///
169 /// * `thread_pool` - The thread pool to use. The maximum level of
170 /// parallelism is given by the number of threads in the pool.
171 ///
172 /// * `pl` - A mutable reference to a concurrent progress logger.
173 fn par_apply<
174 F: Fn(Range<usize>) -> A + Sync,
175 A: Default + Send,
176 R: Fn(A, A) -> A + Sync,
177 D: for<'a> Succ<Input = usize, Output<'a> = usize>,
178 >(
179 &self,
180 func: F,
181 fold: R,
182 granularity: Granularity,
183 deg_cumul: &D,
184 thread_pool: &ThreadPool,
185 pl: &mut impl ConcurrentProgressLog,
186 ) -> A {
187 FairChunks::new(
188 granularity.arc_granularity(
189 self.num_nodes(),
190 Some(deg_cumul.get(deg_cumul.len() - 1) as u64),
191 ),
192 deg_cumul,
193 )
194 .par_map_fold_with(
195 pl.clone(),
196 |pl, range| {
197 let len = range.len();
198 let res = func(range);
199 pl.update_with_count(len);
200 res
201 },
202 fold,
203 thread_pool,
204 )
205 }
206}
207
208/// Error types that can occur during graph equality checking.
209#[derive(Error, Debug, Clone, PartialEq, Eq)]
210pub enum EqError {
211 /// The graphs have different numbers of nodes.
212 #[error("Different number of nodes: {first} != {second}")]
213 NumNodes { first: usize, second: usize },
214
215 /// The graphs have different numbers of arcs.
216 #[error("Different number of arcs: {first} !={second}")]
217 NumArcs { first: u64, second: u64 },
218
219 /// The graphs have different successors for a specific node.
220 #[error("Different successors for node {node}: at index {index} {first} != {second}")]
221 Successors {
222 node: usize,
223 index: usize,
224 first: String,
225 second: String,
226 },
227
228 /// The graphs have different outdegrees for a specific node.
229 #[error("Different outdegree for node {node}: {first} != {second}")]
230 Outdegree {
231 node: usize,
232 first: usize,
233 second: usize,
234 },
235}
236
237#[doc(hidden)]
238/// Checks whether two sorted successors lists are identical,
239/// returning an appropriate error.
240pub fn eq_succs<L: PartialEq + std::fmt::Debug>(
241 node: usize,
242 succ0: impl IntoIterator<Item = L>,
243 succ1: impl IntoIterator<Item = L>,
244) -> Result<(), EqError> {
245 let mut succ0 = succ0.into_iter();
246 let mut succ1 = succ1.into_iter();
247 let mut index = 0;
248 loop {
249 match (succ0.next(), succ1.next()) {
250 (None, None) => return Ok(()),
251 (Some(s0), Some(s1)) => {
252 if s0 != s1 {
253 return Err(EqError::Successors {
254 node,
255 index,
256 first: format!("{s0:?}"),
257 second: format!("{s1:?}"),
258 });
259 }
260 }
261 (None, Some(_)) => {
262 return Err(EqError::Outdegree {
263 node,
264 first: index,
265 second: index + 1 + succ1.count(),
266 });
267 }
268 (Some(_), None) => {
269 return Err(EqError::Outdegree {
270 node,
271 first: index + 1 + succ0.count(),
272 second: index,
273 });
274 }
275 }
276 index += 1;
277 }
278}
279
280/// Checks if the two provided sorted labelings are equal.
281///
282/// Since graphs are labelings, this function can also be used to check whether
283/// sorted graphs are equal. If the graphs are different, an [`EqError`] is
284/// returned describing the first difference found.
285pub fn eq_sorted<L0: SequentialLabeling, L1: SequentialLabeling<Label = L0::Label>>(
286 l0: &L0,
287 l1: &L1,
288) -> Result<(), EqError>
289where
290 for<'a> L0::Lender<'a>: SortedLender,
291 for<'a> L1::Lender<'a>: SortedLender,
292 for<'a, 'b> LenderIntoIter<'b, L0::Lender<'a>>: SortedIterator,
293 for<'a, 'b> LenderIntoIter<'b, L0::Lender<'a>>: SortedIterator,
294 L0::Label: PartialEq + std::fmt::Debug,
295{
296 if l0.num_nodes() != l1.num_nodes() {
297 return Err(EqError::NumNodes {
298 first: l0.num_nodes(),
299 second: l1.num_nodes(),
300 });
301 }
302 for_!(((node0, succ0), (node1, succ1)) in l0.iter().zip(l1.iter()) {
303 debug_assert_eq!(node0, node1);
304 eq_succs(node0, succ0, succ1)?;
305 });
306 Ok(())
307}
308
309/// Convenience type alias for the iterator over the labels of a node
310/// returned by the [`iter_from`](SequentialLabeling::iter_from) method.
311pub type Labels<'succ, 'node, S> =
312 <<S as SequentialLabeling>::Lender<'node> as NodeLabelsLender<'succ>>::IntoIterator;
313
314/// Marker trait for lenders returned by [`SequentialLabeling::iter`] yielding
315/// node ids in ascending order.
316///
317/// The [`AssumeSortedLender`] type can be used to wrap a lender and
318/// unsafely implement this trait.
319///
320/// # Safety
321///
322/// The first element of the pairs returned by the iterator must go from zero to
323/// the [number of nodes](SequentialLabeling::num_nodes) of the graph, excluded.
324///
325/// # Examples
326///
327/// To bind the lender returned by [`SequentialLabeling::iter`] to implement this
328/// trait, you must use higher-rank trait bounds:
329/// ```rust
330/// use webgraph::traits::*;
331///
332/// fn takes_labeling_with_sorted_lender<G>(g: G) where
333/// G: SequentialLabeling,
334/// for<'a> G::Lender<'a>: SortedLender,
335/// {
336/// // ...
337/// }
338/// ```
339pub unsafe trait SortedLender: Lender {}
340
341/// A transparent wrapper for a [`NodeLabelsLender`] unsafely implementing
342/// [`SortedLender`].
343///
344/// This wrapper is useful when the underlying lender is known to return nodes
345/// in ascending order, but the trait is not implemented, and it is not possible
346/// to implement it directly because of the orphan rule.
347#[derive(Debug, Clone)]
348#[repr(transparent)]
349pub struct AssumeSortedLender<L> {
350 lender: L,
351}
352
353impl<L> AssumeSortedLender<L> {
354 /// # Safety
355 ///
356 /// The argument must return nodes in ascending order.
357 pub unsafe fn new(lender: L) -> Self {
358 Self { lender }
359 }
360}
361
362unsafe impl<L: Lender> SortedLender for AssumeSortedLender<L> {}
363
364impl<'succ, L: Lender> Lending<'succ> for AssumeSortedLender<L> {
365 type Lend = <L as Lending<'succ>>::Lend;
366}
367
368impl<L: Lender> Lender for AssumeSortedLender<L> {
369 #[inline(always)]
370 fn next(&mut self) -> Option<Lend<'_, Self>> {
371 self.lender.next()
372 }
373
374 #[inline(always)]
375 fn size_hint(&self) -> (usize, Option<usize>) {
376 self.lender.size_hint()
377 }
378}
379
380impl<L: ExactSizeLender> ExactSizeLender for AssumeSortedLender<L> {
381 #[inline(always)]
382 fn len(&self) -> usize {
383 self.lender.len()
384 }
385}
386
387impl<'lend, L> NodeLabelsLender<'lend> for AssumeSortedLender<L>
388where
389 L: Lender + for<'next> NodeLabelsLender<'next>,
390{
391 type Label = LenderLabel<'lend, L>;
392 type IntoIterator = <L as NodeLabelsLender<'lend>>::IntoIterator;
393}
394
395/// Marker trait for [`Iterator`]s yielding labels in the order induced by
396/// enumerating the successors in ascending order.
397///
398/// The [`AssumeSortedIterator`] type can be used to wrap an iterator and
399/// unsafely implement this trait.
400///
401/// # Safety
402///
403/// The labels returned by the iterator must be in the order in which they would
404/// be if successors were returned in ascending order.
405///
406/// # Examples
407///
408/// To bind the iterators returned by the lender returned by
409/// [`SequentialLabeling::iter`] to implement this trait, you must use
410/// higher-rank trait bounds:
411/// ```rust
412/// use webgraph::traits::*;
413///
414/// fn takes_labeling_with_sorted_iterators<G>(g: G) where
415/// G: SequentialLabeling,
416/// for<'a, 'b> LenderIntoIter<'b, G::Lender<'a>>: SortedIterator,
417/// {
418/// // ...
419/// }
420/// ```
421pub unsafe trait SortedIterator: Iterator {}
422
423/// A transparent wrapper for an [`Iterator`] unsafely implementing
424/// [`SortedIterator`].
425///
426/// This wrapper is useful when an iterator is known to return labels in sorted
427/// order, but the trait is not implemented, and it is not possible to implement
428/// it directly because of the orphan rule.
429#[derive(Debug, Clone)]
430#[repr(transparent)]
431pub struct AssumeSortedIterator<I> {
432 iter: I,
433}
434
435impl<I> AssumeSortedIterator<I> {
436 /// # Safety
437 /// This is unsafe as the propose of this struct is to attach an unsafe
438 /// trait to a struct that does not implement it.
439 pub unsafe fn new(iter: I) -> Self {
440 Self { iter }
441 }
442}
443
444unsafe impl<I: Iterator> SortedIterator for AssumeSortedIterator<I> {}
445
446impl<I: Iterator> Iterator for AssumeSortedIterator<I> {
447 type Item = I::Item;
448
449 fn next(&mut self) -> Option<Self::Item> {
450 self.iter.next()
451 }
452}
453
454impl<I: ExactSizeIterator> ExactSizeIterator for AssumeSortedIterator<I> {
455 fn len(&self) -> usize {
456 self.iter.len()
457 }
458}
459
460/// A [`SequentialLabeling`] providing, additionally, random access to
461/// the list of labels associated with a node.
462///
463/// The function [`check_impl`] can be used to check whether the
464/// sequential and random-access implementations of a labeling are consistent.
465#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
466pub trait RandomAccessLabeling: SequentialLabeling {
467 /// The type of the iterator over the labels of a node
468 /// returned by [`labels`](RandomAccessLabeling::labels).
469 type Labels<'succ>: IntoIterator<Item = <Self as SequentialLabeling>::Label>
470 where
471 Self: 'succ;
472
473 /// Returns the number of arcs in the graph.
474 fn num_arcs(&self) -> u64;
475
476 /// Returns the labels associated with a node.
477 fn labels(&self, node_id: usize) -> <Self as RandomAccessLabeling>::Labels<'_>;
478
479 /// Returns the number of labels associated with a node.
480 fn outdegree(&self, node_id: usize) -> usize;
481}
482
483/// Error types that can occur during checking the implementation of a random
484/// access labeling.
485#[derive(Error, Debug, Clone, PartialEq, Eq)]
486pub enum CheckImplError {
487 /// The number of nodes returned by [`iter`](SequentialLabeling::iter)
488 /// is different from the number returned by
489 /// [`num_nodes`](SequentialLabeling::num_nodes).
490 #[error("Different number of nodes: {iter} (iter) != {method} (num_nodes)")]
491 NumNodes { iter: usize, method: usize },
492
493 /// The number of successors returned by [`iter`](SequentialLabeling::iter)
494 /// is different from the number returned by
495 /// [`num_arcs`](RandomAccessLabeling::num_arcs).
496 #[error("Different number of nodes: {iter} (iter) != {method} (num_arcs)")]
497 NumArcs { iter: u64, method: u64 },
498
499 /// The two implementations return different labels for a specific node.
500 #[error(
501 "Different successors for node {node}: at index {index} {sequential} (sequential) != {random_access} (random access)"
502 )]
503 Successors {
504 node: usize,
505 index: usize,
506 sequential: String,
507 random_access: String,
508 },
509
510 /// The graphs have different outdegrees for a specific node.
511 #[error(
512 "Different outdegree for node {node}: {sequential} (sequential) != {random_access} (random access)"
513 )]
514 Outdegree {
515 node: usize,
516 sequential: usize,
517 random_access: usize,
518 },
519}
520
521/// Checks the sequential vs. random-access implementation of a sorted
522/// random-access labeling.
523///
524/// Note that this method will check that the sequential and random-access
525/// iterators on labels of each node are identical, and that the number of
526/// nodes returned by the sequential iterator is the same as the number of
527/// nodes returned by [`num_nodes`](SequentialLabeling::num_nodes).
528pub fn check_impl<L: RandomAccessLabeling>(l: L) -> Result<(), CheckImplError>
529where
530 L::Label: PartialEq + std::fmt::Debug,
531{
532 let mut num_nodes = 0;
533 let mut num_arcs: u64 = 0;
534 for_!((node, succ_iter) in l.iter() {
535 num_nodes += 1;
536 let mut succ_iter = succ_iter.into_iter();
537 let mut succ = l.labels(node).into_iter();
538 let mut index = 0;
539 loop {
540 match (succ_iter.next(), succ.next()) {
541 (None, None) => break,
542 (Some(s0), Some(s1)) => {
543 if s0 != s1 {
544 return Err(CheckImplError::Successors {
545 node,
546 index,
547 sequential: format!("{s0:?}"),
548 random_access: format!("{s1:?}"),
549 });
550 }
551 }
552 (None, Some(_)) => {
553 return Err(CheckImplError::Outdegree {
554 node,
555 sequential: index,
556 random_access: index + 1 + succ.count(),
557 });
558 }
559 (Some(_), None) => {
560 return Err(CheckImplError::Outdegree {
561 node,
562 sequential: index + 1 + succ_iter.count(),
563 random_access: index,
564 });
565 }
566 }
567 index += 1;
568 }
569 num_arcs += index as u64;
570 });
571
572 if num_nodes != l.num_nodes() {
573 Err(CheckImplError::NumNodes {
574 method: l.num_nodes(),
575 iter: num_nodes,
576 })
577 } else if num_arcs != l.num_arcs() {
578 Err(CheckImplError::NumArcs {
579 method: l.num_arcs(),
580 iter: num_arcs,
581 })
582 } else {
583 Ok(())
584 }
585}
586
587/// A struct used to make it easy to implement sequential access
588/// starting from random access.
589///
590/// Users can implement just random-access primitives and then
591/// use this structure to implement sequential access.
592pub struct IteratorImpl<'node, G: RandomAccessLabeling> {
593 pub labeling: &'node G,
594 pub nodes: core::ops::Range<usize>,
595}
596
597unsafe impl<G: RandomAccessLabeling> SortedLender for IteratorImpl<'_, G> {}
598
599impl<'succ, G: RandomAccessLabeling> NodeLabelsLender<'succ> for IteratorImpl<'_, G> {
600 type Label = G::Label;
601 type IntoIterator = <G as RandomAccessLabeling>::Labels<'succ>;
602}
603
604impl<'succ, G: RandomAccessLabeling> Lending<'succ> for IteratorImpl<'_, G> {
605 type Lend = (usize, <G as RandomAccessLabeling>::Labels<'succ>);
606}
607
608impl<G: RandomAccessLabeling> Lender for IteratorImpl<'_, G> {
609 #[inline(always)]
610 fn next(&mut self) -> Option<Lend<'_, Self>> {
611 self.nodes
612 .next()
613 .map(|node_id| (node_id, self.labeling.labels(node_id)))
614 }
615}