webgraph/traits/labels.rs
1/*
2 * SPDX-FileCopyrightText: 2023 Tommaso Fontana
3 * SPDX-FileCopyrightText: 2023 Inria
4 * SPDX-FileCopyrightText: 2023 Sebastiano Vigna
5 *
6 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
7 */
8
9/*!
10
11Traits for accessing labelings, both sequentially and
12in random-access fashion.
13
14A *labeling* is the basic storage unit for graph data. It associates to
15each node of a graph a list of labels. In the [sequential case](SequentialLabeling),
16one can obtain a [lender](SequentialLabeling::iter) that lends pairs given by a node
17and an iterator on the associated labels. In the [random-access case](RandomAccessLabeling),
18instead, one can get [an iterator on the labels associated with a node](RandomAccessLabeling::labels).
19Labelings can be [zipped together](crate::labels::Zip), obtaining a
20new labeling whose labels are pairs.
21
22The number of nodes *n* of the graph is returned by [`SequentialLabeling::num_nodes`],
23and nodes identifier are in the interval [0 . . *n*).
24
25*/
26
27use crate::{graphs::bvgraph::DCF, traits::LenderIntoIter, utils::Granularity};
28
29use super::{LenderLabel, NodeLabelsLender, ParMapFold};
30
31use core::ops::Range;
32use dsi_progress_logger::prelude::*;
33use impl_tools::autoimpl;
34use lender::*;
35use std::rc::Rc;
36use sux::{
37 dict::EliasFanoBuilder,
38 rank_sel::{SelectAdaptConst, SelectZeroAdaptConst},
39 traits::Succ,
40 utils::FairChunks,
41};
42use thiserror::Error;
43
44/// A labeling that can be accessed sequentially.
45///
46/// The iterator returned by [iter](SequentialLabeling::iter) is a
47/// [lender](NodeLabelsLender): to access the next pair, you must have finished
48/// to use the previous one. You can invoke [`Lender::copied`] to get a standard
49/// iterator, at the cost of some allocation and copying.
50///
51/// It is suggested that all implementors of this trait also implement
52/// [`IntoLender`] on a reference, returning the same lender as
53/// [`iter`](SequentialLabeling::iter). This makes it possible to use the
54/// [`for_!`](lender::for_) macro to iterate over the labeling (see the [module
55/// documentation](crate) for an example).
56///
57/// Note that there is no guarantee that the lender will return nodes in
58/// ascending order, or that the iterators on labels will return them in any
59/// specified order.
60///
61/// The marker traits [`SortedLender`] and [`SortedIterator`] can be used to
62/// force these properties. Note that [`SortedIterator`] implies that successors
63/// are returned in ascending order, and labels are returned in the same order.
64///
65/// This trait provides two default methods,
66/// [`par_apply`](SequentialLabeling::par_apply) and
67/// [`par_node_apply`](SequentialLabeling::par_node_apply), that make it easy to
68/// process in parallel the nodes of the labeling.
69///
70/// The function [`eq_sorted`] can be used to check whether two
71/// sorted labelings are equal.
72#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
73pub trait SequentialLabeling {
74 type Label;
75 /// The type of [`Lender`] over the successors of a node
76 /// returned by [`iter`](SequentialLabeling::iter).
77 type Lender<'node>: for<'next> NodeLabelsLender<'next, Label = Self::Label>
78 where
79 Self: 'node;
80
81 /// Returns the number of nodes in the graph.
82 fn num_nodes(&self) -> usize;
83
84 /// Returns the number of arcs in the graph, if available.
85 fn num_arcs_hint(&self) -> Option<u64> {
86 None
87 }
88
89 /// Returns an iterator over the labeling.
90 ///
91 /// Iterators over the labeling return pairs given by a node of the graph
92 /// and an [`IntoIterator`] over the labels.
93 fn iter(&self) -> Self::Lender<'_> {
94 self.iter_from(0)
95 }
96
97 /// Returns an iterator over the labeling starting at `from` (included).
98 ///
99 /// Note that if the iterator [is not sorted](SortedIterator), `from` is not
100 /// the node id of the first node returned by the iterator, but just the
101 /// starting point of the iteration
102 fn iter_from(&self, from: usize) -> Self::Lender<'_>;
103
104 /// Builds the degree cumulative function for this labeling.
105 ///
106 /// The degree cumulative function is the sequence 0, d₀, d₀ + d₁, … ,
107 /// *m*, where *dₖ* is the number of arcs/labels of node *k* and *m* is the
108 /// number of arcs/labels.
109 ///
110 /// # Panics
111 ///
112 /// Panics if [`num_arcs_hint`](SequentialLabeling::num_arcs_hint) returns
113 /// `None`.
114 fn build_dcf(&self) -> DCF {
115 let n = self.num_nodes();
116 let num_arcs = self
117 .num_arcs_hint()
118 .expect("build_dcf requires num_arcs_hint()") as usize;
119 let mut efb = EliasFanoBuilder::new(n + 1, num_arcs);
120 efb.push(0);
121 let mut cumul = 0usize;
122 let mut lender = self.iter();
123 while let Some((_node, succs)) = lender.next() {
124 cumul += succs.into_iter().count();
125 efb.push(cumul);
126 }
127 unsafe {
128 efb.build().map_high_bits(|high_bits| {
129 SelectZeroAdaptConst::<_, _, 12, 4>::new(SelectAdaptConst::<_, _, 12, 4>::new(
130 high_bits,
131 ))
132 })
133 }
134 }
135
136 /// Applies `func` to each chunk of nodes of size `node_granularity` in
137 /// parallel, and folds the results using `fold`.
138 ///
139 /// # Arguments
140 ///
141 /// * `func` - The function to apply to each chunk of nodes.
142 ///
143 /// * `fold` - The function to fold the results obtained from each chunk. It
144 /// will be passed to the [`Iterator::fold`].
145 ///
146 /// * `granularity` - The granularity of parallel tasks.
147 ///
148 /// * `pl` - An optional mutable reference to a progress logger.
149 ///
150 /// # Panics
151 ///
152 /// This method will panic if [`Granularity::node_granularity`] does.
153 fn par_node_apply<
154 A: Default + Send,
155 F: Fn(Range<usize>) -> A + Sync,
156 R: Fn(A, A) -> A + Sync,
157 >(
158 &self,
159 func: F,
160 fold: R,
161 granularity: Granularity,
162 pl: &mut impl ConcurrentProgressLog,
163 ) -> A {
164 let num_nodes = self.num_nodes();
165 let node_granularity = granularity.node_granularity(num_nodes, self.num_arcs_hint());
166 (0..num_nodes.div_ceil(node_granularity))
167 .map(|i| i * node_granularity..num_nodes.min((i + 1) * node_granularity))
168 .par_map_fold_with(
169 pl.clone(),
170 |pl, range| {
171 let len = range.len();
172 let res = func(range);
173 pl.update_with_count(len);
174 res
175 },
176 fold,
177 )
178 }
179
180 /// Applies `func` to each chunk of nodes containing approximately
181 /// `arc_granularity` arcs in parallel and folds the results using `fold`.
182 ///
183 /// You have to provide the degree cumulative function of the graph (i.e.,
184 /// the sequence 0, *d*₀, *d*₀ + *d*₁, ..., *a*, where *a* is the number of
185 /// arcs in the graph) in a form that makes it possible to compute
186 /// successors (for example, using the suitable `webgraph build` command).
187 ///
188 /// # Arguments
189 ///
190 /// * `func` - The function to apply to each chunk of nodes.
191 ///
192 /// * `fold` - The function to fold the results obtained from each chunk.
193 /// It will be passed to the [`Iterator::fold`].
194 ///
195 /// * `granularity` - The granularity of parallel tests.
196 ///
197 /// * `deg_cumul_func` - The degree cumulative function of the graph.
198 ///
199 /// * `pl` - A mutable reference to a concurrent progress logger.
200 fn par_apply<
201 F: Fn(Range<usize>) -> A + Sync,
202 A: Default + Send,
203 R: Fn(A, A) -> A + Sync,
204 D: for<'a> Succ<Input = usize, Output<'a> = usize>,
205 >(
206 &self,
207 func: F,
208 fold: R,
209 granularity: Granularity,
210 deg_cumul: &D,
211 pl: &mut impl ConcurrentProgressLog,
212 ) -> A {
213 FairChunks::new(
214 granularity.arc_granularity(
215 self.num_nodes(),
216 Some(deg_cumul.get(deg_cumul.len() - 1) as u64),
217 ),
218 deg_cumul,
219 )
220 .par_map_fold_with(
221 pl.clone(),
222 |pl, range| {
223 let len = range.len();
224 let res = func(range);
225 pl.update_with_count(len);
226 res
227 },
228 fold,
229 )
230 }
231}
232
233/// Error types that can occur during graph equality checking.
234#[derive(Error, Debug, Clone, PartialEq, Eq)]
235pub enum EqError {
236 /// The graphs have different numbers of nodes.
237 #[error("Different number of nodes: {first} != {second}")]
238 NumNodes { first: usize, second: usize },
239
240 /// The graphs have different numbers of arcs.
241 #[error("Different number of arcs: {first} != {second}")]
242 NumArcs { first: u64, second: u64 },
243
244 /// Iteration on the two graphs returned a different node.
245 #[error("Different node: at index {index} {first} != {second}")]
246 Node {
247 index: usize,
248 first: usize,
249 second: usize,
250 },
251
252 /// The graphs have different successors for a specific node.
253 #[error("Different successors for node {node}: at index {index} {first} != {second}")]
254 Successors {
255 node: usize,
256 index: usize,
257 first: String,
258 second: String,
259 },
260
261 /// The graphs have different outdegrees for a specific node.
262 #[error("Different outdegree for node {node}: {first} != {second}")]
263 Outdegree {
264 node: usize,
265 first: usize,
266 second: usize,
267 },
268}
269
270#[doc(hidden)]
271/// Checks whether two sorted successors lists are identical,
272/// returning an appropriate error.
273pub fn eq_succs<L: PartialEq + std::fmt::Debug>(
274 node: usize,
275 succ0: impl IntoIterator<Item = L>,
276 succ1: impl IntoIterator<Item = L>,
277) -> Result<(), EqError> {
278 let mut succ0 = succ0.into_iter();
279 let mut succ1 = succ1.into_iter();
280 let mut index = 0;
281 loop {
282 match (succ0.next(), succ1.next()) {
283 (None, None) => return Ok(()),
284 (Some(s0), Some(s1)) => {
285 if s0 != s1 {
286 return Err(EqError::Successors {
287 node,
288 index,
289 first: format!("{s0:?}"),
290 second: format!("{s1:?}"),
291 });
292 }
293 }
294 (None, Some(_)) => {
295 return Err(EqError::Outdegree {
296 node,
297 first: index,
298 second: index + 1 + succ1.count(),
299 });
300 }
301 (Some(_), None) => {
302 return Err(EqError::Outdegree {
303 node,
304 first: index + 1 + succ0.count(),
305 second: index,
306 });
307 }
308 }
309 index += 1;
310 }
311}
312
313/// Checks if the two provided sorted labelings are equal.
314///
315/// Since graphs are labelings, this function can also be used to check whether
316/// sorted graphs are equal. If the graphs are different, an [`EqError`] is
317/// returned describing the first difference found.
318pub fn eq_sorted<L0: SequentialLabeling, L1: SequentialLabeling<Label = L0::Label>>(
319 l0: &L0,
320 l1: &L1,
321) -> Result<(), EqError>
322where
323 for<'a> L0::Lender<'a>: SortedLender,
324 for<'a> L1::Lender<'a>: SortedLender,
325 for<'a, 'b> LenderIntoIter<'b, L0::Lender<'a>>: SortedIterator,
326 for<'a, 'b> LenderIntoIter<'b, L1::Lender<'a>>: SortedIterator,
327 L0::Label: PartialEq + std::fmt::Debug,
328{
329 if l0.num_nodes() != l1.num_nodes() {
330 return Err(EqError::NumNodes {
331 first: l0.num_nodes(),
332 second: l1.num_nodes(),
333 });
334 }
335 for_!((index, ((node0, succ0), (node1, succ1))) in l0.iter().zip(l1.iter()).enumerate() {
336 if node0 != node1 {
337 return Err(EqError::Node {
338 index,
339 first: node0,
340 second: node1,
341 });
342 }
343 eq_succs(node0, succ0, succ1)?;
344 });
345 Ok(())
346}
347
348/// Convenience type alias for the iterator over the labels of a node
349/// returned by the [`iter_from`](SequentialLabeling::iter_from) method.
350pub type Labels<'succ, 'node, S> =
351 <<S as SequentialLabeling>::Lender<'node> as NodeLabelsLender<'succ>>::IntoIterator;
352
353/// Marker trait for lenders returned by [`SequentialLabeling::iter`] yielding
354/// node ids in ascending order.
355///
356/// The [`AssumeSortedLender`] type can be used to wrap a lender and
357/// unsafely implement this trait.
358///
359/// # Safety
360///
361/// The first element of the pairs returned by the iterator must go from zero to
362/// the [number of nodes](SequentialLabeling::num_nodes) of the graph, excluded.
363///
364/// # Examples
365///
366/// To bind the lender returned by [`SequentialLabeling::iter`] to implement this
367/// trait, you must use higher-rank trait bounds:
368/// ```rust
369/// use webgraph::traits::*;
370///
371/// fn takes_labeling_with_sorted_lender<G>(g: G) where
372/// G: SequentialLabeling,
373/// for<'a> G::Lender<'a>: SortedLender,
374/// {
375/// // ...
376/// }
377/// ```
378pub unsafe trait SortedLender: Lender {}
379
380/// A transparent wrapper for a [`NodeLabelsLender`] unsafely implementing
381/// [`SortedLender`].
382///
383/// This wrapper is useful when the underlying lender is known to return nodes
384/// in ascending order, but the trait is not implemented, and it is not possible
385/// to implement it directly because of the orphan rule.
386#[derive(Debug, Clone)]
387#[repr(transparent)]
388pub struct AssumeSortedLender<L> {
389 lender: L,
390}
391
392impl<L> AssumeSortedLender<L> {
393 /// # Safety
394 ///
395 /// The argument must return nodes in ascending order.
396 pub unsafe fn new(lender: L) -> Self {
397 Self { lender }
398 }
399}
400
401unsafe impl<L: Lender> SortedLender for AssumeSortedLender<L> {}
402
403impl<'succ, L: Lender> Lending<'succ> for AssumeSortedLender<L> {
404 type Lend = <L as Lending<'succ>>::Lend;
405}
406
407impl<L: Lender> Lender for AssumeSortedLender<L> {
408 // SAFETY: the lend is covariant as it directly delegates to the underlying
409 // covariant lender L.
410 unsafe_assume_covariance!();
411
412 #[inline(always)]
413 fn next(&mut self) -> Option<Lend<'_, Self>> {
414 self.lender.next()
415 }
416
417 #[inline(always)]
418 fn size_hint(&self) -> (usize, Option<usize>) {
419 self.lender.size_hint()
420 }
421}
422
423impl<L: ExactSizeLender> ExactSizeLender for AssumeSortedLender<L> {
424 #[inline(always)]
425 fn len(&self) -> usize {
426 self.lender.len()
427 }
428}
429
430impl<'lend, L> NodeLabelsLender<'lend> for AssumeSortedLender<L>
431where
432 L: Lender + for<'next> NodeLabelsLender<'next>,
433{
434 type Label = LenderLabel<'lend, L>;
435 type IntoIterator = <L as NodeLabelsLender<'lend>>::IntoIterator;
436}
437
438/// Marker trait for [`Iterator`]s yielding labels in the order induced by
439/// enumerating the successors in ascending order.
440///
441/// The [`AssumeSortedIterator`] type can be used to wrap an iterator and
442/// unsafely implement this trait.
443///
444/// # Safety
445///
446/// The labels returned by the iterator must be in the order in which they would
447/// be if successors were returned in ascending order.
448///
449/// # Examples
450///
451/// To bind the iterators returned by the lender returned by
452/// [`SequentialLabeling::iter`] to implement this trait, you must use
453/// higher-rank trait bounds:
454/// ```rust
455/// use webgraph::traits::*;
456///
457/// fn takes_labeling_with_sorted_iterators<G>(g: G) where
458/// G: SequentialLabeling,
459/// for<'a, 'b> LenderIntoIter<'b, G::Lender<'a>>: SortedIterator,
460/// {
461/// // ...
462/// }
463/// ```
464pub unsafe trait SortedIterator: Iterator {}
465
466/// A transparent wrapper for an [`Iterator`] unsafely implementing
467/// [`SortedIterator`].
468///
469/// This wrapper is useful when an iterator is known to return labels in sorted
470/// order, but the trait is not implemented, and it is not possible to implement
471/// it directly because of the orphan rule.
472#[derive(Debug, Clone)]
473#[repr(transparent)]
474pub struct AssumeSortedIterator<I> {
475 iter: I,
476}
477
478impl<I> AssumeSortedIterator<I> {
479 /// # Safety
480 /// This is unsafe as the purpose of this struct is to attach an unsafe
481 /// trait to a struct that does not implement it.
482 pub unsafe fn new(iter: I) -> Self {
483 Self { iter }
484 }
485}
486
487unsafe impl<I: Iterator> SortedIterator for AssumeSortedIterator<I> {}
488
489impl<I: Iterator> Iterator for AssumeSortedIterator<I> {
490 type Item = I::Item;
491
492 #[inline(always)]
493 fn next(&mut self) -> Option<Self::Item> {
494 self.iter.next()
495 }
496
497 #[inline(always)]
498 fn count(self) -> usize {
499 self.iter.count()
500 }
501}
502
503impl<I: ExactSizeIterator> ExactSizeIterator for AssumeSortedIterator<I> {
504 #[inline(always)]
505 fn len(&self) -> usize {
506 self.iter.len()
507 }
508}
509
510/// A [`SequentialLabeling`] providing, additionally, random access to
511/// the list of labels associated with a node.
512///
513/// The function [`check_impl`] can be used to check whether the
514/// sequential and random-access implementations of a labeling are consistent.
515#[autoimpl(for<S: trait + ?Sized> &S, &mut S, Rc<S>)]
516pub trait RandomAccessLabeling: SequentialLabeling {
517 /// The type of the iterator over the labels of a node
518 /// returned by [`labels`](RandomAccessLabeling::labels).
519 type Labels<'succ>: IntoIterator<Item = <Self as SequentialLabeling>::Label>
520 where
521 Self: 'succ;
522
523 /// Returns the number of arcs in the graph.
524 fn num_arcs(&self) -> u64;
525
526 /// Returns the labels associated with a node.
527 fn labels(&self, node_id: usize) -> <Self as RandomAccessLabeling>::Labels<'_>;
528
529 /// Returns the number of labels associated with a node.
530 fn outdegree(&self, node_id: usize) -> usize;
531}
532
533/// Error types that can occur during checking the implementation of a random
534/// access labeling.
535#[derive(Error, Debug, Clone, PartialEq, Eq)]
536pub enum CheckImplError {
537 /// The number of nodes returned by [`iter`](SequentialLabeling::iter)
538 /// is different from the number returned by
539 /// [`num_nodes`](SequentialLabeling::num_nodes).
540 #[error("Different number of nodes: {iter} (iter) != {method} (num_nodes)")]
541 NumNodes { iter: usize, method: usize },
542
543 /// The number of successors returned by [`iter`](SequentialLabeling::iter)
544 /// is different from the number returned by
545 /// [`num_arcs`](RandomAccessLabeling::num_arcs).
546 #[error("Different number of arcs: {iter} (iter) != {method} (num_arcs)")]
547 NumArcs { iter: u64, method: u64 },
548
549 /// The two implementations return different labels for a specific node.
550 #[error(
551 "Different successors for node {node}: at index {index} {sequential} (sequential) != {random_access} (random access)"
552 )]
553 Successors {
554 node: usize,
555 index: usize,
556 sequential: String,
557 random_access: String,
558 },
559
560 /// The graphs have different outdegrees for a specific node.
561 #[error(
562 "Different outdegree for node {node}: {sequential} (sequential) != {random_access} (random access)"
563 )]
564 Outdegree {
565 node: usize,
566 sequential: usize,
567 random_access: usize,
568 },
569}
570
571/// Checks the sequential vs. random-access implementation of a sorted
572/// random-access labeling.
573///
574/// Note that this method will check that the sequential and random-access
575/// iterators on labels of each node are identical, and that the number of
576/// nodes returned by the sequential iterator is the same as the number of
577/// nodes returned by [`num_nodes`](SequentialLabeling::num_nodes).
578pub fn check_impl<L: RandomAccessLabeling>(l: L) -> Result<(), CheckImplError>
579where
580 L::Label: PartialEq + std::fmt::Debug,
581{
582 let mut num_nodes = 0;
583 let mut num_arcs: u64 = 0;
584 for_!((node, succ_iter) in l.iter() {
585 num_nodes += 1;
586 let mut succ_iter = succ_iter.into_iter();
587 let mut succ = l.labels(node).into_iter();
588 let mut index = 0;
589 loop {
590 match (succ_iter.next(), succ.next()) {
591 (None, None) => break,
592 (Some(s0), Some(s1)) => {
593 if s0 != s1 {
594 return Err(CheckImplError::Successors {
595 node,
596 index,
597 sequential: format!("{s0:?}"),
598 random_access: format!("{s1:?}"),
599 });
600 }
601 }
602 (None, Some(_)) => {
603 return Err(CheckImplError::Outdegree {
604 node,
605 sequential: index,
606 random_access: index + 1 + succ.count(),
607 });
608 }
609 (Some(_), None) => {
610 return Err(CheckImplError::Outdegree {
611 node,
612 sequential: index + 1 + succ_iter.count(),
613 random_access: index,
614 });
615 }
616 }
617 index += 1;
618 }
619 num_arcs += index as u64;
620 });
621
622 if num_nodes != l.num_nodes() {
623 Err(CheckImplError::NumNodes {
624 method: l.num_nodes(),
625 iter: num_nodes,
626 })
627 } else if num_arcs != l.num_arcs() {
628 Err(CheckImplError::NumArcs {
629 method: l.num_arcs(),
630 iter: num_arcs,
631 })
632 } else {
633 Ok(())
634 }
635}
636
637/// A struct used to make it easy to implement sequential access
638/// starting from random access.
639///
640/// Users can implement just random-access primitives and then use this
641/// structure to implement the lender returned by
642/// [`iter_from`](SequentialLabeling::iter_from) by just returning an instance
643/// of this struct.
644pub struct LenderImpl<'node, G: RandomAccessLabeling> {
645 /// The underlying random-access labeling.
646 pub labeling: &'node G,
647 /// The range of nodes to iterate over.
648 pub nodes: core::ops::Range<usize>,
649}
650
651unsafe impl<G: RandomAccessLabeling> SortedLender for LenderImpl<'_, G> {}
652
653impl<'succ, G: RandomAccessLabeling> NodeLabelsLender<'succ> for LenderImpl<'_, G> {
654 type Label = G::Label;
655 type IntoIterator = <G as RandomAccessLabeling>::Labels<'succ>;
656}
657
658impl<'succ, G: RandomAccessLabeling> Lending<'succ> for LenderImpl<'_, G> {
659 type Lend = (usize, <G as RandomAccessLabeling>::Labels<'succ>);
660}
661
662impl<G: RandomAccessLabeling> Lender for LenderImpl<'_, G> {
663 // SAFETY: the lend is covariant as it returns labels from a RandomAccessLabeling,
664 // which are expected to be covariant in the lifetime.
665 unsafe_assume_covariance!();
666
667 #[inline(always)]
668 fn next(&mut self) -> Option<Lend<'_, Self>> {
669 self.nodes
670 .next()
671 .map(|node_id| (node_id, self.labeling.labels(node_id)))
672 }
673
674 #[inline(always)]
675 fn size_hint(&self) -> (usize, Option<usize>) {
676 (self.nodes.len(), Some(self.nodes.len()))
677 }
678}
679
680impl<G: RandomAccessLabeling> ExactSizeLender for LenderImpl<'_, G> {
681 #[inline(always)]
682 fn len(&self) -> usize {
683 self.nodes.len()
684 }
685}