swh_graph_stdlib/visit.rs
1// Copyright (C) 2024 The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::borrow::Borrow;
7use std::collections::VecDeque;
8
9use swh_graph::graph::*;
10
11use crate::collections::{AdaptiveNodeSet, NodeSet, ReadNodeSet};
12
13/// Stateful BFS (breadth-first search) visit of (a part of) the Software
14/// Heritage graph, returning deduplicated node identifiers.
15///
16/// This visit uses [NodeId]s ([usize] integers) to keep track of visited and
17/// to be visited nodes. As such it is suitable for small- to medium-sized
18/// visits. For large-sized graph visits (e.g., the entire graph or close to
19/// that) it is preferable to use bit vectors (incurring the price of a higher
20/// startup cost to allocate the bit vector.
21pub struct NodeVisit<'a, G>
22where
23 G: SwhForwardGraph,
24{
25 graph: &'a G,
26 visited: AdaptiveNodeSet,
27 queue: VecDeque<NodeId>,
28}
29
30impl<'a, G> NodeVisit<'a, G>
31where
32 G: SwhForwardGraph,
33{
34 fn new<I: IntoIterator<Item: Borrow<NodeId>>>(graph: &'a G, nodes: I) -> Self {
35 NodeVisit {
36 graph,
37 visited: AdaptiveNodeSet::new(graph.num_nodes()),
38 queue: nodes.into_iter().map(|item| *item.borrow()).collect(),
39 }
40 }
41}
42
43impl<G> Iterator for NodeVisit<'_, G>
44where
45 G: SwhForwardGraph,
46{
47 type Item = NodeId;
48
49 fn next(&mut self) -> Option<Self::Item> {
50 if let Some(current_node) = self.queue.pop_front() {
51 for succ in self.graph.successors(current_node) {
52 if !self.visited.contains(succ) {
53 self.queue.push_back(succ);
54 self.visited.insert(succ);
55 }
56 }
57 Some(current_node)
58 } else {
59 None
60 }
61 }
62}
63
64/// Iterate on the nodes of the sub-graph rooted at `start`, in BFS order.
65///
66/// `start` is usually a single node id, passed as `&[node]`, but can be a
67/// non-singleton slice or iterator of nodes, to avoid independently re-visiting shared
68/// sub-graphs from multiple starting points.
69///
70/// See [NodeVisit] documentation for performance considerations.
71///
72/// # Examples
73///
74/// ```
75/// # use swh_graph::graph::SwhForwardGraph;
76/// # use swh_graph_stdlib::iter_nodes;
77/// #
78/// # fn f<G: SwhForwardGraph>(graph: &G) {
79/// let src = &[1, 2, 3];
80/// for node in iter_nodes(graph, src) {
81/// println!("Visiting: {}", node);
82/// }
83/// # }
84/// ```
85///
86/// ```
87/// # use swh_graph::graph::SwhForwardGraph;
88/// # use swh_graph_stdlib::iter_nodes;
89/// #
90/// # fn f<G: SwhForwardGraph>(graph: &G) {
91/// let src = 1..4;
92/// for node in iter_nodes(graph, src) {
93/// println!("Visiting: {}", node);
94/// }
95/// # }
96/// ```
97pub fn iter_nodes<G, I: IntoIterator<Item: Borrow<NodeId>>>(graph: &G, start: I) -> NodeVisit<'_, G>
98where
99 G: SwhForwardGraph,
100{
101 NodeVisit::new(graph, start)
102}