swh_graph_stdlib/
visit.rs

1// Copyright (C) 2024  The Software Heritage developers
2// See the AUTHORS file at the top-level directory of this distribution
3// License: GNU General Public License version 3, or any later version
4// See top-level LICENSE file for more information
5
6use std::borrow::Borrow;
7use std::collections::VecDeque;
8
9use swh_graph::graph::*;
10
11use crate::collections::{AdaptiveNodeSet, NodeSet, ReadNodeSet};
12
13/// Stateful BFS (breadth-first search) visit of (a part of) the Software
14/// Heritage graph, returning deduplicated node identifiers.
15///
16/// This visit uses [NodeId]s ([usize] integers) to keep track of visited and
17/// to be visited nodes. As such it is suitable for small- to medium-sized
18/// visits.  For large-sized graph visits (e.g., the entire graph or close to
19/// that) it is preferable to use bit vectors (incurring the price of a higher
20/// startup cost to allocate the bit vector.
21pub struct NodeVisit<'a, G>
22where
23    G: SwhForwardGraph,
24{
25    graph: &'a G,
26    visited: AdaptiveNodeSet,
27    queue: VecDeque<NodeId>,
28}
29
30impl<'a, G> NodeVisit<'a, G>
31where
32    G: SwhForwardGraph,
33{
34    fn new<I: IntoIterator<Item: Borrow<NodeId>>>(graph: &'a G, nodes: I) -> Self {
35        NodeVisit {
36            graph,
37            visited: AdaptiveNodeSet::new(graph.num_nodes()),
38            queue: nodes.into_iter().map(|item| *item.borrow()).collect(),
39        }
40    }
41}
42
43impl<G> Iterator for NodeVisit<'_, G>
44where
45    G: SwhForwardGraph,
46{
47    type Item = NodeId;
48
49    fn next(&mut self) -> Option<Self::Item> {
50        if let Some(current_node) = self.queue.pop_front() {
51            for succ in self.graph.successors(current_node) {
52                if !self.visited.contains(succ) {
53                    self.queue.push_back(succ);
54                    self.visited.insert(succ);
55                }
56            }
57            Some(current_node)
58        } else {
59            None
60        }
61    }
62}
63
64/// Iterate on the nodes of the sub-graph rooted at `start`, in BFS order.
65///
66/// `start` is usually a single node id, passed as `&[node]`, but can be a
67/// non-singleton slice or iterator of nodes, to avoid independently re-visiting shared
68/// sub-graphs from multiple starting points.
69///
70/// See [NodeVisit] documentation for performance considerations.
71///
72/// # Examples
73///
74/// ```
75/// # use swh_graph::graph::SwhForwardGraph;
76/// # use swh_graph_stdlib::iter_nodes;
77/// #
78/// # fn f<G: SwhForwardGraph>(graph: &G) {
79/// let src = &[1, 2, 3];
80/// for node in iter_nodes(graph, src) {
81///     println!("Visiting: {}", node);
82/// }
83/// # }
84/// ```
85///
86/// ```
87/// # use swh_graph::graph::SwhForwardGraph;
88/// # use swh_graph_stdlib::iter_nodes;
89/// #
90/// # fn f<G: SwhForwardGraph>(graph: &G) {
91/// let src = 1..4;
92/// for node in iter_nodes(graph, src) {
93///     println!("Visiting: {}", node);
94/// }
95/// # }
96/// ```
97pub fn iter_nodes<G, I: IntoIterator<Item: Borrow<NodeId>>>(graph: &G, start: I) -> NodeVisit<'_, G>
98where
99    G: SwhForwardGraph,
100{
101    NodeVisit::new(graph, start)
102}