Skip to main content

oxgraph_db/
traversal.rs

1//! Graph projection traversal primitives.
2
3use core::ops::ControlFlow;
4use std::collections::{BTreeMap, BTreeSet};
5
6use oxgraph_algo::{
7    BfsBounds, BfsEpochScratch, breadth_first_search_bounded, breadth_first_search_bounded_both,
8    reverse_breadth_first_search_bounded,
9};
10use oxgraph_graph::{
11    CanonicalElementIdentity, CanonicalRelationIdentity, EdgeSourceGraph, EdgeTargetGraph,
12    ElementIndex, LocalElementIdentity, OutgoingGraph,
13};
14
15use crate::{
16    DbError, ElementId, RelationId,
17    projection::{GraphProjection, ProjectionElementId},
18};
19
20/// Direction a graph navigation expands along.
21///
22/// Used by both [`Reader::neighbors`](crate::Reader::neighbors) (single-hop
23/// reverse-adjacency lookup) and [`Reader::walk`](crate::Reader::walk) (bounded
24/// projection BFS). `Outgoing` follows source→target edges, `Incoming` follows
25/// target→source edges, and `Both` follows either.
26///
27/// # Performance
28///
29/// Copying and comparing are `O(1)`.
30#[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
31#[non_exhaustive]
32pub enum Direction {
33    /// Follow outgoing edges: the element plays the source role.
34    #[default]
35    Outgoing,
36    /// Follow incoming edges: the element plays the target role.
37    Incoming,
38    /// Follow outgoing edges first, then incoming edges.
39    Both,
40}
41
42/// Bounds for a bounded graph projection [`walk`](crate::Reader::walk).
43///
44/// # Performance
45///
46/// Copying is `O(1)`.
47#[derive(Clone, Copy, Debug, Eq, PartialEq)]
48pub struct Walk {
49    /// Maximum hop depth to expand from any seed.
50    pub max_depth: usize,
51    /// Direction used to expand each frontier element.
52    pub direction: Direction,
53    /// Maximum number of nodes to emit.
54    pub limit: usize,
55    /// Whether depth-0 seed elements are included in the result nodes.
56    pub include_start: bool,
57}
58
59impl Default for Walk {
60    /// Creates an outgoing depth-1 walk with an unbounded node limit, excluding
61    /// the seeds.
62    ///
63    /// # Performance
64    ///
65    /// This function is `O(1)`.
66    fn default() -> Self {
67        Self {
68            max_depth: 1,
69            direction: Direction::Outgoing,
70            limit: usize::MAX,
71            include_start: false,
72        }
73    }
74}
75
76/// One node discovered by a [`walk`](crate::Reader::walk).
77///
78/// # Performance
79///
80/// Copying and comparing are `O(1)`.
81#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
82pub struct TraversedNode {
83    /// Canonical element discovered by the walk.
84    pub element: ElementId,
85    /// Shortest discovered hop depth from any seed.
86    pub depth: usize,
87}
88
89/// One edge traversed by a [`walk`](crate::Reader::walk).
90///
91/// Edges connect two discovered nodes; the source and target follow the
92/// projection's source/target roles.
93///
94/// # Performance
95///
96/// Copying and comparing are `O(1)`.
97#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
98pub struct TraversedEdge {
99    /// Canonical relation backing this edge.
100    pub relation: RelationId,
101    /// Source endpoint element (the projection source role).
102    pub source: ElementId,
103    /// Target endpoint element (the projection target role).
104    pub target: ElementId,
105}
106
107/// A discovered subgraph: nodes in BFS first-discovery order plus the projection
108/// edges among them.
109///
110/// Nodes are unique by canonical element. Edges connect two discovered nodes and
111/// are unique by canonical relation, ordered by ascending `(source, target,
112/// relation)`.
113///
114/// # Performance
115///
116/// Iterating nodes or edges is `O(node count)` / `O(edge count)`.
117#[derive(Clone, Debug, Default, Eq, PartialEq)]
118pub struct Subgraph {
119    /// Discovered nodes in BFS first-discovery order.
120    pub nodes: Vec<TraversedNode>,
121    /// Edges connecting two discovered nodes.
122    pub edges: Vec<TraversedEdge>,
123}
124
125impl Subgraph {
126    /// Returns the discovered nodes.
127    ///
128    /// # Performance
129    ///
130    /// This method is `O(1)`.
131    #[must_use]
132    pub fn nodes(&self) -> &[TraversedNode] {
133        &self.nodes
134    }
135
136    /// Returns the traversed edges.
137    ///
138    /// # Performance
139    ///
140    /// This method is `O(1)`.
141    #[must_use]
142    pub fn edges(&self) -> &[TraversedEdge] {
143        &self.edges
144    }
145}
146
147/// Walks a materialized graph projection over the substrate bounded BFS,
148/// returning the discovered nodes AND the projection edges among them.
149///
150/// Seeds, depth bound, node limit, and seed inclusion map onto [`BfsBounds`];
151/// the visitor records each discovered element as one [`TraversedNode`] in
152/// first-discovery order. `Outgoing` uses [`breadth_first_search_bounded`],
153/// `Incoming` uses [`reverse_breadth_first_search_bounded`], and `Both` uses
154/// [`breadth_first_search_bounded_both`]. After the node set is fixed, every
155/// projection edge whose source and target are BOTH discovered is emitted as one
156/// [`TraversedEdge`], deduplicated by relation and ordered by `(source, target,
157/// relation)`, so the result never references a node it omitted.
158///
159/// # Errors
160///
161/// Returns [`DbError::UnknownElement`] when a seed is not part of the
162/// projection, or [`DbError::Traversal`] when bounded BFS fails.
163///
164/// # Performance
165///
166/// This function is `O(visited nodes + visited edges)` over the materialized
167/// projection.
168pub(crate) fn walk_graph_projection(
169    graph: &GraphProjection,
170    seeds: &[ElementId],
171    walk: Walk,
172) -> Result<Subgraph, DbError> {
173    if seeds.is_empty() || walk.limit == 0 {
174        return Ok(Subgraph::default());
175    }
176
177    let local_seeds = seeds
178        .iter()
179        .map(|seed| {
180            graph
181                .local_element_id(*seed)
182                .ok_or(DbError::UnknownElement { id: *seed })
183        })
184        .collect::<Result<Vec<ProjectionElementId>, DbError>>()?;
185
186    let bounds = BfsBounds {
187        max_depth: Some(u32::try_from(walk.max_depth).unwrap_or(u32::MAX)),
188        result_limit: walk.limit,
189        include_seeds: walk.include_start,
190    };
191
192    let bound = graph.element_bound();
193    let mut marks = vec![0_u32; bound];
194    let mut queue = vec![ProjectionElementId::new(0); bound];
195    let mut scratch = BfsEpochScratch::for_graph(graph, &mut marks, &mut queue);
196
197    let mut nodes = Vec::new();
198    // Seeds are always part of the discovered node set for edge collection, even
199    // when `include_start` excludes them from the emitted result nodes — an edge
200    // rooted at a seed connects to a discovered node and must be reported.
201    let mut discovered: BTreeMap<ElementId, ProjectionElementId> = local_seeds
202        .iter()
203        .map(|&local| (graph.canonical_element_id(local), local))
204        .collect();
205    {
206        let mut visitor = |element: ProjectionElementId, depth: u32| {
207            let canonical = graph.canonical_element_id(element);
208            let depth = usize::try_from(depth).unwrap_or(usize::MAX);
209            nodes.push(TraversedNode {
210                element: canonical,
211                depth,
212            });
213            discovered.insert(canonical, element);
214            ControlFlow::Continue(())
215        };
216
217        match walk.direction {
218            Direction::Outgoing => breadth_first_search_bounded(
219                graph,
220                &local_seeds,
221                bounds,
222                &mut scratch,
223                &mut visitor,
224            ),
225            Direction::Incoming => reverse_breadth_first_search_bounded(
226                graph,
227                &local_seeds,
228                bounds,
229                &mut scratch,
230                &mut visitor,
231            ),
232            Direction::Both => breadth_first_search_bounded_both(
233                graph,
234                &local_seeds,
235                bounds,
236                &mut scratch,
237                &mut visitor,
238            ),
239        }
240        .map_err(|_error| DbError::traversal("bounded traversal failed"))?;
241    }
242
243    let edges = collect_internal_edges(graph, &discovered);
244    Ok(Subgraph { nodes, edges })
245}
246
247/// Collects the projection edges whose source and target are both discovered.
248///
249/// Edges are deduplicated by canonical relation and ordered by `(source, target,
250/// relation)` so the result is deterministic and never references an undiscovered
251/// node.
252///
253/// # Performance
254///
255/// This function is `O(discovered nodes * out-degree + edge count log edge
256/// count)`.
257fn collect_internal_edges(
258    graph: &GraphProjection,
259    discovered: &BTreeMap<ElementId, ProjectionElementId>,
260) -> Vec<TraversedEdge> {
261    let mut seen = BTreeSet::new();
262    let mut edges = Vec::new();
263    for local in discovered.values().copied() {
264        for edge in graph.outgoing_edges(local) {
265            let source = graph.canonical_element_id(graph.source(edge));
266            let target = graph.canonical_element_id(graph.target(edge));
267            if !discovered.contains_key(&target) {
268                continue;
269            }
270            let relation = graph.canonical_relation_id(edge);
271            if !seen.insert(relation) {
272                continue;
273            }
274            edges.push(TraversedEdge {
275                relation,
276                source,
277                target,
278            });
279        }
280    }
281    edges.sort_unstable();
282    edges
283}