Skip to main content

velesdb_core/collection/graph/
traversal.rs

1//! Graph traversal algorithms for multi-hop queries.
2//!
3//! This module provides BFS-based traversal for variable-length path patterns
4//! like `(a)-[*1..3]->(b)` in MATCH clauses.
5//!
6//! # Streaming Mode (EPIC-019 US-005)
7//!
8//! For large graphs, the module provides streaming iterators that yield results
9//! lazily without loading all visited nodes into memory at once.
10
11#![allow(dead_code)] // WIP: Will be used by MATCH clause execution
12
13use super::EdgeStore;
14use std::collections::{HashSet, VecDeque};
15
16/// Default maximum depth for unbounded traversals.
17pub const DEFAULT_MAX_DEPTH: u32 = 3;
18
19/// Safety cap for maximum depth to prevent runaway traversals.
20/// Only applied when user requests unbounded traversal (*).
21///
22/// Note: Neo4j and ArangoDB do NOT impose hard limits.
23/// 100 is chosen to cover most real-world use cases:
24/// - Social networks (6 degrees of separation)
25/// - Dependency graphs (deep npm/cargo trees)
26/// - Organizational hierarchies
27/// - Knowledge graphs
28pub const SAFETY_MAX_DEPTH: u32 = 100;
29
30/// Result of a graph traversal operation.
31#[derive(Debug, Clone)]
32pub struct TraversalResult {
33    /// The target node ID reached.
34    pub target_id: u64,
35    /// The path taken (list of edge IDs).
36    pub path: Vec<u64>,
37    /// Depth of the traversal (number of hops).
38    pub depth: u32,
39}
40
41impl TraversalResult {
42    /// Creates a new traversal result.
43    #[must_use]
44    pub fn new(target_id: u64, path: Vec<u64>, depth: u32) -> Self {
45        Self {
46            target_id,
47            path,
48            depth,
49        }
50    }
51}
52
53/// Configuration for graph traversal.
54#[derive(Debug, Clone)]
55pub struct TraversalConfig {
56    /// Minimum number of hops (inclusive).
57    pub min_depth: u32,
58    /// Maximum number of hops (inclusive).
59    pub max_depth: u32,
60    /// Maximum number of results to return.
61    pub limit: usize,
62    /// Filter by relationship types (empty = all types).
63    pub rel_types: Vec<String>,
64}
65
66impl Default for TraversalConfig {
67    fn default() -> Self {
68        Self {
69            min_depth: 1,
70            max_depth: DEFAULT_MAX_DEPTH,
71            limit: 100,
72            rel_types: Vec::new(),
73        }
74    }
75}
76
77impl TraversalConfig {
78    /// Creates a config for a specific range (e.g., *1..3).
79    ///
80    /// Respects the caller's max_depth without artificial capping.
81    /// For unbounded traversals, use `with_unbounded_range()` instead.
82    #[must_use]
83    pub fn with_range(min: u32, max: u32) -> Self {
84        Self {
85            min_depth: min,
86            max_depth: max,
87            ..Self::default()
88        }
89    }
90
91    /// Creates a config for unbounded traversal (e.g., *1..).
92    ///
93    /// Applies SAFETY_MAX_DEPTH cap to prevent runaway traversals.
94    #[must_use]
95    pub fn with_unbounded_range(min: u32) -> Self {
96        Self {
97            min_depth: min,
98            max_depth: SAFETY_MAX_DEPTH,
99            ..Self::default()
100        }
101    }
102
103    /// Sets the result limit.
104    #[must_use]
105    pub fn with_limit(mut self, limit: usize) -> Self {
106        self.limit = limit;
107        self
108    }
109
110    /// Filters by relationship types.
111    #[must_use]
112    pub fn with_rel_types(mut self, types: Vec<String>) -> Self {
113        self.rel_types = types;
114        self
115    }
116
117    /// Sets a custom max depth (for advanced use cases).
118    #[must_use]
119    pub fn with_max_depth(mut self, max_depth: u32) -> Self {
120        self.max_depth = max_depth;
121        self
122    }
123}
124
125/// BFS state for traversal.
126#[derive(Debug)]
127pub(super) struct BfsState {
128    /// Current node ID.
129    pub(super) node_id: u64,
130    /// Path taken to reach this node (edge IDs).
131    pub(super) path: Vec<u64>,
132    /// Current depth.
133    pub(super) depth: u32,
134}
135
136/// Direction of edge traversal used by the shared BFS helper.
137#[derive(Debug, Clone, Copy, PartialEq, Eq)]
138enum BfsDirection {
139    /// Follow outgoing edges (source -> target).
140    Forward,
141    /// Follow incoming edges (target -> source).
142    Reverse,
143}
144
145/// Core BFS loop shared by forward and reverse traversal.
146///
147/// The `direction` parameter controls which edges are followed and which
148/// endpoint is used as the next hop. All other logic is identical.
149#[must_use]
150fn bfs_traverse_directed(
151    edge_store: &EdgeStore,
152    source_id: u64,
153    config: &TraversalConfig,
154    direction: BfsDirection,
155) -> Vec<TraversalResult> {
156    let mut results = Vec::new();
157    let mut visited = HashSet::new();
158    let mut queue = VecDeque::new();
159
160    // CRITICAL FIX: Mark source node as visited before traversal
161    // to prevent cycles back to source causing duplicate work
162    visited.insert(source_id);
163
164    queue.push_back(BfsState {
165        node_id: source_id,
166        path: Vec::new(),
167        depth: 0,
168    });
169
170    while let Some(state) = queue.pop_front() {
171        if results.len() >= config.limit {
172            break;
173        }
174        let edges = match direction {
175            BfsDirection::Forward => edge_store.get_outgoing(state.node_id),
176            BfsDirection::Reverse => edge_store.get_incoming(state.node_id),
177        };
178        process_bfs_neighbors(
179            &edges,
180            &state,
181            config,
182            direction,
183            &mut results,
184            &mut visited,
185            &mut queue,
186        );
187    }
188
189    results
190}
191
192/// Processes neighbors for a single BFS level: filters edges, records results,
193/// and enqueues unvisited nodes for the next hop.
194#[inline]
195fn process_bfs_neighbors(
196    edges: &[&super::GraphEdge],
197    state: &BfsState,
198    config: &TraversalConfig,
199    direction: BfsDirection,
200    results: &mut Vec<TraversalResult>,
201    visited: &mut HashSet<u64>,
202    queue: &mut VecDeque<BfsState>,
203) {
204    for edge in edges {
205        if results.len() >= config.limit {
206            break;
207        }
208        if !config.rel_types.is_empty() && !config.rel_types.contains(&edge.label().to_string()) {
209            continue;
210        }
211        let next_node = match direction {
212            BfsDirection::Forward => edge.target(),
213            BfsDirection::Reverse => edge.source(),
214        };
215        let new_depth = state.depth + 1;
216        if new_depth > config.max_depth {
217            continue;
218        }
219        let mut new_path = state.path.clone();
220        new_path.push(edge.id());
221
222        if new_depth >= config.min_depth {
223            results.push(TraversalResult::new(next_node, new_path.clone(), new_depth));
224        }
225        if new_depth < config.max_depth && visited.insert(next_node) {
226            queue.push_back(BfsState {
227                node_id: next_node,
228                path: new_path,
229                depth: new_depth,
230            });
231        }
232    }
233}
234
235/// Performs BFS traversal from a source node.
236///
237/// Finds all paths from `source_id` within the configured depth range.
238/// Uses iterative BFS with `VecDeque` for better cache locality.
239///
240/// # Arguments
241///
242/// * `edge_store` - The edge storage to traverse.
243/// * `source_id` - Starting node ID.
244/// * `config` - Traversal configuration.
245///
246/// # Returns
247///
248/// Vector of traversal results, limited by `config.limit`.
249#[must_use]
250pub fn bfs_traverse(
251    edge_store: &EdgeStore,
252    source_id: u64,
253    config: &TraversalConfig,
254) -> Vec<TraversalResult> {
255    bfs_traverse_directed(edge_store, source_id, config, BfsDirection::Forward)
256}
257
258/// Performs BFS traversal in the reverse direction (following incoming edges).
259#[must_use]
260pub fn bfs_traverse_reverse(
261    edge_store: &EdgeStore,
262    source_id: u64,
263    config: &TraversalConfig,
264) -> Vec<TraversalResult> {
265    bfs_traverse_directed(edge_store, source_id, config, BfsDirection::Reverse)
266}
267
268/// Performs bidirectional BFS (follows both directions).
269#[must_use]
270pub fn bfs_traverse_both(
271    edge_store: &EdgeStore,
272    source_id: u64,
273    config: &TraversalConfig,
274) -> Vec<TraversalResult> {
275    let mut results = Vec::new();
276    let half_limit = config.limit / 2 + 1;
277
278    let config_half = TraversalConfig {
279        limit: half_limit,
280        ..config.clone()
281    };
282
283    // Forward traversal
284    let forward = bfs_traverse(edge_store, source_id, &config_half);
285    results.extend(forward);
286
287    // Reverse traversal
288    if results.len() < config.limit {
289        let reverse = bfs_traverse_reverse(edge_store, source_id, &config_half);
290        for r in reverse {
291            if results.len() >= config.limit {
292                break;
293            }
294            // Avoid duplicates
295            if !results
296                .iter()
297                .any(|existing| existing.target_id == r.target_id && existing.path == r.path)
298            {
299                results.push(r);
300            }
301        }
302    }
303
304    results.truncate(config.limit);
305    results
306}
307
308// Tests moved to traversal_tests.rs per project rules