codegraph/
query.rs

1//! Query builder for fluent graph queries.
2//!
3//! Provides a fluent interface for constructing and executing complex graph queries
4//! with multiple filters and optimizations.
5
6use crate::error::Result;
7use crate::graph::{CodeGraph, Direction, Node, NodeId, NodeType, PropertyValue};
8
9/// A filter predicate that can be applied to nodes.
10type FilterFn = Box<dyn Fn(&Node) -> bool>;
11
12/// Fluent query builder for graph operations.
13///
14/// Allows chaining multiple filters to find specific nodes in the graph.
15///
16/// # Examples
17///
18/// ```
19/// use codegraph::{CodeGraph, NodeType};
20///
21/// # fn example() -> codegraph::Result<()> {
22/// let mut graph = CodeGraph::in_memory()?;
23/// // ... populate graph ...
24///
25/// // Find all public functions in a specific file
26/// let results = graph.query()
27///     .node_type(NodeType::Function)
28///     .in_file("src/main.rs")
29///     .property("visibility", "public")
30///     .execute()?;
31/// # Ok(())
32/// # }
33/// ```
34pub struct QueryBuilder<'a> {
35    graph: &'a CodeGraph,
36    filters: Vec<FilterFn>,
37    limit_value: Option<usize>,
38    in_file_filter: Option<String>,
39}
40
41impl<'a> QueryBuilder<'a> {
42    /// Create a new query builder for the given graph.
43    pub fn new(graph: &'a CodeGraph) -> Self {
44        Self {
45            graph,
46            filters: Vec::new(),
47            limit_value: None,
48            in_file_filter: None,
49        }
50    }
51
52    /// Filter nodes by type.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// # use codegraph::{CodeGraph, NodeType};
58    /// # fn example() -> codegraph::Result<()> {
59    /// # let graph = CodeGraph::in_memory()?;
60    /// let functions = graph.query()
61    ///     .node_type(NodeType::Function)
62    ///     .execute()?;
63    /// # Ok(())
64    /// # }
65    /// ```
66    pub fn node_type(mut self, node_type: NodeType) -> Self {
67        self.filters
68            .push(Box::new(move |node| node.node_type == node_type));
69        self
70    }
71
72    /// Filter nodes that are contained in a specific file.
73    ///
74    /// Looks up the file by path and finds all nodes connected via Contains edges.
75    pub fn in_file(mut self, file_path: &str) -> Self {
76        self.in_file_filter = Some(file_path.to_string());
77        self
78    }
79
80    /// Filter files by glob pattern.
81    ///
82    /// Supports wildcards: `*` matches any characters, `**` matches directories.
83    ///
84    /// # Examples
85    ///
86    /// - `src/*.rs` - All Rust files in src/
87    /// - `**/*.py` - All Python files recursively
88    /// - `tests/**/*.rs` - All Rust files under tests/
89    pub fn file_pattern(mut self, pattern: &str) -> Self {
90        let pattern = pattern.to_string();
91        self.filters.push(Box::new(move |node| {
92            if let Some(path) = node.properties.get_string("path") {
93                glob_match(&pattern, path)
94            } else {
95                false
96            }
97        }));
98        self
99    }
100
101    /// Filter nodes by exact property match.
102    ///
103    /// Supports string, int, float, and bool property values.
104    pub fn property<V: Into<PropertyValue>>(mut self, key: &str, value: V) -> Self {
105        let key = key.to_string();
106        let value = value.into();
107
108        self.filters.push(Box::new(move |node| {
109            if let Some(prop_value) = node.properties.get(&key) {
110                match (&value, prop_value) {
111                    (PropertyValue::String(v1), PropertyValue::String(v2)) => v1 == v2,
112                    (PropertyValue::Int(v1), PropertyValue::Int(v2)) => v1 == v2,
113                    (PropertyValue::Float(v1), PropertyValue::Float(v2)) => {
114                        (v1 - v2).abs() < f64::EPSILON
115                    }
116                    (PropertyValue::Bool(v1), PropertyValue::Bool(v2)) => v1 == v2,
117                    _ => false,
118                }
119            } else {
120                false
121            }
122        }));
123        self
124    }
125
126    /// Filter nodes that have a specific property (regardless of value).
127    pub fn property_exists(mut self, key: &str) -> Self {
128        let key = key.to_string();
129        self.filters
130            .push(Box::new(move |node| node.properties.contains_key(&key)));
131        self
132    }
133
134    /// Filter nodes by name containing a substring (case-insensitive).
135    pub fn name_contains(mut self, substring: &str) -> Self {
136        let substring = substring.to_lowercase();
137        self.filters.push(Box::new(move |node| {
138            if let Some(name) = node.properties.get_string("name") {
139                name.to_lowercase().contains(&substring)
140            } else {
141                false
142            }
143        }));
144        self
145    }
146
147    /// Filter nodes by name matching a regex pattern.
148    pub fn name_matches(mut self, pattern: &str) -> Self {
149        let pattern = pattern.to_string();
150        self.filters.push(Box::new(move |node| {
151            if let Some(name) = node.properties.get_string("name") {
152                // Simple regex: support ^ for start, $ for end, * for wildcard
153                regex_match(&pattern, name)
154            } else {
155                false
156            }
157        }));
158        self
159    }
160
161    /// Filter nodes using a custom predicate function.
162    ///
163    /// # Examples
164    ///
165    /// ```
166    /// # use codegraph::{CodeGraph, NodeType};
167    /// # fn example() -> codegraph::Result<()> {
168    /// # let graph = CodeGraph::in_memory()?;
169    /// // Find functions longer than 50 lines
170    /// let results = graph.query()
171    ///     .node_type(NodeType::Function)
172    ///     .custom(|node| {
173    ///         if let (Some(start), Some(end)) = (
174    ///             node.properties.get_int("line_start"),
175    ///             node.properties.get_int("line_end")
176    ///         ) {
177    ///             (end - start) > 50
178    ///         } else {
179    ///             false
180    ///         }
181    ///     })
182    ///     .execute()?;
183    /// # Ok(())
184    /// # }
185    /// ```
186    pub fn custom<F>(mut self, predicate: F) -> Self
187    where
188        F: Fn(&Node) -> bool + 'static,
189    {
190        self.filters.push(Box::new(predicate));
191        self
192    }
193
194    /// Limit the number of results returned.
195    pub fn limit(mut self, n: usize) -> Self {
196        self.limit_value = Some(n);
197        self
198    }
199
200    /// Execute the query and return matching node IDs.
201    pub fn execute(&self) -> Result<Vec<NodeId>> {
202        let mut results = Vec::new();
203        let limit = self.limit_value.unwrap_or(usize::MAX);
204
205        // If in_file filter is set, only search nodes in that file
206        let search_nodes: Vec<NodeId> = if let Some(file_path) = &self.in_file_filter {
207            self.get_nodes_in_file(file_path)?
208        } else {
209            // Search all nodes
210            (0..self.graph.node_count() as u64).collect()
211        };
212
213        // Iterate through nodes and apply filters
214        for node_id in search_nodes {
215            if results.len() >= limit {
216                break;
217            }
218
219            if let Ok(node) = self.graph.get_node(node_id) {
220                if self.matches_filters(node) {
221                    results.push(node_id);
222                }
223            }
224        }
225
226        Ok(results)
227    }
228
229    /// Count the number of matching nodes without allocating a result vector.
230    pub fn count(&self) -> Result<usize> {
231        let mut count = 0;
232
233        // If in_file filter is set, only search nodes in that file
234        let search_nodes: Vec<NodeId> = if let Some(file_path) = &self.in_file_filter {
235            self.get_nodes_in_file(file_path)?
236        } else {
237            // Search all nodes
238            (0..self.graph.node_count() as u64).collect()
239        };
240
241        for node_id in search_nodes {
242            if let Ok(node) = self.graph.get_node(node_id) {
243                if self.matches_filters(node) {
244                    count += 1;
245                }
246            }
247        }
248
249        Ok(count)
250    }
251
252    /// Check if any nodes match the query (short-circuits on first match).
253    pub fn exists(&self) -> Result<bool> {
254        // If in_file filter is set, only search nodes in that file
255        let search_nodes: Vec<NodeId> = if let Some(file_path) = &self.in_file_filter {
256            self.get_nodes_in_file(file_path)?
257        } else {
258            // Search all nodes
259            (0..self.graph.node_count() as u64).collect()
260        };
261
262        for node_id in search_nodes {
263            if let Ok(node) = self.graph.get_node(node_id) {
264                if self.matches_filters(node) {
265                    return Ok(true);
266                }
267            }
268        }
269
270        Ok(false)
271    }
272
273    /// Get all nodes contained in a specific file.
274    fn get_nodes_in_file(&self, file_path: &str) -> Result<Vec<NodeId>> {
275        // First find the file node
276        for node_id in 0..self.graph.node_count() as u64 {
277            if let Ok(node) = self.graph.get_node(node_id) {
278                if node.node_type == NodeType::CodeFile {
279                    if let Some(path) = node.properties.get_string("path") {
280                        if path == file_path {
281                            // Found the file, now get all nodes it contains
282                            return self.graph.get_neighbors(node_id, Direction::Outgoing);
283                        }
284                    }
285                }
286            }
287        }
288
289        // File not found
290        Ok(Vec::new())
291    }
292
293    /// Check if a node matches all filters.
294    fn matches_filters(&self, node: &Node) -> bool {
295        self.filters.iter().all(|filter| filter(node))
296    }
297}
298
299/// Simple glob pattern matching.
300///
301/// Supports * (any characters) and ** (directories).
302fn glob_match(pattern: &str, path: &str) -> bool {
303    // Handle ** for directory matching
304    if pattern.contains("**") {
305        let parts: Vec<&str> = pattern.split("**").collect();
306        if parts.len() == 2 {
307            let prefix = parts[0];
308            let suffix = parts[1].trim_start_matches('/');
309
310            // Check prefix
311            if !prefix.is_empty() && !path.starts_with(prefix) {
312                return false;
313            }
314
315            // If suffix contains *, we need to handle it recursively
316            if suffix.contains('*') {
317                // Get the part after the last /
318                if let Some(last_slash) = path.rfind('/') {
319                    let filename = &path[last_slash + 1..];
320                    return glob_match(suffix, filename);
321                } else {
322                    return glob_match(suffix, path);
323                }
324            }
325
326            // Simple suffix match
327            if !suffix.is_empty() && !path.ends_with(suffix) {
328                return false;
329            }
330            return true;
331        }
332    }
333
334    // Handle * for simple wildcard matching
335    let pattern_parts: Vec<&str> = pattern.split('*').collect();
336    if pattern_parts.len() == 1 {
337        // No wildcards - exact match
338        return pattern == path;
339    }
340
341    // Check if path matches the pattern
342    let mut pos = 0;
343    for (i, part) in pattern_parts.iter().enumerate() {
344        if part.is_empty() {
345            // Empty part from consecutive * or at start/end
346            continue;
347        }
348
349        if i == 0 {
350            // First part must match start
351            if !path[pos..].starts_with(part) {
352                return false;
353            }
354            pos += part.len();
355        } else if i == pattern_parts.len() - 1 {
356            // Last part must match end
357            return path[pos..].ends_with(part);
358        } else {
359            // Middle parts must exist in order
360            if let Some(index) = path[pos..].find(part) {
361                pos += index + part.len();
362            } else {
363                return false;
364            }
365        }
366    }
367
368    true
369}
370
371/// Simple regex pattern matching.
372///
373/// Supports ^ (start), $ (end), and basic literals.
374fn regex_match(pattern: &str, text: &str) -> bool {
375    let starts_with = pattern.starts_with('^');
376    let ends_with = pattern.ends_with('$');
377
378    let pattern = pattern.trim_start_matches('^').trim_end_matches('$');
379
380    if starts_with && ends_with {
381        text == pattern
382    } else if starts_with {
383        text.starts_with(pattern)
384    } else if ends_with {
385        text.ends_with(pattern)
386    } else {
387        text.contains(pattern)
388    }
389}