codegraph/query.rs
1//! Query builder for fluent graph queries.
2//!
3//! Provides a fluent interface for constructing and executing complex graph queries
4//! with multiple filters and optimizations.
5
6use crate::error::Result;
7use crate::graph::{CodeGraph, Direction, Node, NodeId, NodeType, PropertyValue};
8
9/// A filter predicate that can be applied to nodes.
10type FilterFn = Box<dyn Fn(&Node) -> bool>;
11
12/// Fluent query builder for graph operations.
13///
14/// Allows chaining multiple filters to find specific nodes in the graph.
15///
16/// # Examples
17///
18/// ```
19/// use codegraph::{CodeGraph, NodeType};
20///
21/// # fn example() -> codegraph::Result<()> {
22/// let mut graph = CodeGraph::in_memory()?;
23/// // ... populate graph ...
24///
25/// // Find all public functions in a specific file
26/// let results = graph.query()
27/// .node_type(NodeType::Function)
28/// .in_file("src/main.rs")
29/// .property("visibility", "public")
30/// .execute()?;
31/// # Ok(())
32/// # }
33/// ```
34pub struct QueryBuilder<'a> {
35 graph: &'a CodeGraph,
36 filters: Vec<FilterFn>,
37 limit_value: Option<usize>,
38 in_file_filter: Option<String>,
39}
40
41impl<'a> QueryBuilder<'a> {
42 /// Create a new query builder for the given graph.
43 pub fn new(graph: &'a CodeGraph) -> Self {
44 Self {
45 graph,
46 filters: Vec::new(),
47 limit_value: None,
48 in_file_filter: None,
49 }
50 }
51
52 /// Filter nodes by type.
53 ///
54 /// # Examples
55 ///
56 /// ```
57 /// # use codegraph::{CodeGraph, NodeType};
58 /// # fn example() -> codegraph::Result<()> {
59 /// # let graph = CodeGraph::in_memory()?;
60 /// let functions = graph.query()
61 /// .node_type(NodeType::Function)
62 /// .execute()?;
63 /// # Ok(())
64 /// # }
65 /// ```
66 pub fn node_type(mut self, node_type: NodeType) -> Self {
67 self.filters
68 .push(Box::new(move |node| node.node_type == node_type));
69 self
70 }
71
72 /// Filter nodes that are contained in a specific file.
73 ///
74 /// Looks up the file by path and finds all nodes connected via Contains edges.
75 pub fn in_file(mut self, file_path: &str) -> Self {
76 self.in_file_filter = Some(file_path.to_string());
77 self
78 }
79
80 /// Filter files by glob pattern.
81 ///
82 /// Supports wildcards: `*` matches any characters, `**` matches directories.
83 ///
84 /// # Examples
85 ///
86 /// - `src/*.rs` - All Rust files in src/
87 /// - `**/*.py` - All Python files recursively
88 /// - `tests/**/*.rs` - All Rust files under tests/
89 pub fn file_pattern(mut self, pattern: &str) -> Self {
90 let pattern = pattern.to_string();
91 self.filters.push(Box::new(move |node| {
92 if let Some(path) = node.properties.get_string("path") {
93 glob_match(&pattern, path)
94 } else {
95 false
96 }
97 }));
98 self
99 }
100
101 /// Filter nodes by exact property match.
102 ///
103 /// Supports string, int, float, and bool property values.
104 pub fn property<V: Into<PropertyValue>>(mut self, key: &str, value: V) -> Self {
105 let key = key.to_string();
106 let value = value.into();
107
108 self.filters.push(Box::new(move |node| {
109 if let Some(prop_value) = node.properties.get(&key) {
110 match (&value, prop_value) {
111 (PropertyValue::String(v1), PropertyValue::String(v2)) => v1 == v2,
112 (PropertyValue::Int(v1), PropertyValue::Int(v2)) => v1 == v2,
113 (PropertyValue::Float(v1), PropertyValue::Float(v2)) => {
114 (v1 - v2).abs() < f64::EPSILON
115 }
116 (PropertyValue::Bool(v1), PropertyValue::Bool(v2)) => v1 == v2,
117 _ => false,
118 }
119 } else {
120 false
121 }
122 }));
123 self
124 }
125
126 /// Filter nodes that have a specific property (regardless of value).
127 pub fn property_exists(mut self, key: &str) -> Self {
128 let key = key.to_string();
129 self.filters
130 .push(Box::new(move |node| node.properties.contains_key(&key)));
131 self
132 }
133
134 /// Filter nodes by name containing a substring (case-insensitive).
135 pub fn name_contains(mut self, substring: &str) -> Self {
136 let substring = substring.to_lowercase();
137 self.filters.push(Box::new(move |node| {
138 if let Some(name) = node.properties.get_string("name") {
139 name.to_lowercase().contains(&substring)
140 } else {
141 false
142 }
143 }));
144 self
145 }
146
147 /// Filter nodes by name matching a regex pattern.
148 pub fn name_matches(mut self, pattern: &str) -> Self {
149 let pattern = pattern.to_string();
150 self.filters.push(Box::new(move |node| {
151 if let Some(name) = node.properties.get_string("name") {
152 // Simple regex: support ^ for start, $ for end, * for wildcard
153 regex_match(&pattern, name)
154 } else {
155 false
156 }
157 }));
158 self
159 }
160
161 /// Filter nodes using a custom predicate function.
162 ///
163 /// # Examples
164 ///
165 /// ```
166 /// # use codegraph::{CodeGraph, NodeType};
167 /// # fn example() -> codegraph::Result<()> {
168 /// # let graph = CodeGraph::in_memory()?;
169 /// // Find functions longer than 50 lines
170 /// let results = graph.query()
171 /// .node_type(NodeType::Function)
172 /// .custom(|node| {
173 /// if let (Some(start), Some(end)) = (
174 /// node.properties.get_int("line_start"),
175 /// node.properties.get_int("line_end")
176 /// ) {
177 /// (end - start) > 50
178 /// } else {
179 /// false
180 /// }
181 /// })
182 /// .execute()?;
183 /// # Ok(())
184 /// # }
185 /// ```
186 pub fn custom<F>(mut self, predicate: F) -> Self
187 where
188 F: Fn(&Node) -> bool + 'static,
189 {
190 self.filters.push(Box::new(predicate));
191 self
192 }
193
194 /// Limit the number of results returned.
195 pub fn limit(mut self, n: usize) -> Self {
196 self.limit_value = Some(n);
197 self
198 }
199
200 /// Execute the query and return matching node IDs.
201 pub fn execute(&self) -> Result<Vec<NodeId>> {
202 let mut results = Vec::new();
203 let limit = self.limit_value.unwrap_or(usize::MAX);
204
205 // If in_file filter is set, only search nodes in that file
206 let search_nodes: Vec<NodeId> = if let Some(file_path) = &self.in_file_filter {
207 self.get_nodes_in_file(file_path)?
208 } else {
209 // Search all nodes
210 (0..self.graph.node_count() as u64).collect()
211 };
212
213 // Iterate through nodes and apply filters
214 for node_id in search_nodes {
215 if results.len() >= limit {
216 break;
217 }
218
219 if let Ok(node) = self.graph.get_node(node_id) {
220 if self.matches_filters(node) {
221 results.push(node_id);
222 }
223 }
224 }
225
226 Ok(results)
227 }
228
229 /// Count the number of matching nodes without allocating a result vector.
230 pub fn count(&self) -> Result<usize> {
231 let mut count = 0;
232
233 // If in_file filter is set, only search nodes in that file
234 let search_nodes: Vec<NodeId> = if let Some(file_path) = &self.in_file_filter {
235 self.get_nodes_in_file(file_path)?
236 } else {
237 // Search all nodes
238 (0..self.graph.node_count() as u64).collect()
239 };
240
241 for node_id in search_nodes {
242 if let Ok(node) = self.graph.get_node(node_id) {
243 if self.matches_filters(node) {
244 count += 1;
245 }
246 }
247 }
248
249 Ok(count)
250 }
251
252 /// Check if any nodes match the query (short-circuits on first match).
253 pub fn exists(&self) -> Result<bool> {
254 // If in_file filter is set, only search nodes in that file
255 let search_nodes: Vec<NodeId> = if let Some(file_path) = &self.in_file_filter {
256 self.get_nodes_in_file(file_path)?
257 } else {
258 // Search all nodes
259 (0..self.graph.node_count() as u64).collect()
260 };
261
262 for node_id in search_nodes {
263 if let Ok(node) = self.graph.get_node(node_id) {
264 if self.matches_filters(node) {
265 return Ok(true);
266 }
267 }
268 }
269
270 Ok(false)
271 }
272
273 /// Get all nodes contained in a specific file.
274 fn get_nodes_in_file(&self, file_path: &str) -> Result<Vec<NodeId>> {
275 // First find the file node
276 for node_id in 0..self.graph.node_count() as u64 {
277 if let Ok(node) = self.graph.get_node(node_id) {
278 if node.node_type == NodeType::CodeFile {
279 if let Some(path) = node.properties.get_string("path") {
280 if path == file_path {
281 // Found the file, now get all nodes it contains
282 return self.graph.get_neighbors(node_id, Direction::Outgoing);
283 }
284 }
285 }
286 }
287 }
288
289 // File not found
290 Ok(Vec::new())
291 }
292
293 /// Check if a node matches all filters.
294 fn matches_filters(&self, node: &Node) -> bool {
295 self.filters.iter().all(|filter| filter(node))
296 }
297}
298
299/// Simple glob pattern matching.
300///
301/// Supports * (any characters) and ** (directories).
302fn glob_match(pattern: &str, path: &str) -> bool {
303 // Handle ** for directory matching
304 if pattern.contains("**") {
305 let parts: Vec<&str> = pattern.split("**").collect();
306 if parts.len() == 2 {
307 let prefix = parts[0];
308 let suffix = parts[1].trim_start_matches('/');
309
310 // Check prefix
311 if !prefix.is_empty() && !path.starts_with(prefix) {
312 return false;
313 }
314
315 // If suffix contains *, we need to handle it recursively
316 if suffix.contains('*') {
317 // Get the part after the last /
318 if let Some(last_slash) = path.rfind('/') {
319 let filename = &path[last_slash + 1..];
320 return glob_match(suffix, filename);
321 } else {
322 return glob_match(suffix, path);
323 }
324 }
325
326 // Simple suffix match
327 if !suffix.is_empty() && !path.ends_with(suffix) {
328 return false;
329 }
330 return true;
331 }
332 }
333
334 // Handle * for simple wildcard matching
335 let pattern_parts: Vec<&str> = pattern.split('*').collect();
336 if pattern_parts.len() == 1 {
337 // No wildcards - exact match
338 return pattern == path;
339 }
340
341 // Check if path matches the pattern
342 let mut pos = 0;
343 for (i, part) in pattern_parts.iter().enumerate() {
344 if part.is_empty() {
345 // Empty part from consecutive * or at start/end
346 continue;
347 }
348
349 if i == 0 {
350 // First part must match start
351 if !path[pos..].starts_with(part) {
352 return false;
353 }
354 pos += part.len();
355 } else if i == pattern_parts.len() - 1 {
356 // Last part must match end
357 return path[pos..].ends_with(part);
358 } else {
359 // Middle parts must exist in order
360 if let Some(index) = path[pos..].find(part) {
361 pos += index + part.len();
362 } else {
363 return false;
364 }
365 }
366 }
367
368 true
369}
370
371/// Simple regex pattern matching.
372///
373/// Supports ^ (start), $ (end), and basic literals.
374fn regex_match(pattern: &str, text: &str) -> bool {
375 let starts_with = pattern.starts_with('^');
376 let ends_with = pattern.ends_with('$');
377
378 let pattern = pattern.trim_start_matches('^').trim_end_matches('$');
379
380 if starts_with && ends_with {
381 text == pattern
382 } else if starts_with {
383 text.starts_with(pattern)
384 } else if ends_with {
385 text.ends_with(pattern)
386 } else {
387 text.contains(pattern)
388 }
389}