Skip to main content

sqry_cli/
index_discovery.rs

1//! Index discovery module for finding unified graph in ancestor directories.
2//!
3//! This module implements git-like behavior where sqry walks up the directory
4//! tree to find the nearest graph index, enabling queries from subdirectories to
5//! automatically use a parent index with appropriate scope filtering.
6
7use sqry_core::graph::unified::persistence::GraphStorage;
8use std::path::{Path, PathBuf};
9
10/// Maximum depth to traverse upward (security limit).
11const MAX_ANCESTOR_DEPTH: usize = 64;
12
13/// Legacy index file name constant (deprecated).
14pub const INDEX_FILE_NAME: &str = ".sqry-index";
15
16/// Characters that need escaping in path patterns for sqry query language.
17/// These are glob metacharacters that would be interpreted specially.
18const PATH_ESCAPE_CHARS: &[char] = &['*', '?', '[', ']', '{', '}', '\\'];
19
20/// Result of index discovery, containing location and scope information.
21#[derive(Debug, Clone)]
22pub struct IndexLocation {
23    /// Absolute path to the directory containing .sqry-index
24    pub index_root: PathBuf,
25
26    /// Original path the user requested (for scoping results)
27    pub query_scope: PathBuf,
28
29    /// True if index was found in an ancestor directory (relative to start dir)
30    pub is_ancestor: bool,
31
32    /// True if the query scope is a file (not a directory)
33    pub is_file_query: bool,
34
35    /// True if query augmentation/filtering is needed.
36    /// This is true when:
37    /// - Index is in ancestor directory (`is_ancestor`), OR
38    /// - Query targets a specific file (`is_file_query`)
39    ///
40    /// Note: File queries always need filtering even when the index
41    /// is in the file's parent directory (`is_ancestor` would be false
42    /// due to how we start discovery from the parent).
43    pub requires_scope_filter: bool,
44}
45
46impl IndexLocation {
47    /// Get the relative path from `index_root` to `query_scope` for filtering.
48    ///
49    /// Returns:
50    /// - `Some(relative_path)` when scope filtering is needed and path is inside index root
51    /// - `None` when no filtering needed (`query_scope` == `index_root` and !`is_file_query`)
52    /// - `None` when `query_scope` is outside `index_root` (edge case, shouldn't happen)
53    ///
54    /// Note: Uses `requires_scope_filter` (not `is_ancestor`) to ensure file queries
55    /// in the index root still compute their relative scope for exact-match filtering.
56    #[must_use]
57    pub fn relative_scope(&self) -> Option<PathBuf> {
58        if self.requires_scope_filter {
59            self.query_scope
60                .strip_prefix(&self.index_root)
61                .ok()
62                .map(Path::to_path_buf)
63        } else {
64            None
65        }
66    }
67}
68
69/// Find the nearest .sqry-index by walking up from the given path.
70///
71/// # Algorithm
72/// 1. Canonicalize the start path (resolve symlinks, make absolute)
73/// 2. Check for .sqry-index in current directory
74/// 3. If not found, move to parent and repeat
75/// 4. Stop at filesystem root or `MAX_ANCESTOR_DEPTH`
76///
77/// # Arguments
78/// * `start` - The directory or file to start searching from
79///
80/// # Returns
81/// * `Some(IndexLocation)` if an index was found
82/// * `None` if no index exists in any ancestor
83#[must_use]
84pub fn find_nearest_index(start: &Path) -> Option<IndexLocation> {
85    let query_scope = start.to_path_buf();
86
87    // Canonicalize for consistent path matching; fall back to original if fails
88    // (e.g., permission denied, path doesn't exist yet)
89    let canonical_start = start.canonicalize().unwrap_or_else(|_| start.to_path_buf());
90
91    // Determine if input is a file or directory
92    // For file paths, start discovery from the parent directory
93    let (mut ancestor_dir, is_file_query) = if canonical_start.is_file() {
94        let parent = canonical_start
95            .parent()
96            .map_or_else(|| canonical_start.clone(), Path::to_path_buf);
97        (parent, true)
98    } else {
99        (canonical_start, false)
100    };
101
102    // Ensure we have an absolute path for traversal
103    if ancestor_dir.is_relative()
104        && let Ok(cwd) = std::env::current_dir()
105    {
106        ancestor_dir = cwd.join(&ancestor_dir);
107    }
108
109    for ancestor_depth in 0..MAX_ANCESTOR_DEPTH {
110        // Check for unified graph format first
111        let storage = GraphStorage::new(&ancestor_dir);
112        if storage.exists() {
113            let is_ancestor = ancestor_depth > 0;
114            return Some(IndexLocation {
115                index_root: ancestor_dir,
116                query_scope: query_scope.canonicalize().unwrap_or(query_scope),
117                is_ancestor,
118                is_file_query,
119                // File queries always need filtering, even when index is in parent
120                requires_scope_filter: is_ancestor || is_file_query,
121            });
122        }
123
124        // Fallback: check for legacy .sqry-index format
125        let legacy_index_path = ancestor_dir.join(INDEX_FILE_NAME);
126        if legacy_index_path.exists() && legacy_index_path.is_file() {
127            let is_ancestor = ancestor_depth > 0;
128            return Some(IndexLocation {
129                index_root: ancestor_dir,
130                query_scope: query_scope.canonicalize().unwrap_or(query_scope),
131                is_ancestor,
132                is_file_query,
133                requires_scope_filter: is_ancestor || is_file_query,
134            });
135        }
136
137        // Move to parent directory
138        if !ancestor_dir.pop() {
139            // Reached filesystem root
140            break;
141        }
142    }
143
144    None
145}
146
147/// Escape special characters in a path component for safe use in path: predicate.
148/// Also normalizes Windows backslashes to forward slashes for consistent query syntax.
149///
150/// # Double Escaping for Glob Patterns
151/// Glob metacharacters need double-escaping because there are two parsing stages:
152/// 1. Query lexer: `\\[` → `\[` (consumes one level of escaping)
153/// 2. Globset matcher: `\[` → literal `[` (consumes second level)
154///
155/// Without double-escaping, `src/[test]` would become `path:"src/\[test\]/**"`,
156/// lexer would yield `src/[test]/**`, and globset would treat `[test]` as a
157/// character class instead of a literal directory name.
158fn escape_path_for_query(path: &Path) -> String {
159    let path_str = path.to_string_lossy();
160    let mut escaped = String::with_capacity(path_str.len() + 20);
161
162    for ch in path_str.chars() {
163        // Normalize Windows backslashes to forward slashes
164        if ch == '\\' && cfg!(windows) {
165            escaped.push('/');
166            continue;
167        }
168        if ch == '\\' {
169            // Backslash needs 4 chars: `\\\\` → lexer `\\` → globset `\`
170            escaped.push_str("\\\\\\\\");
171        } else if PATH_ESCAPE_CHARS.contains(&ch) {
172            // Other glob chars: `\\[` → lexer `\[` → globset literal `[`
173            escaped.push_str("\\\\");
174            escaped.push(ch);
175        } else {
176            escaped.push(ch);
177        }
178    }
179
180    escaped
181}
182
183/// Check if a path requires quoting due to special characters.
184/// Paths need quoting when they contain:
185/// - Spaces or double quotes (for tokenization)
186/// - Glob metacharacters with escapes (backslash escapes only work in quoted strings)
187fn path_needs_quoting(path: &Path) -> bool {
188    let path_str = path.to_string_lossy();
189    path_str
190        .chars()
191        .any(|c| c == ' ' || c == '"' || PATH_ESCAPE_CHARS.contains(&c))
192}
193
194/// Augment a query with an implicit path filter when using ancestor index.
195///
196/// # Arguments
197/// * `query` - Original query string
198/// * `relative_scope` - Path relative to index root to filter by
199/// * `is_file_query` - True if scope is a file, false if directory
200///
201/// # Returns
202/// Query string with path filter appended
203///
204/// # Path Handling
205/// - Paths with spaces, quotes, or glob metacharacters are quoted automatically
206/// - Inside quotes, glob metacharacters are escaped with backslashes
207/// - The implicit filter is `ANDed` with the original query
208/// - Parentheses ensure correct precedence
209/// - File queries use exact path match; directory queries use `/**` glob
210#[must_use]
211pub fn augment_query_with_scope(query: &str, relative_scope: &Path, is_file_query: bool) -> String {
212    // Empty scope means no filtering needed
213    if relative_scope.as_os_str().is_empty() {
214        return query.to_string();
215    }
216
217    // Build the path filter pattern
218    // - File query: exact match (no glob suffix)
219    // - Directory query: recursive glob (/**)
220    let scope_pattern = if path_needs_quoting(relative_scope) {
221        // Escape glob metacharacters (backslash escapes only work in quoted strings)
222        let escaped_path = escape_path_for_query(relative_scope);
223        // Also escape internal double quotes
224        let quoted = escaped_path.replace('"', "\\\"");
225        if is_file_query {
226            format!("\"{quoted}\"")
227        } else {
228            format!("\"{quoted}/**\"")
229        }
230    } else {
231        // Simple path without special characters - use unquoted
232        let path_str = relative_scope.to_string_lossy();
233        if is_file_query {
234            path_str.into_owned()
235        } else {
236            format!("{path_str}/**")
237        }
238    };
239
240    let path_filter = format!("path:{scope_pattern}");
241
242    if query.trim().is_empty() {
243        path_filter
244    } else {
245        // Wrap original query in parentheses to preserve precedence
246        // Example: "kind:fn OR kind:method" -> "(kind:fn OR kind:method) AND path:src/**"
247        format!("({query}) AND {path_filter}")
248    }
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254    use std::fs;
255    use tempfile::TempDir;
256
257    /// Helper to create a minimal index file for discovery tests.
258    fn create_test_index(path: &Path) {
259        let index_path = path.join(INDEX_FILE_NAME);
260        fs::write(&index_path, "test-index-marker").unwrap();
261    }
262
263    #[test]
264    fn find_nearest_index_at_current_dir() {
265        let tmp = TempDir::new().unwrap();
266        create_test_index(tmp.path());
267
268        let result = find_nearest_index(tmp.path());
269
270        assert!(result.is_some());
271        let loc = result.unwrap();
272        assert_eq!(loc.index_root, tmp.path().canonicalize().unwrap());
273        assert!(!loc.is_ancestor);
274        assert!(!loc.is_file_query);
275        assert!(!loc.requires_scope_filter);
276    }
277
278    #[test]
279    fn find_nearest_index_in_parent() {
280        let tmp = TempDir::new().unwrap();
281        create_test_index(tmp.path());
282
283        let subdir = tmp.path().join("src");
284        fs::create_dir(&subdir).unwrap();
285
286        let result = find_nearest_index(&subdir);
287
288        assert!(result.is_some());
289        let loc = result.unwrap();
290        assert_eq!(loc.index_root, tmp.path().canonicalize().unwrap());
291        assert!(loc.is_ancestor);
292        assert!(!loc.is_file_query);
293        assert!(loc.requires_scope_filter);
294    }
295
296    #[test]
297    fn find_nearest_index_in_grandparent() {
298        let tmp = TempDir::new().unwrap();
299        create_test_index(tmp.path());
300
301        let deep = tmp.path().join("src").join("utils");
302        fs::create_dir_all(&deep).unwrap();
303
304        let result = find_nearest_index(&deep);
305
306        assert!(result.is_some());
307        let loc = result.unwrap();
308        assert_eq!(loc.index_root, tmp.path().canonicalize().unwrap());
309        assert!(loc.is_ancestor);
310        assert!(loc.requires_scope_filter);
311    }
312
313    #[test]
314    fn find_nearest_index_none_found() {
315        let tmp = TempDir::new().unwrap();
316        // No index created
317
318        let result = find_nearest_index(tmp.path());
319
320        // The search traverses ancestor directories, so if a .sqry/ exists
321        // in an ancestor of the temp dir (e.g. /tmp/.sqry/ from a previous
322        // run), it will be found. We only assert no index was found *within*
323        // the temp dir itself.
324        match &result {
325            None => {} // expected
326            Some(loc) => {
327                let tmp_canonical = tmp.path().canonicalize().unwrap();
328                assert!(
329                    !loc.index_root.starts_with(&tmp_canonical),
330                    "found unexpected index inside temp dir: {:?}",
331                    loc.index_root
332                );
333            }
334        }
335    }
336
337    #[test]
338    fn find_nearest_index_nested_repos() {
339        let tmp = TempDir::new().unwrap();
340        create_test_index(tmp.path()); // Root index
341
342        let inner = tmp.path().join("packages").join("web");
343        fs::create_dir_all(&inner).unwrap();
344        create_test_index(&inner); // Inner index
345
346        let query_path = inner.join("src");
347        fs::create_dir(&query_path).unwrap();
348
349        let result = find_nearest_index(&query_path);
350
351        // Should find the nearest (inner) index
352        assert!(result.is_some());
353        let loc = result.unwrap();
354        assert_eq!(loc.index_root, inner.canonicalize().unwrap());
355        assert!(loc.is_ancestor);
356    }
357
358    #[test]
359    fn find_nearest_index_file_input() {
360        let tmp = TempDir::new().unwrap();
361        create_test_index(tmp.path());
362
363        let subdir = tmp.path().join("src");
364        fs::create_dir(&subdir).unwrap();
365        let file = subdir.join("main.rs");
366        fs::write(&file, "fn main() {}").unwrap();
367
368        let result = find_nearest_index(&file);
369
370        assert!(result.is_some());
371        let loc = result.unwrap();
372        assert!(loc.is_file_query);
373        assert!(loc.is_ancestor); // Index is in grandparent
374        assert!(loc.requires_scope_filter);
375    }
376
377    #[test]
378    fn find_nearest_index_file_in_index_dir() {
379        let tmp = TempDir::new().unwrap();
380        create_test_index(tmp.path());
381
382        let file = tmp.path().join("main.rs");
383        fs::write(&file, "fn main() {}").unwrap();
384
385        let result = find_nearest_index(&file);
386
387        assert!(result.is_some());
388        let loc = result.unwrap();
389        assert!(!loc.is_ancestor); // Index is in file's parent
390        assert!(loc.is_file_query);
391        assert!(loc.requires_scope_filter); // File queries always need filtering
392    }
393
394    #[test]
395    fn relative_scope_calculation() {
396        let loc = IndexLocation {
397            index_root: PathBuf::from("/project"),
398            query_scope: PathBuf::from("/project/src/utils"),
399            is_ancestor: true,
400            is_file_query: false,
401            requires_scope_filter: true,
402        };
403
404        let scope = loc.relative_scope();
405        assert_eq!(scope, Some(PathBuf::from("src/utils")));
406    }
407
408    #[test]
409    fn relative_scope_same_dir() {
410        let loc = IndexLocation {
411            index_root: PathBuf::from("/project"),
412            query_scope: PathBuf::from("/project"),
413            is_ancestor: false,
414            is_file_query: false,
415            requires_scope_filter: false,
416        };
417
418        let scope = loc.relative_scope();
419        assert!(scope.is_none());
420    }
421
422    #[test]
423    fn relative_scope_file_in_root() {
424        let loc = IndexLocation {
425            index_root: PathBuf::from("/project"),
426            query_scope: PathBuf::from("/project/main.rs"),
427            is_ancestor: false,
428            is_file_query: true,
429            requires_scope_filter: true,
430        };
431
432        let scope = loc.relative_scope();
433        assert_eq!(scope, Some(PathBuf::from("main.rs")));
434    }
435
436    #[test]
437    fn augment_query_with_scope_basic() {
438        let result = augment_query_with_scope("kind:function", Path::new("src"), false);
439        assert_eq!(result, "(kind:function) AND path:src/**");
440    }
441
442    #[test]
443    fn augment_query_with_scope_empty_query() {
444        let result = augment_query_with_scope("", Path::new("src"), false);
445        assert_eq!(result, "path:src/**");
446    }
447
448    #[test]
449    fn augment_query_with_scope_empty_path() {
450        let result = augment_query_with_scope("kind:fn", Path::new(""), false);
451        assert_eq!(result, "kind:fn");
452    }
453
454    #[test]
455    fn augment_query_with_scope_file_query() {
456        let result = augment_query_with_scope("kind:function", Path::new("src/main.rs"), true);
457        assert_eq!(result, "(kind:function) AND path:src/main.rs");
458    }
459
460    #[test]
461    fn augment_query_with_scope_directory_query() {
462        let result = augment_query_with_scope("kind:function", Path::new("src"), false);
463        assert_eq!(result, "(kind:function) AND path:src/**");
464    }
465
466    #[test]
467    fn augment_query_file_with_spaces() {
468        let result =
469            augment_query_with_scope("kind:function", Path::new("my project/main.rs"), true);
470        assert_eq!(result, "(kind:function) AND path:\"my project/main.rs\"");
471    }
472
473    #[test]
474    fn augment_query_with_scope_path_with_spaces() {
475        let result = augment_query_with_scope("kind:function", Path::new("my project/src"), false);
476        assert_eq!(result, "(kind:function) AND path:\"my project/src/**\"");
477    }
478
479    #[test]
480    fn augment_query_with_scope_path_with_glob_chars() {
481        // Paths with glob metacharacters must be quoted and double-escaped:
482        // - CLI emits `\\[` so lexer returns `\[` for globset to interpret as literal `[`
483        let result = augment_query_with_scope("kind:function", Path::new("src/[test]"), false);
484        assert_eq!(result, "(kind:function) AND path:\"src/\\\\[test\\\\]/**\"");
485    }
486
487    #[test]
488    fn augment_query_preserves_precedence() {
489        let result = augment_query_with_scope("kind:fn OR kind:method", Path::new("src"), false);
490        assert_eq!(result, "(kind:fn OR kind:method) AND path:src/**");
491    }
492
493    #[test]
494    fn augment_query_with_existing_path_predicate() {
495        let result =
496            augment_query_with_scope("kind:fn AND path:*.rs", Path::new("src/utils"), false);
497        assert_eq!(result, "(kind:fn AND path:*.rs) AND path:src/utils/**");
498    }
499
500    #[test]
501    #[cfg(unix)]
502    fn escape_path_with_backslash_on_unix() {
503        // Backslash in path gets double-escaped: `\` → `\\\\` (4 chars in raw string)
504        // So lexer returns `\\` and globset matches literal backslash
505        let result = escape_path_for_query(Path::new("src/file\\name"));
506        assert_eq!(result, "src/file\\\\\\\\name");
507    }
508
509    /// Test that augmented queries with special characters can be parsed by the lexer.
510    /// This ensures the escaping strategy produces valid query syntax.
511    #[test]
512    fn augmented_queries_are_parseable() {
513        use sqry_core::query::Lexer;
514
515        let test_cases = [
516            // Simple path (no escaping needed)
517            ("kind:fn", Path::new("src"), false),
518            // Path with spaces (quoted)
519            ("kind:fn", Path::new("my project/src"), false),
520            // Path with glob metacharacters (quoted + escaped)
521            ("kind:fn", Path::new("src/[test]"), false),
522            ("kind:fn", Path::new("src/test*"), false),
523            ("kind:fn", Path::new("src/test?"), false),
524            ("kind:fn", Path::new("src/{a,b}"), false),
525            // File queries
526            ("kind:fn", Path::new("src/main.rs"), true),
527            ("kind:fn", Path::new("src/[test]/main.rs"), true),
528            // Complex query with special path
529            ("kind:fn OR kind:method", Path::new("src/[utils]"), false),
530        ];
531
532        for (query, path, is_file) in test_cases {
533            let augmented = augment_query_with_scope(query, path, is_file);
534            let mut lexer = Lexer::new(&augmented);
535            let result = lexer.tokenize();
536            assert!(
537                result.is_ok(),
538                "Failed to parse augmented query for path {:?}: {:?}\nQuery: {}",
539                path,
540                result.err(),
541                augmented
542            );
543        }
544    }
545}