Skip to main content

sqry_cli/
index_discovery.rs

1//! Index discovery module for finding unified graph in ancestor directories.
2//!
3//! This module implements git-like behavior where sqry walks up the directory
4//! tree to find the nearest graph index, enabling queries from subdirectories to
5//! automatically use a parent index with appropriate scope filtering.
6
7use sqry_core::graph::unified::persistence::GraphStorage;
8use std::path::{Path, PathBuf};
9
10/// Maximum depth to traverse upward (security limit).
11const MAX_ANCESTOR_DEPTH: usize = 64;
12
13/// Legacy index file name constant (deprecated).
14pub const INDEX_FILE_NAME: &str = ".sqry-index";
15
16/// Characters that need escaping in path patterns for sqry query language.
17/// These are glob metacharacters that would be interpreted specially.
18const PATH_ESCAPE_CHARS: &[char] = &['*', '?', '[', ']', '{', '}', '\\'];
19
20/// Result of index discovery, containing location and scope information.
21#[derive(Debug, Clone)]
22pub struct IndexLocation {
23    /// Absolute path to the directory containing .sqry-index
24    pub index_root: PathBuf,
25
26    /// Original path the user requested (for scoping results)
27    pub query_scope: PathBuf,
28
29    /// True if index was found in an ancestor directory (relative to start dir)
30    pub is_ancestor: bool,
31
32    /// True if the query scope is a file (not a directory)
33    pub is_file_query: bool,
34
35    /// True if query augmentation/filtering is needed.
36    /// This is true when:
37    /// - Index is in ancestor directory (`is_ancestor`), OR
38    /// - Query targets a specific file (`is_file_query`)
39    ///
40    /// Note: File queries always need filtering even when the index
41    /// is in the file's parent directory (`is_ancestor` would be false
42    /// due to how we start discovery from the parent).
43    pub requires_scope_filter: bool,
44}
45
46impl IndexLocation {
47    /// Get the relative path from `index_root` to `query_scope` for filtering.
48    ///
49    /// Returns:
50    /// - `Some(relative_path)` when scope filtering is needed and path is inside index root
51    /// - `None` when no filtering needed (`query_scope` == `index_root` and !`is_file_query`)
52    /// - `None` when `query_scope` is outside `index_root` (edge case, shouldn't happen)
53    ///
54    /// Note: Uses `requires_scope_filter` (not `is_ancestor`) to ensure file queries
55    /// in the index root still compute their relative scope for exact-match filtering.
56    #[must_use]
57    pub fn relative_scope(&self) -> Option<PathBuf> {
58        if self.requires_scope_filter {
59            self.query_scope
60                .strip_prefix(&self.index_root)
61                .ok()
62                .map(Path::to_path_buf)
63        } else {
64            None
65        }
66    }
67}
68
69/// Find the nearest .sqry-index by walking up from the given path.
70///
71/// # Algorithm
72/// 1. Canonicalize the start path (resolve symlinks, make absolute)
73/// 2. Check for .sqry-index in current directory
74/// 3. If not found, move to parent and repeat
75/// 4. Stop at filesystem root or `MAX_ANCESTOR_DEPTH`
76///
77/// # Arguments
78/// * `start` - The directory or file to start searching from
79///
80/// # Returns
81/// * `Some(IndexLocation)` if an index was found
82/// * `None` if no index exists in any ancestor
83#[must_use]
84pub fn find_nearest_index(start: &Path) -> Option<IndexLocation> {
85    let query_scope = start.to_path_buf();
86
87    // Canonicalize for consistent path matching; fall back to original if fails
88    // (e.g., permission denied, path doesn't exist yet)
89    let canonical_start = start.canonicalize().unwrap_or_else(|_| start.to_path_buf());
90
91    // Determine if input is a file or directory
92    // For file paths, start discovery from the parent directory
93    let (mut ancestor_dir, is_file_query) = if canonical_start.is_file() {
94        let parent = canonical_start
95            .parent()
96            .map_or_else(|| canonical_start.clone(), Path::to_path_buf);
97        (parent, true)
98    } else {
99        (canonical_start, false)
100    };
101
102    // Ensure we have an absolute path for traversal
103    if ancestor_dir.is_relative()
104        && let Ok(cwd) = std::env::current_dir()
105    {
106        ancestor_dir = cwd.join(&ancestor_dir);
107    }
108
109    for ancestor_depth in 0..MAX_ANCESTOR_DEPTH {
110        // Check for unified graph format first
111        let storage = GraphStorage::new(&ancestor_dir);
112        if storage.exists() {
113            let is_ancestor = ancestor_depth > 0;
114            return Some(IndexLocation {
115                index_root: ancestor_dir,
116                query_scope: query_scope.canonicalize().unwrap_or(query_scope),
117                is_ancestor,
118                is_file_query,
119                // File queries always need filtering, even when index is in parent
120                requires_scope_filter: is_ancestor || is_file_query,
121            });
122        }
123
124        // Fallback: check for legacy .sqry-index format
125        let legacy_index_path = ancestor_dir.join(INDEX_FILE_NAME);
126        if legacy_index_path.exists() && legacy_index_path.is_file() {
127            let is_ancestor = ancestor_depth > 0;
128            return Some(IndexLocation {
129                index_root: ancestor_dir,
130                query_scope: query_scope.canonicalize().unwrap_or(query_scope),
131                is_ancestor,
132                is_file_query,
133                requires_scope_filter: is_ancestor || is_file_query,
134            });
135        }
136
137        // Move to parent directory
138        if !ancestor_dir.pop() {
139            // Reached filesystem root
140            break;
141        }
142    }
143
144    None
145}
146
147/// Escape special characters in a path component for safe use in path: predicate.
148/// Also normalizes Windows backslashes to forward slashes for consistent query syntax.
149///
150/// # Double Escaping for Glob Patterns
151/// Glob metacharacters need double-escaping because there are two parsing stages:
152/// 1. Query lexer: `\\[` → `\[` (consumes one level of escaping)
153/// 2. Globset matcher: `\[` → literal `[` (consumes second level)
154///
155/// Without double-escaping, `src/[test]` would become `path:"src/\[test\]/**"`,
156/// lexer would yield `src/[test]/**`, and globset would treat `[test]` as a
157/// character class instead of a literal directory name.
158fn escape_path_for_query(path: &Path) -> String {
159    let path_str = path.to_string_lossy();
160    let mut escaped = String::with_capacity(path_str.len() + 20);
161
162    for ch in path_str.chars() {
163        // Normalize Windows backslashes to forward slashes
164        if ch == '\\' && cfg!(windows) {
165            escaped.push('/');
166            continue;
167        }
168        if ch == '\\' {
169            // Backslash needs 4 chars: `\\\\` → lexer `\\` → globset `\`
170            escaped.push_str("\\\\\\\\");
171        } else if PATH_ESCAPE_CHARS.contains(&ch) {
172            // Other glob chars: `\\[` → lexer `\[` → globset literal `[`
173            escaped.push_str("\\\\");
174            escaped.push(ch);
175        } else {
176            escaped.push(ch);
177        }
178    }
179
180    escaped
181}
182
183/// Check if a path requires quoting due to special characters.
184/// Paths need quoting when they contain:
185/// - Spaces or double quotes (for tokenization)
186/// - Glob metacharacters with escapes (backslash escapes only work in quoted strings)
187/// - A leading character that the query lexer does not accept as a word start
188///   (the lexer's `is_word_start` permits only ASCII alphabetic + `_`, so paths
189///   beginning with `.`, `-`, a digit, `/`, etc. must be quoted).
190fn path_needs_quoting(path: &Path) -> bool {
191    let path_str = path.to_string_lossy();
192    let leading_requires_quoting = path_str
193        .chars()
194        .next()
195        .is_some_and(|c| !c.is_ascii_alphabetic() && c != '_');
196    leading_requires_quoting
197        || path_str
198            .chars()
199            .any(|c| c == ' ' || c == '"' || PATH_ESCAPE_CHARS.contains(&c))
200}
201
202/// Augment a query with an implicit path filter when using ancestor index.
203///
204/// # Arguments
205/// * `query` - Original query string
206/// * `relative_scope` - Path relative to index root to filter by
207/// * `is_file_query` - True if scope is a file, false if directory
208///
209/// # Returns
210/// Query string with path filter appended
211///
212/// # Path Handling
213/// - Paths with spaces, quotes, or glob metacharacters are quoted automatically
214/// - Inside quotes, glob metacharacters are escaped with backslashes
215/// - The implicit filter is `ANDed` with the original query
216/// - Parentheses ensure correct precedence
217/// - File queries use exact path match; directory queries use `/**` glob
218#[must_use]
219pub fn augment_query_with_scope(query: &str, relative_scope: &Path, is_file_query: bool) -> String {
220    // Empty scope means no filtering needed
221    if relative_scope.as_os_str().is_empty() {
222        return query.to_string();
223    }
224
225    // Build the path filter pattern
226    // - File query: exact match (no glob suffix)
227    // - Directory query: recursive glob (/**)
228    let scope_pattern = if path_needs_quoting(relative_scope) {
229        // Escape glob metacharacters (backslash escapes only work in quoted strings)
230        let escaped_path = escape_path_for_query(relative_scope);
231        // Also escape internal double quotes
232        let quoted = escaped_path.replace('"', "\\\"");
233        if is_file_query {
234            format!("\"{quoted}\"")
235        } else {
236            format!("\"{quoted}/**\"")
237        }
238    } else {
239        // Simple path without special characters - use unquoted
240        let path_str = relative_scope.to_string_lossy();
241        if is_file_query {
242            path_str.into_owned()
243        } else {
244            format!("{path_str}/**")
245        }
246    };
247
248    let path_filter = format!("path:{scope_pattern}");
249
250    if query.trim().is_empty() {
251        path_filter
252    } else {
253        // Wrap original query in parentheses to preserve precedence
254        // Example: "kind:fn OR kind:method" -> "(kind:fn OR kind:method) AND path:src/**"
255        format!("({query}) AND {path_filter}")
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use std::fs;
263    use tempfile::TempDir;
264
265    /// Helper to create a minimal index file for discovery tests.
266    fn create_test_index(path: &Path) {
267        let index_path = path.join(INDEX_FILE_NAME);
268        fs::write(&index_path, "test-index-marker").unwrap();
269    }
270
271    #[test]
272    fn find_nearest_index_at_current_dir() {
273        let tmp = TempDir::new().unwrap();
274        create_test_index(tmp.path());
275
276        let result = find_nearest_index(tmp.path());
277
278        assert!(result.is_some());
279        let loc = result.unwrap();
280        assert_eq!(loc.index_root, tmp.path().canonicalize().unwrap());
281        assert!(!loc.is_ancestor);
282        assert!(!loc.is_file_query);
283        assert!(!loc.requires_scope_filter);
284    }
285
286    #[test]
287    fn find_nearest_index_in_parent() {
288        let tmp = TempDir::new().unwrap();
289        create_test_index(tmp.path());
290
291        let subdir = tmp.path().join("src");
292        fs::create_dir(&subdir).unwrap();
293
294        let result = find_nearest_index(&subdir);
295
296        assert!(result.is_some());
297        let loc = result.unwrap();
298        assert_eq!(loc.index_root, tmp.path().canonicalize().unwrap());
299        assert!(loc.is_ancestor);
300        assert!(!loc.is_file_query);
301        assert!(loc.requires_scope_filter);
302    }
303
304    #[test]
305    fn find_nearest_index_in_grandparent() {
306        let tmp = TempDir::new().unwrap();
307        create_test_index(tmp.path());
308
309        let deep = tmp.path().join("src").join("utils");
310        fs::create_dir_all(&deep).unwrap();
311
312        let result = find_nearest_index(&deep);
313
314        assert!(result.is_some());
315        let loc = result.unwrap();
316        assert_eq!(loc.index_root, tmp.path().canonicalize().unwrap());
317        assert!(loc.is_ancestor);
318        assert!(loc.requires_scope_filter);
319    }
320
321    #[test]
322    fn find_nearest_index_none_found() {
323        let tmp = TempDir::new().unwrap();
324        // No index created
325
326        let result = find_nearest_index(tmp.path());
327
328        // The search traverses ancestor directories, so if a .sqry/ exists
329        // in an ancestor of the temp dir (e.g. /tmp/.sqry/ from a previous
330        // run), it will be found. We only assert no index was found *within*
331        // the temp dir itself.
332        match &result {
333            None => {} // expected
334            Some(loc) => {
335                let tmp_canonical = tmp.path().canonicalize().unwrap();
336                assert!(
337                    !loc.index_root.starts_with(&tmp_canonical),
338                    "found unexpected index inside temp dir: {:?}",
339                    loc.index_root
340                );
341            }
342        }
343    }
344
345    #[test]
346    fn find_nearest_index_nested_repos() {
347        let tmp = TempDir::new().unwrap();
348        create_test_index(tmp.path()); // Root index
349
350        let inner = tmp.path().join("packages").join("web");
351        fs::create_dir_all(&inner).unwrap();
352        create_test_index(&inner); // Inner index
353
354        let query_path = inner.join("src");
355        fs::create_dir(&query_path).unwrap();
356
357        let result = find_nearest_index(&query_path);
358
359        // Should find the nearest (inner) index
360        assert!(result.is_some());
361        let loc = result.unwrap();
362        assert_eq!(loc.index_root, inner.canonicalize().unwrap());
363        assert!(loc.is_ancestor);
364    }
365
366    #[test]
367    fn find_nearest_index_file_input() {
368        let tmp = TempDir::new().unwrap();
369        create_test_index(tmp.path());
370
371        let subdir = tmp.path().join("src");
372        fs::create_dir(&subdir).unwrap();
373        let file = subdir.join("main.rs");
374        fs::write(&file, "fn main() {}").unwrap();
375
376        let result = find_nearest_index(&file);
377
378        assert!(result.is_some());
379        let loc = result.unwrap();
380        assert!(loc.is_file_query);
381        assert!(loc.is_ancestor); // Index is in grandparent
382        assert!(loc.requires_scope_filter);
383    }
384
385    #[test]
386    fn find_nearest_index_file_in_index_dir() {
387        let tmp = TempDir::new().unwrap();
388        create_test_index(tmp.path());
389
390        let file = tmp.path().join("main.rs");
391        fs::write(&file, "fn main() {}").unwrap();
392
393        let result = find_nearest_index(&file);
394
395        assert!(result.is_some());
396        let loc = result.unwrap();
397        assert!(!loc.is_ancestor); // Index is in file's parent
398        assert!(loc.is_file_query);
399        assert!(loc.requires_scope_filter); // File queries always need filtering
400    }
401
402    #[test]
403    fn relative_scope_calculation() {
404        let loc = IndexLocation {
405            index_root: PathBuf::from("/project"),
406            query_scope: PathBuf::from("/project/src/utils"),
407            is_ancestor: true,
408            is_file_query: false,
409            requires_scope_filter: true,
410        };
411
412        let scope = loc.relative_scope();
413        assert_eq!(scope, Some(PathBuf::from("src/utils")));
414    }
415
416    #[test]
417    fn relative_scope_same_dir() {
418        let loc = IndexLocation {
419            index_root: PathBuf::from("/project"),
420            query_scope: PathBuf::from("/project"),
421            is_ancestor: false,
422            is_file_query: false,
423            requires_scope_filter: false,
424        };
425
426        let scope = loc.relative_scope();
427        assert!(scope.is_none());
428    }
429
430    #[test]
431    fn relative_scope_file_in_root() {
432        let loc = IndexLocation {
433            index_root: PathBuf::from("/project"),
434            query_scope: PathBuf::from("/project/main.rs"),
435            is_ancestor: false,
436            is_file_query: true,
437            requires_scope_filter: true,
438        };
439
440        let scope = loc.relative_scope();
441        assert_eq!(scope, Some(PathBuf::from("main.rs")));
442    }
443
444    #[test]
445    fn augment_query_with_scope_basic() {
446        let result = augment_query_with_scope("kind:function", Path::new("src"), false);
447        assert_eq!(result, "(kind:function) AND path:src/**");
448    }
449
450    #[test]
451    fn augment_query_with_scope_empty_query() {
452        let result = augment_query_with_scope("", Path::new("src"), false);
453        assert_eq!(result, "path:src/**");
454    }
455
456    #[test]
457    fn augment_query_with_scope_empty_path() {
458        let result = augment_query_with_scope("kind:fn", Path::new(""), false);
459        assert_eq!(result, "kind:fn");
460    }
461
462    #[test]
463    fn augment_query_with_scope_file_query() {
464        let result = augment_query_with_scope("kind:function", Path::new("src/main.rs"), true);
465        assert_eq!(result, "(kind:function) AND path:src/main.rs");
466    }
467
468    #[test]
469    fn augment_query_with_scope_directory_query() {
470        let result = augment_query_with_scope("kind:function", Path::new("src"), false);
471        assert_eq!(result, "(kind:function) AND path:src/**");
472    }
473
474    #[test]
475    fn augment_query_file_with_spaces() {
476        let result =
477            augment_query_with_scope("kind:function", Path::new("my project/main.rs"), true);
478        assert_eq!(result, "(kind:function) AND path:\"my project/main.rs\"");
479    }
480
481    #[test]
482    fn augment_query_with_scope_path_with_spaces() {
483        let result = augment_query_with_scope("kind:function", Path::new("my project/src"), false);
484        assert_eq!(result, "(kind:function) AND path:\"my project/src/**\"");
485    }
486
487    #[test]
488    fn augment_query_with_scope_path_with_glob_chars() {
489        // Paths with glob metacharacters must be quoted and double-escaped:
490        // - CLI emits `\\[` so lexer returns `\[` for globset to interpret as literal `[`
491        let result = augment_query_with_scope("kind:function", Path::new("src/[test]"), false);
492        assert_eq!(result, "(kind:function) AND path:\"src/\\\\[test\\\\]/**\"");
493    }
494
495    #[test]
496    fn augment_query_preserves_precedence() {
497        let result = augment_query_with_scope("kind:fn OR kind:method", Path::new("src"), false);
498        assert_eq!(result, "(kind:fn OR kind:method) AND path:src/**");
499    }
500
501    #[test]
502    fn augment_query_with_existing_path_predicate() {
503        let result =
504            augment_query_with_scope("kind:fn AND path:*.rs", Path::new("src/utils"), false);
505        assert_eq!(result, "(kind:fn AND path:*.rs) AND path:src/utils/**");
506    }
507
508    #[test]
509    fn augment_query_path_with_leading_dot() {
510        // Paths starting with `.` (e.g. hidden directories or git worktrees under
511        // `.worktrees/`) must be quoted because the query lexer's word-start rule
512        // accepts only ASCII alpha + `_`. An unquoted `path:.worktrees/...` value
513        // parses as `path:` followed by a stray `.` and fails.
514        let result = augment_query_with_scope(
515            "kind:function",
516            Path::new(".worktrees/phase3a/test-fixtures/cli-basic"),
517            false,
518        );
519        assert_eq!(
520            result,
521            "(kind:function) AND path:\".worktrees/phase3a/test-fixtures/cli-basic/**\""
522        );
523    }
524
525    #[test]
526    fn augment_query_path_with_leading_digit() {
527        // Paths starting with a digit similarly violate the lexer's word-start rule.
528        let result =
529            augment_query_with_scope("kind:function", Path::new("2024-archive/src"), false);
530        assert_eq!(result, "(kind:function) AND path:\"2024-archive/src/**\"");
531    }
532
533    #[test]
534    #[cfg(unix)]
535    fn escape_path_with_backslash_on_unix() {
536        // Backslash in path gets double-escaped: `\` → `\\\\` (4 chars in raw string)
537        // So lexer returns `\\` and globset matches literal backslash
538        let result = escape_path_for_query(Path::new("src/file\\name"));
539        assert_eq!(result, "src/file\\\\\\\\name");
540    }
541
542    /// Test that augmented queries with special characters can be parsed by the lexer.
543    /// This ensures the escaping strategy produces valid query syntax.
544    #[test]
545    fn augmented_queries_are_parseable() {
546        use sqry_core::query::Lexer;
547
548        let test_cases = [
549            // Simple path (no escaping needed)
550            ("kind:fn", Path::new("src"), false),
551            // Path with spaces (quoted)
552            ("kind:fn", Path::new("my project/src"), false),
553            // Path with glob metacharacters (quoted + escaped)
554            ("kind:fn", Path::new("src/[test]"), false),
555            ("kind:fn", Path::new("src/test*"), false),
556            ("kind:fn", Path::new("src/test?"), false),
557            ("kind:fn", Path::new("src/{a,b}"), false),
558            // File queries
559            ("kind:fn", Path::new("src/main.rs"), true),
560            ("kind:fn", Path::new("src/[test]/main.rs"), true),
561            // Complex query with special path
562            ("kind:fn OR kind:method", Path::new("src/[utils]"), false),
563        ];
564
565        for (query, path, is_file) in test_cases {
566            let augmented = augment_query_with_scope(query, path, is_file);
567            let mut lexer = Lexer::new(&augmented);
568            let result = lexer.tokenize();
569            assert!(
570                result.is_ok(),
571                "Failed to parse augmented query for path {:?}: {:?}\nQuery: {}",
572                path,
573                result.err(),
574                augmented
575            );
576        }
577    }
578}