1use std::path::Path;
11
12use regex::Regex;
13
14#[derive(Debug)]
16pub struct Query {
17 pub must: Vec<Matcher>,
19 pub should: Vec<Matcher>,
21 pub must_not: Vec<Matcher>,
23}
24
25#[derive(Debug)]
27pub struct Matcher {
28 pub regex: Regex,
29 pub original: String,
30}
31
32impl Query {
33 pub fn matches_line(&self, line: &str) -> bool {
35 if self.must_not.iter().any(|m| m.regex.is_match(line)) {
37 return false;
38 }
39
40 let must_ok = self.must.is_empty() || self.must.iter().all(|m| m.regex.is_match(line));
42
43 let should_ok =
45 self.should.is_empty() || self.should.iter().any(|m| m.regex.is_match(line));
46
47 must_ok && should_ok
48 }
49
50 pub fn matches_file(&self, content: &str) -> bool {
55 if self
56 .must_not
57 .iter()
58 .any(|m| content.lines().any(|l| m.regex.is_match(l)))
59 {
60 return false;
61 }
62
63 let must_ok = self
64 .must
65 .iter()
66 .all(|m| content.lines().any(|l| m.regex.is_match(l)));
67
68 let should_ok = self.should.is_empty()
69 || self
70 .should
71 .iter()
72 .any(|m| content.lines().any(|l| m.regex.is_match(l)));
73
74 must_ok && should_ok
75 }
76
77 pub fn matching_lines(&self, content: &str) -> Vec<usize> {
80 content
81 .lines()
82 .enumerate()
83 .filter(|(_, line)| {
84 let any_positive = self.must.iter().any(|m| m.regex.is_match(line))
85 || self.should.iter().any(|m| m.regex.is_match(line));
86 let no_negative = !self.must_not.iter().any(|m| m.regex.is_match(line));
87 any_positive && no_negative
88 })
89 .map(|(idx, _)| idx)
90 .collect()
91 }
92
93 pub fn is_simple(&self) -> bool {
95 self.must.is_empty() && self.must_not.is_empty() && self.should.len() == 1
96 }
97
98 pub fn simple_regex(&self) -> Option<&Regex> {
100 if self.is_simple() {
101 Some(&self.should[0].regex)
102 } else {
103 None
104 }
105 }
106}
107
108pub fn parse(input: &str, exact: bool, ignore_case: bool) -> std::result::Result<Query, String> {
121 let tokens = tokenize(input);
122
123 let mut must = Vec::new();
124 let mut should = Vec::new();
125 let mut must_not = Vec::new();
126 let mut has_and = false;
127 let mut next_not = false;
128
129 for token in &tokens {
131 if matches!(token, Token::And) {
132 has_and = true;
133 break;
134 }
135 }
136
137 for token in tokens {
138 match token {
139 Token::And => {
140 }
142 Token::Or => {
143 }
145 Token::Not => {
146 next_not = true;
147 }
148 Token::Term(term) => {
149 let matcher = build_matcher(&term, false, exact, ignore_case)?;
150 if next_not {
151 must_not.push(matcher);
152 next_not = false;
153 } else if has_and {
154 must.push(matcher);
155 } else {
156 should.push(matcher);
157 }
158 }
159 Token::Phrase(phrase) => {
160 let matcher = build_matcher(&phrase, true, true, ignore_case)?;
161 if next_not {
162 must_not.push(matcher);
163 next_not = false;
164 } else if has_and {
165 must.push(matcher);
166 } else {
167 should.push(matcher);
168 }
169 }
170 }
171 }
172
173 Ok(Query {
174 must,
175 should,
176 must_not,
177 })
178}
179
180fn build_matcher(
181 term: &str,
182 is_phrase: bool,
183 exact: bool,
184 ignore_case: bool,
185) -> std::result::Result<Matcher, String> {
186 let pattern = if is_phrase || exact {
187 format!(r"\b{}\b", regex::escape(term))
189 } else {
190 let stemmed = stem(term);
192 if stemmed.len() < term.len() {
193 format!(r"(?i)\b{}\w*", regex::escape(&stemmed))
195 } else {
196 regex::escape(term)
198 }
199 };
200
201 let regex = regex::RegexBuilder::new(&pattern)
202 .case_insensitive(ignore_case || (!exact && !is_phrase))
203 .build()
204 .map_err(|e| format!("invalid pattern '{term}': {e}"))?;
205
206 Ok(Matcher {
207 regex,
208 original: term.to_string(),
209 })
210}
211
212#[derive(Debug, PartialEq)]
215enum Token {
216 Term(String),
217 Phrase(String),
218 And,
219 Or,
220 Not,
221}
222
223fn tokenize(input: &str) -> Vec<Token> {
224 let mut tokens = Vec::new();
225 let mut chars = input.chars().peekable();
226
227 while let Some(&ch) = chars.peek() {
228 if ch.is_whitespace() {
229 chars.next();
230 continue;
231 }
232
233 if ch == '"' {
234 chars.next(); let mut phrase = String::new();
236 while let Some(&c) = chars.peek() {
237 if c == '"' {
238 chars.next(); break;
240 }
241 phrase.push(c);
242 chars.next();
243 }
244 if !phrase.is_empty() {
245 tokens.push(Token::Phrase(phrase));
246 }
247 continue;
248 }
249
250 let mut word = String::new();
252 while let Some(&c) = chars.peek() {
253 if c.is_whitespace() || c == '"' {
254 break;
255 }
256 word.push(c);
257 chars.next();
258 }
259
260 match word.as_str() {
261 "AND" => tokens.push(Token::And),
262 "OR" => tokens.push(Token::Or),
263 "NOT" => tokens.push(Token::Not),
264 _ => tokens.push(Token::Term(word)),
265 }
266 }
267
268 tokens
269}
270
271pub fn stem(word: &str) -> String {
285 let w = word.to_lowercase();
286
287 let suffixes = &[
289 "ication", "ation", "ition", "ction", "tion", "sion", "ment", "ness", "able", "ible", "ally", "ence", "ance", "ings", "ated", "ized", "ised", "cting", "less", "ful", "ous", "ive", "ity", "ing", "ers", "ies", "ied", "ion", "ed", "er", "ate", "ly", "al", "or", "ar", "s", ];
326
327 let min_stem = 4;
328
329 for suffix in suffixes {
330 if w.len() > suffix.len() + min_stem && w.ends_with(suffix) {
331 return w[..w.len() - suffix.len()].to_string();
332 }
333 }
334
335 w
336}
337
338pub fn language_extensions(language: &str) -> Option<&'static [&'static str]> {
342 match language.to_lowercase().as_str() {
343 "rust" | "rs" => Some(&["rs"]),
344 "typescript" | "ts" => Some(&["ts", "tsx"]),
345 "javascript" | "js" => Some(&["js", "jsx", "mjs", "cjs"]),
346 "python" | "py" => Some(&["py", "pyi"]),
347 "go" | "golang" => Some(&["go"]),
348 "java" => Some(&["java"]),
349 "ruby" | "rb" => Some(&["rb"]),
350 "c" => Some(&["c", "h"]),
351 "cpp" | "c++" | "cxx" => Some(&["cpp", "cc", "cxx", "hpp", "hxx", "h"]),
352 "swift" => Some(&["swift"]),
353 "php" => Some(&["php"]),
354 "elixir" | "ex" => Some(&["ex", "exs"]),
355 "zig" => Some(&["zig"]),
356 "lua" => Some(&["lua"]),
357 "shell" | "bash" | "sh" => Some(&["sh", "bash", "zsh"]),
358 "toml" => Some(&["toml"]),
359 "yaml" | "yml" => Some(&["yaml", "yml"]),
360 "json" => Some(&["json"]),
361 "markdown" | "md" => Some(&["md", "markdown"]),
362 _ => None,
363 }
364}
365
366pub fn is_test_file(path: &Path) -> bool {
368 let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
369 let name_lower = name.to_lowercase();
370
371 name_lower.ends_with("_test.go")
373 || name_lower.ends_with("_test.rs")
374 || name_lower.ends_with(".test.ts")
375 || name_lower.ends_with(".test.tsx")
376 || name_lower.ends_with(".test.js")
377 || name_lower.ends_with(".test.jsx")
378 || name_lower.ends_with(".spec.ts")
379 || name_lower.ends_with(".spec.tsx")
380 || name_lower.ends_with(".spec.js")
381 || name_lower.ends_with(".spec.jsx")
382 || name_lower.starts_with("test_")
383 || name_lower == "conftest.py"
384 || path.components().any(|c| {
385 let s = c.as_os_str().to_str().unwrap_or("");
386 s == "tests" || s == "test" || s == "__tests__" || s == "spec" || s == "specs"
387 })
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393
394 #[test]
395 fn tokenize_simple() {
396 let tokens = tokenize("hello world");
397 assert_eq!(
398 tokens,
399 vec![Token::Term("hello".into()), Token::Term("world".into())]
400 );
401 }
402
403 #[test]
404 fn tokenize_boolean() {
405 let tokens = tokenize("error AND handling");
406 assert_eq!(
407 tokens,
408 vec![
409 Token::Term("error".into()),
410 Token::And,
411 Token::Term("handling".into())
412 ]
413 );
414 }
415
416 #[test]
417 fn tokenize_not() {
418 let tokens = tokenize("database NOT sqlite");
419 assert_eq!(
420 tokens,
421 vec![
422 Token::Term("database".into()),
423 Token::Not,
424 Token::Term("sqlite".into())
425 ]
426 );
427 }
428
429 #[test]
430 fn tokenize_phrase() {
431 let tokens = tokenize("\"user authentication\" AND error");
432 assert_eq!(
433 tokens,
434 vec![
435 Token::Phrase("user authentication".into()),
436 Token::And,
437 Token::Term("error".into())
438 ]
439 );
440 }
441
442 #[test]
443 fn tokenize_or() {
444 let tokens = tokenize("login OR auth");
445 assert_eq!(
446 tokens,
447 vec![
448 Token::Term("login".into()),
449 Token::Or,
450 Token::Term("auth".into())
451 ]
452 );
453 }
454
455 #[test]
456 fn parse_and_query() {
457 let q = parse("error AND handling", false, false).unwrap();
458 assert_eq!(q.must.len(), 2);
459 assert_eq!(q.should.len(), 0);
460 assert!(q.matches_line("error handling here"));
461 assert!(!q.matches_line("just an error"));
462 }
463
464 #[test]
465 fn parse_or_query() {
466 let q = parse("login OR auth", false, false).unwrap();
467 assert_eq!(q.should.len(), 2);
468 assert!(q.matches_line("login page"));
469 assert!(q.matches_line("auth token"));
470 assert!(!q.matches_line("nothing here"));
471 }
472
473 #[test]
474 fn parse_not_query() {
475 let q = parse("database NOT sqlite", false, false).unwrap();
476 assert!(q.matches_line("database connection"));
477 assert!(!q.matches_line("database sqlite connection"));
478 }
479
480 #[test]
481 fn parse_phrase_query() {
482 let q = parse("\"user authentication\"", true, false).unwrap();
483 assert!(q.matches_line("the user authentication system"));
484 assert!(!q.matches_line("the user and authentication"));
485 }
486
487 #[test]
488 fn parse_simple_term() {
489 let q = parse("ToolOutput", false, false).unwrap();
490 assert!(q.is_simple());
491 assert!(q.matches_line("fn text() -> ToolOutput {"));
492 }
493
494 #[test]
495 fn stem_common_words() {
496 assert_eq!(stem("authentication"), "authent");
497 assert_eq!(stem("handling"), "handl");
498 assert_eq!(stem("errors"), "error");
499 assert_eq!(stem("handler"), "handl");
500 assert_eq!(stem("performance"), "perform");
501 assert_eq!(stem("connected"), "connect");
503 }
504
505 #[test]
506 fn stem_short_words_unchanged() {
507 assert_eq!(stem("run"), "run");
509 assert_eq!(stem("go"), "go");
510 assert_eq!(stem("get"), "get");
511 }
512
513 #[test]
514 fn stemmed_search_matches_variants() {
515 let q = parse("authenticate", false, false).unwrap();
516 assert!(q.matches_line("authentication required"));
518 assert!(q.matches_line("authenticated user"));
519 assert!(q.matches_line("fn authenticate()"));
520 }
521
522 #[test]
523 fn exact_search_no_stemming() {
524 let q = parse("authenticate", true, false).unwrap();
525 assert!(q.matches_line("fn authenticate() {"));
526 assert!(!q.matches_line("authentication required"));
528 }
529
530 #[test]
531 fn file_level_and_matching() {
532 let content = "fn handle_error() {\n log(\"something\");\n}\n\nfn setup_logging() {\n // configure\n}";
533 let q = parse("error AND logging", false, false).unwrap();
534 assert!(q.matches_file(content));
535
536 let q2 = parse("error AND database", false, false).unwrap();
537 assert!(!q2.matches_file(content));
538 }
539
540 #[test]
541 fn test_file_detection() {
542 assert!(is_test_file(Path::new("src/auth_test.go")));
543 assert!(is_test_file(Path::new("src/auth.test.ts")));
544 assert!(is_test_file(Path::new("src/auth.spec.js")));
545 assert!(is_test_file(Path::new("test_auth.py")));
546 assert!(is_test_file(Path::new("tests/integration.rs")));
547 assert!(is_test_file(Path::new("__tests__/auth.tsx")));
548 assert!(!is_test_file(Path::new("src/auth.rs")));
549 assert!(!is_test_file(Path::new("src/main.ts")));
550 }
551
552 #[test]
553 fn language_extension_mapping() {
554 assert_eq!(language_extensions("rust"), Some(&["rs"][..]));
555 assert_eq!(language_extensions("typescript"), Some(&["ts", "tsx"][..]));
556 assert_eq!(language_extensions("python"), Some(&["py", "pyi"][..]));
557 assert_eq!(language_extensions("unknown"), None);
558 }
559}