use std::path::Path;
use regex::Regex;
#[derive(Debug)]
pub struct Query {
pub must: Vec<Matcher>,
pub should: Vec<Matcher>,
pub must_not: Vec<Matcher>,
}
#[derive(Debug)]
pub struct Matcher {
pub regex: Regex,
pub original: String,
}
impl Query {
pub fn matches_line(&self, line: &str) -> bool {
if self.must_not.iter().any(|m| m.regex.is_match(line)) {
return false;
}
let must_ok = self.must.is_empty() || self.must.iter().all(|m| m.regex.is_match(line));
let should_ok =
self.should.is_empty() || self.should.iter().any(|m| m.regex.is_match(line));
must_ok && should_ok
}
pub fn matches_file(&self, content: &str) -> bool {
if self
.must_not
.iter()
.any(|m| content.lines().any(|l| m.regex.is_match(l)))
{
return false;
}
let must_ok = self
.must
.iter()
.all(|m| content.lines().any(|l| m.regex.is_match(l)));
let should_ok = self.should.is_empty()
|| self
.should
.iter()
.any(|m| content.lines().any(|l| m.regex.is_match(l)));
must_ok && should_ok
}
pub fn matching_lines(&self, content: &str) -> Vec<usize> {
content
.lines()
.enumerate()
.filter(|(_, line)| {
let any_positive = self.must.iter().any(|m| m.regex.is_match(line))
|| self.should.iter().any(|m| m.regex.is_match(line));
let no_negative = !self.must_not.iter().any(|m| m.regex.is_match(line));
any_positive && no_negative
})
.map(|(idx, _)| idx)
.collect()
}
pub fn is_simple(&self) -> bool {
self.must.is_empty() && self.must_not.is_empty() && self.should.len() == 1
}
pub fn simple_regex(&self) -> Option<&Regex> {
if self.is_simple() {
Some(&self.should[0].regex)
} else {
None
}
}
}
pub fn parse(input: &str, exact: bool, ignore_case: bool) -> std::result::Result<Query, String> {
let tokens = tokenize(input);
let mut must = Vec::new();
let mut should = Vec::new();
let mut must_not = Vec::new();
let mut has_and = false;
let mut next_not = false;
for token in &tokens {
if matches!(token, Token::And) {
has_and = true;
break;
}
}
for token in tokens {
match token {
Token::And => {
}
Token::Or => {
}
Token::Not => {
next_not = true;
}
Token::Term(term) => {
let matcher = build_matcher(&term, false, exact, ignore_case)?;
if next_not {
must_not.push(matcher);
next_not = false;
} else if has_and {
must.push(matcher);
} else {
should.push(matcher);
}
}
Token::Phrase(phrase) => {
let matcher = build_matcher(&phrase, true, true, ignore_case)?;
if next_not {
must_not.push(matcher);
next_not = false;
} else if has_and {
must.push(matcher);
} else {
should.push(matcher);
}
}
}
}
Ok(Query {
must,
should,
must_not,
})
}
fn build_matcher(
term: &str,
is_phrase: bool,
exact: bool,
ignore_case: bool,
) -> std::result::Result<Matcher, String> {
let pattern = if is_phrase || exact {
format!(r"\b{}\b", regex::escape(term))
} else {
let stemmed = stem(term);
if stemmed.len() < term.len() {
format!(r"(?i)\b{}\w*", regex::escape(&stemmed))
} else {
regex::escape(term)
}
};
let regex = regex::RegexBuilder::new(&pattern)
.case_insensitive(ignore_case || (!exact && !is_phrase))
.build()
.map_err(|e| format!("invalid pattern '{term}': {e}"))?;
Ok(Matcher {
regex,
original: term.to_string(),
})
}
#[derive(Debug, PartialEq)]
enum Token {
Term(String),
Phrase(String),
And,
Or,
Not,
}
fn tokenize(input: &str) -> Vec<Token> {
let mut tokens = Vec::new();
let mut chars = input.chars().peekable();
while let Some(&ch) = chars.peek() {
if ch.is_whitespace() {
chars.next();
continue;
}
if ch == '"' {
chars.next(); let mut phrase = String::new();
while let Some(&c) = chars.peek() {
if c == '"' {
chars.next(); break;
}
phrase.push(c);
chars.next();
}
if !phrase.is_empty() {
tokens.push(Token::Phrase(phrase));
}
continue;
}
let mut word = String::new();
while let Some(&c) = chars.peek() {
if c.is_whitespace() || c == '"' {
break;
}
word.push(c);
chars.next();
}
match word.as_str() {
"AND" => tokens.push(Token::And),
"OR" => tokens.push(Token::Or),
"NOT" => tokens.push(Token::Not),
_ => tokens.push(Token::Term(word)),
}
}
tokens
}
pub fn stem(word: &str) -> String {
let w = word.to_lowercase();
let suffixes = &[
"ication", "ation", "ition", "ction", "tion", "sion", "ment", "ness", "able", "ible", "ally", "ence", "ance", "ings", "ated", "ized", "ised", "cting", "less", "ful", "ous", "ive", "ity", "ing", "ers", "ies", "ied", "ion", "ed", "er", "ate", "ly", "al", "or", "ar", "s", ];
let min_stem = 4;
for suffix in suffixes {
if w.len() > suffix.len() + min_stem && w.ends_with(suffix) {
return w[..w.len() - suffix.len()].to_string();
}
}
w
}
pub fn language_extensions(language: &str) -> Option<&'static [&'static str]> {
match language.to_lowercase().as_str() {
"rust" | "rs" => Some(&["rs"]),
"typescript" | "ts" => Some(&["ts", "tsx"]),
"javascript" | "js" => Some(&["js", "jsx", "mjs", "cjs"]),
"python" | "py" => Some(&["py", "pyi"]),
"go" | "golang" => Some(&["go"]),
"java" => Some(&["java"]),
"ruby" | "rb" => Some(&["rb"]),
"c" => Some(&["c", "h"]),
"cpp" | "c++" | "cxx" => Some(&["cpp", "cc", "cxx", "hpp", "hxx", "h"]),
"swift" => Some(&["swift"]),
"php" => Some(&["php"]),
"elixir" | "ex" => Some(&["ex", "exs"]),
"zig" => Some(&["zig"]),
"lua" => Some(&["lua"]),
"shell" | "bash" | "sh" => Some(&["sh", "bash", "zsh"]),
"toml" => Some(&["toml"]),
"yaml" | "yml" => Some(&["yaml", "yml"]),
"json" => Some(&["json"]),
"markdown" | "md" => Some(&["md", "markdown"]),
_ => None,
}
}
pub fn is_test_file(path: &Path) -> bool {
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
let name_lower = name.to_lowercase();
name_lower.ends_with("_test.go")
|| name_lower.ends_with("_test.rs")
|| name_lower.ends_with(".test.ts")
|| name_lower.ends_with(".test.tsx")
|| name_lower.ends_with(".test.js")
|| name_lower.ends_with(".test.jsx")
|| name_lower.ends_with(".spec.ts")
|| name_lower.ends_with(".spec.tsx")
|| name_lower.ends_with(".spec.js")
|| name_lower.ends_with(".spec.jsx")
|| name_lower.starts_with("test_")
|| name_lower == "conftest.py"
|| path.components().any(|c| {
let s = c.as_os_str().to_str().unwrap_or("");
s == "tests" || s == "test" || s == "__tests__" || s == "spec" || s == "specs"
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn tokenize_simple() {
let tokens = tokenize("hello world");
assert_eq!(
tokens,
vec![Token::Term("hello".into()), Token::Term("world".into())]
);
}
#[test]
fn tokenize_boolean() {
let tokens = tokenize("error AND handling");
assert_eq!(
tokens,
vec![
Token::Term("error".into()),
Token::And,
Token::Term("handling".into())
]
);
}
#[test]
fn tokenize_not() {
let tokens = tokenize("database NOT sqlite");
assert_eq!(
tokens,
vec![
Token::Term("database".into()),
Token::Not,
Token::Term("sqlite".into())
]
);
}
#[test]
fn tokenize_phrase() {
let tokens = tokenize("\"user authentication\" AND error");
assert_eq!(
tokens,
vec![
Token::Phrase("user authentication".into()),
Token::And,
Token::Term("error".into())
]
);
}
#[test]
fn tokenize_or() {
let tokens = tokenize("login OR auth");
assert_eq!(
tokens,
vec![
Token::Term("login".into()),
Token::Or,
Token::Term("auth".into())
]
);
}
#[test]
fn parse_and_query() {
let q = parse("error AND handling", false, false).unwrap();
assert_eq!(q.must.len(), 2);
assert_eq!(q.should.len(), 0);
assert!(q.matches_line("error handling here"));
assert!(!q.matches_line("just an error"));
}
#[test]
fn parse_or_query() {
let q = parse("login OR auth", false, false).unwrap();
assert_eq!(q.should.len(), 2);
assert!(q.matches_line("login page"));
assert!(q.matches_line("auth token"));
assert!(!q.matches_line("nothing here"));
}
#[test]
fn parse_not_query() {
let q = parse("database NOT sqlite", false, false).unwrap();
assert!(q.matches_line("database connection"));
assert!(!q.matches_line("database sqlite connection"));
}
#[test]
fn parse_phrase_query() {
let q = parse("\"user authentication\"", true, false).unwrap();
assert!(q.matches_line("the user authentication system"));
assert!(!q.matches_line("the user and authentication"));
}
#[test]
fn parse_simple_term() {
let q = parse("ToolOutput", false, false).unwrap();
assert!(q.is_simple());
assert!(q.matches_line("fn text() -> ToolOutput {"));
}
#[test]
fn stem_common_words() {
assert_eq!(stem("authentication"), "authent");
assert_eq!(stem("handling"), "handl");
assert_eq!(stem("errors"), "error");
assert_eq!(stem("handler"), "handl");
assert_eq!(stem("performance"), "perform");
assert_eq!(stem("connected"), "connect");
}
#[test]
fn stem_short_words_unchanged() {
assert_eq!(stem("run"), "run");
assert_eq!(stem("go"), "go");
assert_eq!(stem("get"), "get");
}
#[test]
fn stemmed_search_matches_variants() {
let q = parse("authenticate", false, false).unwrap();
assert!(q.matches_line("authentication required"));
assert!(q.matches_line("authenticated user"));
assert!(q.matches_line("fn authenticate()"));
}
#[test]
fn exact_search_no_stemming() {
let q = parse("authenticate", true, false).unwrap();
assert!(q.matches_line("fn authenticate() {"));
assert!(!q.matches_line("authentication required"));
}
#[test]
fn file_level_and_matching() {
let content = "fn handle_error() {\n log(\"something\");\n}\n\nfn setup_logging() {\n // configure\n}";
let q = parse("error AND logging", false, false).unwrap();
assert!(q.matches_file(content));
let q2 = parse("error AND database", false, false).unwrap();
assert!(!q2.matches_file(content));
}
#[test]
fn test_file_detection() {
assert!(is_test_file(Path::new("src/auth_test.go")));
assert!(is_test_file(Path::new("src/auth.test.ts")));
assert!(is_test_file(Path::new("src/auth.spec.js")));
assert!(is_test_file(Path::new("test_auth.py")));
assert!(is_test_file(Path::new("tests/integration.rs")));
assert!(is_test_file(Path::new("__tests__/auth.tsx")));
assert!(!is_test_file(Path::new("src/auth.rs")));
assert!(!is_test_file(Path::new("src/main.ts")));
}
#[test]
fn language_extension_mapping() {
assert_eq!(language_extensions("rust"), Some(&["rs"][..]));
assert_eq!(language_extensions("typescript"), Some(&["ts", "tsx"][..]));
assert_eq!(language_extensions("python"), Some(&["py", "pyi"][..]));
assert_eq!(language_extensions("unknown"), None);
}
}