use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Deserialize)]
pub struct GrepRequest {
pub pattern: String,
#[serde(default)]
pub index_id: Option<String>,
#[serde(default)]
pub case_insensitive: bool,
#[serde(default)]
pub context_before: usize,
#[serde(default)]
pub context_after: usize,
#[serde(default)]
pub context: Option<usize>,
#[serde(default)]
pub glob: Option<String>,
#[serde(default)]
pub multiline: bool,
#[serde(default)]
pub fixed_strings: bool,
#[serde(default)]
pub files_with_matches: bool,
#[serde(default)]
pub invert_match: bool,
#[serde(default)]
pub word_regexp: bool,
#[serde(default = "default_max_results")]
pub max_results: usize,
}
fn default_max_results() -> usize {
DEFAULT_MAX_RESULTS
}
pub const DEFAULT_MAX_RESULTS: usize = 100;
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct GrepMatch {
pub file: String,
pub line: usize,
pub column: usize,
pub text: String,
pub context_before: Vec<String>,
pub context_after: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct GrepResponse {
pub matches: Vec<GrepMatch>,
pub total: usize,
pub truncated: bool,
}
#[derive(Debug, thiserror::Error)]
pub enum GrepError {
#[error("invalid regex pattern: {0}")]
InvalidRegex(String),
#[error("invalid glob pattern: {0}")]
InvalidGlob(String),
}
#[derive(Debug)]
pub struct CompiledGrep {
regex: regex::Regex,
glob: Option<glob::Pattern>,
context_before: usize,
context_after: usize,
multiline: bool,
files_with_matches: bool,
invert_match: bool,
}
impl CompiledGrep {
pub fn compile(req: &GrepRequest) -> Result<Self, GrepError> {
let escaped = if req.fixed_strings {
regex::escape(&req.pattern)
} else {
req.pattern.clone()
};
let final_pattern = if req.word_regexp {
format!(r"\b(?:{escaped})\b")
} else {
escaped
};
let regex = regex::RegexBuilder::new(&final_pattern)
.case_insensitive(req.case_insensitive)
.dot_matches_new_line(req.multiline)
.build()
.map_err(|e| GrepError::InvalidRegex(e.to_string()))?;
let glob = match req.glob.as_deref() {
Some(pat) => {
Some(glob::Pattern::new(pat).map_err(|e| GrepError::InvalidGlob(e.to_string()))?)
}
None => None,
};
let (context_before, context_after) = match req.context {
Some(c) => (c, c),
None => (req.context_before, req.context_after),
};
Ok(Self {
regex,
glob,
context_before,
context_after,
multiline: req.multiline,
files_with_matches: req.files_with_matches,
invert_match: req.invert_match,
})
}
pub fn path_matches(&self, rel_path: &str) -> bool {
match &self.glob {
None => true,
Some(pat) => {
let opts = glob::MatchOptions {
case_sensitive: true,
require_literal_separator: false,
require_literal_leading_dot: false,
};
pat.matches_with(rel_path, opts)
}
}
}
}
pub fn grep_file_content(
file: &str,
content: &str,
compiled: &CompiledGrep,
out: &mut Vec<GrepMatch>,
max_results: usize,
) {
if out.len() >= max_results {
return;
}
if compiled.files_with_matches {
emit_files_with_matches(file, content, compiled, out);
return;
}
let lines: Vec<&str> = content.lines().collect();
if compiled.multiline {
grep_multiline(file, content, &lines, compiled, out, max_results);
} else {
grep_line_by_line(file, &lines, compiled, out, max_results);
}
}
fn emit_files_with_matches(
file: &str,
content: &str,
compiled: &CompiledGrep,
out: &mut Vec<GrepMatch>,
) {
let any_match = compiled.regex.is_match(content);
let should_emit = if compiled.invert_match {
content
.lines()
.any(|line| !compiled.regex.is_match(line))
&& content.lines().next().is_some()
} else {
any_match
};
if should_emit {
out.push(GrepMatch {
file: file.to_string(),
line: 0,
column: 0,
text: String::new(),
context_before: Vec::new(),
context_after: Vec::new(),
});
}
}
fn grep_line_by_line(
file: &str,
lines: &[&str],
compiled: &CompiledGrep,
out: &mut Vec<GrepMatch>,
max_results: usize,
) {
for (idx, line) in lines.iter().enumerate() {
if out.len() >= max_results {
return;
}
if compiled.invert_match {
if !compiled.regex.is_match(line) {
out.push(build_match(file, lines, idx, 1, compiled));
}
} else if let Some(m) = compiled.regex.find(line) {
out.push(build_match(
file,
lines,
idx,
byte_to_col(line, m.start()),
compiled,
));
}
}
}
fn grep_multiline(
file: &str,
content: &str,
lines: &[&str],
compiled: &CompiledGrep,
out: &mut Vec<GrepMatch>,
max_results: usize,
) {
let mut line_starts = Vec::with_capacity(lines.len());
let mut offset = 0usize;
for line in lines {
line_starts.push(offset);
offset += line.len() + 1;
}
for m in compiled.regex.find_iter(content) {
if out.len() >= max_results {
return;
}
let start = m.start();
let line_idx = match line_starts.binary_search(&start) {
Ok(i) => i,
Err(i) => i.saturating_sub(1),
};
let line_text = lines.get(line_idx).copied().unwrap_or("");
let col = byte_to_col(line_text, start - line_starts[line_idx]);
out.push(build_match(file, lines, line_idx, col, compiled));
}
}
fn build_match(
file: &str,
lines: &[&str],
idx: usize,
column: usize,
compiled: &CompiledGrep,
) -> GrepMatch {
let before_start = idx.saturating_sub(compiled.context_before);
let context_before: Vec<String> = lines[before_start..idx]
.iter()
.map(|s| s.to_string())
.collect();
let after_end = (idx + 1 + compiled.context_after).min(lines.len());
let context_after: Vec<String> = lines[idx + 1..after_end]
.iter()
.map(|s| s.to_string())
.collect();
GrepMatch {
file: file.to_string(),
line: idx + 1,
column,
text: lines.get(idx).copied().unwrap_or("").to_string(),
context_before,
context_after,
}
}
fn byte_to_col(line: &str, byte: usize) -> usize {
let clamped = byte.min(line.len());
line[..clamped].chars().count() + 1
}
#[cfg(test)]
mod tests {
use super::*;
fn req(pattern: &str) -> GrepRequest {
GrepRequest {
pattern: pattern.to_string(),
index_id: None,
case_insensitive: false,
context_before: 0,
context_after: 0,
context: None,
glob: None,
multiline: false,
fixed_strings: false,
files_with_matches: false,
invert_match: false,
word_regexp: false,
max_results: DEFAULT_MAX_RESULTS,
}
}
fn run(file: &str, content: &str, r: &GrepRequest) -> Vec<GrepMatch> {
let compiled = CompiledGrep::compile(r).expect("compile");
let mut out = Vec::new();
grep_file_content(file, content, &compiled, &mut out, r.max_results);
out
}
#[test]
fn request_defaults_are_grep_like() {
let r = req("x");
assert!(!r.case_insensitive);
assert_eq!(r.context_before, 0);
assert_eq!(r.context_after, 0);
assert!(r.context.is_none());
assert!(r.glob.is_none());
assert!(!r.multiline);
assert!(!r.fixed_strings);
assert!(!r.files_with_matches);
assert!(!r.invert_match);
assert!(!r.word_regexp);
assert_eq!(r.max_results, DEFAULT_MAX_RESULTS);
let parsed: GrepRequest = serde_json::from_str(r#"{"pattern":"x"}"#).unwrap();
assert_eq!(parsed.max_results, DEFAULT_MAX_RESULTS);
assert!(!parsed.case_insensitive);
assert!(!parsed.fixed_strings);
assert!(!parsed.files_with_matches);
assert!(!parsed.invert_match);
assert!(!parsed.word_regexp);
}
#[test]
fn single_literal_match_reports_line_and_column() {
let content = "fn a() {}\n fn authenticate() {}\nfn b() {}\n";
let matches = run("src/auth.rs", content, &req("authenticate"));
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].file, "src/auth.rs");
assert_eq!(matches[0].line, 2);
assert_eq!(matches[0].column, 8);
assert_eq!(matches[0].text, " fn authenticate() {}");
}
#[test]
fn regex_pattern_matches() {
let content = "let x = 1;\nlet y = 22;\nlet z = 333;\n";
let matches = run("a.rs", content, &req(r"=\s*\d{2,};"));
assert_eq!(matches.len(), 2); assert_eq!(matches[0].line, 2);
assert_eq!(matches[1].line, 3);
}
#[test]
fn case_insensitive_matches() {
let content = "ERROR here\nno match\nerror there\n";
let mut r = req("error");
assert_eq!(run("a.rs", content, &r).len(), 1); r.case_insensitive = true;
assert_eq!(run("a.rs", content, &r).len(), 2);
}
#[test]
fn context_windows_are_clamped() {
let content = "l1\nl2\nMATCH\nl4\nl5\n";
let mut r = req("MATCH");
r.context_before = 5; r.context_after = 5; let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].context_before, vec!["l1", "l2"]);
assert_eq!(matches[0].context_after, vec!["l4", "l5"]);
}
#[test]
fn compile_folds_context_c_over_a_b() {
let mut r = req("MATCH");
r.context_before = 1;
r.context_after = 1;
r.context = Some(3);
let content = "a\nb\nc\nd\nMATCH\ne\nf\ng\nh\n";
let matches = run("a.rs", content, &r);
assert_eq!(matches[0].context_before, vec!["b", "c", "d"]);
assert_eq!(matches[0].context_after, vec!["e", "f", "g"]);
}
#[test]
fn multiline_match_spans_lines() {
let content = "struct S {\n field: i32,\n}\n";
let mut r = req(r"struct S \{.*field");
assert_eq!(run("a.rs", content, &r).len(), 0);
r.multiline = true;
let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line, 1);
assert_eq!(matches[0].column, 1);
}
#[test]
fn glob_filters_by_path() {
let mut r = req("x");
r.glob = Some("**/*.rs".to_string());
let compiled = CompiledGrep::compile(&r).unwrap();
assert!(compiled.path_matches("crates/foo/src/bar.rs"));
assert!(compiled.path_matches("top.rs"));
assert!(!compiled.path_matches("crates/foo/README.md"));
r.glob = Some("*.rs".to_string());
let compiled = CompiledGrep::compile(&r).unwrap();
assert!(compiled.path_matches("a/b/c.rs"));
assert!(!compiled.path_matches("a/b/c.py"));
}
#[test]
fn respects_remaining_budget() {
let content = "x\nx\nx\nx\nx\n";
let mut r = req("x");
r.max_results = 3;
let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 3);
}
#[test]
fn no_op_when_budget_exhausted() {
let r = req("x");
let compiled = CompiledGrep::compile(&r).unwrap();
let mut out = vec![GrepMatch {
file: "pre.rs".into(),
line: 1,
column: 1,
text: "x".into(),
context_before: vec![],
context_after: vec![],
}];
grep_file_content("a.rs", "x\nx\n", &compiled, &mut out, 1);
assert_eq!(out.len(), 1); }
#[test]
fn invalid_regex_is_rejected() {
let r = req("(unclosed");
let err = CompiledGrep::compile(&r).unwrap_err();
assert!(matches!(err, GrepError::InvalidRegex(_)));
}
#[test]
fn invalid_glob_is_rejected() {
let mut r = req("x");
r.glob = Some("[unclosed".to_string());
let err = CompiledGrep::compile(&r).unwrap_err();
assert!(matches!(err, GrepError::InvalidGlob(_)));
}
#[test]
fn byte_to_col_handles_multibyte() {
let content = "café_X\n";
let matches = run("a.rs", content, &req("X"));
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].column, 6);
}
#[test]
fn fixed_strings_treats_pattern_as_literal() {
let content = "a.b\nacb\nax\n";
let mut r = req("a.b");
let regex_hits = run("a.rs", content, &r);
assert_eq!(regex_hits.len(), 2);
r.fixed_strings = true;
let literal_hits = run("a.rs", content, &r);
assert_eq!(literal_hits.len(), 1);
assert_eq!(literal_hits[0].text, "a.b");
}
#[test]
fn fixed_strings_accepts_invalid_regex_chars() {
let content = "vec[0] = 1;\nvec.get(0);\n";
let mut r = req("vec[");
assert!(CompiledGrep::compile(&r).is_err());
r.fixed_strings = true;
let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line, 1);
}
#[test]
fn files_with_matches_returns_path_once() {
let content = "fn a() {}\n// TODO refactor\nfn b() {}\n// TODO inline\n";
let mut r = req("TODO");
assert_eq!(run("a.rs", content, &r).len(), 2);
r.files_with_matches = true;
let matches = run("src/a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].file, "src/a.rs");
assert_eq!(matches[0].line, 0);
assert_eq!(matches[0].column, 0);
assert!(matches[0].text.is_empty());
assert!(matches[0].context_before.is_empty());
assert!(matches[0].context_after.is_empty());
}
#[test]
fn files_with_matches_skips_non_matching_files() {
let mut r = req("ZZZ");
r.files_with_matches = true;
let matches = run("a.rs", "fn a() {}\nfn b() {}\n", &r);
assert!(matches.is_empty());
}
#[test]
fn files_with_matches_honours_invert() {
let mut r = req("fn");
r.files_with_matches = true;
r.invert_match = true;
let all_match = "fn a() {}\nfn b() {}\n";
assert!(run("a.rs", all_match, &r).is_empty());
let mixed = "fn a() {}\nstruct S;\n";
let hits = run("a.rs", mixed, &r);
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].file, "a.rs");
}
#[test]
fn invert_match_returns_non_matching_lines() {
let content = "fn a() {}\nstruct S;\nfn b() {}\n";
let mut r = req("^fn");
assert_eq!(run("a.rs", content, &r).len(), 2);
r.invert_match = true;
let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].text, "struct S;");
assert_eq!(matches[0].line, 2);
assert_eq!(matches[0].column, 1);
}
#[test]
fn word_regexp_requires_boundaries() {
let content = "let log = 1;\nlet catalog = 2;\nlet log_level = 3;\n";
let mut r = req("log");
assert_eq!(run("a.rs", content, &r).len(), 3);
r.word_regexp = true;
let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line, 1);
}
#[test]
fn word_regexp_composes_with_fixed_strings() {
let content = "use a.b;\nuse a.bc;\n";
let mut r = req("a.b");
r.fixed_strings = true;
r.word_regexp = true;
let matches = run("a.rs", content, &r);
assert_eq!(matches.len(), 1);
assert_eq!(matches[0].line, 1);
}
}