1use std::collections::{BTreeMap, BTreeSet};
2
3use anyhow::Context as _;
4use postgres::Client;
5use regex::Regex;
6use serde::Serialize;
7
8use crate::config::Context;
9use crate::db;
10use crate::output::{self, Format};
11use crate::search::fts;
12
13pub struct GrepOptions<'a> {
14 pub pattern: &'a str,
15 pub paths: &'a [String],
16 pub globs: &'a [String],
17 pub fixed_strings: bool,
18 pub ignore_case: bool,
19 pub context: Option<usize>,
20 pub before_context: Option<usize>,
21 pub after_context: Option<usize>,
22 pub max_count: Option<usize>,
23 pub format: Format,
24}
25
26#[derive(Debug, Clone)]
27struct IndexedContentChunk {
28 file_path: String,
29 line_start: usize,
30 content: String,
31}
32
33#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
34pub(crate) struct GrepSpan {
35 pub start: usize,
36 pub end: usize,
37}
38
39#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
40pub(crate) struct GrepContextLine {
41 pub line: usize,
42 pub text: String,
43}
44
45#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
46pub(crate) struct GrepMatch {
47 pub path: String,
48 pub line: usize,
49 pub text: String,
50 pub spans: Vec<GrepSpan>,
51 pub before: Vec<GrepContextLine>,
52 pub after: Vec<GrepContextLine>,
53}
54
55#[derive(Debug, Serialize)]
56struct GrepResponse {
57 project_id: String,
58 pattern: String,
59 fixed_strings: bool,
60 ignore_case: bool,
61 paths: Vec<String>,
62 globs: Vec<String>,
63 max_count: Option<usize>,
64 matched_lines: usize,
65 truncated: bool,
66 scanned_chunks: usize,
67 matches: Vec<GrepMatch>,
68}
69
70#[derive(Debug)]
71struct GrepResult {
72 scanned_chunks: usize,
73 matched_lines: usize,
74 truncated: bool,
75 matches: Vec<GrepMatch>,
76}
77
78pub fn run(ctx: &Context, options: GrepOptions<'_>) -> anyhow::Result<()> {
79 let mut conn = db::connect_readonly(&ctx.database_url)?;
80 let chunks = load_indexed_chunks(&mut conn, &ctx.project_id)?;
81 let result = grep_chunks(&chunks, &options)?;
82
83 match options.format {
84 Format::Json => output::print_json(&GrepResponse {
85 project_id: ctx.project_id.clone(),
86 pattern: options.pattern.to_string(),
87 fixed_strings: options.fixed_strings,
88 ignore_case: options.ignore_case,
89 paths: options.paths.to_vec(),
90 globs: options.globs.to_vec(),
91 max_count: options.max_count,
92 matched_lines: result.matched_lines,
93 truncated: result.truncated,
94 scanned_chunks: result.scanned_chunks,
95 matches: result.matches,
96 }),
97 Format::Text => {
98 let text = format_text_matches(&result.matches);
99 if text.is_empty() {
100 Ok(())
101 } else {
102 output::print_text(&text)
103 }
104 }
105 }
106}
107
108fn load_indexed_chunks(
109 conn: &mut Client,
110 project_id: &str,
111) -> anyhow::Result<Vec<IndexedContentChunk>> {
112 let rows = conn.query(
113 "SELECT c.file_path,
114 c.chunk_index::BIGINT AS chunk_index,
115 c.line_start::BIGINT AS line_start,
116 c.content
117 FROM code_content_chunks c
118 JOIN code_indexed_files cf
119 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
120 WHERE c.project_id = $1
121 ORDER BY c.file_path ASC, c.line_start ASC, c.chunk_index ASC",
122 &[&project_id],
123 )?;
124
125 rows.into_iter()
126 .map(|row| {
127 let line_start = i64_to_usize(row.try_get("line_start")?, "line_start")?;
128 Ok(IndexedContentChunk {
129 file_path: row.try_get("file_path")?,
130 line_start,
131 content: row.try_get("content")?,
132 })
133 })
134 .collect()
135}
136
137fn grep_chunks(
138 chunks: &[IndexedContentChunk],
139 options: &GrepOptions<'_>,
140) -> anyhow::Result<GrepResult> {
141 let matcher = GrepMatcher::new(options.pattern, options.fixed_strings, options.ignore_case)?;
142 let filters = GrepFilters::new(options.paths, options.globs)?;
143 let before_context = options.before_context.or(options.context).unwrap_or(0);
144 let after_context = options.after_context.or(options.context).unwrap_or(0);
145
146 let mut scanned_chunks = 0usize;
147 let mut file_lines: BTreeMap<String, BTreeMap<usize, String>> = BTreeMap::new();
148 let mut matches: BTreeMap<(String, usize), GrepMatch> = BTreeMap::new();
149
150 for chunk in chunks {
151 if !filters.matches(&chunk.file_path) {
152 continue;
153 }
154 scanned_chunks += 1;
155
156 for (offset, line_text) in chunk.content.lines().enumerate() {
157 let line = chunk.line_start + offset;
158 file_lines
159 .entry(chunk.file_path.clone())
160 .or_default()
161 .entry(line)
162 .or_insert_with(|| line_text.to_string());
163
164 let key = (chunk.file_path.clone(), line);
165 if matches.contains_key(&key) {
166 continue;
167 }
168
169 let spans = matcher.find_spans(line_text);
170 if !spans.is_empty() {
171 matches.insert(
172 key,
173 GrepMatch {
174 path: chunk.file_path.clone(),
175 line,
176 text: line_text.to_string(),
177 spans,
178 before: Vec::new(),
179 after: Vec::new(),
180 },
181 );
182 }
183 }
184 }
185
186 let total_matching_lines = matches.len();
187 let max = options.max_count.unwrap_or(usize::MAX);
188 let mut retained = matches.into_values().take(max).collect::<Vec<_>>();
189 for item in &mut retained {
190 if let Some(lines) = file_lines.get(&item.path) {
191 item.before = context_before(lines, item.line, before_context);
192 item.after = context_after(lines, item.line, after_context);
193 }
194 }
195
196 Ok(GrepResult {
197 scanned_chunks,
198 matched_lines: retained.len(),
199 truncated: total_matching_lines > retained.len(),
200 matches: retained,
201 })
202}
203
204struct GrepMatcher {
205 regex: Regex,
206}
207
208impl GrepMatcher {
209 fn new(pattern: &str, fixed_strings: bool, ignore_case: bool) -> anyhow::Result<Self> {
210 if pattern.is_empty() {
211 anyhow::bail!("gcode grep pattern must not be empty");
212 }
213 let pattern = if fixed_strings {
214 regex::escape(pattern)
215 } else {
216 pattern.to_string()
217 };
218 let regex = regex::RegexBuilder::new(&pattern)
219 .case_insensitive(ignore_case)
220 .build()
221 .with_context(|| "invalid gcode grep pattern")?;
222 Ok(Self { regex })
223 }
224
225 fn find_spans(&self, line: &str) -> Vec<GrepSpan> {
226 self.regex
227 .find_iter(line)
228 .filter(|m| m.start() != m.end())
229 .map(|m| GrepSpan {
230 start: m.start(),
231 end: m.end(),
232 })
233 .collect()
234 }
235}
236
237struct GrepFilters {
238 paths: Vec<glob::Pattern>,
239 globs: Vec<CompiledGlob>,
240}
241
242impl GrepFilters {
243 fn new(paths: &[String], globs: &[String]) -> anyhow::Result<Self> {
244 let expanded_paths = fts::expand_paths(paths);
245 Ok(Self {
246 paths: fts::compile_patterns(&expanded_paths)?,
247 globs: globs
248 .iter()
249 .map(|glob| CompiledGlob::new(glob))
250 .collect::<anyhow::Result<Vec<_>>>()?,
251 })
252 }
253
254 fn matches(&self, file_path: &str) -> bool {
255 let path_matches =
256 self.paths.is_empty() || self.paths.iter().any(|pattern| pattern.matches(file_path));
257 let glob_matches =
258 self.globs.is_empty() || self.globs.iter().any(|glob| glob.matches(file_path));
259 path_matches && glob_matches
260 }
261}
262
263struct CompiledGlob {
264 raw: String,
265 pattern: glob::Pattern,
266}
267
268impl CompiledGlob {
269 fn new(raw: &str) -> anyhow::Result<Self> {
270 Ok(Self {
271 raw: raw.to_string(),
272 pattern: glob::Pattern::new(raw)
273 .map_err(|err| anyhow::anyhow!("invalid grep glob `{raw}`: {err}"))?,
274 })
275 }
276
277 fn matches(&self, file_path: &str) -> bool {
278 if self.pattern.matches(file_path) {
279 return true;
280 }
281 if self.raw.contains('/') {
282 return false;
283 }
284 file_path
285 .rsplit('/')
286 .next()
287 .is_some_and(|name| self.pattern.matches(name))
288 }
289}
290
291fn context_before(
292 lines: &BTreeMap<usize, String>,
293 line: usize,
294 context: usize,
295) -> Vec<GrepContextLine> {
296 if context == 0 {
297 return Vec::new();
298 }
299 let start = line.saturating_sub(context);
300 lines
301 .range(start..line)
302 .map(|(line, text)| GrepContextLine {
303 line: *line,
304 text: text.clone(),
305 })
306 .collect()
307}
308
309fn context_after(
310 lines: &BTreeMap<usize, String>,
311 line: usize,
312 context: usize,
313) -> Vec<GrepContextLine> {
314 if context == 0 {
315 return Vec::new();
316 }
317 let end = line.saturating_add(context);
318 lines
319 .range((line.saturating_add(1))..=end)
320 .map(|(line, text)| GrepContextLine {
321 line: *line,
322 text: text.clone(),
323 })
324 .collect()
325}
326
327fn format_text_matches(matches: &[GrepMatch]) -> String {
328 let matching_lines: BTreeSet<(String, usize)> =
329 matches.iter().map(|m| (m.path.clone(), m.line)).collect();
330 let mut emitted_context = BTreeSet::new();
331 let mut lines = Vec::new();
332
333 for item in matches {
334 for context in &item.before {
335 let key = (item.path.clone(), context.line);
336 if !matching_lines.contains(&key) && emitted_context.insert(key) {
337 lines.push(format!("{}-{}-{}", item.path, context.line, context.text));
338 }
339 }
340
341 lines.push(format!("{}:{}:{}", item.path, item.line, item.text));
342
343 for context in &item.after {
344 let key = (item.path.clone(), context.line);
345 if !matching_lines.contains(&key) && emitted_context.insert(key) {
346 lines.push(format!("{}-{}-{}", item.path, context.line, context.text));
347 }
348 }
349 }
350
351 lines.join("\n")
352}
353
354fn i64_to_usize(value: i64, column: &str) -> anyhow::Result<usize> {
355 value
356 .try_into()
357 .with_context(|| format!("column `{column}` contains negative or too-large value {value}"))
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 fn chunk(path: &str, line_start: usize, content: &str) -> IndexedContentChunk {
365 IndexedContentChunk {
366 file_path: path.to_string(),
367 line_start,
368 content: content.to_string(),
369 }
370 }
371
372 fn options(pattern: &str) -> GrepOptions<'_> {
373 GrepOptions {
374 pattern,
375 paths: &[],
376 globs: &[],
377 fixed_strings: false,
378 ignore_case: false,
379 context: None,
380 before_context: None,
381 after_context: None,
382 max_count: None,
383 format: Format::Json,
384 }
385 }
386
387 #[test]
388 fn text_renders_grep_shape() {
389 let chunks = vec![chunk("src/lib.rs", 1, "one\nneedle\nthree")];
390 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
391
392 assert_eq!(format_text_matches(&result.matches), "src/lib.rs:2:needle");
393 }
394
395 #[test]
396 fn ordering_is_path_then_line() {
397 let chunks = vec![
398 chunk("b.rs", 10, "needle later"),
399 chunk("a.rs", 3, "needle first"),
400 chunk("a.rs", 1, "needle earliest"),
401 ];
402 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
403
404 let keys: Vec<_> = result
405 .matches
406 .iter()
407 .map(|m| (m.path.as_str(), m.line))
408 .collect();
409 assert_eq!(keys, vec![("a.rs", 1), ("a.rs", 3), ("b.rs", 10)]);
410 }
411
412 #[test]
413 fn ignore_case_matches_case_insensitively() {
414 let chunks = vec![chunk("src/lib.rs", 1, "Needle")];
415 let mut opts = options("needle");
416 opts.ignore_case = true;
417 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
418
419 assert_eq!(result.matches.len(), 1);
420 }
421
422 #[test]
423 fn fixed_strings_treat_regex_metacharacters_literally() {
424 let chunks = vec![chunk("src/lib.rs", 1, "a.b\naxb")];
425 let mut opts = options("a.b");
426 opts.fixed_strings = true;
427 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
428
429 assert_eq!(result.matches.len(), 1);
430 assert_eq!(result.matches[0].line, 1);
431 }
432
433 #[test]
434 fn context_flags_include_bounded_neighbors() {
435 let chunks = vec![chunk("src/lib.rs", 1, "one\ntwo\nneedle\nfour\nfive")];
436 let mut opts = options("needle");
437 opts.before_context = Some(1);
438 opts.after_context = Some(2);
439 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
440 let item = &result.matches[0];
441
442 assert_eq!(
443 item.before,
444 vec![GrepContextLine {
445 line: 2,
446 text: "two".to_string()
447 }]
448 );
449 assert_eq!(
450 item.after,
451 vec![
452 GrepContextLine {
453 line: 4,
454 text: "four".to_string()
455 },
456 GrepContextLine {
457 line: 5,
458 text: "five".to_string()
459 }
460 ]
461 );
462 assert_eq!(
463 format_text_matches(&result.matches),
464 "src/lib.rs-2-two\nsrc/lib.rs:3:needle\nsrc/lib.rs-4-four\nsrc/lib.rs-5-five"
465 );
466 }
467
468 #[test]
469 fn max_count_caps_matching_lines_not_context() {
470 let chunks = vec![chunk(
471 "src/lib.rs",
472 1,
473 "before\nneedle one\nmiddle\nneedle two\nafter",
474 )];
475 let mut opts = options("needle");
476 opts.context = Some(1);
477 opts.max_count = Some(1);
478 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
479
480 assert_eq!(result.matched_lines, 1);
481 assert!(result.truncated);
482 assert_eq!(result.matches[0].line, 2);
483 assert_eq!(result.matches[0].before.len(), 1);
484 assert_eq!(result.matches[0].after.len(), 1);
485 }
486
487 #[test]
488 fn json_match_contains_spans_and_context() {
489 let chunks = vec![chunk("src/lib.rs", 1, "before\nneedle needle\nafter")];
490 let mut opts = options("needle");
491 opts.context = Some(1);
492 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
493 let value = serde_json::to_value(&result.matches[0]).expect("serialize match");
494
495 assert_eq!(value["path"], "src/lib.rs");
496 assert_eq!(value["line"], 2);
497 assert_eq!(value["text"], "needle needle");
498 assert_eq!(value["spans"][0]["start"], 0);
499 assert_eq!(value["spans"][0]["end"], 6);
500 assert_eq!(value["spans"][1]["start"], 7);
501 assert_eq!(value["before"][0]["line"], 1);
502 assert_eq!(value["after"][0]["line"], 3);
503 }
504
505 #[test]
506 fn path_and_glob_filters_compose() {
507 let chunks = vec![
508 chunk("src/gobby/app.py", 1, "needle"),
509 chunk("src/gobby/app.rs", 1, "needle"),
510 chunk("tests/app.py", 1, "needle"),
511 ];
512 let paths = vec!["src/gobby".to_string()];
513 let globs = vec!["*.py".to_string()];
514 let opts = GrepOptions {
515 paths: &paths,
516 globs: &globs,
517 ..options("needle")
518 };
519 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
520
521 assert_eq!(result.scanned_chunks, 1);
522 assert_eq!(result.matches[0].path, "src/gobby/app.py");
523 }
524
525 #[test]
526 fn overlapping_chunks_dedupe_by_file_and_line() {
527 let chunks = vec![
528 chunk("src/lib.rs", 1, "needle\nother"),
529 chunk("src/lib.rs", 1, "needle\nother"),
530 ];
531 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
532
533 assert_eq!(result.matches.len(), 1);
534 }
535}