1use std::collections::{BTreeMap, BTreeSet};
2
3use anyhow::Context as _;
4use postgres::Client;
5use regex::Regex;
6use serde::Serialize;
7
8use crate::config::Context;
9use crate::db;
10use crate::output::{self, Format};
11use crate::search::fts;
12
13pub struct GrepOptions<'a> {
14 pub pattern: &'a str,
15 pub paths: &'a [String],
16 pub globs: &'a [String],
17 pub fixed_strings: bool,
18 pub ignore_case: bool,
19 pub context: Option<usize>,
20 pub before_context: Option<usize>,
21 pub after_context: Option<usize>,
22 pub max_count: Option<usize>,
23 pub format: Format,
24}
25
26#[derive(Debug, Clone)]
27struct IndexedContentChunk {
28 file_path: String,
29 line_start: usize,
30 content: String,
31}
32
33#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
34pub(crate) struct GrepSpan {
35 pub start: usize,
36 pub end: usize,
37}
38
39#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
40pub(crate) struct GrepContextLine {
41 pub line: usize,
42 pub text: String,
43}
44
45#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
46pub(crate) struct GrepMatch {
47 pub path: String,
48 pub line: usize,
49 pub text: String,
50 pub spans: Vec<GrepSpan>,
51 pub before: Vec<GrepContextLine>,
52 pub after: Vec<GrepContextLine>,
53}
54
55#[derive(Debug, Serialize)]
56struct GrepResponse {
57 project_id: String,
58 pattern: String,
59 fixed_strings: bool,
60 ignore_case: bool,
61 paths: Vec<String>,
62 globs: Vec<String>,
63 max_count: Option<usize>,
64 matched_lines: usize,
65 truncated: bool,
66 scanned_chunks: usize,
67 matches: Vec<GrepMatch>,
68}
69
70#[derive(Debug)]
71struct GrepResult {
72 scanned_chunks: usize,
73 matched_lines: usize,
74 truncated: bool,
75 matches: Vec<GrepMatch>,
76}
77
78pub fn run(ctx: &Context, options: GrepOptions<'_>) -> anyhow::Result<()> {
79 let mut conn = db::connect_readonly(&ctx.database_url)?;
80 let chunks = load_indexed_chunks(&mut conn, &ctx.project_id)?;
81 let result = grep_chunks(&chunks, &options)?;
82
83 match options.format {
84 Format::Json => output::print_json(&GrepResponse {
85 project_id: ctx.project_id.clone(),
86 pattern: options.pattern.to_string(),
87 fixed_strings: options.fixed_strings,
88 ignore_case: options.ignore_case,
89 paths: options.paths.to_vec(),
90 globs: options.globs.to_vec(),
91 max_count: options.max_count,
92 matched_lines: result.matched_lines,
93 truncated: result.truncated,
94 scanned_chunks: result.scanned_chunks,
95 matches: result.matches,
96 }),
97 Format::Text => {
98 let text = format_text_matches(&result.matches);
99 if text.is_empty() {
100 Ok(())
101 } else {
102 output::print_text(&text)
103 }
104 }
105 }
106}
107
108fn load_indexed_chunks(
109 conn: &mut Client,
110 project_id: &str,
111) -> anyhow::Result<Vec<IndexedContentChunk>> {
112 let rows = conn.query(
113 "SELECT c.file_path,
114 c.line_start::BIGINT AS line_start,
115 c.content
116 FROM code_content_chunks c
117 JOIN code_indexed_files cf
118 ON cf.project_id = c.project_id AND cf.file_path = c.file_path
119 WHERE c.project_id = $1
120 ORDER BY c.file_path ASC, c.line_start ASC, c.chunk_index ASC",
121 &[&project_id],
122 )?;
123
124 rows.into_iter()
125 .map(|row| {
126 let line_start = i64_to_usize(row.try_get("line_start")?, "line_start")?;
127 Ok(IndexedContentChunk {
128 file_path: row.try_get("file_path")?,
129 line_start,
130 content: row.try_get("content")?,
131 })
132 })
133 .collect()
134}
135
136fn grep_chunks(
137 chunks: &[IndexedContentChunk],
138 options: &GrepOptions<'_>,
139) -> anyhow::Result<GrepResult> {
140 let matcher = GrepMatcher::new(options.pattern, options.fixed_strings, options.ignore_case)?;
141 let filters = GrepFilters::new(options.paths, options.globs)?;
142 let before_context = options.before_context.or(options.context).unwrap_or(0);
143 let after_context = options.after_context.or(options.context).unwrap_or(0);
144
145 let mut scanned_chunks = 0usize;
146 let mut file_lines: BTreeMap<String, BTreeMap<usize, String>> = BTreeMap::new();
147 let mut matches: BTreeMap<(String, usize), GrepMatch> = BTreeMap::new();
148
149 for chunk in chunks {
150 if !filters.matches(&chunk.file_path) {
151 continue;
152 }
153 scanned_chunks += 1;
154
155 for (offset, line_text) in chunk.content.lines().enumerate() {
156 let line = chunk.line_start + offset;
157 file_lines
158 .entry(chunk.file_path.clone())
159 .or_default()
160 .entry(line)
161 .or_insert_with(|| line_text.to_string());
162
163 let key = (chunk.file_path.clone(), line);
164 if matches.contains_key(&key) {
165 continue;
166 }
167
168 let spans = matcher.find_spans(line_text);
169 if !spans.is_empty() {
170 matches.insert(
171 key,
172 GrepMatch {
173 path: chunk.file_path.clone(),
174 line,
175 text: line_text.to_string(),
176 spans,
177 before: Vec::new(),
178 after: Vec::new(),
179 },
180 );
181 }
182 }
183 }
184
185 let total_matching_lines = matches.len();
186 let max = options.max_count.unwrap_or(usize::MAX);
187 let mut retained = matches.into_values().take(max).collect::<Vec<_>>();
188 for item in &mut retained {
189 if let Some(lines) = file_lines.get(&item.path) {
190 item.before = context_before(lines, item.line, before_context);
191 item.after = context_after(lines, item.line, after_context);
192 }
193 }
194
195 Ok(GrepResult {
196 scanned_chunks,
197 matched_lines: retained.len(),
198 truncated: total_matching_lines > retained.len(),
199 matches: retained,
200 })
201}
202
203struct GrepMatcher {
204 regex: Regex,
205}
206
207impl GrepMatcher {
208 fn new(pattern: &str, fixed_strings: bool, ignore_case: bool) -> anyhow::Result<Self> {
209 if pattern.is_empty() {
210 anyhow::bail!("gcode grep pattern must not be empty");
211 }
212 let pattern = if fixed_strings {
213 regex::escape(pattern)
214 } else {
215 pattern.to_string()
216 };
217 let regex = regex::RegexBuilder::new(&pattern)
218 .case_insensitive(ignore_case)
219 .build()
220 .with_context(|| "invalid gcode grep pattern")?;
221 Ok(Self { regex })
222 }
223
224 fn find_spans(&self, line: &str) -> Vec<GrepSpan> {
225 self.regex
226 .find_iter(line)
227 .filter(|m| m.start() != m.end())
228 .map(|m| GrepSpan {
229 start: m.start(),
230 end: m.end(),
231 })
232 .collect()
233 }
234}
235
236struct GrepFilters {
237 paths: Vec<glob::Pattern>,
238 globs: Vec<CompiledGlob>,
239}
240
241impl GrepFilters {
242 fn new(paths: &[String], globs: &[String]) -> anyhow::Result<Self> {
243 let expanded_paths = fts::expand_paths(paths);
244 Ok(Self {
245 paths: fts::compile_patterns(&expanded_paths)?,
246 globs: globs
247 .iter()
248 .map(|glob| CompiledGlob::new(glob))
249 .collect::<anyhow::Result<Vec<_>>>()?,
250 })
251 }
252
253 fn matches(&self, file_path: &str) -> bool {
254 let path_matches =
255 self.paths.is_empty() || self.paths.iter().any(|pattern| pattern.matches(file_path));
256 let glob_matches =
257 self.globs.is_empty() || self.globs.iter().any(|glob| glob.matches(file_path));
258 path_matches && glob_matches
259 }
260}
261
262struct CompiledGlob {
263 raw: String,
264 pattern: glob::Pattern,
265}
266
267impl CompiledGlob {
268 fn new(raw: &str) -> anyhow::Result<Self> {
269 Ok(Self {
270 raw: raw.to_string(),
271 pattern: glob::Pattern::new(raw)
272 .map_err(|err| anyhow::anyhow!("invalid grep glob `{raw}`: {err}"))?,
273 })
274 }
275
276 fn matches(&self, file_path: &str) -> bool {
277 if self.pattern.matches(file_path) {
278 return true;
279 }
280 if self.raw.contains('/') {
281 return false;
282 }
283 file_path
284 .rsplit('/')
285 .next()
286 .is_some_and(|name| self.pattern.matches(name))
287 }
288}
289
290fn context_before(
291 lines: &BTreeMap<usize, String>,
292 line: usize,
293 context: usize,
294) -> Vec<GrepContextLine> {
295 if context == 0 {
296 return Vec::new();
297 }
298 let start = line.saturating_sub(context);
299 lines
300 .range(start..line)
301 .map(|(line, text)| GrepContextLine {
302 line: *line,
303 text: text.clone(),
304 })
305 .collect()
306}
307
308fn context_after(
309 lines: &BTreeMap<usize, String>,
310 line: usize,
311 context: usize,
312) -> Vec<GrepContextLine> {
313 if context == 0 {
314 return Vec::new();
315 }
316 let end = line.saturating_add(context);
317 lines
318 .range((line.saturating_add(1))..=end)
319 .map(|(line, text)| GrepContextLine {
320 line: *line,
321 text: text.clone(),
322 })
323 .collect()
324}
325
326fn format_text_matches(matches: &[GrepMatch]) -> String {
327 let matching_lines: BTreeSet<(String, usize)> =
328 matches.iter().map(|m| (m.path.clone(), m.line)).collect();
329 let mut emitted_context = BTreeSet::new();
330 let mut current_path: Option<&str> = None;
331 let mut lines = Vec::new();
332
333 for item in matches {
334 for context in &item.before {
335 let key = (item.path.clone(), context.line);
336 if !matching_lines.contains(&key) && emitted_context.insert(key) {
337 push_grouped_grep_line(
338 &mut lines,
339 &mut current_path,
340 &item.path,
341 context.line,
342 '-',
343 &context.text,
344 );
345 }
346 }
347
348 push_grouped_grep_line(
349 &mut lines,
350 &mut current_path,
351 &item.path,
352 item.line,
353 ':',
354 &item.text,
355 );
356
357 for context in &item.after {
358 let key = (item.path.clone(), context.line);
359 if !matching_lines.contains(&key) && emitted_context.insert(key) {
360 push_grouped_grep_line(
361 &mut lines,
362 &mut current_path,
363 &item.path,
364 context.line,
365 '-',
366 &context.text,
367 );
368 }
369 }
370 }
371
372 lines.join("\n")
373}
374
375fn push_grouped_grep_line<'a>(
376 lines: &mut Vec<String>,
377 current_path: &mut Option<&'a str>,
378 path: &'a str,
379 line: usize,
380 marker: char,
381 text: &str,
382) {
383 if *current_path != Some(path) {
384 lines.push(path.to_string());
385 *current_path = Some(path);
386 }
387 lines.push(format!("{line}{marker}{text}"));
388}
389
390fn i64_to_usize(value: i64, column: &str) -> anyhow::Result<usize> {
391 value
392 .try_into()
393 .with_context(|| format!("column `{column}` contains negative or too-large value {value}"))
394}
395
396#[cfg(test)]
397mod tests {
398 use super::*;
399
400 fn chunk(path: &str, line_start: usize, content: &str) -> IndexedContentChunk {
401 IndexedContentChunk {
402 file_path: path.to_string(),
403 line_start,
404 content: content.to_string(),
405 }
406 }
407
408 fn options(pattern: &str) -> GrepOptions<'_> {
409 GrepOptions {
410 pattern,
411 paths: &[],
412 globs: &[],
413 fixed_strings: false,
414 ignore_case: false,
415 context: None,
416 before_context: None,
417 after_context: None,
418 max_count: None,
419 format: Format::Json,
420 }
421 }
422
423 #[test]
424 fn text_renders_grouped_grep_shape() {
425 let chunks = vec![chunk("src/lib.rs", 1, "one\nneedle\nthree")];
426 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
427
428 assert_eq!(format_text_matches(&result.matches), "src/lib.rs\n2:needle");
429 }
430
431 #[test]
432 fn text_groups_multiple_files() {
433 let chunks = vec![
434 chunk("src/a.rs", 1, "needle a"),
435 chunk("tests/b.rs", 10, "needle b"),
436 ];
437 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
438
439 assert_eq!(
440 format_text_matches(&result.matches),
441 "src/a.rs\n1:needle a\ntests/b.rs\n10:needle b"
442 );
443 }
444
445 #[test]
446 fn ordering_is_path_then_line() {
447 let chunks = vec![
448 chunk("b.rs", 10, "needle later"),
449 chunk("a.rs", 3, "needle first"),
450 chunk("a.rs", 1, "needle earliest"),
451 ];
452 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
453
454 let keys: Vec<_> = result
455 .matches
456 .iter()
457 .map(|m| (m.path.as_str(), m.line))
458 .collect();
459 assert_eq!(keys, vec![("a.rs", 1), ("a.rs", 3), ("b.rs", 10)]);
460 }
461
462 #[test]
463 fn ignore_case_matches_case_insensitively() {
464 let chunks = vec![chunk("src/lib.rs", 1, "Needle")];
465 let mut opts = options("needle");
466 opts.ignore_case = true;
467 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
468
469 assert_eq!(result.matches.len(), 1);
470 }
471
472 #[test]
473 fn fixed_strings_treat_regex_metacharacters_literally() {
474 let chunks = vec![chunk("src/lib.rs", 1, "a.b\naxb")];
475 let mut opts = options("a.b");
476 opts.fixed_strings = true;
477 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
478
479 assert_eq!(result.matches.len(), 1);
480 assert_eq!(result.matches[0].line, 1);
481 }
482
483 #[test]
484 fn context_flags_include_bounded_neighbors() {
485 let chunks = vec![chunk("src/lib.rs", 1, "one\ntwo\nneedle\nfour\nfive")];
486 let mut opts = options("needle");
487 opts.before_context = Some(1);
488 opts.after_context = Some(2);
489 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
490 let item = &result.matches[0];
491
492 assert_eq!(
493 item.before,
494 vec![GrepContextLine {
495 line: 2,
496 text: "two".to_string()
497 }]
498 );
499 assert_eq!(
500 item.after,
501 vec![
502 GrepContextLine {
503 line: 4,
504 text: "four".to_string()
505 },
506 GrepContextLine {
507 line: 5,
508 text: "five".to_string()
509 }
510 ]
511 );
512 assert_eq!(
513 format_text_matches(&result.matches),
514 "src/lib.rs\n2-two\n3:needle\n4-four\n5-five"
515 );
516 }
517
518 #[test]
519 fn text_suppresses_duplicate_context_lines() {
520 let chunks = vec![chunk(
521 "src/lib.rs",
522 1,
523 "one\nneedle one\nmiddle\nneedle two\nfive",
524 )];
525 let mut opts = options("needle");
526 opts.context = Some(1);
527 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
528
529 assert_eq!(
530 format_text_matches(&result.matches),
531 "src/lib.rs\n1-one\n2:needle one\n3-middle\n4:needle two\n5-five"
532 );
533 }
534
535 #[test]
536 fn max_count_caps_matching_lines_not_context() {
537 let chunks = vec![chunk(
538 "src/lib.rs",
539 1,
540 "before\nneedle one\nmiddle\nneedle two\nafter",
541 )];
542 let mut opts = options("needle");
543 opts.context = Some(1);
544 opts.max_count = Some(1);
545 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
546
547 assert_eq!(result.matched_lines, 1);
548 assert!(result.truncated);
549 assert_eq!(result.matches[0].line, 2);
550 assert_eq!(result.matches[0].before.len(), 1);
551 assert_eq!(result.matches[0].after.len(), 1);
552 assert_eq!(
553 format_text_matches(&result.matches),
554 "src/lib.rs\n1-before\n2:needle one\n3-middle"
555 );
556 }
557
558 #[test]
559 fn json_match_contains_spans_and_context() {
560 let chunks = vec![chunk("src/lib.rs", 1, "before\nneedle needle\nafter")];
561 let mut opts = options("needle");
562 opts.context = Some(1);
563 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
564 let value = serde_json::to_value(&result.matches[0]).expect("serialize match");
565
566 assert_eq!(value["path"], "src/lib.rs");
567 assert_eq!(value["line"], 2);
568 assert_eq!(value["text"], "needle needle");
569 assert_eq!(value["spans"][0]["start"], 0);
570 assert_eq!(value["spans"][0]["end"], 6);
571 assert_eq!(value["spans"][1]["start"], 7);
572 assert_eq!(value["before"][0]["line"], 1);
573 assert_eq!(value["after"][0]["line"], 3);
574 }
575
576 #[test]
577 fn path_and_glob_filters_compose() {
578 let chunks = vec![
579 chunk("src/gobby/app.py", 1, "needle"),
580 chunk("src/gobby/app.rs", 1, "needle"),
581 chunk("tests/app.py", 1, "needle"),
582 ];
583 let paths = vec!["src/gobby".to_string()];
584 let globs = vec!["*.py".to_string()];
585 let opts = GrepOptions {
586 paths: &paths,
587 globs: &globs,
588 ..options("needle")
589 };
590 let result = grep_chunks(&chunks, &opts).expect("grep chunks");
591
592 assert_eq!(result.scanned_chunks, 1);
593 assert_eq!(result.matches[0].path, "src/gobby/app.py");
594 }
595
596 #[test]
597 fn overlapping_chunks_dedupe_by_file_and_line() {
598 let chunks = vec![
599 chunk("src/lib.rs", 1, "needle\nother"),
600 chunk("src/lib.rs", 1, "needle\nother"),
601 ];
602 let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
603
604 assert_eq!(result.matches.len(), 1);
605 }
606}