Skip to main content

gobby_code/commands/
grep.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use anyhow::Context as _;
4use postgres::Client;
5use regex::Regex;
6use serde::Serialize;
7
8use crate::config::Context;
9use crate::db;
10use crate::output::{self, Format};
11use crate::search::fts;
12
13pub struct GrepOptions<'a> {
14    pub pattern: &'a str,
15    pub paths: &'a [String],
16    pub globs: &'a [String],
17    pub fixed_strings: bool,
18    pub ignore_case: bool,
19    pub context: Option<usize>,
20    pub before_context: Option<usize>,
21    pub after_context: Option<usize>,
22    pub max_count: Option<usize>,
23    pub format: Format,
24}
25
26#[derive(Debug, Clone)]
27struct IndexedContentChunk {
28    file_path: String,
29    line_start: usize,
30    content: String,
31}
32
33#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
34pub(crate) struct GrepSpan {
35    pub start: usize,
36    pub end: usize,
37}
38
39#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
40pub(crate) struct GrepContextLine {
41    pub line: usize,
42    pub text: String,
43}
44
45#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
46pub(crate) struct GrepMatch {
47    pub path: String,
48    pub line: usize,
49    pub text: String,
50    pub spans: Vec<GrepSpan>,
51    pub before: Vec<GrepContextLine>,
52    pub after: Vec<GrepContextLine>,
53}
54
55#[derive(Debug, Serialize)]
56struct GrepResponse {
57    project_id: String,
58    pattern: String,
59    fixed_strings: bool,
60    ignore_case: bool,
61    paths: Vec<String>,
62    globs: Vec<String>,
63    max_count: Option<usize>,
64    matched_lines: usize,
65    truncated: bool,
66    scanned_chunks: usize,
67    matches: Vec<GrepMatch>,
68}
69
70#[derive(Debug)]
71struct GrepResult {
72    scanned_chunks: usize,
73    matched_lines: usize,
74    truncated: bool,
75    matches: Vec<GrepMatch>,
76}
77
78pub fn run(ctx: &Context, options: GrepOptions<'_>) -> anyhow::Result<()> {
79    let mut conn = db::connect_readonly(&ctx.database_url)?;
80    let chunks = load_indexed_chunks(&mut conn, &ctx.project_id)?;
81    let result = grep_chunks(&chunks, &options)?;
82
83    match options.format {
84        Format::Json => output::print_json(&GrepResponse {
85            project_id: ctx.project_id.clone(),
86            pattern: options.pattern.to_string(),
87            fixed_strings: options.fixed_strings,
88            ignore_case: options.ignore_case,
89            paths: options.paths.to_vec(),
90            globs: options.globs.to_vec(),
91            max_count: options.max_count,
92            matched_lines: result.matched_lines,
93            truncated: result.truncated,
94            scanned_chunks: result.scanned_chunks,
95            matches: result.matches,
96        }),
97        Format::Text => {
98            let text = format_text_matches(&result.matches);
99            if text.is_empty() {
100                Ok(())
101            } else {
102                output::print_text(&text)
103            }
104        }
105    }
106}
107
108fn load_indexed_chunks(
109    conn: &mut Client,
110    project_id: &str,
111) -> anyhow::Result<Vec<IndexedContentChunk>> {
112    let rows = conn.query(
113        "SELECT c.file_path,
114                c.line_start::BIGINT AS line_start,
115                c.content
116         FROM code_content_chunks c
117         JOIN code_indexed_files cf
118           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
119         WHERE c.project_id = $1
120         ORDER BY c.file_path ASC, c.line_start ASC, c.chunk_index ASC",
121        &[&project_id],
122    )?;
123
124    rows.into_iter()
125        .map(|row| {
126            let line_start = i64_to_usize(row.try_get("line_start")?, "line_start")?;
127            Ok(IndexedContentChunk {
128                file_path: row.try_get("file_path")?,
129                line_start,
130                content: row.try_get("content")?,
131            })
132        })
133        .collect()
134}
135
136fn grep_chunks(
137    chunks: &[IndexedContentChunk],
138    options: &GrepOptions<'_>,
139) -> anyhow::Result<GrepResult> {
140    let matcher = GrepMatcher::new(options.pattern, options.fixed_strings, options.ignore_case)?;
141    let filters = GrepFilters::new(options.paths, options.globs)?;
142    let before_context = options.before_context.or(options.context).unwrap_or(0);
143    let after_context = options.after_context.or(options.context).unwrap_or(0);
144
145    let mut scanned_chunks = 0usize;
146    let mut file_lines: BTreeMap<String, BTreeMap<usize, String>> = BTreeMap::new();
147    let mut matches: BTreeMap<(String, usize), GrepMatch> = BTreeMap::new();
148
149    for chunk in chunks {
150        if !filters.matches(&chunk.file_path) {
151            continue;
152        }
153        scanned_chunks += 1;
154
155        for (offset, line_text) in chunk.content.lines().enumerate() {
156            let line = chunk.line_start + offset;
157            file_lines
158                .entry(chunk.file_path.clone())
159                .or_default()
160                .entry(line)
161                .or_insert_with(|| line_text.to_string());
162
163            let key = (chunk.file_path.clone(), line);
164            if matches.contains_key(&key) {
165                continue;
166            }
167
168            let spans = matcher.find_spans(line_text);
169            if !spans.is_empty() {
170                matches.insert(
171                    key,
172                    GrepMatch {
173                        path: chunk.file_path.clone(),
174                        line,
175                        text: line_text.to_string(),
176                        spans,
177                        before: Vec::new(),
178                        after: Vec::new(),
179                    },
180                );
181            }
182        }
183    }
184
185    let total_matching_lines = matches.len();
186    let max = options.max_count.unwrap_or(usize::MAX);
187    let mut retained = matches.into_values().take(max).collect::<Vec<_>>();
188    for item in &mut retained {
189        if let Some(lines) = file_lines.get(&item.path) {
190            item.before = context_before(lines, item.line, before_context);
191            item.after = context_after(lines, item.line, after_context);
192        }
193    }
194
195    Ok(GrepResult {
196        scanned_chunks,
197        matched_lines: retained.len(),
198        truncated: total_matching_lines > retained.len(),
199        matches: retained,
200    })
201}
202
203struct GrepMatcher {
204    regex: Regex,
205}
206
207impl GrepMatcher {
208    fn new(pattern: &str, fixed_strings: bool, ignore_case: bool) -> anyhow::Result<Self> {
209        if pattern.is_empty() {
210            anyhow::bail!("gcode grep pattern must not be empty");
211        }
212        let pattern = if fixed_strings {
213            regex::escape(pattern)
214        } else {
215            pattern.to_string()
216        };
217        let regex = regex::RegexBuilder::new(&pattern)
218            .case_insensitive(ignore_case)
219            .build()
220            .with_context(|| "invalid gcode grep pattern")?;
221        Ok(Self { regex })
222    }
223
224    fn find_spans(&self, line: &str) -> Vec<GrepSpan> {
225        self.regex
226            .find_iter(line)
227            .filter(|m| m.start() != m.end())
228            .map(|m| GrepSpan {
229                start: m.start(),
230                end: m.end(),
231            })
232            .collect()
233    }
234}
235
236struct GrepFilters {
237    paths: Vec<glob::Pattern>,
238    globs: Vec<CompiledGlob>,
239}
240
241impl GrepFilters {
242    fn new(paths: &[String], globs: &[String]) -> anyhow::Result<Self> {
243        let expanded_paths = fts::expand_paths(paths);
244        Ok(Self {
245            paths: fts::compile_patterns(&expanded_paths)?,
246            globs: globs
247                .iter()
248                .map(|glob| CompiledGlob::new(glob))
249                .collect::<anyhow::Result<Vec<_>>>()?,
250        })
251    }
252
253    fn matches(&self, file_path: &str) -> bool {
254        let path_matches =
255            self.paths.is_empty() || self.paths.iter().any(|pattern| pattern.matches(file_path));
256        let glob_matches =
257            self.globs.is_empty() || self.globs.iter().any(|glob| glob.matches(file_path));
258        path_matches && glob_matches
259    }
260}
261
262struct CompiledGlob {
263    raw: String,
264    pattern: glob::Pattern,
265}
266
267impl CompiledGlob {
268    fn new(raw: &str) -> anyhow::Result<Self> {
269        Ok(Self {
270            raw: raw.to_string(),
271            pattern: glob::Pattern::new(raw)
272                .map_err(|err| anyhow::anyhow!("invalid grep glob `{raw}`: {err}"))?,
273        })
274    }
275
276    fn matches(&self, file_path: &str) -> bool {
277        if self.pattern.matches(file_path) {
278            return true;
279        }
280        if self.raw.contains('/') {
281            return false;
282        }
283        file_path
284            .rsplit('/')
285            .next()
286            .is_some_and(|name| self.pattern.matches(name))
287    }
288}
289
290fn context_before(
291    lines: &BTreeMap<usize, String>,
292    line: usize,
293    context: usize,
294) -> Vec<GrepContextLine> {
295    if context == 0 {
296        return Vec::new();
297    }
298    let start = line.saturating_sub(context);
299    lines
300        .range(start..line)
301        .map(|(line, text)| GrepContextLine {
302            line: *line,
303            text: text.clone(),
304        })
305        .collect()
306}
307
308fn context_after(
309    lines: &BTreeMap<usize, String>,
310    line: usize,
311    context: usize,
312) -> Vec<GrepContextLine> {
313    if context == 0 {
314        return Vec::new();
315    }
316    let end = line.saturating_add(context);
317    lines
318        .range((line.saturating_add(1))..=end)
319        .map(|(line, text)| GrepContextLine {
320            line: *line,
321            text: text.clone(),
322        })
323        .collect()
324}
325
326fn format_text_matches(matches: &[GrepMatch]) -> String {
327    let matching_lines: BTreeSet<(String, usize)> =
328        matches.iter().map(|m| (m.path.clone(), m.line)).collect();
329    let mut emitted_context = BTreeSet::new();
330    let mut current_path: Option<&str> = None;
331    let mut lines = Vec::new();
332
333    for item in matches {
334        for context in &item.before {
335            let key = (item.path.clone(), context.line);
336            if !matching_lines.contains(&key) && emitted_context.insert(key) {
337                push_grouped_grep_line(
338                    &mut lines,
339                    &mut current_path,
340                    &item.path,
341                    context.line,
342                    '-',
343                    &context.text,
344                );
345            }
346        }
347
348        push_grouped_grep_line(
349            &mut lines,
350            &mut current_path,
351            &item.path,
352            item.line,
353            ':',
354            &item.text,
355        );
356
357        for context in &item.after {
358            let key = (item.path.clone(), context.line);
359            if !matching_lines.contains(&key) && emitted_context.insert(key) {
360                push_grouped_grep_line(
361                    &mut lines,
362                    &mut current_path,
363                    &item.path,
364                    context.line,
365                    '-',
366                    &context.text,
367                );
368            }
369        }
370    }
371
372    lines.join("\n")
373}
374
375fn push_grouped_grep_line<'a>(
376    lines: &mut Vec<String>,
377    current_path: &mut Option<&'a str>,
378    path: &'a str,
379    line: usize,
380    marker: char,
381    text: &str,
382) {
383    if *current_path != Some(path) {
384        lines.push(path.to_string());
385        *current_path = Some(path);
386    }
387    lines.push(format!("{line}{marker}{text}"));
388}
389
390fn i64_to_usize(value: i64, column: &str) -> anyhow::Result<usize> {
391    value
392        .try_into()
393        .with_context(|| format!("column `{column}` contains negative or too-large value {value}"))
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399
400    fn chunk(path: &str, line_start: usize, content: &str) -> IndexedContentChunk {
401        IndexedContentChunk {
402            file_path: path.to_string(),
403            line_start,
404            content: content.to_string(),
405        }
406    }
407
408    fn options(pattern: &str) -> GrepOptions<'_> {
409        GrepOptions {
410            pattern,
411            paths: &[],
412            globs: &[],
413            fixed_strings: false,
414            ignore_case: false,
415            context: None,
416            before_context: None,
417            after_context: None,
418            max_count: None,
419            format: Format::Json,
420        }
421    }
422
423    #[test]
424    fn text_renders_grouped_grep_shape() {
425        let chunks = vec![chunk("src/lib.rs", 1, "one\nneedle\nthree")];
426        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
427
428        assert_eq!(format_text_matches(&result.matches), "src/lib.rs\n2:needle");
429    }
430
431    #[test]
432    fn text_groups_multiple_files() {
433        let chunks = vec![
434            chunk("src/a.rs", 1, "needle a"),
435            chunk("tests/b.rs", 10, "needle b"),
436        ];
437        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
438
439        assert_eq!(
440            format_text_matches(&result.matches),
441            "src/a.rs\n1:needle a\ntests/b.rs\n10:needle b"
442        );
443    }
444
445    #[test]
446    fn ordering_is_path_then_line() {
447        let chunks = vec![
448            chunk("b.rs", 10, "needle later"),
449            chunk("a.rs", 3, "needle first"),
450            chunk("a.rs", 1, "needle earliest"),
451        ];
452        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
453
454        let keys: Vec<_> = result
455            .matches
456            .iter()
457            .map(|m| (m.path.as_str(), m.line))
458            .collect();
459        assert_eq!(keys, vec![("a.rs", 1), ("a.rs", 3), ("b.rs", 10)]);
460    }
461
462    #[test]
463    fn ignore_case_matches_case_insensitively() {
464        let chunks = vec![chunk("src/lib.rs", 1, "Needle")];
465        let mut opts = options("needle");
466        opts.ignore_case = true;
467        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
468
469        assert_eq!(result.matches.len(), 1);
470    }
471
472    #[test]
473    fn fixed_strings_treat_regex_metacharacters_literally() {
474        let chunks = vec![chunk("src/lib.rs", 1, "a.b\naxb")];
475        let mut opts = options("a.b");
476        opts.fixed_strings = true;
477        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
478
479        assert_eq!(result.matches.len(), 1);
480        assert_eq!(result.matches[0].line, 1);
481    }
482
483    #[test]
484    fn context_flags_include_bounded_neighbors() {
485        let chunks = vec![chunk("src/lib.rs", 1, "one\ntwo\nneedle\nfour\nfive")];
486        let mut opts = options("needle");
487        opts.before_context = Some(1);
488        opts.after_context = Some(2);
489        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
490        let item = &result.matches[0];
491
492        assert_eq!(
493            item.before,
494            vec![GrepContextLine {
495                line: 2,
496                text: "two".to_string()
497            }]
498        );
499        assert_eq!(
500            item.after,
501            vec![
502                GrepContextLine {
503                    line: 4,
504                    text: "four".to_string()
505                },
506                GrepContextLine {
507                    line: 5,
508                    text: "five".to_string()
509                }
510            ]
511        );
512        assert_eq!(
513            format_text_matches(&result.matches),
514            "src/lib.rs\n2-two\n3:needle\n4-four\n5-five"
515        );
516    }
517
518    #[test]
519    fn text_suppresses_duplicate_context_lines() {
520        let chunks = vec![chunk(
521            "src/lib.rs",
522            1,
523            "one\nneedle one\nmiddle\nneedle two\nfive",
524        )];
525        let mut opts = options("needle");
526        opts.context = Some(1);
527        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
528
529        assert_eq!(
530            format_text_matches(&result.matches),
531            "src/lib.rs\n1-one\n2:needle one\n3-middle\n4:needle two\n5-five"
532        );
533    }
534
535    #[test]
536    fn max_count_caps_matching_lines_not_context() {
537        let chunks = vec![chunk(
538            "src/lib.rs",
539            1,
540            "before\nneedle one\nmiddle\nneedle two\nafter",
541        )];
542        let mut opts = options("needle");
543        opts.context = Some(1);
544        opts.max_count = Some(1);
545        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
546
547        assert_eq!(result.matched_lines, 1);
548        assert!(result.truncated);
549        assert_eq!(result.matches[0].line, 2);
550        assert_eq!(result.matches[0].before.len(), 1);
551        assert_eq!(result.matches[0].after.len(), 1);
552        assert_eq!(
553            format_text_matches(&result.matches),
554            "src/lib.rs\n1-before\n2:needle one\n3-middle"
555        );
556    }
557
558    #[test]
559    fn json_match_contains_spans_and_context() {
560        let chunks = vec![chunk("src/lib.rs", 1, "before\nneedle needle\nafter")];
561        let mut opts = options("needle");
562        opts.context = Some(1);
563        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
564        let value = serde_json::to_value(&result.matches[0]).expect("serialize match");
565
566        assert_eq!(value["path"], "src/lib.rs");
567        assert_eq!(value["line"], 2);
568        assert_eq!(value["text"], "needle needle");
569        assert_eq!(value["spans"][0]["start"], 0);
570        assert_eq!(value["spans"][0]["end"], 6);
571        assert_eq!(value["spans"][1]["start"], 7);
572        assert_eq!(value["before"][0]["line"], 1);
573        assert_eq!(value["after"][0]["line"], 3);
574    }
575
576    #[test]
577    fn path_and_glob_filters_compose() {
578        let chunks = vec![
579            chunk("src/gobby/app.py", 1, "needle"),
580            chunk("src/gobby/app.rs", 1, "needle"),
581            chunk("tests/app.py", 1, "needle"),
582        ];
583        let paths = vec!["src/gobby".to_string()];
584        let globs = vec!["*.py".to_string()];
585        let opts = GrepOptions {
586            paths: &paths,
587            globs: &globs,
588            ..options("needle")
589        };
590        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
591
592        assert_eq!(result.scanned_chunks, 1);
593        assert_eq!(result.matches[0].path, "src/gobby/app.py");
594    }
595
596    #[test]
597    fn overlapping_chunks_dedupe_by_file_and_line() {
598        let chunks = vec![
599            chunk("src/lib.rs", 1, "needle\nother"),
600            chunk("src/lib.rs", 1, "needle\nother"),
601        ];
602        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
603
604        assert_eq!(result.matches.len(), 1);
605    }
606}