Skip to main content

gobby_code/commands/
grep.rs

1use std::collections::{BTreeMap, BTreeSet};
2
3use anyhow::Context as _;
4use postgres::Client;
5use regex::Regex;
6use serde::Serialize;
7
8use crate::config::Context;
9use crate::db;
10use crate::output::{self, Format};
11use crate::search::fts;
12
13pub struct GrepOptions<'a> {
14    pub pattern: &'a str,
15    pub paths: &'a [String],
16    pub globs: &'a [String],
17    pub fixed_strings: bool,
18    pub ignore_case: bool,
19    pub context: Option<usize>,
20    pub before_context: Option<usize>,
21    pub after_context: Option<usize>,
22    pub max_count: Option<usize>,
23    pub format: Format,
24}
25
26#[derive(Debug, Clone)]
27struct IndexedContentChunk {
28    file_path: String,
29    line_start: usize,
30    content: String,
31}
32
33#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
34pub(crate) struct GrepSpan {
35    pub start: usize,
36    pub end: usize,
37}
38
39#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
40pub(crate) struct GrepContextLine {
41    pub line: usize,
42    pub text: String,
43}
44
45#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
46pub(crate) struct GrepMatch {
47    pub path: String,
48    pub line: usize,
49    pub text: String,
50    pub spans: Vec<GrepSpan>,
51    pub before: Vec<GrepContextLine>,
52    pub after: Vec<GrepContextLine>,
53}
54
55#[derive(Debug, Serialize)]
56struct GrepResponse {
57    project_id: String,
58    pattern: String,
59    fixed_strings: bool,
60    ignore_case: bool,
61    paths: Vec<String>,
62    globs: Vec<String>,
63    max_count: Option<usize>,
64    matched_lines: usize,
65    truncated: bool,
66    scanned_chunks: usize,
67    matches: Vec<GrepMatch>,
68}
69
70#[derive(Debug)]
71struct GrepResult {
72    scanned_chunks: usize,
73    matched_lines: usize,
74    truncated: bool,
75    matches: Vec<GrepMatch>,
76}
77
78pub fn run(ctx: &Context, options: GrepOptions<'_>) -> anyhow::Result<()> {
79    let mut conn = db::connect_readonly(&ctx.database_url)?;
80    let chunks = load_indexed_chunks(&mut conn, &ctx.project_id)?;
81    let result = grep_chunks(&chunks, &options)?;
82
83    match options.format {
84        Format::Json => output::print_json(&GrepResponse {
85            project_id: ctx.project_id.clone(),
86            pattern: options.pattern.to_string(),
87            fixed_strings: options.fixed_strings,
88            ignore_case: options.ignore_case,
89            paths: options.paths.to_vec(),
90            globs: options.globs.to_vec(),
91            max_count: options.max_count,
92            matched_lines: result.matched_lines,
93            truncated: result.truncated,
94            scanned_chunks: result.scanned_chunks,
95            matches: result.matches,
96        }),
97        Format::Text => {
98            let text = format_text_matches(&result.matches);
99            if text.is_empty() {
100                Ok(())
101            } else {
102                output::print_text(&text)
103            }
104        }
105    }
106}
107
108fn load_indexed_chunks(
109    conn: &mut Client,
110    project_id: &str,
111) -> anyhow::Result<Vec<IndexedContentChunk>> {
112    let rows = conn.query(
113        "SELECT c.file_path,
114                c.chunk_index::BIGINT AS chunk_index,
115                c.line_start::BIGINT AS line_start,
116                c.content
117         FROM code_content_chunks c
118         JOIN code_indexed_files cf
119           ON cf.project_id = c.project_id AND cf.file_path = c.file_path
120         WHERE c.project_id = $1
121         ORDER BY c.file_path ASC, c.line_start ASC, c.chunk_index ASC",
122        &[&project_id],
123    )?;
124
125    rows.into_iter()
126        .map(|row| {
127            let line_start = i64_to_usize(row.try_get("line_start")?, "line_start")?;
128            Ok(IndexedContentChunk {
129                file_path: row.try_get("file_path")?,
130                line_start,
131                content: row.try_get("content")?,
132            })
133        })
134        .collect()
135}
136
137fn grep_chunks(
138    chunks: &[IndexedContentChunk],
139    options: &GrepOptions<'_>,
140) -> anyhow::Result<GrepResult> {
141    let matcher = GrepMatcher::new(options.pattern, options.fixed_strings, options.ignore_case)?;
142    let filters = GrepFilters::new(options.paths, options.globs)?;
143    let before_context = options.before_context.or(options.context).unwrap_or(0);
144    let after_context = options.after_context.or(options.context).unwrap_or(0);
145
146    let mut scanned_chunks = 0usize;
147    let mut file_lines: BTreeMap<String, BTreeMap<usize, String>> = BTreeMap::new();
148    let mut matches: BTreeMap<(String, usize), GrepMatch> = BTreeMap::new();
149
150    for chunk in chunks {
151        if !filters.matches(&chunk.file_path) {
152            continue;
153        }
154        scanned_chunks += 1;
155
156        for (offset, line_text) in chunk.content.lines().enumerate() {
157            let line = chunk.line_start + offset;
158            file_lines
159                .entry(chunk.file_path.clone())
160                .or_default()
161                .entry(line)
162                .or_insert_with(|| line_text.to_string());
163
164            let key = (chunk.file_path.clone(), line);
165            if matches.contains_key(&key) {
166                continue;
167            }
168
169            let spans = matcher.find_spans(line_text);
170            if !spans.is_empty() {
171                matches.insert(
172                    key,
173                    GrepMatch {
174                        path: chunk.file_path.clone(),
175                        line,
176                        text: line_text.to_string(),
177                        spans,
178                        before: Vec::new(),
179                        after: Vec::new(),
180                    },
181                );
182            }
183        }
184    }
185
186    let total_matching_lines = matches.len();
187    let max = options.max_count.unwrap_or(usize::MAX);
188    let mut retained = matches.into_values().take(max).collect::<Vec<_>>();
189    for item in &mut retained {
190        if let Some(lines) = file_lines.get(&item.path) {
191            item.before = context_before(lines, item.line, before_context);
192            item.after = context_after(lines, item.line, after_context);
193        }
194    }
195
196    Ok(GrepResult {
197        scanned_chunks,
198        matched_lines: retained.len(),
199        truncated: total_matching_lines > retained.len(),
200        matches: retained,
201    })
202}
203
204struct GrepMatcher {
205    regex: Regex,
206}
207
208impl GrepMatcher {
209    fn new(pattern: &str, fixed_strings: bool, ignore_case: bool) -> anyhow::Result<Self> {
210        if pattern.is_empty() {
211            anyhow::bail!("gcode grep pattern must not be empty");
212        }
213        let pattern = if fixed_strings {
214            regex::escape(pattern)
215        } else {
216            pattern.to_string()
217        };
218        let regex = regex::RegexBuilder::new(&pattern)
219            .case_insensitive(ignore_case)
220            .build()
221            .with_context(|| "invalid gcode grep pattern")?;
222        Ok(Self { regex })
223    }
224
225    fn find_spans(&self, line: &str) -> Vec<GrepSpan> {
226        self.regex
227            .find_iter(line)
228            .filter(|m| m.start() != m.end())
229            .map(|m| GrepSpan {
230                start: m.start(),
231                end: m.end(),
232            })
233            .collect()
234    }
235}
236
237struct GrepFilters {
238    paths: Vec<glob::Pattern>,
239    globs: Vec<CompiledGlob>,
240}
241
242impl GrepFilters {
243    fn new(paths: &[String], globs: &[String]) -> anyhow::Result<Self> {
244        let expanded_paths = fts::expand_paths(paths);
245        Ok(Self {
246            paths: fts::compile_patterns(&expanded_paths)?,
247            globs: globs
248                .iter()
249                .map(|glob| CompiledGlob::new(glob))
250                .collect::<anyhow::Result<Vec<_>>>()?,
251        })
252    }
253
254    fn matches(&self, file_path: &str) -> bool {
255        let path_matches =
256            self.paths.is_empty() || self.paths.iter().any(|pattern| pattern.matches(file_path));
257        let glob_matches =
258            self.globs.is_empty() || self.globs.iter().any(|glob| glob.matches(file_path));
259        path_matches && glob_matches
260    }
261}
262
263struct CompiledGlob {
264    raw: String,
265    pattern: glob::Pattern,
266}
267
268impl CompiledGlob {
269    fn new(raw: &str) -> anyhow::Result<Self> {
270        Ok(Self {
271            raw: raw.to_string(),
272            pattern: glob::Pattern::new(raw)
273                .map_err(|err| anyhow::anyhow!("invalid grep glob `{raw}`: {err}"))?,
274        })
275    }
276
277    fn matches(&self, file_path: &str) -> bool {
278        if self.pattern.matches(file_path) {
279            return true;
280        }
281        if self.raw.contains('/') {
282            return false;
283        }
284        file_path
285            .rsplit('/')
286            .next()
287            .is_some_and(|name| self.pattern.matches(name))
288    }
289}
290
291fn context_before(
292    lines: &BTreeMap<usize, String>,
293    line: usize,
294    context: usize,
295) -> Vec<GrepContextLine> {
296    if context == 0 {
297        return Vec::new();
298    }
299    let start = line.saturating_sub(context);
300    lines
301        .range(start..line)
302        .map(|(line, text)| GrepContextLine {
303            line: *line,
304            text: text.clone(),
305        })
306        .collect()
307}
308
309fn context_after(
310    lines: &BTreeMap<usize, String>,
311    line: usize,
312    context: usize,
313) -> Vec<GrepContextLine> {
314    if context == 0 {
315        return Vec::new();
316    }
317    let end = line.saturating_add(context);
318    lines
319        .range((line.saturating_add(1))..=end)
320        .map(|(line, text)| GrepContextLine {
321            line: *line,
322            text: text.clone(),
323        })
324        .collect()
325}
326
327fn format_text_matches(matches: &[GrepMatch]) -> String {
328    let matching_lines: BTreeSet<(String, usize)> =
329        matches.iter().map(|m| (m.path.clone(), m.line)).collect();
330    let mut emitted_context = BTreeSet::new();
331    let mut lines = Vec::new();
332
333    for item in matches {
334        for context in &item.before {
335            let key = (item.path.clone(), context.line);
336            if !matching_lines.contains(&key) && emitted_context.insert(key) {
337                lines.push(format!("{}-{}-{}", item.path, context.line, context.text));
338            }
339        }
340
341        lines.push(format!("{}:{}:{}", item.path, item.line, item.text));
342
343        for context in &item.after {
344            let key = (item.path.clone(), context.line);
345            if !matching_lines.contains(&key) && emitted_context.insert(key) {
346                lines.push(format!("{}-{}-{}", item.path, context.line, context.text));
347            }
348        }
349    }
350
351    lines.join("\n")
352}
353
354fn i64_to_usize(value: i64, column: &str) -> anyhow::Result<usize> {
355    value
356        .try_into()
357        .with_context(|| format!("column `{column}` contains negative or too-large value {value}"))
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363
364    fn chunk(path: &str, line_start: usize, content: &str) -> IndexedContentChunk {
365        IndexedContentChunk {
366            file_path: path.to_string(),
367            line_start,
368            content: content.to_string(),
369        }
370    }
371
372    fn options(pattern: &str) -> GrepOptions<'_> {
373        GrepOptions {
374            pattern,
375            paths: &[],
376            globs: &[],
377            fixed_strings: false,
378            ignore_case: false,
379            context: None,
380            before_context: None,
381            after_context: None,
382            max_count: None,
383            format: Format::Json,
384        }
385    }
386
387    #[test]
388    fn text_renders_grep_shape() {
389        let chunks = vec![chunk("src/lib.rs", 1, "one\nneedle\nthree")];
390        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
391
392        assert_eq!(format_text_matches(&result.matches), "src/lib.rs:2:needle");
393    }
394
395    #[test]
396    fn ordering_is_path_then_line() {
397        let chunks = vec![
398            chunk("b.rs", 10, "needle later"),
399            chunk("a.rs", 3, "needle first"),
400            chunk("a.rs", 1, "needle earliest"),
401        ];
402        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
403
404        let keys: Vec<_> = result
405            .matches
406            .iter()
407            .map(|m| (m.path.as_str(), m.line))
408            .collect();
409        assert_eq!(keys, vec![("a.rs", 1), ("a.rs", 3), ("b.rs", 10)]);
410    }
411
412    #[test]
413    fn ignore_case_matches_case_insensitively() {
414        let chunks = vec![chunk("src/lib.rs", 1, "Needle")];
415        let mut opts = options("needle");
416        opts.ignore_case = true;
417        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
418
419        assert_eq!(result.matches.len(), 1);
420    }
421
422    #[test]
423    fn fixed_strings_treat_regex_metacharacters_literally() {
424        let chunks = vec![chunk("src/lib.rs", 1, "a.b\naxb")];
425        let mut opts = options("a.b");
426        opts.fixed_strings = true;
427        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
428
429        assert_eq!(result.matches.len(), 1);
430        assert_eq!(result.matches[0].line, 1);
431    }
432
433    #[test]
434    fn context_flags_include_bounded_neighbors() {
435        let chunks = vec![chunk("src/lib.rs", 1, "one\ntwo\nneedle\nfour\nfive")];
436        let mut opts = options("needle");
437        opts.before_context = Some(1);
438        opts.after_context = Some(2);
439        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
440        let item = &result.matches[0];
441
442        assert_eq!(
443            item.before,
444            vec![GrepContextLine {
445                line: 2,
446                text: "two".to_string()
447            }]
448        );
449        assert_eq!(
450            item.after,
451            vec![
452                GrepContextLine {
453                    line: 4,
454                    text: "four".to_string()
455                },
456                GrepContextLine {
457                    line: 5,
458                    text: "five".to_string()
459                }
460            ]
461        );
462        assert_eq!(
463            format_text_matches(&result.matches),
464            "src/lib.rs-2-two\nsrc/lib.rs:3:needle\nsrc/lib.rs-4-four\nsrc/lib.rs-5-five"
465        );
466    }
467
468    #[test]
469    fn max_count_caps_matching_lines_not_context() {
470        let chunks = vec![chunk(
471            "src/lib.rs",
472            1,
473            "before\nneedle one\nmiddle\nneedle two\nafter",
474        )];
475        let mut opts = options("needle");
476        opts.context = Some(1);
477        opts.max_count = Some(1);
478        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
479
480        assert_eq!(result.matched_lines, 1);
481        assert!(result.truncated);
482        assert_eq!(result.matches[0].line, 2);
483        assert_eq!(result.matches[0].before.len(), 1);
484        assert_eq!(result.matches[0].after.len(), 1);
485    }
486
487    #[test]
488    fn json_match_contains_spans_and_context() {
489        let chunks = vec![chunk("src/lib.rs", 1, "before\nneedle needle\nafter")];
490        let mut opts = options("needle");
491        opts.context = Some(1);
492        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
493        let value = serde_json::to_value(&result.matches[0]).expect("serialize match");
494
495        assert_eq!(value["path"], "src/lib.rs");
496        assert_eq!(value["line"], 2);
497        assert_eq!(value["text"], "needle needle");
498        assert_eq!(value["spans"][0]["start"], 0);
499        assert_eq!(value["spans"][0]["end"], 6);
500        assert_eq!(value["spans"][1]["start"], 7);
501        assert_eq!(value["before"][0]["line"], 1);
502        assert_eq!(value["after"][0]["line"], 3);
503    }
504
505    #[test]
506    fn path_and_glob_filters_compose() {
507        let chunks = vec![
508            chunk("src/gobby/app.py", 1, "needle"),
509            chunk("src/gobby/app.rs", 1, "needle"),
510            chunk("tests/app.py", 1, "needle"),
511        ];
512        let paths = vec!["src/gobby".to_string()];
513        let globs = vec!["*.py".to_string()];
514        let opts = GrepOptions {
515            paths: &paths,
516            globs: &globs,
517            ..options("needle")
518        };
519        let result = grep_chunks(&chunks, &opts).expect("grep chunks");
520
521        assert_eq!(result.scanned_chunks, 1);
522        assert_eq!(result.matches[0].path, "src/gobby/app.py");
523    }
524
525    #[test]
526    fn overlapping_chunks_dedupe_by_file_and_line() {
527        let chunks = vec![
528            chunk("src/lib.rs", 1, "needle\nother"),
529            chunk("src/lib.rs", 1, "needle\nother"),
530        ];
531        let result = grep_chunks(&chunks, &options("needle")).expect("grep chunks");
532
533        assert_eq!(result.matches.len(), 1);
534    }
535}