Skip to main content

sift_queue/collect/
rg.rs

1use crate::collect::GroupedItem;
2use anyhow::{anyhow, bail, Result};
3
4pub fn parse_json(input: &str) -> Result<Vec<GroupedItem>> {
5    let mut items = Vec::new();
6    let mut current: Option<ActiveGroup> = None;
7
8    for (index, raw_line) in input.lines().enumerate() {
9        let line_number = index + 1;
10        let line = raw_line.trim();
11        if line.is_empty() {
12            continue;
13        }
14
15        let value: serde_json::Value = serde_json::from_str(line)
16            .map_err(|e| anyhow!("invalid rg --json input on line {}: {}", line_number, e))?;
17
18        let event_type = value
19            .get("type")
20            .and_then(|value| value.as_str())
21            .ok_or_else(|| {
22                anyhow!(
23                    "invalid rg --json input on line {}: missing type",
24                    line_number
25                )
26            })?;
27
28        match event_type {
29            "begin" => {
30                finalize(&mut current, &mut items);
31                let filepath = path_from_value(&value).ok_or_else(|| {
32                    anyhow!(
33                        "invalid rg --json input on line {}: missing path",
34                        line_number
35                    )
36                })?;
37                current = Some(ActiveGroup::new(filepath));
38            }
39            "match" | "context" => {
40                let filepath = path_from_value(&value).ok_or_else(|| {
41                    anyhow!(
42                        "invalid rg --json input on line {}: missing path",
43                        line_number
44                    )
45                })?;
46                let lines_text = lines_text_from_value(&value).ok_or_else(|| {
47                    anyhow!(
48                        "invalid rg --json input on line {}: missing lines.text",
49                        line_number
50                    )
51                })?;
52                let line_number = value
53                    .get("data")
54                    .and_then(|data| data.get("line_number"))
55                    .and_then(|number| number.as_u64());
56
57                if current.as_ref().map(|item| item.filepath.as_str()) != Some(filepath.as_str()) {
58                    finalize(&mut current, &mut items);
59                    current = Some(ActiveGroup::new(filepath));
60                }
61
62                if let Some(item) = current.as_mut() {
63                    item.push_text(&lines_text, line_number);
64                    if event_type == "match" {
65                        item.match_count += 1;
66                    }
67                }
68            }
69            "end" => {
70                if let Some(filepath) = path_from_value(&value) {
71                    if current.as_ref().map(|item| item.filepath.as_str())
72                        != Some(filepath.as_str())
73                    {
74                        finalize(&mut current, &mut items);
75                    }
76                }
77                finalize(&mut current, &mut items);
78            }
79            "summary" => {}
80            _ => {}
81        }
82    }
83
84    finalize(&mut current, &mut items);
85
86    if items.is_empty() {
87        bail!("no rg matches found in stdin");
88    }
89
90    Ok(items)
91}
92
93#[derive(Debug, Clone)]
94struct ActiveGroup {
95    filepath: String,
96    lines: Vec<String>,
97    match_count: usize,
98    last_line_number: Option<u64>,
99}
100
101impl ActiveGroup {
102    fn new(filepath: String) -> Self {
103        Self {
104            filepath,
105            lines: Vec::new(),
106            match_count: 0,
107            last_line_number: None,
108        }
109    }
110
111    fn push_text(&mut self, text: &str, line_number: Option<u64>) {
112        let split_lines = split_rg_text_lines(text);
113        if split_lines.is_empty() {
114            return;
115        }
116
117        if let (Some(previous), Some(start)) = (self.last_line_number, line_number) {
118            if start > previous + 1
119                && !self.lines.is_empty()
120                && self.lines.last() != Some(&String::new())
121            {
122                self.lines.push(String::new());
123            }
124        }
125
126        match line_number {
127            Some(start) => {
128                for (offset, line) in split_lines.iter().enumerate() {
129                    self.lines
130                        .push(format!("{}: {}", start + offset as u64, line));
131                }
132                self.last_line_number = Some(start + split_lines.len() as u64 - 1);
133            }
134            None => {
135                self.lines.extend(split_lines);
136                self.last_line_number = None;
137            }
138        }
139    }
140
141    fn into_grouped_item(self) -> GroupedItem {
142        GroupedItem {
143            filepath: self.filepath,
144            text: self.lines.join("\n"),
145            match_count: self.match_count,
146        }
147    }
148}
149
150fn finalize(current: &mut Option<ActiveGroup>, items: &mut Vec<GroupedItem>) {
151    if let Some(item) = current.take() {
152        if !item.lines.is_empty() {
153            items.push(item.into_grouped_item());
154        }
155    }
156}
157
158fn path_from_value(value: &serde_json::Value) -> Option<String> {
159    value
160        .get("data")
161        .and_then(|data| data.get("path"))
162        .and_then(|path| path.get("text"))
163        .and_then(|text| text.as_str())
164        .map(|text| text.to_string())
165}
166
167fn lines_text_from_value(value: &serde_json::Value) -> Option<String> {
168    value
169        .get("data")
170        .and_then(|data| data.get("lines"))
171        .and_then(|lines| lines.get("text"))
172        .and_then(|text| text.as_str())
173        .map(|text| text.to_string())
174}
175
176fn split_rg_text_lines(text: &str) -> Vec<String> {
177    let mut lines: Vec<String> = text.split('\n').map(|line| line.to_string()).collect();
178    if text.ends_with('\n') {
179        lines.pop();
180    }
181    lines
182}
183
184#[cfg(test)]
185mod tests {
186    use super::parse_json;
187
188    #[test]
189    fn test_parse_rg_json_single_file() {
190        let input = concat!(
191            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
192            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n",
193            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
194        );
195
196        let parsed = parse_json(input).unwrap();
197        assert_eq!(parsed.len(), 1);
198        assert_eq!(parsed[0].filepath, "a.rb");
199        assert_eq!(parsed[0].text, "1: foo");
200        assert_eq!(parsed[0].match_count, 1);
201    }
202
203    #[test]
204    fn test_parse_rg_json_multi_file() {
205        let input = concat!(
206            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
207            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n",
208            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
209            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"b.rb\"}}}\n",
210            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"b.rb\"},\"lines\":{\"text\":\"bar\\n\"},\"line_number\":7}}\n",
211            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"b.rb\"}}}\n"
212        );
213
214        let parsed = parse_json(input).unwrap();
215        assert_eq!(parsed.len(), 2);
216        assert_eq!(parsed[0].filepath, "a.rb");
217        assert_eq!(parsed[1].filepath, "b.rb");
218    }
219
220    #[test]
221    fn test_parse_rg_json_with_context() {
222        let input = concat!(
223            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
224            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"before\\n\"},\"line_number\":1}}\n",
225            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":2}}\n",
226            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"after\\n\"},\"line_number\":3}}\n",
227            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
228        );
229
230        let parsed = parse_json(input).unwrap();
231        assert_eq!(parsed[0].text, "1: before\n2: foo\n3: after");
232        assert_eq!(parsed[0].match_count, 1);
233    }
234
235    #[test]
236    fn test_parse_rg_json_preserves_blank_context_lines() {
237        let input = concat!(
238            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
239            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"\\n\"},\"line_number\":2}}\n",
240            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":3}}\n",
241            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
242        );
243
244        let parsed = parse_json(input).unwrap();
245        assert_eq!(parsed[0].text, "2: \n3: foo");
246    }
247
248    #[test]
249    fn test_parse_rg_json_adds_spacing_between_separate_match_groups() {
250        let input = concat!(
251            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
252            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"before first\\n\"},\"line_number\":1}}\n",
253            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"first\\n\"},\"line_number\":2}}\n",
254            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"after first\\n\"},\"line_number\":3}}\n",
255            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"before second\\n\"},\"line_number\":10}}\n",
256            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"second\\n\"},\"line_number\":11}}\n",
257            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"after second\\n\"},\"line_number\":12}}\n",
258            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
259        );
260
261        let parsed = parse_json(input).unwrap();
262        assert_eq!(
263            parsed[0].text,
264            "1: before first\n2: first\n3: after first\n\n10: before second\n11: second\n12: after second"
265        );
266        assert_eq!(parsed[0].match_count, 2);
267    }
268
269    #[test]
270    fn test_parse_rg_json_ignores_summary() {
271        let input = concat!(
272            "{\"type\":\"summary\",\"data\":{\"elapsed_total\":{\"human\":\"0.1s\"}}}\n",
273            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
274            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n",
275            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
276        );
277
278        let parsed = parse_json(input).unwrap();
279        assert_eq!(parsed.len(), 1);
280        assert_eq!(parsed[0].filepath, "a.rb");
281    }
282
283    #[test]
284    fn test_parse_rg_json_empty_reports_no_matches() {
285        let err = parse_json("").unwrap_err();
286        assert!(err.to_string().contains("no rg matches found"));
287    }
288
289    #[test]
290    fn test_parse_rg_json_invalid_line_reports_line_number() {
291        let err = parse_json("not json\n").unwrap_err();
292        assert!(err.to_string().contains("line 1"));
293    }
294
295    #[test]
296    fn test_parse_rg_json_missing_begin_path_fails() {
297        let input = concat!(
298            "{\"type\":\"begin\",\"data\":{}}\n",
299            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n"
300        );
301
302        let err = parse_json(input).unwrap_err();
303        assert!(err.to_string().contains("line 1"));
304        assert!(err.to_string().contains("missing path"));
305    }
306
307    #[test]
308    fn test_parse_rg_json_missing_lines_text_fails() {
309        let input = concat!(
310            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
311            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"line_number\":1}}\n"
312        );
313
314        let err = parse_json(input).unwrap_err();
315        assert!(err.to_string().contains("line 2"));
316        assert!(err.to_string().contains("missing lines.text"));
317    }
318}