sift-queue 0.6.0

use crate::collect::GroupedItem;
use anyhow::{anyhow, bail, Result};

pub fn parse_json(input: &str) -> Result<Vec<GroupedItem>> {
    let mut items = Vec::new();
    let mut current: Option<ActiveGroup> = None;

    for (index, raw_line) in input.lines().enumerate() {
        let line_number = index + 1;
        let line = raw_line.trim();
        if line.is_empty() {
            continue;
        }

        let value: serde_json::Value = serde_json::from_str(line)
            .map_err(|e| anyhow!("invalid rg --json input on line {}: {}", line_number, e))?;

        let event_type = value
            .get("type")
            .and_then(|value| value.as_str())
            .ok_or_else(|| {
                anyhow!(
                    "invalid rg --json input on line {}: missing type",
                    line_number
                )
            })?;

        match event_type {
            "begin" => {
                finalize(&mut current, &mut items);
                let filepath = path_from_value(&value).ok_or_else(|| {
                    anyhow!(
                        "invalid rg --json input on line {}: missing path",
                        line_number
                    )
                })?;
                current = Some(ActiveGroup::new(filepath));
            }
            "match" | "context" => {
                let filepath = path_from_value(&value).ok_or_else(|| {
                    anyhow!(
                        "invalid rg --json input on line {}: missing path",
                        line_number
                    )
                })?;
                let lines_text = lines_text_from_value(&value).ok_or_else(|| {
                    anyhow!(
                        "invalid rg --json input on line {}: missing lines.text",
                        line_number
                    )
                })?;
                let line_number = value
                    .get("data")
                    .and_then(|data| data.get("line_number"))
                    .and_then(|number| number.as_u64());

                if current.as_ref().map(|item| item.filepath.as_str()) != Some(filepath.as_str()) {
                    finalize(&mut current, &mut items);
                    current = Some(ActiveGroup::new(filepath));
                }

                if let Some(item) = current.as_mut() {
                    item.push_text(&lines_text, line_number);
                    if event_type == "match" {
                        item.match_count += 1;
                    }
                }
            }
            "end" => {
                if let Some(filepath) = path_from_value(&value) {
                    if current.as_ref().map(|item| item.filepath.as_str())
                        != Some(filepath.as_str())
                    {
                        finalize(&mut current, &mut items);
                    }
                }
                finalize(&mut current, &mut items);
            }
            "summary" => {}
            _ => {}
        }
    }

    finalize(&mut current, &mut items);

    if items.is_empty() {
        bail!("no rg matches found in stdin");
    }

    Ok(items)
}

#[derive(Debug, Clone)]
struct ActiveGroup {
    filepath: String,
    lines: Vec<String>,
    match_count: usize,
    last_line_number: Option<u64>,
}

impl ActiveGroup {
    fn new(filepath: String) -> Self {
        Self {
            filepath,
            lines: Vec::new(),
            match_count: 0,
            last_line_number: None,
        }
    }

    fn push_text(&mut self, text: &str, line_number: Option<u64>) {
        let split_lines = split_rg_text_lines(text);
        if split_lines.is_empty() {
            return;
        }

        if let (Some(previous), Some(start)) = (self.last_line_number, line_number) {
            if start > previous + 1
                && !self.lines.is_empty()
                && self.lines.last() != Some(&String::new())
            {
                self.lines.push(String::new());
            }
        }

        match line_number {
            Some(start) => {
                for (offset, line) in split_lines.iter().enumerate() {
                    self.lines
                        .push(format!("{}: {}", start + offset as u64, line));
                }
                self.last_line_number = Some(start + split_lines.len() as u64 - 1);
            }
            None => {
                self.lines.extend(split_lines);
                self.last_line_number = None;
            }
        }
    }

    fn into_grouped_item(self) -> GroupedItem {
        GroupedItem {
            filepath: self.filepath,
            text: self.lines.join("\n"),
            match_count: self.match_count,
        }
    }
}

fn finalize(current: &mut Option<ActiveGroup>, items: &mut Vec<GroupedItem>) {
    if let Some(item) = current.take() {
        if !item.lines.is_empty() {
            items.push(item.into_grouped_item());
        }
    }
}

fn path_from_value(value: &serde_json::Value) -> Option<String> {
    value
        .get("data")
        .and_then(|data| data.get("path"))
        .and_then(|path| path.get("text"))
        .and_then(|text| text.as_str())
        .map(|text| text.to_string())
}

fn lines_text_from_value(value: &serde_json::Value) -> Option<String> {
    value
        .get("data")
        .and_then(|data| data.get("lines"))
        .and_then(|lines| lines.get("text"))
        .and_then(|text| text.as_str())
        .map(|text| text.to_string())
}

fn split_rg_text_lines(text: &str) -> Vec<String> {
    let mut lines: Vec<String> = text.split('\n').map(|line| line.to_string()).collect();
    if text.ends_with('\n') {
        lines.pop();
    }
    lines
}

#[cfg(test)]
mod tests {
    use super::parse_json;

    #[test]
    fn test_parse_rg_json_single_file() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
        );

        let parsed = parse_json(input).unwrap();
        assert_eq!(parsed.len(), 1);
        assert_eq!(parsed[0].filepath, "a.rb");
        assert_eq!(parsed[0].text, "1: foo");
        assert_eq!(parsed[0].match_count, 1);
    }

    #[test]
    fn test_parse_rg_json_multi_file() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"b.rb\"}}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"b.rb\"},\"lines\":{\"text\":\"bar\\n\"},\"line_number\":7}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"b.rb\"}}}\n"
        );

        let parsed = parse_json(input).unwrap();
        assert_eq!(parsed.len(), 2);
        assert_eq!(parsed[0].filepath, "a.rb");
        assert_eq!(parsed[1].filepath, "b.rb");
    }

    #[test]
    fn test_parse_rg_json_with_context() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"before\\n\"},\"line_number\":1}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":2}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"after\\n\"},\"line_number\":3}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
        );

        let parsed = parse_json(input).unwrap();
        assert_eq!(parsed[0].text, "1: before\n2: foo\n3: after");
        assert_eq!(parsed[0].match_count, 1);
    }

    #[test]
    fn test_parse_rg_json_preserves_blank_context_lines() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"\\n\"},\"line_number\":2}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":3}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
        );

        let parsed = parse_json(input).unwrap();
        assert_eq!(parsed[0].text, "2: \n3: foo");
    }

    #[test]
    fn test_parse_rg_json_adds_spacing_between_separate_match_groups() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"before first\\n\"},\"line_number\":1}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"first\\n\"},\"line_number\":2}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"after first\\n\"},\"line_number\":3}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"before second\\n\"},\"line_number\":10}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"second\\n\"},\"line_number\":11}}\n",
            "{\"type\":\"context\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"after second\\n\"},\"line_number\":12}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
        );

        let parsed = parse_json(input).unwrap();
        assert_eq!(
            parsed[0].text,
            "1: before first\n2: first\n3: after first\n\n10: before second\n11: second\n12: after second"
        );
        assert_eq!(parsed[0].match_count, 2);
    }

    #[test]
    fn test_parse_rg_json_ignores_summary() {
        let input = concat!(
            "{\"type\":\"summary\",\"data\":{\"elapsed_total\":{\"human\":\"0.1s\"}}}\n",
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n",
            "{\"type\":\"end\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n"
        );

        let parsed = parse_json(input).unwrap();
        assert_eq!(parsed.len(), 1);
        assert_eq!(parsed[0].filepath, "a.rb");
    }

    #[test]
    fn test_parse_rg_json_empty_reports_no_matches() {
        let err = parse_json("").unwrap_err();
        assert!(err.to_string().contains("no rg matches found"));
    }

    #[test]
    fn test_parse_rg_json_invalid_line_reports_line_number() {
        let err = parse_json("not json\n").unwrap_err();
        assert!(err.to_string().contains("line 1"));
    }

    #[test]
    fn test_parse_rg_json_missing_begin_path_fails() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"lines\":{\"text\":\"foo\\n\"},\"line_number\":1}}\n"
        );

        let err = parse_json(input).unwrap_err();
        assert!(err.to_string().contains("line 1"));
        assert!(err.to_string().contains("missing path"));
    }

    #[test]
    fn test_parse_rg_json_missing_lines_text_fails() {
        let input = concat!(
            "{\"type\":\"begin\",\"data\":{\"path\":{\"text\":\"a.rb\"}}}\n",
            "{\"type\":\"match\",\"data\":{\"path\":{\"text\":\"a.rb\"},\"line_number\":1}}\n"
        );

        let err = parse_json(input).unwrap_err();
        assert!(err.to_string().contains("line 2"));
        assert!(err.to_string().contains("missing lines.text"));
    }
}