multilint 0.1.3

A driver of multiple linters
Documentation
use std::{collections::BTreeMap, convert::identity};

use anyhow::Result;
use log::{debug, warn};
use regex::{Captures, Regex, RegexBuilder};
use serde::Serialize;

#[derive(Debug, PartialEq, Eq, Default, Serialize)]
pub struct Parsed {
    pub program: Option<String>,
    pub file: Option<String>,
    pub line: Option<u32>,
    pub column: Option<u32>,
    pub message: Option<String>,
}

pub fn to_re(format: &str) -> String {
    let mut ret = String::new();
    let mut escape = false;
    for c in format.chars() {
        if escape {
            match c {
                '%' => ret.push('%'),
                'p' => ret.push_str(r"(?P<p>[^:[[:cntrl:]]]+)"),
                'f' => ret.push_str(r"(?P<f>[^:[[:cntrl:]]]+)"),
                'l' => ret.push_str(r"(?P<l>\d+)"),
                'c' => ret.push_str(r"(?P<c>\d+)"),
                'm' => ret.push_str(r"(?P<m>.*)"),
                _ => warn!("invalid format %{}", c),
            }
            escape = false;
        } else if c == '%' {
            escape = true;
        } else {
            ret.push(c);
        }
    }
    ret
}

#[derive(Debug)]
pub struct Parser {
    regexes: Vec<Regex>,
}

impl Parser {
    pub fn new<I, S>(patterns: I) -> Result<Self>
    where
        S: AsRef<str>,
        I: IntoIterator<Item = S>,
    {
        let regexes = patterns
            .into_iter()
            .map(|pat| {
                RegexBuilder::new(&to_re(pat.as_ref()))
                    .multi_line(true)
                    .build()
            })
            .collect::<Result<Vec<_>, regex::Error>>()?;
        Ok(Self { regexes })
    }

    pub fn parse(&self, text: &str) -> Vec<Parsed> {
        let mut captures = BTreeMap::<(usize, usize), Captures>::new();
        for regex in &self.regexes {
            for cap in regex.captures_iter(text) {
                let mat = cap.get(0).unwrap();
                captures.entry((mat.start(), mat.end())).or_insert(cap);
            }
        }
        captures
            .values()
            .map(|cap| {
                debug!("{:?}", cap);
                let gets = |name: &str| cap.name(name).map(|m| m.as_str().to_string());
                let geti = |name: &str| {
                    cap.name(name)
                        .map(|m| m.as_str().parse().ok())
                        .and_then(identity)
                };
                Parsed {
                    program: gets("p"),
                    file: gets("f"),
                    line: geti("l"),
                    column: geti("c"),
                    message: gets("m"),
                }
            })
            .collect()
    }
}

#[cfg(test)]
mod tests {
    use super::{to_re, Parsed, Parser};
    use test_log::test;

    #[test]
    fn re() {
        assert_eq!(to_re("%%"), "%");
        assert_eq!(to_re("%m"), r"(?P<m>.*)");
        assert_eq!(to_re("%%%m%%%"), r"%(?P<m>.*)%");
    }

    #[test]
    fn empty() {
        assert!(Parser::new(Vec::<String>::new())
            .unwrap()
            .parse("")
            .is_empty());
    }

    #[test]
    fn gnu() {
        let formats = ["^%f:%l:%c: %m$"];
        let text = r#"prog.cc:2:5: error: use of undeclared identifier 'std'
    std::cout << "hello world" << std::endl;
    ^
prog.cc:2:35: error: use of undeclared identifier 'std'
    std::cout << "hello world" << std::endl;
                                  ^
2 errors generated.
"#;
        assert_eq!(
            Parser::new(formats).unwrap().parse(text),
            vec![
                Parsed {
                    file: Some("prog.cc".to_string()),
                    line: Some(2),
                    column: Some(5),
                    message: Some("error: use of undeclared identifier 'std'".to_string()),
                    ..Default::default()
                },
                Parsed {
                    file: Some("prog.cc".to_string()),
                    line: Some(2),
                    column: Some(35),
                    message: Some("error: use of undeclared identifier 'std'".to_string()),
                    ..Default::default()
                }
            ]
        );
    }

    #[test]
    fn multi_pattern() {
        let formats = [r"^%f:%l:%c: %m$", r"^%f:%l: %m$"];
        let text = r#"prog.cc:1: error: expected unqualified-id
prog.cc:1:1: error: expected unqualified-id
"#;
        assert_eq!(
            Parser::new(formats).unwrap().parse(text),
            vec![
                Parsed {
                    file: Some("prog.cc".to_string()),
                    line: Some(1),
                    message: Some("error: expected unqualified-id".to_string()),
                    ..Default::default()
                },
                Parsed {
                    file: Some("prog.cc".to_string()),
                    line: Some(1),
                    column: Some(1),
                    message: Some("error: expected unqualified-id".to_string()),
                    ..Default::default()
                },
            ]
        );
    }
}