justcsv/
parse.rs

1use nom::{Parser, bytes::complete::tag, multi::separated_list1};
2
3pub type ParseResult<'a, T> = nom::IResult<&'a str, T>;
4
5fn textdata<F>(stop: F) -> impl FnMut(&str) -> ParseResult<&str>
6where
7    F: Fn(char) -> bool,
8{
9    move |src| {
10        for (i, c) in src.char_indices() {
11            if stop(c) {
12                return Ok((&src[i..], &src[..i]));
13            }
14        }
15        Ok(("", src))
16    }
17}
18
19fn escaped(comma: char, dquote: char) -> impl FnMut(&str) -> ParseResult<&str> {
20    move |src| {
21        let trimmed = src.trim_start();
22        let (rest, _) = tag(format!("{}", dquote).as_str())(trimmed)?;
23        println!("Escaped: {:?}; trimmed={:?}; rest={:?}", src, trimmed, rest);
24        let mut char_indices = rest.char_indices().peekable();
25        while let Some((i, c)) = char_indices.next() {
26            if c == dquote {
27                match char_indices.peek().copied() {
28                    Some((j, c)) if c != dquote => {
29                        let remainder = rest[j..].trim_start();
30                        let next_byte = remainder.as_bytes().first().copied().unwrap_or_default();
31                        if remainder.starts_with(comma) || next_byte < 0x20 {
32                            return Ok((remainder, &rest[..i]));
33                        } else {
34                            return Err(nom::Err::Failure(nom::error::make_error(
35                                src,
36                                nom::error::ErrorKind::Fail,
37                            )));
38                        }
39                    }
40                    None => return Ok(("", &rest[..i])),
41                    _ => {
42                        let _ = char_indices.next();
43                    }
44                }
45            }
46        }
47        Err(nom::Err::Incomplete(nom::Needed::Unknown))
48    }
49}
50
51fn field<'a>(
52    comma: char,
53    dquote: char,
54) -> impl Parser<&'a str, Output = String, Error = nom::error::Error<&'a str>> {
55    let stop = move |c| (c < ' ' || c == comma || c == dquote);
56    nom::combinator::map(
57        nom::branch::alt((escaped(comma, dquote), textdata(stop))),
58        move |field| {
59            field.replace(
60                format!("{}{}", dquote, dquote).as_str(),
61                format!("{}", dquote).as_str(),
62            )
63        },
64    )
65}
66
67pub fn record(src: &str, comma: char, dquote: char) -> ParseResult<Vec<String>> {
68    separated_list1(tag(format!("{}", comma).as_str()), field(comma, dquote)).parse(src)
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    #[test]
76    fn parse_just_record() {
77        let line = "мама,мыла,раму\r\n";
78        assert_eq!(
79            vec!["мама", "мыла", "раму"],
80            record(line, ',', '"').unwrap().1
81        );
82    }
83
84    #[test]
85    fn parse_with_escaped() {
86        let line = "мама, \"мыла\",раму";
87        assert_eq!(
88            vec!["мама", "мыла", "раму"],
89            record(line, ',', '"').unwrap().1
90        );
91    }
92
93    #[test]
94    fn parse_multiline() {
95        let line = "мама, \"мыла\ntwo times\"\t\t,раму";
96        assert_eq!(
97            vec!["мама", "мыла\ntwo times", "раму"],
98            record(line, ',', '"').unwrap().1
99        );
100    }
101
102    #[test]
103    fn fail_after_dquote() {
104        let comma = ',';
105        let dquote = '"';
106        println!("{:?}", record("мама,мыла, \"раму\"abc", comma, dquote));
107        assert!(record("мама,мыла, \"раму\"abc", comma, dquote).is_err());
108        assert_eq!(
109            vec!["мама", "мыла", "раму"],
110            record("мама,\"мыла\", \"раму\" ", comma, dquote).unwrap().1
111        );
112    }
113
114    #[test]
115    fn escaped_dquote() {
116        let line = "мама, \"мыла\n\"\"two times\"\"\"\t\t,раму";
117        assert_eq!(
118            vec!["мама", "мыла\n\"two times\"", "раму"],
119            record(line, ',', '"').unwrap().1
120        );
121    }
122}