1use nom::{Parser, bytes::complete::tag, multi::separated_list1};
2
3pub type ParseResult<'a, T> = nom::IResult<&'a str, T>;
4
5fn textdata<F>(stop: F) -> impl FnMut(&str) -> ParseResult<&str>
6where
7 F: Fn(char) -> bool,
8{
9 move |src| {
10 for (i, c) in src.char_indices() {
11 if stop(c) {
12 return Ok((&src[i..], &src[..i]));
13 }
14 }
15 Ok(("", src))
16 }
17}
18
19fn escaped(comma: char, dquote: char) -> impl FnMut(&str) -> ParseResult<&str> {
20 move |src| {
21 let trimmed = src.trim_start();
22 let (rest, _) = tag(format!("{}", dquote).as_str())(trimmed)?;
23 println!("Escaped: {:?}; trimmed={:?}; rest={:?}", src, trimmed, rest);
24 let mut char_indices = rest.char_indices().peekable();
25 while let Some((i, c)) = char_indices.next() {
26 if c == dquote {
27 match char_indices.peek().copied() {
28 Some((j, c)) if c != dquote => {
29 let remainder = rest[j..].trim_start();
30 let next_byte = remainder.as_bytes().first().copied().unwrap_or_default();
31 if remainder.starts_with(comma) || next_byte < 0x20 {
32 return Ok((remainder, &rest[..i]));
33 } else {
34 return Err(nom::Err::Failure(nom::error::make_error(
35 src,
36 nom::error::ErrorKind::Fail,
37 )));
38 }
39 }
40 None => return Ok(("", &rest[..i])),
41 _ => {
42 let _ = char_indices.next();
43 }
44 }
45 }
46 }
47 Err(nom::Err::Incomplete(nom::Needed::Unknown))
48 }
49}
50
51fn field<'a>(
52 comma: char,
53 dquote: char,
54) -> impl Parser<&'a str, Output = String, Error = nom::error::Error<&'a str>> {
55 let stop = move |c| (c < ' ' || c == comma || c == dquote);
56 nom::combinator::map(
57 nom::branch::alt((escaped(comma, dquote), textdata(stop))),
58 move |field| {
59 field.replace(
60 format!("{}{}", dquote, dquote).as_str(),
61 format!("{}", dquote).as_str(),
62 )
63 },
64 )
65}
66
67pub fn record(src: &str, comma: char, dquote: char) -> ParseResult<Vec<String>> {
68 separated_list1(tag(format!("{}", comma).as_str()), field(comma, dquote)).parse(src)
69}
70
71#[cfg(test)]
72mod tests {
73 use super::*;
74
75 #[test]
76 fn parse_just_record() {
77 let line = "мама,мыла,раму\r\n";
78 assert_eq!(
79 vec!["мама", "мыла", "раму"],
80 record(line, ',', '"').unwrap().1
81 );
82 }
83
84 #[test]
85 fn parse_with_escaped() {
86 let line = "мама, \"мыла\",раму";
87 assert_eq!(
88 vec!["мама", "мыла", "раму"],
89 record(line, ',', '"').unwrap().1
90 );
91 }
92
93 #[test]
94 fn parse_multiline() {
95 let line = "мама, \"мыла\ntwo times\"\t\t,раму";
96 assert_eq!(
97 vec!["мама", "мыла\ntwo times", "раму"],
98 record(line, ',', '"').unwrap().1
99 );
100 }
101
102 #[test]
103 fn fail_after_dquote() {
104 let comma = ',';
105 let dquote = '"';
106 println!("{:?}", record("мама,мыла, \"раму\"abc", comma, dquote));
107 assert!(record("мама,мыла, \"раму\"abc", comma, dquote).is_err());
108 assert_eq!(
109 vec!["мама", "мыла", "раму"],
110 record("мама,\"мыла\", \"раму\" ", comma, dquote).unwrap().1
111 );
112 }
113
114 #[test]
115 fn escaped_dquote() {
116 let line = "мама, \"мыла\n\"\"two times\"\"\"\t\t,раму";
117 assert_eq!(
118 vec!["мама", "мыла\n\"two times\"", "раму"],
119 record(line, ',', '"').unwrap().1
120 );
121 }
122}