1use std::{iter::Peekable, str::CharIndices};
16
17#[derive(Debug)]
18pub struct CsvRowSplitter<'a> {
19 row: &'a str,
20 char_indices: Peekable<CharIndices<'a>>,
21 sep: char,
22 quote: char,
23 parse_done: bool,
24 field_start_index: usize,
25 field_is_quoted: bool,
26 field_has_separator: bool,
27 cur_in_quoted_field: bool,
28 cur_is_field_start: bool,
29}
30
31impl<'a> CsvRowSplitter<'a> {
32 pub fn new(row: &'a str, sep: char, quote: char) -> CsvRowSplitter<'a> {
33 CsvRowSplitter {
34 row: row,
35 char_indices: row.char_indices().peekable(),
36 sep: sep,
37 quote: quote,
38 parse_done: false,
39 field_start_index: 0,
40 field_is_quoted: false,
41 field_has_separator: false, cur_in_quoted_field: false,
43 cur_is_field_start: true, }
45 }
46
47 fn field_start_set(&mut self, start_index: usize) {
48 self.field_start_index = start_index;
49 self.field_is_quoted = false;
50 self.field_has_separator = false;
51 self.cur_in_quoted_field = false;
52 self.cur_is_field_start = true;
53 }
54
55 fn get_field(&self, end_index: usize) -> &'a str {
56 let field_shift = self.field_is_quoted as usize - self.field_has_separator as usize;
57 let i = self.field_start_index + field_shift;
58 let j = end_index - field_shift;
59 unsafe { self.row.get_unchecked(i..j) }
60 }
61
62 fn next_char_is_none_or_sep(&mut self) -> bool {
63 match self.char_indices.peek() {
64 None => true,
65 Some((_, v)) => v == &self.sep,
66 }
67 }
68}
69
70impl<'a> Iterator for CsvRowSplitter<'a> {
71 type Item = &'a str;
72
73 fn next(&mut self) -> Option<Self::Item> {
74 if self.parse_done {
75 return None;
76 }
77
78 loop {
79 let Some((index, c)) = self.char_indices.next() else {
80 self.parse_done = true;
82 let f = self.get_field(self.row.len());
83 return Some(f);
84 };
85
86 if c == '\\' {
87 self.char_indices.next();
89 } else if c == self.sep {
90 if self.cur_in_quoted_field {
91 self.field_has_separator = true;
92 } else {
93 let f = self.get_field(index);
94 self.field_start_set(index + 1);
95 return Some(f);
96 }
97 } else if c == self.quote {
98 if self.cur_is_field_start {
99 self.field_is_quoted = true;
100 self.cur_in_quoted_field = true;
101 } else if self.next_char_is_none_or_sep() {
102 self.cur_in_quoted_field = false;
103 } else {
104 self.char_indices.next();
106 }
107 }
108
109 self.cur_is_field_start = false;
110 }
111 }
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
118
119 #[test]
120 fn test_csv_row_split() {
121 let r = "我们abc,def,12";
122 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
123 assert_eq!(o, vec!["我们abc", "def", "12"]);
124
125 let r = "1,2,3,";
126 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
127 assert_eq!(o, vec!["1", "2", "3", ""]);
128
129 let r = r#"1,2,3,"""#;
130 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
131 assert_eq!(o, vec!["1", "2", "3", ""]);
132
133 let r = r#"1,2,3,"",4"#;
134 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
135 assert_eq!(o, vec!["1", "2", "3", "", "4"]);
136
137 let r = r#"1,2,3,"","4""#;
138 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
139 assert_eq!(o, vec!["1", "2", "3", "", "4"]);
140
141 let r = r#""1",2,3,"#;
143 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
144 assert_eq!(o, vec!["1", "2", "3", ""]);
145
146 let r = r#"first,second,"third,fourth",fifth"#;
148 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
149 assert_eq!(o, vec!["first", "second", r#""third,fourth""#, "fifth"]);
150
151 let r = r#"first,second,"third,fourth","fifth""#;
152 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
153 assert_eq!(o, vec!["first", "second", r#""third,fourth""#, "fifth"]);
154
155 let r = r#""third,fourth","fifth""#;
156 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
157 assert_eq!(o, vec![r#""third,fourth""#, "fifth"]);
158
159 let r = r#"third\",fourth,"fifth""#;
161 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
162 assert_eq!(o, vec![r#"third\""#, "fourth", "fifth"]);
163
164 let r = r#""Charles ""Pretty Boy"" Floyd","1 Short St, Smallville""#;
165 let o = CsvRowSplitter::new(&r, ',', '"').collect::<Vec<_>>();
166 assert_eq!(
167 o,
168 vec![
169 r#"Charles ""Pretty Boy"" Floyd"#,
170 r#""1 Short St, Smallville""#
171 ]
172 );
173 }
174}