csv_lib/models/
in_row_iter.rs1use memchr::{memchr, memchr2};
2
3#[derive(Debug)]
6pub struct InRowIter<'a> {
7 line: &'a [u8],
8 field_separator: u8,
9 string_separator: u8,
10 cursor: usize,
11}
12
13impl<'mmap> InRowIter<'mmap> {
14 #[inline(always)]
15 pub fn new(line: &'mmap [u8], field_separator: u8, string_separator: u8) -> Self {
17 Self {
18 line,
19 field_separator,
20 string_separator,
21 cursor: 0,
22 }
23 }
24 #[inline(always)]
25 pub(crate) fn set_cursor(&mut self, new_index :usize){
28 self.cursor = new_index;
29 }
30 #[inline(always)]
31 pub(crate) fn get_cursor(&self) -> usize { self.cursor }
34 #[inline(always)]
35 pub fn count_fields(&self, delimiter: u8, string_separator: u8) -> usize {
37 let mut count = 0;
38 let mut pos = 0;
39 let len = self.line.len();
40 let check_string = string_separator != 0;
41 let mut in_string = false;
42
43 while pos < len {
44 let slice = &self.line[pos..];
45
46 if let Some(next_pos) = if check_string {
48 memchr2(delimiter, string_separator, slice)
49 } else {
50 memchr(delimiter, slice)
51 } {
52 let byte = slice[next_pos];
53
54 if check_string && byte == string_separator {
55 in_string = !in_string;
56 pos += next_pos + 1;
57 } else if byte == delimiter && !in_string {
58 count += 1;
59 pos += next_pos + 1;
60 } else {
61 pos += next_pos + 1;
62 }
63 } else {
64 break;
65 }
66 }
67
68 count + 1 }
70 #[inline(always)]
71 pub fn get_field_index(&mut self, target: usize) -> Option<&'mmap [u8]> {
73 let mut count = 0;
74 let actual = self.get_cursor();
76 self.set_cursor(0_usize);
78 while let Some(el) = self.next(){
79 if count == target {
80 self.set_cursor(actual);
82 return Some(el);
83 }
84 count = count + 1;
85 }
86 self.set_cursor(actual);
88 None
89 }
90 #[inline(always)]
91 #[allow(unused_assignments)]
94 pub fn peek_field_index(&self, target: usize) -> Option<&'mmap [u8]> {
95 let mut count = 0;
96 let mut pos = 0usize;
97 let slice = self.line;
98 let mut in_string = false;
99 let check_string = self.string_separator != 0;
100
101 let mut field_start = 0usize;
102 let mut field_end = 0usize;
103
104 while pos < slice.len() {
105 let mut start_offset = 0usize;
106 let mut end_offset = 0usize;
107 let start = pos;
108
109 if check_string && slice[pos] == self.string_separator {
110 in_string = true;
111 pos += 1;
112 start_offset = 1;
113 }
114
115 while pos < slice.len() {
116 let byte = slice[pos];
117
118 if check_string && byte == self.string_separator {
119 if in_string {
120 if pos + 1 < slice.len() && slice[pos + 1] == self.string_separator {
121 pos += 2; continue;
123 } else {
124 in_string = false;
125 end_offset = 1;
126 pos += 1;
127 continue;
128 }
129 }
130 }
131
132 if byte == self.field_separator && !in_string {
133 if count == target {
134 field_start = start + start_offset;
135 field_end = pos - end_offset;
136 return Some(&slice[field_start..field_end]);
137 } else {
138 count += 1;
139 pos += 1;
140 break;
141 }
142 }
143
144 pos += 1;
145 }
146 }
147
148 if count == target {
149 let start_offset = if check_string && slice[pos.saturating_sub(1)] == self.string_separator { 1 } else { 0 };
150 let end_offset = if check_string && slice[slice.len().saturating_sub(1)] == self.string_separator { 1 } else { 0 };
151
152 field_start = pos + start_offset;
153 field_end = slice.len() - end_offset;
154 return Some(&slice[field_start.min(field_end)..field_end]);
155 }
156
157 None
158 }
159
160}
161
162impl<'mmap> Iterator for InRowIter<'mmap> {
163 type Item = &'mmap [u8];
164 #[inline(always)]
165 fn next(&mut self) -> Option<Self::Item> {
166 if self.cursor >= self.line.len() {
167 return None;
168 }
169
170 let slice = &self.line[self.cursor..];
171 let mut pos = 0usize;
172 let mut in_string = false;
173 let check_string = self.string_separator != 0;
174 let mut start_offset = 0;
175 let mut end_offset = 0;
176
177 if check_string && !slice.is_empty() && slice[0] == self.string_separator {
179 in_string = true;
180 pos = 1;
181 start_offset = 1;
182 }
183
184 while pos < slice.len() {
185 let byte = slice[pos];
186
187 if check_string && byte == self.string_separator {
188 if in_string {
189 if pos + 1 < slice.len() && slice[pos + 1] == self.string_separator {
191 pos += 2;
193 continue;
194 } else {
195 in_string = false;
196 end_offset = 1; pos += 1;
198 continue;
199 }
200 } else {
201 pos += 1;
203 continue;
204 }
205 }
206
207 if byte == self.field_separator && !in_string {
208 let field = &slice[start_offset..pos - end_offset];
209 self.cursor += pos + 1;
210 return Some(field);
211 }
212
213 pos += 1;
214 }
215
216 let field = &slice[start_offset..slice.len() - end_offset];
218 self.cursor = self.line.len();
219 Some(field)
220 }
221}
222
223
224
225
226#[cfg(test)]
227mod tests {
228 use crate::decoders::decoders::Encoding::Windows1252;
229 use crate::models::in_row_iter::InRowIter;
230
231 #[test]
232 fn test_iter_next(){
233
234 let csv_data = b"uno;dos;3;cuatro;cinco;6;siete;ocho;9";
235 let mut row = InRowIter::new(csv_data, b';', 0u8);
236
237
238 let f = row.get_field_index(2).unwrap();
239
240 let dec = Windows1252.decode(f);
241 println!("Field Data: {}", dec.as_ref());
242
243
244 }
245}