simd_csv/
records.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::ops::Index;
4
5use crate::debug;
6use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted};
7
8pub struct ZeroCopyByteRecord<'a> {
9    slice: &'a [u8],
10    seps: &'a [usize],
11    quote: u8,
12}
13
14impl<'a> ZeroCopyByteRecord<'a> {
15    #[inline]
16    pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
17        Self {
18            slice: trim_trailing_crlf(slice),
19            seps,
20            quote,
21        }
22    }
23
24    #[inline]
25    pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
26        (self.seps.to_vec(), self.slice.to_vec())
27    }
28
29    #[inline(always)]
30    pub fn len(&self) -> usize {
31        // NOTE: an empty zero copy record cannot be constructed,
32        // by definition.
33        self.seps.len() + 1
34    }
35
36    #[inline(always)]
37    pub fn is_empty(&self) -> bool {
38        false
39    }
40
41    #[inline(always)]
42    pub fn as_slice(&self) -> &[u8] {
43        self.slice
44    }
45
46    #[inline]
47    pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
48        ZeroCopyByteRecordIter {
49            record: self,
50            current: 0,
51        }
52    }
53
54    #[inline]
55    pub fn get(&self, index: usize) -> Option<&[u8]> {
56        let len = self.seps.len();
57
58        if index > len {
59            return None;
60        }
61
62        let start = if index == 0 {
63            0
64        } else {
65            self.seps[index - 1] + 1
66        };
67
68        let end = if index == len {
69            self.slice.len()
70        } else {
71            self.seps[index]
72        };
73
74        Some(&self.slice[start..end])
75    }
76
77    #[inline]
78    pub fn unquote(&self, index: usize) -> Option<&[u8]> {
79        self.get(index)
80            .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
81    }
82
83    #[inline]
84    pub fn unescape(&self, index: usize) -> Option<Cow<[u8]>> {
85        self.unquote(index).map(|cell| {
86            if let Some(trimmed) = unquoted(cell, self.quote) {
87                unescape(trimmed, self.quote)
88            } else {
89                Cow::Borrowed(cell)
90            }
91        })
92    }
93
94    fn read_byte_record(&self, record: &mut ByteRecord) {
95        record.clear();
96
97        for cell in self.iter() {
98            if let Some(trimmed) = unquoted(cell, self.quote) {
99                unescape_to(trimmed, self.quote, &mut record.data);
100
101                let bounds_len = record.bounds.len();
102
103                let start = if bounds_len == 0 {
104                    0
105                } else {
106                    record.bounds[bounds_len - 1].1
107                };
108
109                record.bounds.push((start, record.data.len()));
110            } else {
111                record.push_field(cell);
112            }
113        }
114    }
115
116    #[inline]
117    pub fn to_byte_record(&self) -> ByteRecord {
118        let mut record = ByteRecord::new();
119        self.read_byte_record(&mut record);
120        record
121    }
122}
123
124impl<'a> fmt::Debug for ZeroCopyByteRecord<'a> {
125    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126        write!(f, "ZeroCopyByteRecord(")?;
127        f.debug_list()
128            .entries(self.iter().map(debug::Bytes))
129            .finish()?;
130        write!(f, ")")?;
131        Ok(())
132    }
133}
134
135pub struct ZeroCopyByteRecordIter<'a> {
136    record: &'a ZeroCopyByteRecord<'a>,
137    current: usize,
138}
139
140impl<'a> ExactSizeIterator for ZeroCopyByteRecordIter<'a> {}
141
142impl<'a> Iterator for ZeroCopyByteRecordIter<'a> {
143    type Item = &'a [u8];
144
145    #[inline]
146    fn next(&mut self) -> Option<Self::Item> {
147        match self.record.get(self.current) {
148            None => None,
149            Some(cell) => {
150                self.current += 1;
151                Some(cell)
152            }
153        }
154    }
155
156    #[inline]
157    fn size_hint(&self) -> (usize, Option<usize>) {
158        let size = self.record.len() - self.current;
159
160        (size, Some(size))
161    }
162
163    #[inline]
164    fn count(self) -> usize
165    where
166        Self: Sized,
167    {
168        self.len()
169    }
170}
171
172impl<'a> Index<usize> for ZeroCopyByteRecord<'a> {
173    type Output = [u8];
174
175    #[inline]
176    fn index(&self, i: usize) -> &[u8] {
177        self.get(i).unwrap()
178    }
179}
180
181#[derive(Default, Clone)]
182pub struct ByteRecord {
183    data: Vec<u8>,
184    bounds: Vec<(usize, usize)>,
185}
186
187impl ByteRecord {
188    pub fn new() -> Self {
189        Self::default()
190    }
191
192    #[inline]
193    pub fn len(&self) -> usize {
194        self.bounds.len()
195    }
196
197    #[inline]
198    pub fn is_empty(&self) -> bool {
199        self.len() == 0
200    }
201
202    #[inline]
203    pub fn clear(&mut self) {
204        self.data.clear();
205        self.bounds.clear();
206    }
207
208    #[inline]
209    pub fn truncate(&mut self, len: usize) {
210        self.bounds.truncate(len);
211
212        if let Some((_, end)) = self.bounds.last() {
213            self.data.truncate(*end);
214        } else {
215            self.data.clear();
216        }
217    }
218
219    #[inline]
220    pub fn as_slice(&self) -> &[u8] {
221        &self.data
222    }
223
224    #[inline]
225    pub fn iter(&self) -> ByteRecordIter<'_> {
226        ByteRecordIter {
227            record: self,
228            current_forward: 0,
229            current_reverse: self.len(),
230        }
231    }
232
233    #[inline(always)]
234    pub fn push_field(&mut self, bytes: &[u8]) {
235        self.data.extend_from_slice(bytes);
236
237        let bounds_len = self.bounds.len();
238
239        let start = if bounds_len == 0 {
240            0
241        } else {
242            self.bounds[bounds_len - 1].1
243        };
244
245        self.bounds.push((start, self.data.len()));
246    }
247
248    #[inline]
249    pub fn get(&self, index: usize) -> Option<&[u8]> {
250        self.bounds
251            .get(index)
252            .copied()
253            .map(|(start, end)| &self.data[start..end])
254    }
255}
256
257impl PartialEq for ByteRecord {
258    fn eq(&self, other: &Self) -> bool {
259        if self.bounds.len() != other.bounds.len() {
260            return false;
261        }
262
263        self.iter()
264            .zip(other.iter())
265            .all(|(self_cell, other_cell)| self_cell == other_cell)
266    }
267}
268
269impl Index<usize> for ByteRecord {
270    type Output = [u8];
271
272    #[inline]
273    fn index(&self, i: usize) -> &[u8] {
274        self.get(i).unwrap()
275    }
276}
277
278impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
279    #[inline]
280    fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
281        for x in iter {
282            self.push_field(x.as_ref());
283        }
284    }
285}
286
287impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
288    #[inline]
289    fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
290        let mut record = Self::new();
291        record.extend(iter);
292        record
293    }
294}
295
296impl<I, T> From<I> for ByteRecord
297where
298    I: IntoIterator<Item = T>,
299    T: AsRef<[u8]>,
300{
301    fn from(value: I) -> Self {
302        let mut record = Self::new();
303
304        for cell in value.into_iter() {
305            record.push_field(cell.as_ref());
306        }
307
308        record
309    }
310}
311
312impl<'r> IntoIterator for &'r ByteRecord {
313    type IntoIter = ByteRecordIter<'r>;
314    type Item = &'r [u8];
315
316    #[inline]
317    fn into_iter(self) -> ByteRecordIter<'r> {
318        self.iter()
319    }
320}
321
322impl fmt::Debug for ByteRecord {
323    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
324        write!(f, "ByteRecord(")?;
325        f.debug_list()
326            .entries(self.iter().map(debug::Bytes))
327            .finish()?;
328        write!(f, ")")?;
329        Ok(())
330    }
331}
332
333pub struct ByteRecordIter<'a> {
334    record: &'a ByteRecord,
335    current_forward: usize,
336    current_reverse: usize,
337}
338
339impl<'a> ExactSizeIterator for ByteRecordIter<'a> {}
340
341impl<'a> Iterator for ByteRecordIter<'a> {
342    type Item = &'a [u8];
343
344    #[inline]
345    fn next(&mut self) -> Option<Self::Item> {
346        if self.current_forward == self.current_reverse {
347            None
348        } else {
349            let (start, end) = self.record.bounds[self.current_forward];
350
351            self.current_forward += 1;
352
353            Some(&self.record.data[start..end])
354        }
355    }
356
357    #[inline]
358    fn size_hint(&self) -> (usize, Option<usize>) {
359        let size = self.current_reverse - self.current_forward;
360
361        (size, Some(size))
362    }
363
364    #[inline]
365    fn count(self) -> usize
366    where
367        Self: Sized,
368    {
369        self.len()
370    }
371}
372
373impl<'a> DoubleEndedIterator for ByteRecordIter<'a> {
374    #[inline]
375    fn next_back(&mut self) -> Option<Self::Item> {
376        if self.current_forward == self.current_reverse {
377            None
378        } else {
379            self.current_reverse -= 1;
380
381            let (start, end) = self.record.bounds[self.current_reverse];
382
383            Some(&self.record.data[start..end])
384        }
385    }
386}
387
388pub(crate) struct ByteRecordBuilder<'r> {
389    record: &'r mut ByteRecord,
390    start: usize,
391}
392
393impl<'r> ByteRecordBuilder<'r> {
394    #[inline(always)]
395    pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
396        Self { record, start: 0 }
397    }
398
399    #[inline(always)]
400    pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
401        self.record.data.extend_from_slice(slice);
402    }
403
404    #[inline(always)]
405    pub(crate) fn push_byte(&mut self, byte: u8) {
406        self.record.data.push(byte);
407    }
408
409    #[inline]
410    pub(crate) fn finalize_field(&mut self) {
411        let start = self.start;
412        self.start = self.record.data.len();
413
414        self.record.bounds.push((start, self.start));
415    }
416
417    #[inline]
418    pub(crate) fn finalize_record(&mut self) {
419        let start = self.start;
420        self.start = self.record.data.len();
421
422        let mut end = self.start;
423        end -= (self.start > 0 && self.record.data[self.start - 1] == b'\r') as usize;
424
425        self.record.bounds.push((start, end));
426    }
427
428    #[inline]
429    pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
430        let start = self.start;
431        self.start = self.record.data.len() + offset;
432
433        self.record.bounds.push((start, self.start));
434
435        self.start += 1;
436    }
437
438    #[inline(always)]
439    pub(crate) fn bump(&mut self) {
440        self.start +=
441            (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
442    }
443}
444
445#[macro_export]
446macro_rules! brec {
447    () => {{
448        ByteRecord::new()
449    }};
450
451    ($($x: expr),*) => {{
452        let mut r = ByteRecord::new();
453
454        $(
455            r.push_field($x.as_bytes());
456        )*
457
458        r
459    }};
460}
461
462#[cfg(test)]
463mod tests {
464    use super::*;
465
466    #[test]
467    fn test_zero_copy_byte_record() {
468        let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
469
470        assert_eq!(record.len(), 3);
471
472        let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
473        assert_eq!(record.iter().collect::<Vec<_>>(), expected);
474
475        for i in 0..expected.len() {
476            assert_eq!(record.get(i), Some(expected[i]));
477        }
478
479        assert_eq!(record.get(4), None);
480    }
481
482    #[test]
483    fn test_byte_record() {
484        let mut record = ByteRecord::new();
485
486        assert_eq!(record.len(), 0);
487        assert_eq!(record.is_empty(), true);
488        assert_eq!(record.get(0), None);
489
490        record.push_field(b"name");
491        record.push_field(b"surname");
492        record.push_field(b"age");
493
494        let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
495        assert_eq!(record.iter().collect::<Vec<_>>(), expected);
496
497        assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
498        assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
499        assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
500        assert_eq!(record.get(3), None);
501    }
502}