simd_csv/
records.rs

1use std::borrow::Cow;
2use std::fmt;
3use std::ops::Index;
4
5use crate::debug;
6use crate::utils::{trim_trailing_crlf, unescape};
7
8pub struct ZeroCopyByteRecord<'a> {
9    slice: &'a [u8],
10    seps: &'a [usize],
11}
12
13impl<'a> ZeroCopyByteRecord<'a> {
14    #[inline]
15    pub(crate) fn new(slice: &'a [u8], seps: &'a [usize]) -> Self {
16        Self {
17            slice: trim_trailing_crlf(slice),
18            seps,
19        }
20    }
21
22    #[inline(always)]
23    pub fn len(&self) -> usize {
24        // NOTE: an empty zero copy record cannot be constructed,
25        // by definition.
26        self.seps.len() + 1
27    }
28
29    #[inline(always)]
30    pub fn is_empty(&self) -> bool {
31        false
32    }
33
34    #[inline(always)]
35    pub fn as_slice(&self) -> &[u8] {
36        self.slice
37    }
38
39    #[inline]
40    pub fn iter(&self) -> ZeroCopyRecordIter<'_> {
41        ZeroCopyRecordIter {
42            record: self,
43            current: 0,
44        }
45    }
46
47    #[inline]
48    pub fn get(&self, index: usize) -> Option<&[u8]> {
49        let len = self.seps.len();
50
51        if index > len {
52            return None;
53        }
54
55        let start = if index == 0 {
56            0
57        } else {
58            self.seps[index - 1] + 1
59        };
60
61        let end = if index == len {
62            self.slice.len()
63        } else {
64            self.seps[index]
65        };
66
67        Some(&self.slice[start..end])
68    }
69
70    #[inline]
71    pub fn is_quoted(&self, index: usize, quote: u8) -> bool {
72        let cell = self.get(index).unwrap();
73        cell.len() > 1 && cell[0] == quote
74    }
75
76    #[inline]
77    pub fn unquote(&self, index: usize, quote: u8) -> Option<&[u8]> {
78        self.get(index).map(|cell| {
79            let len = cell.len();
80
81            if len > 1 && cell[0] == quote {
82                &cell[1..len - 1]
83            } else {
84                cell
85            }
86        })
87    }
88
89    #[inline]
90    pub fn unescape(&self, index: usize, quote: u8) -> Option<Cow<[u8]>> {
91        self.unquote(index, quote).map(|cell| unescape(cell, quote))
92    }
93}
94
95impl<'a> fmt::Debug for ZeroCopyByteRecord<'a> {
96    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
97        write!(f, "ZeroCopyByteRecord(")?;
98        f.debug_list()
99            .entries(self.iter().map(debug::Bytes))
100            .finish()?;
101        write!(f, ")")?;
102        Ok(())
103    }
104}
105
106pub struct ZeroCopyRecordIter<'a> {
107    record: &'a ZeroCopyByteRecord<'a>,
108    current: usize,
109}
110
111impl<'a> Iterator for ZeroCopyRecordIter<'a> {
112    type Item = &'a [u8];
113
114    fn next(&mut self) -> Option<Self::Item> {
115        let cell = self.record.get(self.current);
116
117        if cell.is_some() {
118            self.current += 1;
119        }
120
121        cell
122    }
123}
124
125impl<'a> Index<usize> for ZeroCopyByteRecord<'a> {
126    type Output = [u8];
127
128    #[inline]
129    fn index(&self, i: usize) -> &[u8] {
130        self.get(i).unwrap()
131    }
132}
133
134#[derive(Default, Clone)]
135pub struct ByteRecord {
136    data: Vec<u8>,
137    bounds: Vec<(usize, usize)>,
138}
139
140impl ByteRecord {
141    pub fn new() -> Self {
142        Self::default()
143    }
144
145    #[inline]
146    pub fn len(&self) -> usize {
147        self.bounds.len()
148    }
149
150    #[inline]
151    pub fn is_empty(&self) -> bool {
152        self.len() == 0
153    }
154
155    #[inline]
156    pub fn clear(&mut self) {
157        self.data.clear();
158        self.bounds.clear();
159    }
160
161    #[inline]
162    pub fn as_slice(&self) -> &[u8] {
163        &self.data
164    }
165
166    #[inline]
167    pub fn iter(&self) -> ByteRecordIter<'_> {
168        ByteRecordIter {
169            record: self,
170            current: 0,
171        }
172    }
173
174    #[inline(always)]
175    pub fn push_field(&mut self, bytes: &[u8]) {
176        self.data.extend_from_slice(bytes);
177
178        let bounds_len = self.bounds.len();
179
180        let start = if bounds_len == 0 {
181            0
182        } else {
183            self.bounds[bounds_len - 1].1
184        };
185
186        self.bounds.push((start, self.data.len()));
187    }
188
189    #[inline]
190    pub fn get(&self, index: usize) -> Option<&[u8]> {
191        self.bounds
192            .get(index)
193            .copied()
194            .map(|(start, end)| &self.data[start..end])
195    }
196}
197
198impl PartialEq for ByteRecord {
199    fn eq(&self, other: &Self) -> bool {
200        if self.bounds.len() != other.bounds.len() {
201            return false;
202        }
203
204        self.iter()
205            .zip(other.iter())
206            .all(|(self_cell, other_cell)| self_cell == other_cell)
207    }
208}
209
210impl Index<usize> for ByteRecord {
211    type Output = [u8];
212
213    #[inline]
214    fn index(&self, i: usize) -> &[u8] {
215        self.get(i).unwrap()
216    }
217}
218
219impl<I, T> From<I> for ByteRecord
220where
221    I: IntoIterator<Item = T>,
222    T: AsRef<[u8]>,
223{
224    fn from(value: I) -> Self {
225        let mut record = Self::new();
226
227        for cell in value.into_iter() {
228            record.push_field(cell.as_ref());
229        }
230
231        record
232    }
233}
234
235impl fmt::Debug for ByteRecord {
236    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
237        write!(f, "ByteRecord(")?;
238        f.debug_list()
239            .entries(self.iter().map(debug::Bytes))
240            .finish()?;
241        write!(f, ")")?;
242        Ok(())
243    }
244}
245
246pub struct ByteRecordIter<'a> {
247    record: &'a ByteRecord,
248    current: usize,
249}
250
251impl<'a> Iterator for ByteRecordIter<'a> {
252    type Item = &'a [u8];
253
254    #[inline]
255    fn next(&mut self) -> Option<Self::Item> {
256        if self.current >= self.record.bounds.len() {
257            None
258        } else {
259            let (start, end) = self.record.bounds[self.current];
260
261            self.current += 1;
262
263            Some(&self.record.data[start..end])
264        }
265    }
266}
267
268pub(crate) struct ByteRecordBuilder<'r> {
269    record: &'r mut ByteRecord,
270    start: usize,
271}
272
273impl<'r> ByteRecordBuilder<'r> {
274    #[inline(always)]
275    pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
276        Self { record, start: 0 }
277    }
278
279    #[inline(always)]
280    pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
281        self.record.data.extend_from_slice(slice);
282    }
283
284    #[inline(always)]
285    pub(crate) fn push_byte(&mut self, byte: u8) {
286        self.record.data.push(byte);
287    }
288
289    #[inline]
290    pub(crate) fn finalize_field(&mut self) {
291        let start = self.start;
292        self.start = self.record.data.len();
293
294        self.record.bounds.push((start, self.start));
295    }
296
297    #[inline]
298    pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
299        let start = self.start;
300        self.start = self.record.data.len() + offset;
301
302        self.record.bounds.push((start, self.start));
303
304        self.start += 1;
305    }
306
307    #[inline(always)]
308    pub(crate) fn bump(&mut self) {
309        self.start += 1;
310    }
311}
312
313#[macro_export]
314macro_rules! brec {
315    () => {{
316        ByteRecord::new()
317    }};
318
319    ($($x: expr),*) => {{
320        let mut r = ByteRecord::new();
321
322        $(
323            r.push_field($x.as_bytes());
324        )*
325
326        r
327    }};
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    #[test]
335    fn test_zero_copy_byte_record() {
336        let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12]);
337
338        assert_eq!(record.len(), 3);
339
340        let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
341        assert_eq!(record.iter().collect::<Vec<_>>(), expected);
342
343        for i in 0..expected.len() {
344            assert_eq!(record.get(i), Some(expected[i]));
345        }
346
347        assert_eq!(record.get(4), None);
348    }
349
350    #[test]
351    fn test_byte_record() {
352        let mut record = ByteRecord::new();
353
354        assert_eq!(record.len(), 0);
355        assert_eq!(record.is_empty(), true);
356        assert_eq!(record.get(0), None);
357
358        record.push_field(b"name");
359        record.push_field(b"surname");
360        record.push_field(b"age");
361
362        let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
363        assert_eq!(record.iter().collect::<Vec<_>>(), expected);
364
365        assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
366        assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
367        assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
368        assert_eq!(record.get(3), None);
369    }
370}