simd_csv/
records.rs

1use std::fmt;
2use std::ops::Index;
3
4use crate::debug;
5use crate::utils::trim_trailing_crlf;
6
7pub struct ZeroCopyByteRecord<'a> {
8    slice: &'a [u8],
9    seps: &'a [usize],
10}
11
12impl<'a> ZeroCopyByteRecord<'a> {
13    #[inline]
14    pub(crate) fn new(slice: &'a [u8], seps: &'a [usize]) -> Self {
15        Self {
16            slice: trim_trailing_crlf(slice),
17            seps,
18        }
19    }
20
21    #[inline]
22    pub fn len(&self) -> usize {
23        self.seps.len() + 1
24    }
25
26    #[inline]
27    pub fn is_empty(&self) -> bool {
28        self.len() == 0
29    }
30
31    #[inline]
32    pub fn as_slice(&self) -> &[u8] {
33        self.slice
34    }
35
36    #[inline]
37    pub fn iter(&self) -> ZeroCopyRecordIter<'_> {
38        ZeroCopyRecordIter {
39            record: self,
40            current_sep_index: 0,
41            offset: 0,
42        }
43    }
44}
45
46impl<'a> fmt::Debug for ZeroCopyByteRecord<'a> {
47    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
48        write!(f, "ZeroCopyByteRecord(")?;
49        f.debug_list()
50            .entries(self.iter().map(debug::Bytes))
51            .finish()?;
52        write!(f, ")")?;
53        Ok(())
54    }
55}
56
57pub struct ZeroCopyRecordIter<'a> {
58    record: &'a ZeroCopyByteRecord<'a>,
59    current_sep_index: usize,
60    offset: usize,
61}
62
63impl<'a> Iterator for ZeroCopyRecordIter<'a> {
64    type Item = &'a [u8];
65
66    fn next(&mut self) -> Option<Self::Item> {
67        let seps = &self.record.seps;
68        let len = seps.len();
69
70        if self.current_sep_index > len {
71            return None;
72        }
73
74        let offset = self.offset;
75
76        let end = if self.current_sep_index < len {
77            let sep = seps[self.current_sep_index];
78            self.offset = sep + 1;
79            sep
80        } else {
81            // Last field
82            self.offset = self.record.slice.len();
83            self.offset
84        };
85
86        self.current_sep_index += 1;
87
88        Some(&self.record.slice[offset..end])
89    }
90}
91
92#[derive(Default, Clone)]
93pub struct ByteRecord {
94    data: Vec<u8>,
95    bounds: Vec<(usize, usize)>,
96}
97
98impl ByteRecord {
99    pub fn new() -> Self {
100        Self::default()
101    }
102
103    #[inline]
104    pub fn len(&self) -> usize {
105        self.bounds.len()
106    }
107
108    #[inline]
109    pub fn is_empty(&self) -> bool {
110        self.len() == 0
111    }
112
113    #[inline]
114    pub fn clear(&mut self) {
115        self.data.clear();
116        self.bounds.clear();
117    }
118
119    #[inline]
120    pub fn as_slice(&self) -> &[u8] {
121        &self.data
122    }
123
124    #[inline]
125    pub fn iter(&self) -> ByteRecordIter<'_> {
126        ByteRecordIter {
127            record: self,
128            current: 0,
129        }
130    }
131
132    #[inline(always)]
133    pub fn push_field(&mut self, bytes: &[u8]) {
134        self.data.extend_from_slice(bytes);
135
136        let bounds_len = self.bounds.len();
137
138        let start = if bounds_len == 0 {
139            0
140        } else {
141            self.bounds[bounds_len - 1].1
142        };
143
144        self.bounds.push((start, self.data.len()));
145    }
146
147    #[inline]
148    pub fn get(&self, index: usize) -> Option<&[u8]> {
149        self.bounds
150            .get(index)
151            .copied()
152            .map(|(start, end)| &self.data[start..end])
153    }
154}
155
156impl PartialEq for ByteRecord {
157    fn eq(&self, other: &Self) -> bool {
158        if self.bounds.len() != other.bounds.len() {
159            return false;
160        }
161
162        self.iter()
163            .zip(other.iter())
164            .all(|(self_cell, other_cell)| self_cell == other_cell)
165    }
166}
167
168impl Index<usize> for ByteRecord {
169    type Output = [u8];
170
171    #[inline]
172    fn index(&self, i: usize) -> &[u8] {
173        self.get(i).unwrap()
174    }
175}
176
177impl<I, T> From<I> for ByteRecord
178where
179    I: IntoIterator<Item = T>,
180    T: AsRef<[u8]>,
181{
182    fn from(value: I) -> Self {
183        let mut record = Self::new();
184
185        for cell in value.into_iter() {
186            record.push_field(cell.as_ref());
187        }
188
189        record
190    }
191}
192
193impl fmt::Debug for ByteRecord {
194    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
195        write!(f, "ByteRecord(")?;
196        f.debug_list()
197            .entries(self.iter().map(debug::Bytes))
198            .finish()?;
199        write!(f, ")")?;
200        Ok(())
201    }
202}
203
204pub struct ByteRecordIter<'a> {
205    record: &'a ByteRecord,
206    current: usize,
207}
208
209impl<'a> Iterator for ByteRecordIter<'a> {
210    type Item = &'a [u8];
211
212    #[inline]
213    fn next(&mut self) -> Option<Self::Item> {
214        if self.current >= self.record.bounds.len() {
215            None
216        } else {
217            let (start, end) = self.record.bounds[self.current];
218
219            self.current += 1;
220
221            Some(&self.record.data[start..end])
222        }
223    }
224}
225
226pub(crate) struct ByteRecordBuilder<'r> {
227    record: &'r mut ByteRecord,
228    start: usize,
229}
230
231impl<'r> ByteRecordBuilder<'r> {
232    #[inline(always)]
233    pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
234        Self { record, start: 0 }
235    }
236
237    #[inline(always)]
238    pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
239        self.record.data.extend_from_slice(slice);
240    }
241
242    #[inline(always)]
243    pub(crate) fn push_byte(&mut self, byte: u8) {
244        self.record.data.push(byte);
245    }
246
247    #[inline]
248    pub(crate) fn finalize_field(&mut self) {
249        let start = self.start;
250        self.start = self.record.data.len();
251
252        self.record.bounds.push((start, self.start));
253    }
254
255    #[inline]
256    pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
257        let start = self.start;
258        self.start = self.record.data.len() + offset;
259
260        self.record.bounds.push((start, self.start));
261
262        self.start += 1;
263    }
264
265    #[inline(always)]
266    pub(crate) fn bump(&mut self) {
267        self.start += 1;
268    }
269}
270
271#[macro_export]
272macro_rules! brec {
273    () => {{
274        ByteRecord::new()
275    }};
276
277    ($($x: expr),*) => {{
278        let mut r = ByteRecord::new();
279
280        $(
281            r.push_field($x.as_bytes());
282        )*
283
284        r
285    }};
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291
292    #[test]
293    fn test_zero_copy_byte_record() {
294        let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12]);
295
296        assert_eq!(record.len(), 3);
297
298        let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
299        assert_eq!(record.iter().collect::<Vec<_>>(), expected);
300    }
301
302    #[test]
303    fn test_byte_record() {
304        let mut record = ByteRecord::new();
305
306        assert_eq!(record.len(), 0);
307        assert_eq!(record.is_empty(), true);
308        assert_eq!(record.get(0), None);
309
310        record.push_field(b"name");
311        record.push_field(b"surname");
312        record.push_field(b"age");
313
314        let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
315        assert_eq!(record.iter().collect::<Vec<_>>(), expected);
316
317        assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
318        assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
319        assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
320        assert_eq!(record.get(3), None);
321    }
322}