1use std::borrow::Cow;
2use std::fmt;
3use std::ops::Index;
4
5use crate::debug;
6use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted};
7
8pub struct ZeroCopyByteRecord<'a> {
9 slice: &'a [u8],
10 seps: &'a [usize],
11 quote: u8,
12}
13
14impl<'a> ZeroCopyByteRecord<'a> {
15 #[inline]
16 pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
17 Self {
18 slice: trim_trailing_crlf(slice),
19 seps,
20 quote,
21 }
22 }
23
24 #[inline]
25 pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
26 (self.seps.to_vec(), self.slice.to_vec())
27 }
28
29 #[inline(always)]
30 pub fn len(&self) -> usize {
31 self.seps.len() + 1
34 }
35
36 #[inline(always)]
37 pub fn is_empty(&self) -> bool {
38 false
39 }
40
41 #[inline(always)]
42 pub fn as_slice(&self) -> &[u8] {
43 self.slice
44 }
45
46 #[inline]
47 pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
48 ZeroCopyByteRecordIter {
49 record: self,
50 current: 0,
51 }
52 }
53
54 #[inline]
55 pub fn get(&self, index: usize) -> Option<&[u8]> {
56 let len = self.seps.len();
57
58 if index > len {
59 return None;
60 }
61
62 let start = if index == 0 {
63 0
64 } else {
65 self.seps[index - 1] + 1
66 };
67
68 let end = if index == len {
69 self.slice.len()
70 } else {
71 self.seps[index]
72 };
73
74 Some(&self.slice[start..end])
75 }
76
77 #[inline]
78 pub fn unquote(&self, index: usize) -> Option<&[u8]> {
79 self.get(index)
80 .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
81 }
82
83 #[inline]
84 pub fn unescape(&self, index: usize) -> Option<Cow<[u8]>> {
85 self.unquote(index).map(|cell| {
86 if let Some(trimmed) = unquoted(cell, self.quote) {
87 unescape(trimmed, self.quote)
88 } else {
89 Cow::Borrowed(cell)
90 }
91 })
92 }
93
94 fn read_byte_record(&self, record: &mut ByteRecord) {
95 record.clear();
96
97 for cell in self.iter() {
98 if let Some(trimmed) = unquoted(cell, self.quote) {
99 unescape_to(trimmed, self.quote, &mut record.data);
100
101 let bounds_len = record.bounds.len();
102
103 let start = if bounds_len == 0 {
104 0
105 } else {
106 record.bounds[bounds_len - 1].1
107 };
108
109 record.bounds.push((start, record.data.len()));
110 } else {
111 record.push_field(cell);
112 }
113 }
114 }
115
116 #[inline]
117 pub fn to_byte_record(&self) -> ByteRecord {
118 let mut record = ByteRecord::new();
119 self.read_byte_record(&mut record);
120 record
121 }
122}
123
124impl<'a> fmt::Debug for ZeroCopyByteRecord<'a> {
125 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
126 write!(f, "ZeroCopyByteRecord(")?;
127 f.debug_list()
128 .entries(self.iter().map(debug::Bytes))
129 .finish()?;
130 write!(f, ")")?;
131 Ok(())
132 }
133}
134
135pub struct ZeroCopyByteRecordIter<'a> {
136 record: &'a ZeroCopyByteRecord<'a>,
137 current: usize,
138}
139
140impl<'a> ExactSizeIterator for ZeroCopyByteRecordIter<'a> {}
141
142impl<'a> Iterator for ZeroCopyByteRecordIter<'a> {
143 type Item = &'a [u8];
144
145 #[inline]
146 fn next(&mut self) -> Option<Self::Item> {
147 match self.record.get(self.current) {
148 None => None,
149 Some(cell) => {
150 self.current += 1;
151 Some(cell)
152 }
153 }
154 }
155
156 #[inline]
157 fn size_hint(&self) -> (usize, Option<usize>) {
158 let size = self.record.len() - self.current;
159
160 (size, Some(size))
161 }
162
163 #[inline]
164 fn count(self) -> usize
165 where
166 Self: Sized,
167 {
168 self.len()
169 }
170}
171
172impl<'a> Index<usize> for ZeroCopyByteRecord<'a> {
173 type Output = [u8];
174
175 #[inline]
176 fn index(&self, i: usize) -> &[u8] {
177 self.get(i).unwrap()
178 }
179}
180
181#[derive(Default, Clone)]
182pub struct ByteRecord {
183 data: Vec<u8>,
184 bounds: Vec<(usize, usize)>,
185}
186
187impl ByteRecord {
188 pub fn new() -> Self {
189 Self::default()
190 }
191
192 #[inline]
193 pub fn len(&self) -> usize {
194 self.bounds.len()
195 }
196
197 #[inline]
198 pub fn is_empty(&self) -> bool {
199 self.len() == 0
200 }
201
202 #[inline]
203 pub fn clear(&mut self) {
204 self.data.clear();
205 self.bounds.clear();
206 }
207
208 #[inline]
209 pub fn truncate(&mut self, len: usize) {
210 self.bounds.truncate(len);
211
212 if let Some((_, end)) = self.bounds.last() {
213 self.data.truncate(*end);
214 } else {
215 self.data.clear();
216 }
217 }
218
219 #[inline]
220 pub fn as_slice(&self) -> &[u8] {
221 &self.data
222 }
223
224 #[inline]
225 pub fn iter(&self) -> ByteRecordIter<'_> {
226 ByteRecordIter {
227 record: self,
228 current_forward: 0,
229 current_reverse: self.len(),
230 }
231 }
232
233 #[inline(always)]
234 pub fn push_field(&mut self, bytes: &[u8]) {
235 self.data.extend_from_slice(bytes);
236
237 let bounds_len = self.bounds.len();
238
239 let start = if bounds_len == 0 {
240 0
241 } else {
242 self.bounds[bounds_len - 1].1
243 };
244
245 self.bounds.push((start, self.data.len()));
246 }
247
248 #[inline]
249 pub fn get(&self, index: usize) -> Option<&[u8]> {
250 self.bounds
251 .get(index)
252 .copied()
253 .map(|(start, end)| &self.data[start..end])
254 }
255}
256
257impl PartialEq for ByteRecord {
258 fn eq(&self, other: &Self) -> bool {
259 if self.bounds.len() != other.bounds.len() {
260 return false;
261 }
262
263 self.iter()
264 .zip(other.iter())
265 .all(|(self_cell, other_cell)| self_cell == other_cell)
266 }
267}
268
269impl Index<usize> for ByteRecord {
270 type Output = [u8];
271
272 #[inline]
273 fn index(&self, i: usize) -> &[u8] {
274 self.get(i).unwrap()
275 }
276}
277
278impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
279 #[inline]
280 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
281 for x in iter {
282 self.push_field(x.as_ref());
283 }
284 }
285}
286
287impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
288 #[inline]
289 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
290 let mut record = Self::new();
291 record.extend(iter);
292 record
293 }
294}
295
296impl<I, T> From<I> for ByteRecord
297where
298 I: IntoIterator<Item = T>,
299 T: AsRef<[u8]>,
300{
301 fn from(value: I) -> Self {
302 let mut record = Self::new();
303
304 for cell in value.into_iter() {
305 record.push_field(cell.as_ref());
306 }
307
308 record
309 }
310}
311
312impl<'r> IntoIterator for &'r ByteRecord {
313 type IntoIter = ByteRecordIter<'r>;
314 type Item = &'r [u8];
315
316 #[inline]
317 fn into_iter(self) -> ByteRecordIter<'r> {
318 self.iter()
319 }
320}
321
322impl fmt::Debug for ByteRecord {
323 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
324 write!(f, "ByteRecord(")?;
325 f.debug_list()
326 .entries(self.iter().map(debug::Bytes))
327 .finish()?;
328 write!(f, ")")?;
329 Ok(())
330 }
331}
332
333pub struct ByteRecordIter<'a> {
334 record: &'a ByteRecord,
335 current_forward: usize,
336 current_reverse: usize,
337}
338
339impl<'a> ExactSizeIterator for ByteRecordIter<'a> {}
340
341impl<'a> Iterator for ByteRecordIter<'a> {
342 type Item = &'a [u8];
343
344 #[inline]
345 fn next(&mut self) -> Option<Self::Item> {
346 if self.current_forward == self.current_reverse {
347 None
348 } else {
349 let (start, end) = self.record.bounds[self.current_forward];
350
351 self.current_forward += 1;
352
353 Some(&self.record.data[start..end])
354 }
355 }
356
357 #[inline]
358 fn size_hint(&self) -> (usize, Option<usize>) {
359 let size = self.current_reverse - self.current_forward;
360
361 (size, Some(size))
362 }
363
364 #[inline]
365 fn count(self) -> usize
366 where
367 Self: Sized,
368 {
369 self.len()
370 }
371}
372
373impl<'a> DoubleEndedIterator for ByteRecordIter<'a> {
374 #[inline]
375 fn next_back(&mut self) -> Option<Self::Item> {
376 if self.current_forward == self.current_reverse {
377 None
378 } else {
379 self.current_reverse -= 1;
380
381 let (start, end) = self.record.bounds[self.current_reverse];
382
383 Some(&self.record.data[start..end])
384 }
385 }
386}
387
388pub(crate) struct ByteRecordBuilder<'r> {
389 record: &'r mut ByteRecord,
390 start: usize,
391}
392
393impl<'r> ByteRecordBuilder<'r> {
394 #[inline(always)]
395 pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
396 Self { record, start: 0 }
397 }
398
399 #[inline(always)]
400 pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
401 self.record.data.extend_from_slice(slice);
402 }
403
404 #[inline(always)]
405 pub(crate) fn push_byte(&mut self, byte: u8) {
406 self.record.data.push(byte);
407 }
408
409 #[inline]
410 pub(crate) fn finalize_field(&mut self) {
411 let start = self.start;
412 self.start = self.record.data.len();
413
414 self.record.bounds.push((start, self.start));
415 }
416
417 #[inline]
418 pub(crate) fn finalize_record(&mut self) {
419 let start = self.start;
420 self.start = self.record.data.len();
421
422 let mut end = self.start;
423 end -= (self.start > 0 && self.record.data[self.start - 1] == b'\r') as usize;
424
425 self.record.bounds.push((start, end));
426 }
427
428 #[inline]
429 pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
430 let start = self.start;
431 self.start = self.record.data.len() + offset;
432
433 self.record.bounds.push((start, self.start));
434
435 self.start += 1;
436 }
437
438 #[inline(always)]
439 pub(crate) fn bump(&mut self) {
440 self.start +=
441 (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
442 }
443}
444
445#[macro_export]
446macro_rules! brec {
447 () => {{
448 ByteRecord::new()
449 }};
450
451 ($($x: expr),*) => {{
452 let mut r = ByteRecord::new();
453
454 $(
455 r.push_field($x.as_bytes());
456 )*
457
458 r
459 }};
460}
461
462#[cfg(test)]
463mod tests {
464 use super::*;
465
466 #[test]
467 fn test_zero_copy_byte_record() {
468 let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
469
470 assert_eq!(record.len(), 3);
471
472 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
473 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
474
475 for i in 0..expected.len() {
476 assert_eq!(record.get(i), Some(expected[i]));
477 }
478
479 assert_eq!(record.get(4), None);
480 }
481
482 #[test]
483 fn test_byte_record() {
484 let mut record = ByteRecord::new();
485
486 assert_eq!(record.len(), 0);
487 assert_eq!(record.is_empty(), true);
488 assert_eq!(record.get(0), None);
489
490 record.push_field(b"name");
491 record.push_field(b"surname");
492 record.push_field(b"age");
493
494 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
495 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
496
497 assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
498 assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
499 assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
500 assert_eq!(record.get(3), None);
501 }
502}