1use std::borrow::Cow;
2use std::fmt;
3use std::hash::{Hash, Hasher};
4use std::ops::Index;
5
6use crate::debug;
7use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted};
8
9pub struct ZeroCopyByteRecord<'a> {
11 slice: &'a [u8],
12 seps: &'a [usize],
13 pub(crate) quote: u8,
14}
15
16impl<'a> ZeroCopyByteRecord<'a> {
17 #[inline]
18 pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
19 Self {
20 slice: trim_trailing_crlf(slice),
21 seps,
22 quote,
23 }
24 }
25
26 #[inline]
27 pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
28 (self.seps.to_vec(), self.slice.to_vec())
29 }
30
31 #[inline(always)]
34 pub fn len(&self) -> usize {
35 self.seps.len() + 1
38 }
39
40 #[inline(always)]
42 pub fn is_empty(&self) -> bool {
43 false
44 }
45
46 #[inline(always)]
48 pub fn as_slice(&self) -> &[u8] {
49 self.slice
50 }
51
52 #[inline]
57 pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
58 ZeroCopyByteRecordIter {
59 record: self,
60 current_forward: 0,
61 current_backward: self.len(),
62 }
63 }
64
65 #[inline]
69 pub fn unquoted_iter(&self) -> ZeroCopyByteRecordUnquotedIter<'_> {
70 ZeroCopyByteRecordUnquotedIter {
71 record: self,
72 current_forward: 0,
73 current_backward: self.len(),
74 }
75 }
76
77 #[inline]
81 pub fn unescaped_iter(&self) -> ZeroCopyByteRecordUnescapedIter<'_> {
82 ZeroCopyByteRecordUnescapedIter {
83 record: self,
84 current_forward: 0,
85 current_backward: self.len(),
86 }
87 }
88
89 #[inline]
95 pub fn get(&self, index: usize) -> Option<&[u8]> {
96 let len = self.seps.len();
97
98 if index > len {
99 return None;
100 }
101
102 let start = if index == 0 {
103 0
104 } else {
105 self.seps[index - 1] + 1
106 };
107
108 let end = if index == len {
109 self.slice.len()
110 } else {
111 self.seps[index]
112 };
113
114 Some(&self.slice[start..end])
115 }
116
117 #[inline]
127 pub fn unquote(&self, index: usize) -> Option<&[u8]> {
128 self.get(index)
129 .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
130 }
131
132 #[inline]
142 pub fn unescape(&self, index: usize) -> Option<Cow<[u8]>> {
143 self.unquote(index).map(|cell| {
144 if let Some(trimmed) = unquoted(cell, self.quote) {
145 unescape(trimmed, self.quote)
146 } else {
147 Cow::Borrowed(cell)
148 }
149 })
150 }
151
152 fn read_byte_record(&self, record: &mut ByteRecord) {
153 record.clear();
154
155 for cell in self.iter() {
156 if let Some(trimmed) = unquoted(cell, self.quote) {
157 unescape_to(trimmed, self.quote, &mut record.data);
158
159 let bounds_len = record.bounds.len();
160
161 let start = if bounds_len == 0 {
162 0
163 } else {
164 record.bounds[bounds_len - 1].1
165 };
166
167 record.bounds.push((start, record.data.len()));
168 } else {
169 record.push_field(cell);
170 }
171 }
172 }
173
174 #[inline]
176 pub fn to_byte_record(&self) -> ByteRecord {
177 let mut record = ByteRecord::new();
178 self.read_byte_record(&mut record);
179 record
180 }
181
182 #[inline]
183 pub(crate) fn to_byte_record_in_reverse(&self) -> ByteRecord {
184 let mut record = ByteRecord::new();
185
186 for cell in self.unescaped_iter().rev() {
187 record.push_field_in_reverse(&cell);
188 }
189
190 record
191 }
192}
193
194impl fmt::Debug for ZeroCopyByteRecord<'_> {
195 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
196 write!(f, "ZeroCopyByteRecord(")?;
197 f.debug_list()
198 .entries(self.iter().map(debug::Bytes))
199 .finish()?;
200 write!(f, ")")?;
201 Ok(())
202 }
203}
204
205macro_rules! make_zero_copy_iterator {
206 ($name:ident, $method: ident, $out_type: ty) => {
207 pub struct $name<'a> {
208 record: &'a ZeroCopyByteRecord<'a>,
209 current_forward: usize,
210 current_backward: usize,
211 }
212
213 impl ExactSizeIterator for $name<'_> {}
214
215 impl<'a> Iterator for $name<'a> {
216 type Item = $out_type;
217
218 #[inline]
219 fn next(&mut self) -> Option<Self::Item> {
220 if self.current_forward == self.current_backward {
221 None
222 } else {
223 let cell = self.record.$method(self.current_forward);
224
225 self.current_forward += 1;
226
227 cell
228 }
229 }
230
231 #[inline]
232 fn size_hint(&self) -> (usize, Option<usize>) {
233 let size = self.current_backward - self.current_forward;
234
235 (size, Some(size))
236 }
237
238 #[inline]
239 fn count(self) -> usize
240 where
241 Self: Sized,
242 {
243 self.len()
244 }
245 }
246
247 impl DoubleEndedIterator for $name<'_> {
248 #[inline]
249 fn next_back(&mut self) -> Option<Self::Item> {
250 if self.current_forward == self.current_backward {
251 None
252 } else {
253 self.current_backward -= 1;
254
255 self.record.$method(self.current_backward)
256 }
257 }
258 }
259 };
260}
261
262make_zero_copy_iterator!(ZeroCopyByteRecordIter, get, &'a [u8]);
263make_zero_copy_iterator!(ZeroCopyByteRecordUnquotedIter, unquote, &'a [u8]);
264make_zero_copy_iterator!(ZeroCopyByteRecordUnescapedIter, unescape, Cow<'a, [u8]>);
265
266impl Index<usize> for ZeroCopyByteRecord<'_> {
267 type Output = [u8];
268
269 #[inline]
270 fn index(&self, i: usize) -> &[u8] {
271 self.get(i).unwrap()
272 }
273}
274
275#[derive(Default, Clone, Eq)]
288pub struct ByteRecord {
289 data: Vec<u8>,
290 bounds: Vec<(usize, usize)>,
291}
292
293impl ByteRecord {
294 pub fn new() -> Self {
296 Self::default()
297 }
298
299 #[inline]
301 pub fn len(&self) -> usize {
302 self.bounds.len()
303 }
304
305 #[inline]
307 pub fn is_empty(&self) -> bool {
308 self.len() == 0
309 }
310
311 #[inline]
313 pub fn clear(&mut self) {
314 self.data.clear();
315 self.bounds.clear();
316 }
317
318 #[inline]
321 pub fn truncate(&mut self, len: usize) {
322 self.bounds.truncate(len);
323
324 if let Some((_, end)) = self.bounds.last() {
325 self.data.truncate(*end);
326 } else {
327 self.data.clear();
328 }
329 }
330
331 #[inline]
338 pub fn as_slice(&self) -> &[u8] {
339 &self.data
340 }
341
342 #[inline]
344 pub fn iter(&self) -> ByteRecordIter<'_> {
345 ByteRecordIter {
346 record: self,
347 current_forward: 0,
348 current_backward: self.len(),
349 }
350 }
351
352 #[inline(always)]
354 pub fn push_field(&mut self, bytes: &[u8]) {
355 self.data.extend_from_slice(bytes);
356
357 let bounds_len = self.bounds.len();
358
359 let start = if bounds_len == 0 {
360 0
361 } else {
362 self.bounds[bounds_len - 1].1
363 };
364
365 self.bounds.push((start, self.data.len()));
366 }
367
368 #[inline]
369 fn push_field_in_reverse(&mut self, bytes: &[u8]) {
370 self.data.extend_from_slice(bytes);
371
372 let bounds_len = self.bounds.len();
373
374 let start = if bounds_len == 0 {
375 0
376 } else {
377 self.bounds[bounds_len - 1].1
378 };
379
380 let bounds = (start, self.data.len());
381 self.data[bounds.0..bounds.1].reverse();
382
383 self.bounds.push(bounds);
384 }
385
386 #[inline]
388 pub fn get(&self, index: usize) -> Option<&[u8]> {
389 self.bounds
390 .get(index)
391 .copied()
392 .map(|(start, end)| &self.data[start..end])
393 }
394
395 pub(crate) fn reverse(&mut self) {
396 self.data.reverse();
397 self.bounds.reverse();
398
399 let len = self.data.len();
400
401 for (start, end) in self.bounds.iter_mut() {
402 let new_end = len - *start;
403 let new_start = len - *end;
404
405 *start = new_start;
406 *end = new_end;
407 }
408 }
409}
410
411impl PartialEq for ByteRecord {
412 fn eq(&self, other: &Self) -> bool {
413 if self.bounds.len() != other.bounds.len() {
414 return false;
415 }
416
417 self.iter()
418 .zip(other.iter())
419 .all(|(self_cell, other_cell)| self_cell == other_cell)
420 }
421}
422
423impl Hash for ByteRecord {
424 #[inline]
425 fn hash<H: Hasher>(&self, state: &mut H) {
426 state.write_usize(self.len());
427
428 for cell in self.iter() {
429 state.write(cell);
430 }
431 }
432}
433
434impl Index<usize> for ByteRecord {
435 type Output = [u8];
436
437 #[inline]
438 fn index(&self, i: usize) -> &[u8] {
439 self.get(i).unwrap()
440 }
441}
442
443impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
444 #[inline]
445 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
446 for x in iter {
447 self.push_field(x.as_ref());
448 }
449 }
450}
451
452impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
453 #[inline]
454 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
455 let mut record = Self::new();
456 record.extend(iter);
457 record
458 }
459}
460
461impl<I, T> From<I> for ByteRecord
462where
463 I: IntoIterator<Item = T>,
464 T: AsRef<[u8]>,
465{
466 fn from(value: I) -> Self {
467 let mut record = Self::new();
468
469 for cell in value.into_iter() {
470 record.push_field(cell.as_ref());
471 }
472
473 record
474 }
475}
476
477impl<'r> IntoIterator for &'r ByteRecord {
478 type IntoIter = ByteRecordIter<'r>;
479 type Item = &'r [u8];
480
481 #[inline]
482 fn into_iter(self) -> ByteRecordIter<'r> {
483 self.iter()
484 }
485}
486
487impl fmt::Debug for ByteRecord {
488 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
489 write!(f, "ByteRecord(")?;
490 f.debug_list()
491 .entries(self.iter().map(debug::Bytes))
492 .finish()?;
493 write!(f, ")")?;
494 Ok(())
495 }
496}
497
498pub struct ByteRecordIter<'a> {
499 record: &'a ByteRecord,
500 current_forward: usize,
501 current_backward: usize,
502}
503
504impl ExactSizeIterator for ByteRecordIter<'_> {}
505
506impl<'a> Iterator for ByteRecordIter<'a> {
507 type Item = &'a [u8];
508
509 #[inline]
510 fn next(&mut self) -> Option<Self::Item> {
511 if self.current_forward == self.current_backward {
512 None
513 } else {
514 let (start, end) = self.record.bounds[self.current_forward];
515
516 self.current_forward += 1;
517
518 Some(&self.record.data[start..end])
519 }
520 }
521
522 #[inline]
523 fn size_hint(&self) -> (usize, Option<usize>) {
524 let size = self.current_backward - self.current_forward;
525
526 (size, Some(size))
527 }
528
529 #[inline]
530 fn count(self) -> usize
531 where
532 Self: Sized,
533 {
534 self.len()
535 }
536}
537
538impl DoubleEndedIterator for ByteRecordIter<'_> {
539 #[inline]
540 fn next_back(&mut self) -> Option<Self::Item> {
541 if self.current_forward == self.current_backward {
542 None
543 } else {
544 self.current_backward -= 1;
545
546 let (start, end) = self.record.bounds[self.current_backward];
547
548 Some(&self.record.data[start..end])
549 }
550 }
551}
552
553pub(crate) struct ByteRecordBuilder<'r> {
554 record: &'r mut ByteRecord,
555 start: usize,
556}
557
558impl<'r> ByteRecordBuilder<'r> {
559 #[inline(always)]
560 pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
561 Self { record, start: 0 }
562 }
563
564 #[inline(always)]
565 pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
566 self.record.data.extend_from_slice(slice);
567 }
568
569 #[inline(always)]
570 pub(crate) fn push_byte(&mut self, byte: u8) {
571 self.record.data.push(byte);
572 }
573
574 #[inline]
575 pub(crate) fn finalize_field(&mut self) {
576 let start = self.start;
577 self.start = self.record.data.len();
578
579 self.record.bounds.push((start, self.start));
580 }
581
582 #[inline]
583 pub(crate) fn finalize_record(&mut self) {
584 if let Some(b'\r') = self.record.data.last() {
585 self.record.data.pop();
586 }
587
588 self.finalize_field();
589 }
590
591 #[inline]
592 pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
593 let start = self.start;
594 self.start = self.record.data.len() + offset;
595
596 self.record.bounds.push((start, self.start));
597
598 self.start += 1;
599 }
600
601 #[inline(always)]
602 pub(crate) fn bump(&mut self) {
603 self.start +=
604 (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
605 }
606}
607
608#[cfg(test)]
609mod tests {
610 use super::*;
611
612 #[test]
613 fn test_zero_copy_byte_record() {
614 let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
615
616 assert_eq!(record.len(), 3);
617
618 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
619 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
620
621 for i in 0..expected.len() {
622 assert_eq!(record.get(i), Some(expected[i]));
623 }
624
625 assert_eq!(record.get(4), None);
626 }
627
628 #[test]
629 fn test_byte_record() {
630 let mut record = ByteRecord::new();
631
632 assert_eq!(record.len(), 0);
633 assert_eq!(record.is_empty(), true);
634 assert_eq!(record.get(0), None);
635
636 record.push_field(b"name");
637 record.push_field(b"surname");
638 record.push_field(b"age");
639
640 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
641 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
642
643 assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
644 assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
645 assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
646 assert_eq!(record.get(3), None);
647 }
648
649 #[test]
650 fn test_mutate_record_after_read() {
651 let mut record = ByteRecord::new();
652 let mut builder = ByteRecordBuilder::wrap(&mut record);
653 builder.extend_from_slice(b"test\r");
654 builder.finalize_record();
655
656 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test"]);
657
658 record.push_field(b"next");
659
660 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test", b"next"]);
661 }
662
663 #[test]
664 fn test_reverse_byte_record() {
665 let record = brec!["name", "surname", "age"];
666 let mut reversed = record.clone();
667 reversed.reverse();
668
669 assert_eq!(reversed, brec!["ega", "emanrus", "eman"]);
670 reversed.reverse();
671 assert_eq!(record, reversed);
672 }
673}