1use std::borrow::Cow;
2use std::fmt;
3use std::hash::{Hash, Hasher};
4use std::ops::Index;
5
6use crate::debug;
7use crate::error::{self, Error, ErrorKind};
8use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted};
9
10pub struct ZeroCopyByteRecord<'a> {
12 slice: &'a [u8],
13 seps: &'a [usize],
14 pub(crate) quote: u8,
15}
16
17impl<'a> ZeroCopyByteRecord<'a> {
18 #[inline]
19 pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
20 Self {
21 slice: trim_trailing_crlf(slice),
22 seps,
23 quote,
24 }
25 }
26
27 #[inline]
28 pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
29 (self.seps.to_vec(), self.slice.to_vec())
30 }
31
32 #[inline(always)]
35 pub fn len(&self) -> usize {
36 self.seps.len() + 1
39 }
40
41 #[inline(always)]
43 pub fn is_empty(&self) -> bool {
44 false
45 }
46
47 #[inline(always)]
49 pub fn as_slice(&self) -> &[u8] {
50 self.slice
51 }
52
53 #[inline]
58 pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
59 ZeroCopyByteRecordIter {
60 record: self,
61 current_forward: 0,
62 current_backward: self.len(),
63 }
64 }
65
66 #[inline]
70 pub fn unquoted_iter(&self) -> ZeroCopyByteRecordUnquotedIter<'_> {
71 ZeroCopyByteRecordUnquotedIter {
72 record: self,
73 current_forward: 0,
74 current_backward: self.len(),
75 }
76 }
77
78 #[inline]
82 pub fn unescaped_iter(&self) -> ZeroCopyByteRecordUnescapedIter<'_> {
83 ZeroCopyByteRecordUnescapedIter {
84 record: self,
85 current_forward: 0,
86 current_backward: self.len(),
87 }
88 }
89
90 #[inline]
96 pub fn get(&self, index: usize) -> Option<&[u8]> {
97 let len = self.seps.len();
98
99 if index > len {
100 return None;
101 }
102
103 let start = if index == 0 {
104 0
105 } else {
106 self.seps[index - 1] + 1
107 };
108
109 let end = if index == len {
110 self.slice.len()
111 } else {
112 self.seps[index]
113 };
114
115 Some(&self.slice[start..end])
116 }
117
118 #[inline]
128 pub fn unquote(&self, index: usize) -> Option<&[u8]> {
129 self.get(index)
130 .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
131 }
132
133 #[inline]
143 pub fn unescape(&self, index: usize) -> Option<Cow<'_, [u8]>> {
144 self.get(index).map(|cell| {
145 if let Some(trimmed) = unquoted(cell, self.quote) {
146 unescape(trimmed, self.quote)
147 } else {
148 Cow::Borrowed(cell)
149 }
150 })
151 }
152
153 fn read_byte_record(&self, record: &mut ByteRecord) {
154 record.clear();
155
156 for cell in self.iter() {
157 if let Some(trimmed) = unquoted(cell, self.quote) {
158 unescape_to(trimmed, self.quote, &mut record.data);
159
160 let bounds_len = record.bounds.len();
161
162 let start = if bounds_len == 0 {
163 0
164 } else {
165 record.bounds[bounds_len - 1].1
166 };
167
168 record.bounds.push((start, record.data.len()));
169 } else {
170 record.push_field(cell);
171 }
172 }
173 }
174
175 #[inline]
177 pub fn to_byte_record(&self) -> ByteRecord {
178 let mut record = ByteRecord::new();
179 self.read_byte_record(&mut record);
180 record
181 }
182
183 #[inline]
184 pub(crate) fn to_byte_record_in_reverse(&self) -> ByteRecord {
185 let mut record = ByteRecord::new();
186
187 for cell in self.unescaped_iter().rev() {
188 record.push_field_in_reverse(&cell);
189 }
190
191 record
192 }
193}
194
195impl fmt::Debug for ZeroCopyByteRecord<'_> {
196 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
197 write!(f, "ZeroCopyByteRecord(")?;
198 f.debug_list()
199 .entries(self.iter().map(debug::Bytes))
200 .finish()?;
201 write!(f, ")")?;
202 Ok(())
203 }
204}
205
206macro_rules! make_zero_copy_iterator {
207 ($name:ident, $method: ident, $out_type: ty) => {
208 pub struct $name<'a> {
209 record: &'a ZeroCopyByteRecord<'a>,
210 current_forward: usize,
211 current_backward: usize,
212 }
213
214 impl ExactSizeIterator for $name<'_> {}
215
216 impl<'a> Iterator for $name<'a> {
217 type Item = $out_type;
218
219 #[inline]
220 fn next(&mut self) -> Option<Self::Item> {
221 if self.current_forward == self.current_backward {
222 None
223 } else {
224 let cell = self.record.$method(self.current_forward);
225
226 self.current_forward += 1;
227
228 cell
229 }
230 }
231
232 #[inline]
233 fn size_hint(&self) -> (usize, Option<usize>) {
234 let size = self.current_backward - self.current_forward;
235
236 (size, Some(size))
237 }
238
239 #[inline]
240 fn count(self) -> usize
241 where
242 Self: Sized,
243 {
244 self.len()
245 }
246 }
247
248 impl DoubleEndedIterator for $name<'_> {
249 #[inline]
250 fn next_back(&mut self) -> Option<Self::Item> {
251 if self.current_forward == self.current_backward {
252 None
253 } else {
254 self.current_backward -= 1;
255
256 self.record.$method(self.current_backward)
257 }
258 }
259 }
260 };
261}
262
263make_zero_copy_iterator!(ZeroCopyByteRecordIter, get, &'a [u8]);
264make_zero_copy_iterator!(ZeroCopyByteRecordUnquotedIter, unquote, &'a [u8]);
265make_zero_copy_iterator!(ZeroCopyByteRecordUnescapedIter, unescape, Cow<'a, [u8]>);
266
267impl Index<usize> for ZeroCopyByteRecord<'_> {
268 type Output = [u8];
269
270 #[inline]
271 fn index(&self, i: usize) -> &[u8] {
272 self.get(i).unwrap()
273 }
274}
275
276#[derive(Default, Clone, Eq)]
289pub struct ByteRecord {
290 data: Vec<u8>,
291 bounds: Vec<(usize, usize)>,
292}
293
294impl ByteRecord {
295 pub fn new() -> Self {
297 Self::default()
298 }
299
300 #[inline]
302 pub fn len(&self) -> usize {
303 self.bounds.len()
304 }
305
306 #[inline]
308 pub fn is_empty(&self) -> bool {
309 self.len() == 0
310 }
311
312 #[inline]
314 pub fn clear(&mut self) {
315 self.data.clear();
316 self.bounds.clear();
317 }
318
319 #[inline]
322 pub fn truncate(&mut self, len: usize) {
323 self.bounds.truncate(len);
324
325 if let Some((_, end)) = self.bounds.last() {
326 self.data.truncate(*end);
327 } else {
328 self.data.clear();
329 }
330 }
331
332 #[inline]
339 pub fn as_slice(&self) -> &[u8] {
340 &self.data
341 }
342
343 #[inline]
345 pub fn iter(&self) -> ByteRecordIter<'_> {
346 ByteRecordIter {
347 record: self,
348 current_forward: 0,
349 current_backward: self.len(),
350 }
351 }
352
353 #[inline(always)]
355 pub fn push_field(&mut self, bytes: &[u8]) {
356 self.data.extend_from_slice(bytes);
357
358 let bounds_len = self.bounds.len();
359
360 let start = if bounds_len == 0 {
361 0
362 } else {
363 self.bounds[bounds_len - 1].1
364 };
365
366 self.bounds.push((start, self.data.len()));
367 }
368
369 #[inline]
370 fn push_field_in_reverse(&mut self, bytes: &[u8]) {
371 self.data.extend_from_slice(bytes);
372
373 let bounds_len = self.bounds.len();
374
375 let start = if bounds_len == 0 {
376 0
377 } else {
378 self.bounds[bounds_len - 1].1
379 };
380
381 let bounds = (start, self.data.len());
382 self.data[bounds.0..bounds.1].reverse();
383
384 self.bounds.push(bounds);
385 }
386
387 #[inline]
389 pub fn get(&self, index: usize) -> Option<&[u8]> {
390 self.bounds
391 .get(index)
392 .copied()
393 .map(|(start, end)| &self.data[start..end])
394 }
395
396 pub fn into_string_record(self) -> error::Result<StringRecord> {
398 let mut new_record = StringRecord { inner: self };
399
400 if !new_record.validate_utf8() {
401 Err(Error::new(ErrorKind::Utf8Error))
402 } else {
403 Ok(new_record)
404 }
405 }
406
407 pub(crate) fn reverse(&mut self) {
408 self.data.reverse();
409 self.bounds.reverse();
410
411 let len = self.data.len();
412
413 for (start, end) in self.bounds.iter_mut() {
414 let new_end = len - *start;
415 let new_start = len - *end;
416
417 *start = new_start;
418 *end = new_end;
419 }
420 }
421}
422
423impl PartialEq for ByteRecord {
424 fn eq(&self, other: &Self) -> bool {
425 if self.bounds.len() != other.bounds.len() {
426 return false;
427 }
428
429 self.iter()
430 .zip(other.iter())
431 .all(|(self_cell, other_cell)| self_cell == other_cell)
432 }
433}
434
435impl Hash for ByteRecord {
436 #[inline]
437 fn hash<H: Hasher>(&self, state: &mut H) {
438 state.write_usize(self.len());
439
440 for cell in self.iter() {
441 state.write(cell);
442 }
443 }
444}
445
446impl Index<usize> for ByteRecord {
447 type Output = [u8];
448
449 #[inline]
450 fn index(&self, i: usize) -> &[u8] {
451 self.get(i).unwrap()
452 }
453}
454
455impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
456 #[inline]
457 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
458 let iter = iter.into_iter();
459 let size_hint = iter.size_hint();
460
461 self.bounds.reserve(size_hint.1.unwrap_or(size_hint.0));
462
463 for x in iter {
464 self.push_field(x.as_ref());
465 }
466 }
467}
468
469impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
470 #[inline]
471 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
472 let mut record = Self::new();
473 record.extend(iter);
474 record
475 }
476}
477
478impl<I, T> From<I> for ByteRecord
479where
480 I: IntoIterator<Item = T>,
481 T: AsRef<[u8]>,
482{
483 fn from(value: I) -> Self {
484 let mut record = Self::new();
485
486 for cell in value.into_iter() {
487 record.push_field(cell.as_ref());
488 }
489
490 record
491 }
492}
493
494impl<'r> IntoIterator for &'r ByteRecord {
495 type IntoIter = ByteRecordIter<'r>;
496 type Item = &'r [u8];
497
498 #[inline]
499 fn into_iter(self) -> ByteRecordIter<'r> {
500 self.iter()
501 }
502}
503
504impl fmt::Debug for ByteRecord {
505 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
506 write!(f, "ByteRecord(")?;
507 f.debug_list()
508 .entries(self.iter().map(debug::Bytes))
509 .finish()?;
510 write!(f, ")")?;
511 Ok(())
512 }
513}
514
515pub struct ByteRecordIter<'a> {
516 record: &'a ByteRecord,
517 current_forward: usize,
518 current_backward: usize,
519}
520
521impl ExactSizeIterator for ByteRecordIter<'_> {}
522
523impl<'a> Iterator for ByteRecordIter<'a> {
524 type Item = &'a [u8];
525
526 #[inline]
527 fn next(&mut self) -> Option<Self::Item> {
528 if self.current_forward == self.current_backward {
529 None
530 } else {
531 let (start, end) = self.record.bounds[self.current_forward];
532
533 self.current_forward += 1;
534
535 Some(&self.record.data[start..end])
536 }
537 }
538
539 #[inline]
540 fn size_hint(&self) -> (usize, Option<usize>) {
541 let size = self.current_backward - self.current_forward;
542
543 (size, Some(size))
544 }
545
546 #[inline]
547 fn count(self) -> usize
548 where
549 Self: Sized,
550 {
551 self.len()
552 }
553}
554
555impl DoubleEndedIterator for ByteRecordIter<'_> {
556 #[inline]
557 fn next_back(&mut self) -> Option<Self::Item> {
558 if self.current_forward == self.current_backward {
559 None
560 } else {
561 self.current_backward -= 1;
562
563 let (start, end) = self.record.bounds[self.current_backward];
564
565 Some(&self.record.data[start..end])
566 }
567 }
568}
569
570pub(crate) struct ByteRecordBuilder<'r> {
571 record: &'r mut ByteRecord,
572 start: usize,
573}
574
575impl<'r> ByteRecordBuilder<'r> {
576 #[inline(always)]
577 pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
578 Self { record, start: 0 }
579 }
580
581 #[inline(always)]
582 pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
583 self.record.data.extend_from_slice(slice);
584 }
585
586 #[inline(always)]
587 pub(crate) fn push_byte(&mut self, byte: u8) {
588 self.record.data.push(byte);
589 }
590
591 #[inline]
592 pub(crate) fn finalize_field(&mut self) {
593 let start = self.start;
594 self.start = self.record.data.len();
595
596 self.record.bounds.push((start, self.start));
597 }
598
599 #[inline]
600 pub(crate) fn finalize_record(&mut self) {
601 self.finalize_field();
602
603 if let Some((start, end)) = self.record.bounds.last_mut() {
604 if let Some(b'\r') = self.record.data[*start..*end].last() {
605 *end -= 1;
606 self.record.data.pop();
607 }
608 }
609 }
610
611 #[inline]
612 pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
613 let start = self.start;
614 self.start = self.record.data.len() + offset;
615
616 self.record.bounds.push((start, self.start));
617
618 self.start += 1;
619 }
620
621 #[inline(always)]
622 pub(crate) fn bump(&mut self) {
623 self.start +=
624 (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
625 }
626}
627
628#[derive(Default, Clone, Eq)]
641pub struct StringRecord {
642 inner: ByteRecord,
643}
644
645impl StringRecord {
646 pub fn new() -> Self {
648 Self {
649 inner: ByteRecord::new(),
650 }
651 }
652
653 #[inline(always)]
655 pub fn as_byte_record(&self) -> &ByteRecord {
656 &self.inner
657 }
658
659 #[inline(always)]
661 pub fn len(&self) -> usize {
662 self.inner.len()
663 }
664
665 #[inline(always)]
667 pub fn is_empty(&self) -> bool {
668 self.inner.is_empty()
669 }
670
671 #[inline(always)]
673 pub fn clear(&mut self) {
674 self.inner.clear();
675 }
676
677 #[inline(always)]
678 pub(crate) fn as_inner_mut(&mut self) -> &mut ByteRecord {
679 &mut self.inner
680 }
681
682 #[inline]
683 pub(crate) fn validate_utf8(&mut self) -> bool {
684 let bytes = self.inner.as_slice();
685
686 if bytes.is_ascii() {
688 true
689 } else if simdutf8::basic::from_utf8(bytes).is_err() {
690 self.inner.clear();
692 false
693 } else {
694 true
695 }
696 }
697
698 #[inline]
700 pub fn get(&self, index: usize) -> Option<&str> {
701 self.inner.get(index).map(|slice| {
702 debug_assert!(std::str::from_utf8(slice).is_ok());
703 unsafe { std::str::from_utf8_unchecked(slice) }
704 })
705 }
706
707 #[inline]
709 pub fn iter(&self) -> StringRecordIter<'_> {
710 StringRecordIter {
711 record: &self.inner,
712 current_forward: 0,
713 current_backward: self.len(),
714 }
715 }
716
717 #[inline(always)]
719 pub fn push_field(&mut self, field: &str) {
720 self.as_inner_mut().push_field(field.as_bytes());
721 }
722}
723
724impl PartialEq for StringRecord {
725 #[inline(always)]
726 fn eq(&self, other: &Self) -> bool {
727 self.inner.eq(&other.inner)
728 }
729}
730
731impl Hash for StringRecord {
732 #[inline(always)]
733 fn hash<H: Hasher>(&self, state: &mut H) {
734 self.inner.hash(state);
735 }
736}
737
738impl Index<usize> for StringRecord {
739 type Output = str;
740
741 #[inline]
742 fn index(&self, i: usize) -> &str {
743 self.get(i).unwrap()
744 }
745}
746
747impl<T: AsRef<str>> Extend<T> for StringRecord {
748 #[inline]
749 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
750 let iter = iter.into_iter();
751 let size_hint = iter.size_hint();
752
753 self.inner
754 .bounds
755 .reserve(size_hint.1.unwrap_or(size_hint.0));
756
757 for x in iter {
758 self.push_field(x.as_ref());
759 }
760 }
761}
762
763impl<T: AsRef<str>> FromIterator<T> for StringRecord {
764 #[inline]
765 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
766 let mut record = Self::new();
767 record.extend(iter);
768 record
769 }
770}
771
772impl<'r> IntoIterator for &'r StringRecord {
773 type IntoIter = StringRecordIter<'r>;
774 type Item = &'r str;
775
776 #[inline]
777 fn into_iter(self) -> StringRecordIter<'r> {
778 self.iter()
779 }
780}
781
782impl fmt::Debug for StringRecord {
783 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
784 write!(f, "StringRecord(")?;
785 f.debug_list().entries(self.iter()).finish()?;
786 write!(f, ")")?;
787 Ok(())
788 }
789}
790
791pub struct StringRecordIter<'a> {
792 record: &'a ByteRecord,
793 current_forward: usize,
794 current_backward: usize,
795}
796
797impl ExactSizeIterator for StringRecordIter<'_> {}
798
799impl<'a> Iterator for StringRecordIter<'a> {
800 type Item = &'a str;
801
802 #[inline]
803 fn next(&mut self) -> Option<Self::Item> {
804 if self.current_forward == self.current_backward {
805 None
806 } else {
807 let (start, end) = self.record.bounds[self.current_forward];
808
809 self.current_forward += 1;
810
811 Some(unsafe { std::str::from_utf8_unchecked(&self.record.data[start..end]) })
812 }
813 }
814
815 #[inline]
816 fn size_hint(&self) -> (usize, Option<usize>) {
817 let size = self.current_backward - self.current_forward;
818
819 (size, Some(size))
820 }
821
822 #[inline]
823 fn count(self) -> usize
824 where
825 Self: Sized,
826 {
827 self.len()
828 }
829}
830
831impl DoubleEndedIterator for StringRecordIter<'_> {
832 #[inline]
833 fn next_back(&mut self) -> Option<Self::Item> {
834 if self.current_forward == self.current_backward {
835 None
836 } else {
837 self.current_backward -= 1;
838
839 let (start, end) = self.record.bounds[self.current_backward];
840
841 Some(unsafe { std::str::from_utf8_unchecked(&self.record.data[start..end]) })
842 }
843 }
844}
845
846#[cfg(test)]
847mod tests {
848 use super::*;
849
850 #[test]
851 fn test_zero_copy_byte_record() {
852 let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
853
854 assert_eq!(record.len(), 3);
855
856 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
857 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
858
859 for i in 0..expected.len() {
860 assert_eq!(record.get(i), Some(expected[i]));
861 }
862
863 assert_eq!(record.get(4), None);
864 }
865
866 #[test]
867 fn test_byte_record() {
868 let mut record = ByteRecord::new();
869
870 assert_eq!(record.len(), 0);
871 assert_eq!(record.is_empty(), true);
872 assert_eq!(record.get(0), None);
873
874 record.push_field(b"name");
875 record.push_field(b"surname");
876 record.push_field(b"age");
877
878 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
879 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
880
881 assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
882 assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
883 assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
884 assert_eq!(record.get(3), None);
885 }
886
887 #[test]
888 fn test_mutate_record_after_read() {
889 let mut record = ByteRecord::new();
890 let mut builder = ByteRecordBuilder::wrap(&mut record);
891 builder.extend_from_slice(b"test\r");
892 builder.finalize_record();
893
894 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test"]);
895
896 record.push_field(b"next");
897
898 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test", b"next"]);
899 }
900
901 #[test]
902 fn test_reverse_byte_record() {
903 let record = brec!["name", "surname", "age"];
904 let mut reversed = record.clone();
905 reversed.reverse();
906
907 assert_eq!(reversed, brec!["ega", "emanrus", "eman"]);
908 reversed.reverse();
909 assert_eq!(record, reversed);
910 }
911}