1use std::borrow::Cow;
2use std::fmt;
3use std::hash::{Hash, Hasher};
4use std::io::Write;
5use std::ops::Index;
6
7use crate::debug;
8use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted, AppendOnlyView};
9
10#[cfg(feature = "str")]
11use crate::error::{self, Error, ErrorKind};
12
13pub struct ZeroCopyByteRecord<'a> {
15 slice: &'a [u8],
16 seps: &'a [usize],
17 pub(crate) quote: u8,
18}
19
20impl<'a> ZeroCopyByteRecord<'a> {
21 #[inline]
22 pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
23 Self {
24 slice: trim_trailing_crlf(slice),
25 seps,
26 quote,
27 }
28 }
29
30 #[inline]
31 pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
32 (self.seps.to_vec(), self.slice.to_vec())
33 }
34
35 #[inline(always)]
38 pub fn len(&self) -> usize {
39 self.seps.len() + 1
42 }
43
44 #[inline(always)]
46 pub fn is_empty(&self) -> bool {
47 false
48 }
49
50 #[inline(always)]
52 pub fn as_slice(&self) -> &[u8] {
53 self.slice
54 }
55
56 #[inline]
61 pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
62 ZeroCopyByteRecordIter {
63 record: self,
64 current_forward: 0,
65 current_backward: self.len(),
66 }
67 }
68
69 #[inline]
73 pub fn unquoted_iter(&self) -> ZeroCopyByteRecordUnquotedIter<'_> {
74 ZeroCopyByteRecordUnquotedIter {
75 record: self,
76 current_forward: 0,
77 current_backward: self.len(),
78 }
79 }
80
81 #[inline]
85 pub fn unescaped_iter(&self) -> ZeroCopyByteRecordUnescapedIter<'_> {
86 ZeroCopyByteRecordUnescapedIter {
87 record: self,
88 current_forward: 0,
89 current_backward: self.len(),
90 }
91 }
92
93 #[inline]
99 pub fn get(&self, index: usize) -> Option<&[u8]> {
100 let len = self.seps.len();
101
102 if index > len {
103 return None;
104 }
105
106 let start = if index == 0 {
107 0
108 } else {
109 self.seps[index - 1] + 1
110 };
111
112 let end = if index == len {
113 self.slice.len()
114 } else {
115 self.seps[index]
116 };
117
118 Some(&self.slice[start..end])
119 }
120
121 #[inline]
131 pub fn unquote(&self, index: usize) -> Option<&[u8]> {
132 self.get(index)
133 .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
134 }
135
136 #[inline]
146 pub fn unescape(&self, index: usize) -> Option<Cow<'_, [u8]>> {
147 self.get(index).map(|cell| {
148 if let Some(trimmed) = unquoted(cell, self.quote) {
149 unescape(trimmed, self.quote)
150 } else {
151 Cow::Borrowed(cell)
152 }
153 })
154 }
155
156 fn read_byte_record(&self, record: &mut ByteRecord) {
157 record.clear();
158
159 for cell in self.iter() {
160 if let Some(trimmed) = unquoted(cell, self.quote) {
161 unescape_to(trimmed, self.quote, &mut record.data);
162
163 let bounds_len = record.bounds.len();
164
165 let start = if bounds_len == 0 {
166 0
167 } else {
168 record.bounds[bounds_len - 1].1
169 };
170
171 record.bounds.push((start, record.data.len()));
172 } else {
173 record.push_field(cell);
174 }
175 }
176 }
177
178 #[inline]
180 pub fn to_byte_record(&self) -> ByteRecord {
181 let mut record = ByteRecord::new();
182 self.read_byte_record(&mut record);
183 record
184 }
185
186 #[inline]
187 pub(crate) fn to_byte_record_in_reverse(&self) -> ByteRecord {
188 let mut record = ByteRecord::new();
189
190 for cell in self.unescaped_iter().rev() {
191 record.push_field_in_reverse(&cell);
192 }
193
194 record
195 }
196}
197
198impl fmt::Debug for ZeroCopyByteRecord<'_> {
199 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
200 write!(f, "ZeroCopyByteRecord(")?;
201 f.debug_list()
202 .entries(self.iter().map(debug::Bytes))
203 .finish()?;
204 write!(f, ")")?;
205 Ok(())
206 }
207}
208
209macro_rules! make_zero_copy_iterator {
210 ($name:ident, $method: ident, $out_type: ty) => {
211 pub struct $name<'a> {
212 record: &'a ZeroCopyByteRecord<'a>,
213 current_forward: usize,
214 current_backward: usize,
215 }
216
217 impl ExactSizeIterator for $name<'_> {}
218
219 impl<'a> Iterator for $name<'a> {
220 type Item = $out_type;
221
222 #[inline]
223 fn next(&mut self) -> Option<Self::Item> {
224 if self.current_forward == self.current_backward {
225 None
226 } else {
227 let cell = self.record.$method(self.current_forward);
228
229 self.current_forward += 1;
230
231 cell
232 }
233 }
234
235 #[inline]
236 fn size_hint(&self) -> (usize, Option<usize>) {
237 let size = self.current_backward - self.current_forward;
238
239 (size, Some(size))
240 }
241
242 #[inline]
243 fn count(self) -> usize
244 where
245 Self: Sized,
246 {
247 self.len()
248 }
249 }
250
251 impl DoubleEndedIterator for $name<'_> {
252 #[inline]
253 fn next_back(&mut self) -> Option<Self::Item> {
254 if self.current_forward == self.current_backward {
255 None
256 } else {
257 self.current_backward -= 1;
258
259 self.record.$method(self.current_backward)
260 }
261 }
262 }
263 };
264}
265
266make_zero_copy_iterator!(ZeroCopyByteRecordIter, get, &'a [u8]);
267make_zero_copy_iterator!(ZeroCopyByteRecordUnquotedIter, unquote, &'a [u8]);
268make_zero_copy_iterator!(ZeroCopyByteRecordUnescapedIter, unescape, Cow<'a, [u8]>);
269
270impl Index<usize> for ZeroCopyByteRecord<'_> {
271 type Output = [u8];
272
273 #[inline]
274 fn index(&self, i: usize) -> &[u8] {
275 self.get(i).unwrap()
276 }
277}
278
279#[derive(Default, Clone, Eq)]
292pub struct ByteRecord {
293 pub(crate) data: Vec<u8>,
294 pub(crate) bounds: Vec<(usize, usize)>,
295}
296
297impl ByteRecord {
298 pub fn new() -> Self {
300 Self::default()
301 }
302
303 #[inline]
305 pub fn len(&self) -> usize {
306 self.bounds.len()
307 }
308
309 #[inline]
311 pub fn is_empty(&self) -> bool {
312 self.len() == 0
313 }
314
315 #[inline]
317 pub fn clear(&mut self) {
318 self.data.clear();
319 self.bounds.clear();
320 }
321
322 #[inline]
325 pub fn truncate(&mut self, len: usize) {
326 self.bounds.truncate(len);
327
328 if let Some((_, end)) = self.bounds.last() {
329 self.data.truncate(*end);
330 } else {
331 self.data.clear();
332 }
333 }
334
335 #[inline]
342 pub fn as_slice(&self) -> &[u8] {
343 &self.data
344 }
345
346 #[inline]
347 #[cfg(feature = "binary")]
348 pub(crate) fn as_parts(&self) -> (&[(usize, usize)], &[u8]) {
349 (&self.bounds, &self.data)
350 }
351
352 #[inline]
354 pub fn iter(&self) -> ByteRecordIter<'_> {
355 ByteRecordIter {
356 record: self,
357 current_forward: 0,
358 current_backward: self.len(),
359 }
360 }
361
362 #[inline(always)]
364 pub fn push_field(&mut self, bytes: &[u8]) {
365 self.data.extend_from_slice(bytes.as_ref());
366
367 let bounds_len = self.bounds.len();
368
369 let start = if bounds_len == 0 {
370 0
371 } else {
372 self.bounds[bounds_len - 1].1
373 };
374
375 self.bounds.push((start, self.data.len()));
376 }
377
378 #[inline]
381 pub fn fmt_field<F: fmt::Display>(&mut self, target: &F) {
382 write!(&mut self.data, "{}", target).unwrap();
383
384 let bounds_len = self.bounds.len();
385
386 let start = if bounds_len == 0 {
387 0
388 } else {
389 self.bounds[bounds_len - 1].1
390 };
391
392 self.bounds.push((start, self.data.len()));
393 }
394
395 #[inline]
398 pub fn write_field<F>(&mut self, callback: F)
399 where
400 F: FnOnce(AppendOnlyView<u8>),
401 {
402 callback(AppendOnlyView::new(&mut self.data));
403
404 let bounds_len = self.bounds.len();
405
406 let start = if bounds_len == 0 {
407 0
408 } else {
409 self.bounds[bounds_len - 1].1
410 };
411
412 self.bounds.push((start, self.data.len()));
413 }
414
415 #[inline]
416 fn push_field_in_reverse(&mut self, bytes: &[u8]) {
417 self.data.extend_from_slice(bytes);
418
419 let bounds_len = self.bounds.len();
420
421 let start = if bounds_len == 0 {
422 0
423 } else {
424 self.bounds[bounds_len - 1].1
425 };
426
427 let bounds = (start, self.data.len());
428 self.data[bounds.0..bounds.1].reverse();
429
430 self.bounds.push(bounds);
431 }
432
433 #[inline]
435 pub fn get(&self, index: usize) -> Option<&[u8]> {
436 self.bounds
437 .get(index)
438 .copied()
439 .map(|(start, end)| &self.data[start..end])
440 }
441
442 #[cfg(feature = "str")]
444 pub fn into_string_record(self) -> error::Result<StringRecord> {
445 let mut new_record = StringRecord { inner: self };
446
447 if !new_record.validate_utf8() {
448 Err(Error::new(ErrorKind::Utf8Error))
449 } else {
450 Ok(new_record)
451 }
452 }
453
454 pub(crate) fn reverse(&mut self) {
455 self.data.reverse();
456 self.bounds.reverse();
457
458 let len = self.data.len();
459
460 for (start, end) in self.bounds.iter_mut() {
461 let new_end = len - *start;
462 let new_start = len - *end;
463
464 *start = new_start;
465 *end = new_end;
466 }
467 }
468}
469
470impl PartialEq for ByteRecord {
471 fn eq(&self, other: &Self) -> bool {
472 if self.bounds.len() != other.bounds.len() {
473 return false;
474 }
475
476 self.iter()
477 .zip(other.iter())
478 .all(|(self_cell, other_cell)| self_cell == other_cell)
479 }
480}
481
482impl Hash for ByteRecord {
483 #[inline]
484 fn hash<H: Hasher>(&self, state: &mut H) {
485 state.write_usize(self.len());
486
487 for cell in self.iter() {
488 state.write(cell);
489 }
490 }
491}
492
493impl Index<usize> for ByteRecord {
494 type Output = [u8];
495
496 #[inline]
497 fn index(&self, i: usize) -> &[u8] {
498 self.get(i).unwrap()
499 }
500}
501
502impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
503 #[inline]
504 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
505 let iter = iter.into_iter();
506 let size_hint = iter.size_hint();
507
508 self.bounds.reserve(size_hint.1.unwrap_or(size_hint.0));
509
510 for x in iter {
511 self.push_field(x.as_ref());
512 }
513 }
514}
515
516impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
517 #[inline]
518 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
519 let mut record = Self::new();
520 record.extend(iter);
521 record
522 }
523}
524
525impl<I, T> From<I> for ByteRecord
526where
527 I: IntoIterator<Item = T>,
528 T: AsRef<[u8]>,
529{
530 fn from(value: I) -> Self {
531 let mut record = Self::new();
532
533 for cell in value.into_iter() {
534 record.push_field(cell.as_ref());
535 }
536
537 record
538 }
539}
540
541impl<'r> IntoIterator for &'r ByteRecord {
542 type IntoIter = ByteRecordIter<'r>;
543 type Item = &'r [u8];
544
545 #[inline]
546 fn into_iter(self) -> ByteRecordIter<'r> {
547 self.iter()
548 }
549}
550
551impl fmt::Debug for ByteRecord {
552 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
553 write!(f, "ByteRecord(")?;
554 f.debug_list()
555 .entries(self.iter().map(debug::Bytes))
556 .finish()?;
557 write!(f, ")")?;
558 Ok(())
559 }
560}
561
562pub struct ByteRecordIter<'a> {
563 record: &'a ByteRecord,
564 current_forward: usize,
565 current_backward: usize,
566}
567
568impl ExactSizeIterator for ByteRecordIter<'_> {}
569
570impl<'a> Iterator for ByteRecordIter<'a> {
571 type Item = &'a [u8];
572
573 #[inline]
574 fn next(&mut self) -> Option<Self::Item> {
575 if self.current_forward == self.current_backward {
576 None
577 } else {
578 let (start, end) = self.record.bounds[self.current_forward];
579
580 self.current_forward += 1;
581
582 Some(&self.record.data[start..end])
583 }
584 }
585
586 #[inline]
587 fn size_hint(&self) -> (usize, Option<usize>) {
588 let size = self.current_backward - self.current_forward;
589
590 (size, Some(size))
591 }
592
593 #[inline]
594 fn count(self) -> usize
595 where
596 Self: Sized,
597 {
598 self.len()
599 }
600}
601
602impl DoubleEndedIterator for ByteRecordIter<'_> {
603 #[inline]
604 fn next_back(&mut self) -> Option<Self::Item> {
605 if self.current_forward == self.current_backward {
606 None
607 } else {
608 self.current_backward -= 1;
609
610 let (start, end) = self.record.bounds[self.current_backward];
611
612 Some(&self.record.data[start..end])
613 }
614 }
615}
616
617pub(crate) struct ByteRecordBuilder<'r> {
618 record: &'r mut ByteRecord,
619 start: usize,
620}
621
622impl<'r> ByteRecordBuilder<'r> {
623 #[inline(always)]
624 pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
625 Self { record, start: 0 }
626 }
627
628 #[inline(always)]
629 pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
630 self.record.data.extend_from_slice(slice);
631 }
632
633 #[inline(always)]
634 pub(crate) fn push_byte(&mut self, byte: u8) {
635 self.record.data.push(byte);
636 }
637
638 #[inline]
639 pub(crate) fn finalize_field(&mut self) {
640 let start = self.start;
641 self.start = self.record.data.len();
642
643 self.record.bounds.push((start, self.start));
644 }
645
646 #[inline]
647 pub(crate) fn finalize_record(&mut self) {
648 self.finalize_field();
649
650 if let Some((start, end)) = self.record.bounds.last_mut() {
651 if let Some(b'\r') = self.record.data[*start..*end].last() {
652 *end -= 1;
653 self.record.data.pop();
654 }
655 }
656 }
657
658 #[inline]
659 pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
660 let start = self.start;
661 self.start = self.record.data.len() + offset;
662
663 self.record.bounds.push((start, self.start));
664
665 self.start += 1;
666 }
667
668 #[inline(always)]
669 pub(crate) fn bump(&mut self) {
670 self.start +=
671 (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
672 }
673}
674
675#[cfg(feature = "str")]
676mod string_record {
677 use super::*;
678
679 #[derive(Default, Clone, Eq)]
692 pub struct StringRecord {
693 pub(super) inner: ByteRecord,
694 }
695
696 impl StringRecord {
697 pub fn new() -> Self {
699 Self {
700 inner: ByteRecord::new(),
701 }
702 }
703
704 #[inline(always)]
706 pub fn as_byte_record(&self) -> &ByteRecord {
707 &self.inner
708 }
709
710 #[inline(always)]
712 pub fn len(&self) -> usize {
713 self.inner.len()
714 }
715
716 #[inline(always)]
718 pub fn is_empty(&self) -> bool {
719 self.inner.is_empty()
720 }
721
722 #[inline(always)]
724 pub fn clear(&mut self) {
725 self.inner.clear();
726 }
727
728 #[inline(always)]
729 pub(crate) fn as_inner_mut(&mut self) -> &mut ByteRecord {
730 &mut self.inner
731 }
732
733 #[inline]
734 pub(crate) fn validate_utf8(&mut self) -> bool {
735 let bytes = self.inner.as_slice();
736
737 if bytes.is_ascii() {
739 true
740 } else if simdutf8::basic::from_utf8(bytes).is_err() {
741 self.inner.clear();
743 false
744 } else {
745 true
746 }
747 }
748
749 #[inline]
751 pub fn get(&self, index: usize) -> Option<&str> {
752 self.inner.get(index).map(|slice| {
753 debug_assert!(std::str::from_utf8(slice).is_ok());
754 unsafe { std::str::from_utf8_unchecked(slice) }
755 })
756 }
757
758 #[inline]
760 pub fn iter(&self) -> StringRecordIter<'_> {
761 StringRecordIter {
762 record: &self.inner,
763 current_forward: 0,
764 current_backward: self.len(),
765 }
766 }
767
768 #[inline(always)]
770 pub fn push_field(&mut self, field: &str) {
771 self.as_inner_mut().push_field(field.as_bytes());
772 }
773 }
774
775 impl PartialEq for StringRecord {
776 #[inline(always)]
777 fn eq(&self, other: &Self) -> bool {
778 self.inner.eq(&other.inner)
779 }
780 }
781
782 impl Hash for StringRecord {
783 #[inline(always)]
784 fn hash<H: Hasher>(&self, state: &mut H) {
785 self.inner.hash(state);
786 }
787 }
788
789 impl Index<usize> for StringRecord {
790 type Output = str;
791
792 #[inline]
793 fn index(&self, i: usize) -> &str {
794 self.get(i).unwrap()
795 }
796 }
797
798 impl<T: AsRef<str>> Extend<T> for StringRecord {
799 #[inline]
800 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
801 let iter = iter.into_iter();
802 let size_hint = iter.size_hint();
803
804 self.inner
805 .bounds
806 .reserve(size_hint.1.unwrap_or(size_hint.0));
807
808 for x in iter {
809 self.push_field(x.as_ref());
810 }
811 }
812 }
813
814 impl<T: AsRef<str>> FromIterator<T> for StringRecord {
815 #[inline]
816 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
817 let mut record = Self::new();
818 record.extend(iter);
819 record
820 }
821 }
822
823 impl<'r> IntoIterator for &'r StringRecord {
824 type IntoIter = StringRecordIter<'r>;
825 type Item = &'r str;
826
827 #[inline]
828 fn into_iter(self) -> StringRecordIter<'r> {
829 self.iter()
830 }
831 }
832
833 impl fmt::Debug for StringRecord {
834 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
835 write!(f, "StringRecord(")?;
836 f.debug_list().entries(self.iter()).finish()?;
837 write!(f, ")")?;
838 Ok(())
839 }
840 }
841
842 pub struct StringRecordIter<'a> {
843 record: &'a ByteRecord,
844 current_forward: usize,
845 current_backward: usize,
846 }
847
848 impl ExactSizeIterator for StringRecordIter<'_> {}
849
850 impl<'a> Iterator for StringRecordIter<'a> {
851 type Item = &'a str;
852
853 #[inline]
854 fn next(&mut self) -> Option<Self::Item> {
855 if self.current_forward == self.current_backward {
856 None
857 } else {
858 let (start, end) = self.record.bounds[self.current_forward];
859
860 self.current_forward += 1;
861
862 Some(unsafe { std::str::from_utf8_unchecked(&self.record.data[start..end]) })
863 }
864 }
865
866 #[inline]
867 fn size_hint(&self) -> (usize, Option<usize>) {
868 let size = self.current_backward - self.current_forward;
869
870 (size, Some(size))
871 }
872
873 #[inline]
874 fn count(self) -> usize
875 where
876 Self: Sized,
877 {
878 self.len()
879 }
880 }
881
882 impl DoubleEndedIterator for StringRecordIter<'_> {
883 #[inline]
884 fn next_back(&mut self) -> Option<Self::Item> {
885 if self.current_forward == self.current_backward {
886 None
887 } else {
888 self.current_backward -= 1;
889
890 let (start, end) = self.record.bounds[self.current_backward];
891
892 Some(unsafe { std::str::from_utf8_unchecked(&self.record.data[start..end]) })
893 }
894 }
895 }
896}
897
898#[cfg(feature = "str")]
899pub use string_record::*;
900
901#[cfg(test)]
902mod tests {
903 use super::*;
904
905 #[test]
906 fn test_zero_copy_byte_record() {
907 let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
908
909 assert_eq!(record.len(), 3);
910
911 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
912 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
913
914 for i in 0..expected.len() {
915 assert_eq!(record.get(i), Some(expected[i]));
916 }
917
918 assert_eq!(record.get(4), None);
919 }
920
921 #[test]
922 fn test_byte_record() {
923 let mut record = ByteRecord::new();
924
925 assert_eq!(record.len(), 0);
926 assert_eq!(record.is_empty(), true);
927 assert_eq!(record.get(0), None);
928
929 record.push_field(b"name");
930 record.push_field(b"surname");
931 record.push_field(b"age");
932
933 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
934 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
935
936 assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
937 assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
938 assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
939 assert_eq!(record.get(3), None);
940 }
941
942 #[test]
943 fn test_fmt_field() {
944 let mut record = ByteRecord::new();
945
946 record.fmt_field(&45);
947 record.fmt_field(&5.6);
948 record.fmt_field(&"test");
949
950 assert_eq!(record, brec!["45", "5.6", "test"]);
951 }
952
953 #[test]
954 fn test_write_field() {
955 let mut record = ByteRecord::new();
956
957 record.write_field(|bytes| serde_json::to_writer(bytes, &vec!["hello", "world"]).unwrap());
958
959 record.push_field(b"test");
960
961 assert_eq!(record, brec!["[\"hello\",\"world\"]", "test"]);
962 }
963
964 #[test]
965 fn test_mutate_record_after_read() {
966 let mut record = ByteRecord::new();
967 let mut builder = ByteRecordBuilder::wrap(&mut record);
968 builder.extend_from_slice(b"test\r");
969 builder.finalize_record();
970
971 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test"]);
972
973 record.push_field(b"next");
974
975 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test", b"next"]);
976 }
977
978 #[test]
979 fn test_reverse_byte_record() {
980 let record = brec!["name", "surname", "age"];
981 let mut reversed = record.clone();
982 reversed.reverse();
983
984 assert_eq!(reversed, brec!["ega", "emanrus", "eman"]);
985 reversed.reverse();
986 assert_eq!(record, reversed);
987 }
988}