1use std::borrow::Cow;
2use std::fmt;
3use std::hash::{Hash, Hasher};
4use std::ops::Index;
5
6use crate::debug;
7use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted};
8
9pub struct ZeroCopyByteRecord<'a> {
11 slice: &'a [u8],
12 seps: &'a [usize],
13 pub(crate) quote: u8,
14}
15
16impl<'a> ZeroCopyByteRecord<'a> {
17 #[inline]
18 pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
19 Self {
20 slice: trim_trailing_crlf(slice),
21 seps,
22 quote,
23 }
24 }
25
26 #[inline]
27 pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
28 (self.seps.to_vec(), self.slice.to_vec())
29 }
30
31 #[inline(always)]
34 pub fn len(&self) -> usize {
35 self.seps.len() + 1
38 }
39
40 #[inline(always)]
42 pub fn is_empty(&self) -> bool {
43 false
44 }
45
46 #[inline(always)]
48 pub fn as_slice(&self) -> &[u8] {
49 self.slice
50 }
51
52 #[inline]
57 pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
58 ZeroCopyByteRecordIter {
59 record: self,
60 current_forward: 0,
61 current_backward: self.len(),
62 }
63 }
64
65 #[inline]
69 pub fn unquoted_iter(&self) -> ZeroCopyByteRecordUnquotedIter<'_> {
70 ZeroCopyByteRecordUnquotedIter {
71 record: self,
72 current_forward: 0,
73 current_backward: self.len(),
74 }
75 }
76
77 #[inline]
81 pub fn unescaped_iter(&self) -> ZeroCopyByteRecordUnescapedIter<'_> {
82 ZeroCopyByteRecordUnescapedIter {
83 record: self,
84 current_forward: 0,
85 current_backward: self.len(),
86 }
87 }
88
89 #[inline]
95 pub fn get(&self, index: usize) -> Option<&[u8]> {
96 let len = self.seps.len();
97
98 if index > len {
99 return None;
100 }
101
102 let start = if index == 0 {
103 0
104 } else {
105 self.seps[index - 1] + 1
106 };
107
108 let end = if index == len {
109 self.slice.len()
110 } else {
111 self.seps[index]
112 };
113
114 Some(&self.slice[start..end])
115 }
116
117 #[inline]
127 pub fn unquote(&self, index: usize) -> Option<&[u8]> {
128 self.get(index)
129 .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
130 }
131
132 #[inline]
142 pub fn unescape(&self, index: usize) -> Option<Cow<[u8]>> {
143 self.unquote(index).map(|cell| {
144 if let Some(trimmed) = unquoted(cell, self.quote) {
145 unescape(trimmed, self.quote)
146 } else {
147 Cow::Borrowed(cell)
148 }
149 })
150 }
151
152 fn read_byte_record(&self, record: &mut ByteRecord) {
153 record.clear();
154
155 for cell in self.iter() {
156 if let Some(trimmed) = unquoted(cell, self.quote) {
157 unescape_to(trimmed, self.quote, &mut record.data);
158
159 let bounds_len = record.bounds.len();
160
161 let start = if bounds_len == 0 {
162 0
163 } else {
164 record.bounds[bounds_len - 1].1
165 };
166
167 record.bounds.push((start, record.data.len()));
168 } else {
169 record.push_field(cell);
170 }
171 }
172 }
173
174 #[inline]
176 pub fn to_byte_record(&self) -> ByteRecord {
177 let mut record = ByteRecord::new();
178 self.read_byte_record(&mut record);
179 record
180 }
181
182 #[inline]
183 pub(crate) fn to_byte_record_in_reverse(&self) -> ByteRecord {
184 let mut record = ByteRecord::new();
185
186 for cell in self.unescaped_iter().rev() {
187 record.push_field_in_reverse(&cell);
188 }
189
190 record
191 }
192}
193
194impl fmt::Debug for ZeroCopyByteRecord<'_> {
195 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
196 write!(f, "ZeroCopyByteRecord(")?;
197 f.debug_list()
198 .entries(self.iter().map(debug::Bytes))
199 .finish()?;
200 write!(f, ")")?;
201 Ok(())
202 }
203}
204
205macro_rules! make_zero_copy_iterator {
206 ($name:ident, $method: ident, $out_type: ty) => {
207 pub struct $name<'a> {
208 record: &'a ZeroCopyByteRecord<'a>,
209 current_forward: usize,
210 current_backward: usize,
211 }
212
213 impl ExactSizeIterator for $name<'_> {}
214
215 impl<'a> Iterator for $name<'a> {
216 type Item = $out_type;
217
218 #[inline]
219 fn next(&mut self) -> Option<Self::Item> {
220 if self.current_forward == self.current_backward {
221 None
222 } else {
223 let cell = self.record.$method(self.current_forward);
224
225 self.current_forward += 1;
226
227 cell
228 }
229 }
230
231 #[inline]
232 fn size_hint(&self) -> (usize, Option<usize>) {
233 let size = self.current_backward - self.current_forward;
234
235 (size, Some(size))
236 }
237
238 #[inline]
239 fn count(self) -> usize
240 where
241 Self: Sized,
242 {
243 self.len()
244 }
245 }
246
247 impl DoubleEndedIterator for $name<'_> {
248 #[inline]
249 fn next_back(&mut self) -> Option<Self::Item> {
250 if self.current_forward == self.current_backward {
251 None
252 } else {
253 self.current_backward -= 1;
254
255 self.record.$method(self.current_backward)
256 }
257 }
258 }
259 };
260}
261
262make_zero_copy_iterator!(ZeroCopyByteRecordIter, get, &'a [u8]);
263make_zero_copy_iterator!(ZeroCopyByteRecordUnquotedIter, unquote, &'a [u8]);
264make_zero_copy_iterator!(ZeroCopyByteRecordUnescapedIter, unescape, Cow<'a, [u8]>);
265
266impl Index<usize> for ZeroCopyByteRecord<'_> {
267 type Output = [u8];
268
269 #[inline]
270 fn index(&self, i: usize) -> &[u8] {
271 self.get(i).unwrap()
272 }
273}
274
275#[derive(Default, Clone, Eq)]
277pub struct ByteRecord {
278 data: Vec<u8>,
279 bounds: Vec<(usize, usize)>,
280}
281
282impl ByteRecord {
283 pub fn new() -> Self {
284 Self::default()
285 }
286
287 #[inline]
288 pub fn len(&self) -> usize {
289 self.bounds.len()
290 }
291
292 #[inline]
293 pub fn is_empty(&self) -> bool {
294 self.len() == 0
295 }
296
297 #[inline]
298 pub fn clear(&mut self) {
299 self.data.clear();
300 self.bounds.clear();
301 }
302
303 #[inline]
304 pub fn truncate(&mut self, len: usize) {
305 self.bounds.truncate(len);
306
307 if let Some((_, end)) = self.bounds.last() {
308 self.data.truncate(*end);
309 } else {
310 self.data.clear();
311 }
312 }
313
314 #[inline]
315 pub fn as_slice(&self) -> &[u8] {
316 &self.data
317 }
318
319 #[inline]
320 pub fn iter(&self) -> ByteRecordIter<'_> {
321 ByteRecordIter {
322 record: self,
323 current_forward: 0,
324 current_backward: self.len(),
325 }
326 }
327
328 #[inline(always)]
329 pub fn push_field(&mut self, bytes: &[u8]) {
330 self.data.extend_from_slice(bytes);
331
332 let bounds_len = self.bounds.len();
333
334 let start = if bounds_len == 0 {
335 0
336 } else {
337 self.bounds[bounds_len - 1].1
338 };
339
340 self.bounds.push((start, self.data.len()));
341 }
342
343 #[inline]
344 fn push_field_in_reverse(&mut self, bytes: &[u8]) {
345 self.data.extend_from_slice(bytes);
346
347 let bounds_len = self.bounds.len();
348
349 let start = if bounds_len == 0 {
350 0
351 } else {
352 self.bounds[bounds_len - 1].1
353 };
354
355 let bounds = (start, self.data.len());
356 self.data[bounds.0..bounds.1].reverse();
357
358 self.bounds.push(bounds);
359 }
360
361 #[inline]
362 pub fn get(&self, index: usize) -> Option<&[u8]> {
363 self.bounds
364 .get(index)
365 .copied()
366 .map(|(start, end)| &self.data[start..end])
367 }
368
369 pub(crate) fn reverse(&mut self) {
370 self.data.reverse();
371 self.bounds.reverse();
372
373 let len = self.data.len();
374
375 for (start, end) in self.bounds.iter_mut() {
376 let new_end = len - *start;
377 let new_start = len - *end;
378
379 *start = new_start;
380 *end = new_end;
381 }
382 }
383}
384
385impl PartialEq for ByteRecord {
386 fn eq(&self, other: &Self) -> bool {
387 if self.bounds.len() != other.bounds.len() {
388 return false;
389 }
390
391 self.iter()
392 .zip(other.iter())
393 .all(|(self_cell, other_cell)| self_cell == other_cell)
394 }
395}
396
397impl Hash for ByteRecord {
398 #[inline]
399 fn hash<H: Hasher>(&self, state: &mut H) {
400 state.write_usize(self.len());
401
402 for cell in self.iter() {
403 state.write(cell);
404 }
405 }
406}
407
408impl Index<usize> for ByteRecord {
409 type Output = [u8];
410
411 #[inline]
412 fn index(&self, i: usize) -> &[u8] {
413 self.get(i).unwrap()
414 }
415}
416
417impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
418 #[inline]
419 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
420 for x in iter {
421 self.push_field(x.as_ref());
422 }
423 }
424}
425
426impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
427 #[inline]
428 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
429 let mut record = Self::new();
430 record.extend(iter);
431 record
432 }
433}
434
435impl<I, T> From<I> for ByteRecord
436where
437 I: IntoIterator<Item = T>,
438 T: AsRef<[u8]>,
439{
440 fn from(value: I) -> Self {
441 let mut record = Self::new();
442
443 for cell in value.into_iter() {
444 record.push_field(cell.as_ref());
445 }
446
447 record
448 }
449}
450
451impl<'r> IntoIterator for &'r ByteRecord {
452 type IntoIter = ByteRecordIter<'r>;
453 type Item = &'r [u8];
454
455 #[inline]
456 fn into_iter(self) -> ByteRecordIter<'r> {
457 self.iter()
458 }
459}
460
461impl fmt::Debug for ByteRecord {
462 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
463 write!(f, "ByteRecord(")?;
464 f.debug_list()
465 .entries(self.iter().map(debug::Bytes))
466 .finish()?;
467 write!(f, ")")?;
468 Ok(())
469 }
470}
471
472pub struct ByteRecordIter<'a> {
473 record: &'a ByteRecord,
474 current_forward: usize,
475 current_backward: usize,
476}
477
478impl ExactSizeIterator for ByteRecordIter<'_> {}
479
480impl<'a> Iterator for ByteRecordIter<'a> {
481 type Item = &'a [u8];
482
483 #[inline]
484 fn next(&mut self) -> Option<Self::Item> {
485 if self.current_forward == self.current_backward {
486 None
487 } else {
488 let (start, end) = self.record.bounds[self.current_forward];
489
490 self.current_forward += 1;
491
492 Some(&self.record.data[start..end])
493 }
494 }
495
496 #[inline]
497 fn size_hint(&self) -> (usize, Option<usize>) {
498 let size = self.current_backward - self.current_forward;
499
500 (size, Some(size))
501 }
502
503 #[inline]
504 fn count(self) -> usize
505 where
506 Self: Sized,
507 {
508 self.len()
509 }
510}
511
512impl DoubleEndedIterator for ByteRecordIter<'_> {
513 #[inline]
514 fn next_back(&mut self) -> Option<Self::Item> {
515 if self.current_forward == self.current_backward {
516 None
517 } else {
518 self.current_backward -= 1;
519
520 let (start, end) = self.record.bounds[self.current_backward];
521
522 Some(&self.record.data[start..end])
523 }
524 }
525}
526
527pub(crate) struct ByteRecordBuilder<'r> {
528 record: &'r mut ByteRecord,
529 start: usize,
530}
531
532impl<'r> ByteRecordBuilder<'r> {
533 #[inline(always)]
534 pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
535 Self { record, start: 0 }
536 }
537
538 #[inline(always)]
539 pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
540 self.record.data.extend_from_slice(slice);
541 }
542
543 #[inline(always)]
544 pub(crate) fn push_byte(&mut self, byte: u8) {
545 self.record.data.push(byte);
546 }
547
548 #[inline]
549 pub(crate) fn finalize_field(&mut self) {
550 let start = self.start;
551 self.start = self.record.data.len();
552
553 self.record.bounds.push((start, self.start));
554 }
555
556 #[inline]
557 pub(crate) fn finalize_record(&mut self) {
558 if let Some(b'\r') = self.record.data.last() {
559 self.record.data.pop();
560 }
561
562 self.finalize_field();
563 }
564
565 #[inline]
566 pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
567 let start = self.start;
568 self.start = self.record.data.len() + offset;
569
570 self.record.bounds.push((start, self.start));
571
572 self.start += 1;
573 }
574
575 #[inline(always)]
576 pub(crate) fn bump(&mut self) {
577 self.start +=
578 (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
579 }
580}
581
582#[cfg(test)]
583mod tests {
584 use super::*;
585
586 #[test]
587 fn test_zero_copy_byte_record() {
588 let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
589
590 assert_eq!(record.len(), 3);
591
592 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
593 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
594
595 for i in 0..expected.len() {
596 assert_eq!(record.get(i), Some(expected[i]));
597 }
598
599 assert_eq!(record.get(4), None);
600 }
601
602 #[test]
603 fn test_byte_record() {
604 let mut record = ByteRecord::new();
605
606 assert_eq!(record.len(), 0);
607 assert_eq!(record.is_empty(), true);
608 assert_eq!(record.get(0), None);
609
610 record.push_field(b"name");
611 record.push_field(b"surname");
612 record.push_field(b"age");
613
614 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
615 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
616
617 assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
618 assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
619 assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
620 assert_eq!(record.get(3), None);
621 }
622
623 #[test]
624 fn test_mutate_record_after_read() {
625 let mut record = ByteRecord::new();
626 let mut builder = ByteRecordBuilder::wrap(&mut record);
627 builder.extend_from_slice(b"test\r");
628 builder.finalize_record();
629
630 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test"]);
631
632 record.push_field(b"next");
633
634 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test", b"next"]);
635 }
636
637 #[test]
638 fn test_reverse_byte_record() {
639 let record = brec!["name", "surname", "age"];
640 let mut reversed = record.clone();
641 reversed.reverse();
642
643 assert_eq!(reversed, brec!["ega", "emanrus", "eman"]);
644 reversed.reverse();
645 assert_eq!(record, reversed);
646 }
647}