1use std::borrow::Cow;
2use std::fmt;
3use std::hash::{Hash, Hasher};
4use std::ops::Index;
5
6use crate::debug;
7use crate::utils::{trim_trailing_crlf, unescape, unescape_to, unquoted};
8
9pub struct ZeroCopyByteRecord<'a> {
10 slice: &'a [u8],
11 seps: &'a [usize],
12 pub(crate) quote: u8,
13}
14
15impl<'a> ZeroCopyByteRecord<'a> {
16 #[inline]
17 pub(crate) fn new(slice: &'a [u8], seps: &'a [usize], quote: u8) -> Self {
18 Self {
19 slice: trim_trailing_crlf(slice),
20 seps,
21 quote,
22 }
23 }
24
25 #[inline]
26 pub(crate) fn to_parts(&self) -> (Vec<usize>, Vec<u8>) {
27 (self.seps.to_vec(), self.slice.to_vec())
28 }
29
30 #[inline(always)]
31 pub fn len(&self) -> usize {
32 self.seps.len() + 1
35 }
36
37 #[inline(always)]
38 pub fn is_empty(&self) -> bool {
39 false
40 }
41
42 #[inline(always)]
43 pub fn as_slice(&self) -> &[u8] {
44 self.slice
45 }
46
47 #[inline]
48 pub fn iter(&self) -> ZeroCopyByteRecordIter<'_> {
49 ZeroCopyByteRecordIter {
50 record: self,
51 current_forward: 0,
52 current_backward: self.len(),
53 }
54 }
55
56 #[inline]
57 pub fn unquoted_iter(&self) -> ZeroCopyByteRecordUnquotedIter<'_> {
58 ZeroCopyByteRecordUnquotedIter {
59 record: self,
60 current_forward: 0,
61 current_backward: self.len(),
62 }
63 }
64
65 #[inline]
66 pub fn unescaped_iter(&self) -> ZeroCopyByteRecordUnescapedIter<'_> {
67 ZeroCopyByteRecordUnescapedIter {
68 record: self,
69 current_forward: 0,
70 current_backward: self.len(),
71 }
72 }
73
74 #[inline]
75 pub fn get(&self, index: usize) -> Option<&[u8]> {
76 let len = self.seps.len();
77
78 if index > len {
79 return None;
80 }
81
82 let start = if index == 0 {
83 0
84 } else {
85 self.seps[index - 1] + 1
86 };
87
88 let end = if index == len {
89 self.slice.len()
90 } else {
91 self.seps[index]
92 };
93
94 Some(&self.slice[start..end])
95 }
96
97 #[inline]
98 pub fn unquote(&self, index: usize) -> Option<&[u8]> {
99 self.get(index)
100 .map(|cell| unquoted(cell, self.quote).unwrap_or(cell))
101 }
102
103 #[inline]
104 pub fn unescape(&self, index: usize) -> Option<Cow<[u8]>> {
105 self.unquote(index).map(|cell| {
106 if let Some(trimmed) = unquoted(cell, self.quote) {
107 unescape(trimmed, self.quote)
108 } else {
109 Cow::Borrowed(cell)
110 }
111 })
112 }
113
114 fn read_byte_record(&self, record: &mut ByteRecord) {
115 record.clear();
116
117 for cell in self.iter() {
118 if let Some(trimmed) = unquoted(cell, self.quote) {
119 unescape_to(trimmed, self.quote, &mut record.data);
120
121 let bounds_len = record.bounds.len();
122
123 let start = if bounds_len == 0 {
124 0
125 } else {
126 record.bounds[bounds_len - 1].1
127 };
128
129 record.bounds.push((start, record.data.len()));
130 } else {
131 record.push_field(cell);
132 }
133 }
134 }
135
136 #[inline]
137 pub fn to_byte_record(&self) -> ByteRecord {
138 let mut record = ByteRecord::new();
139 self.read_byte_record(&mut record);
140 record
141 }
142
143 #[inline]
144 pub(crate) fn to_byte_record_in_reverse(&self) -> ByteRecord {
145 let mut record = ByteRecord::new();
146
147 for cell in self.unescaped_iter().rev() {
148 record.push_field_in_reverse(&cell);
149 }
150
151 record
152 }
153}
154
155impl fmt::Debug for ZeroCopyByteRecord<'_> {
156 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
157 write!(f, "ZeroCopyByteRecord(")?;
158 f.debug_list()
159 .entries(self.iter().map(debug::Bytes))
160 .finish()?;
161 write!(f, ")")?;
162 Ok(())
163 }
164}
165
166macro_rules! make_zero_copy_iterator {
167 ($name:ident, $method: ident, $out_type: ty) => {
168 pub struct $name<'a> {
169 record: &'a ZeroCopyByteRecord<'a>,
170 current_forward: usize,
171 current_backward: usize,
172 }
173
174 impl ExactSizeIterator for $name<'_> {}
175
176 impl<'a> Iterator for $name<'a> {
177 type Item = $out_type;
178
179 #[inline]
180 fn next(&mut self) -> Option<Self::Item> {
181 if self.current_forward == self.current_backward {
182 None
183 } else {
184 let cell = self.record.$method(self.current_forward);
185
186 self.current_forward += 1;
187
188 cell
189 }
190 }
191
192 #[inline]
193 fn size_hint(&self) -> (usize, Option<usize>) {
194 let size = self.current_backward - self.current_forward;
195
196 (size, Some(size))
197 }
198
199 #[inline]
200 fn count(self) -> usize
201 where
202 Self: Sized,
203 {
204 self.len()
205 }
206 }
207
208 impl DoubleEndedIterator for $name<'_> {
209 #[inline]
210 fn next_back(&mut self) -> Option<Self::Item> {
211 if self.current_forward == self.current_backward {
212 None
213 } else {
214 self.current_backward -= 1;
215
216 self.record.$method(self.current_backward)
217 }
218 }
219 }
220 };
221}
222
223make_zero_copy_iterator!(ZeroCopyByteRecordIter, get, &'a [u8]);
224make_zero_copy_iterator!(ZeroCopyByteRecordUnquotedIter, unquote, &'a [u8]);
225make_zero_copy_iterator!(ZeroCopyByteRecordUnescapedIter, unescape, Cow<'a, [u8]>);
226
227impl Index<usize> for ZeroCopyByteRecord<'_> {
228 type Output = [u8];
229
230 #[inline]
231 fn index(&self, i: usize) -> &[u8] {
232 self.get(i).unwrap()
233 }
234}
235
236#[derive(Default, Clone, Eq)]
237pub struct ByteRecord {
238 data: Vec<u8>,
239 bounds: Vec<(usize, usize)>,
240}
241
242impl ByteRecord {
243 pub fn new() -> Self {
244 Self::default()
245 }
246
247 #[inline]
248 pub fn len(&self) -> usize {
249 self.bounds.len()
250 }
251
252 #[inline]
253 pub fn is_empty(&self) -> bool {
254 self.len() == 0
255 }
256
257 #[inline]
258 pub fn clear(&mut self) {
259 self.data.clear();
260 self.bounds.clear();
261 }
262
263 #[inline]
264 pub fn truncate(&mut self, len: usize) {
265 self.bounds.truncate(len);
266
267 if let Some((_, end)) = self.bounds.last() {
268 self.data.truncate(*end);
269 } else {
270 self.data.clear();
271 }
272 }
273
274 #[inline]
275 pub fn as_slice(&self) -> &[u8] {
276 &self.data
277 }
278
279 #[inline]
280 pub fn iter(&self) -> ByteRecordIter<'_> {
281 ByteRecordIter {
282 record: self,
283 current_forward: 0,
284 current_backward: self.len(),
285 }
286 }
287
288 #[inline(always)]
289 pub fn push_field(&mut self, bytes: &[u8]) {
290 self.data.extend_from_slice(bytes);
291
292 let bounds_len = self.bounds.len();
293
294 let start = if bounds_len == 0 {
295 0
296 } else {
297 self.bounds[bounds_len - 1].1
298 };
299
300 self.bounds.push((start, self.data.len()));
301 }
302
303 #[inline]
304 fn push_field_in_reverse(&mut self, bytes: &[u8]) {
305 self.data.extend_from_slice(bytes);
306
307 let bounds_len = self.bounds.len();
308
309 let start = if bounds_len == 0 {
310 0
311 } else {
312 self.bounds[bounds_len - 1].1
313 };
314
315 let bounds = (start, self.data.len());
316 self.data[bounds.0..bounds.1].reverse();
317
318 self.bounds.push(bounds);
319 }
320
321 #[inline]
322 pub fn get(&self, index: usize) -> Option<&[u8]> {
323 self.bounds
324 .get(index)
325 .copied()
326 .map(|(start, end)| &self.data[start..end])
327 }
328
329 pub(crate) fn reverse(&mut self) {
330 self.data.reverse();
331 self.bounds.reverse();
332
333 let len = self.data.len();
334
335 for (start, end) in self.bounds.iter_mut() {
336 let new_end = len - *start;
337 let new_start = len - *end;
338
339 *start = new_start;
340 *end = new_end;
341 }
342 }
343}
344
345impl PartialEq for ByteRecord {
346 fn eq(&self, other: &Self) -> bool {
347 if self.bounds.len() != other.bounds.len() {
348 return false;
349 }
350
351 self.iter()
352 .zip(other.iter())
353 .all(|(self_cell, other_cell)| self_cell == other_cell)
354 }
355}
356
357impl Hash for ByteRecord {
358 #[inline]
359 fn hash<H: Hasher>(&self, state: &mut H) {
360 state.write_usize(self.len());
361
362 for cell in self.iter() {
363 state.write(cell);
364 }
365 }
366}
367
368impl Index<usize> for ByteRecord {
369 type Output = [u8];
370
371 #[inline]
372 fn index(&self, i: usize) -> &[u8] {
373 self.get(i).unwrap()
374 }
375}
376
377impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
378 #[inline]
379 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
380 for x in iter {
381 self.push_field(x.as_ref());
382 }
383 }
384}
385
386impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
387 #[inline]
388 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
389 let mut record = Self::new();
390 record.extend(iter);
391 record
392 }
393}
394
395impl<I, T> From<I> for ByteRecord
396where
397 I: IntoIterator<Item = T>,
398 T: AsRef<[u8]>,
399{
400 fn from(value: I) -> Self {
401 let mut record = Self::new();
402
403 for cell in value.into_iter() {
404 record.push_field(cell.as_ref());
405 }
406
407 record
408 }
409}
410
411impl<'r> IntoIterator for &'r ByteRecord {
412 type IntoIter = ByteRecordIter<'r>;
413 type Item = &'r [u8];
414
415 #[inline]
416 fn into_iter(self) -> ByteRecordIter<'r> {
417 self.iter()
418 }
419}
420
421impl fmt::Debug for ByteRecord {
422 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
423 write!(f, "ByteRecord(")?;
424 f.debug_list()
425 .entries(self.iter().map(debug::Bytes))
426 .finish()?;
427 write!(f, ")")?;
428 Ok(())
429 }
430}
431
432pub struct ByteRecordIter<'a> {
433 record: &'a ByteRecord,
434 current_forward: usize,
435 current_backward: usize,
436}
437
438impl ExactSizeIterator for ByteRecordIter<'_> {}
439
440impl<'a> Iterator for ByteRecordIter<'a> {
441 type Item = &'a [u8];
442
443 #[inline]
444 fn next(&mut self) -> Option<Self::Item> {
445 if self.current_forward == self.current_backward {
446 None
447 } else {
448 let (start, end) = self.record.bounds[self.current_forward];
449
450 self.current_forward += 1;
451
452 Some(&self.record.data[start..end])
453 }
454 }
455
456 #[inline]
457 fn size_hint(&self) -> (usize, Option<usize>) {
458 let size = self.current_backward - self.current_forward;
459
460 (size, Some(size))
461 }
462
463 #[inline]
464 fn count(self) -> usize
465 where
466 Self: Sized,
467 {
468 self.len()
469 }
470}
471
472impl DoubleEndedIterator for ByteRecordIter<'_> {
473 #[inline]
474 fn next_back(&mut self) -> Option<Self::Item> {
475 if self.current_forward == self.current_backward {
476 None
477 } else {
478 self.current_backward -= 1;
479
480 let (start, end) = self.record.bounds[self.current_backward];
481
482 Some(&self.record.data[start..end])
483 }
484 }
485}
486
487pub(crate) struct ByteRecordBuilder<'r> {
488 record: &'r mut ByteRecord,
489 start: usize,
490}
491
492impl<'r> ByteRecordBuilder<'r> {
493 #[inline(always)]
494 pub(crate) fn wrap(record: &'r mut ByteRecord) -> Self {
495 Self { record, start: 0 }
496 }
497
498 #[inline(always)]
499 pub(crate) fn extend_from_slice(&mut self, slice: &[u8]) {
500 self.record.data.extend_from_slice(slice);
501 }
502
503 #[inline(always)]
504 pub(crate) fn push_byte(&mut self, byte: u8) {
505 self.record.data.push(byte);
506 }
507
508 #[inline]
509 pub(crate) fn finalize_field(&mut self) {
510 let start = self.start;
511 self.start = self.record.data.len();
512
513 self.record.bounds.push((start, self.start));
514 }
515
516 #[inline]
517 pub(crate) fn finalize_record(&mut self) {
518 if let Some(b'\r') = self.record.data.last() {
519 self.record.data.pop();
520 }
521
522 self.finalize_field();
523 }
524
525 #[inline]
526 pub(crate) fn finalize_field_preemptively(&mut self, offset: usize) {
527 let start = self.start;
528 self.start = self.record.data.len() + offset;
529
530 self.record.bounds.push((start, self.start));
531
532 self.start += 1;
533 }
534
535 #[inline(always)]
536 pub(crate) fn bump(&mut self) {
537 self.start +=
538 (self.record.bounds.last().map(|(s, _)| *s).unwrap_or(0) != self.start) as usize;
539 }
540}
541
542#[macro_export]
543macro_rules! brec {
544 () => {{
545 ByteRecord::new()
546 }};
547
548 ($($x: expr),*) => {{
549 let mut r = ByteRecord::new();
550
551 $(
552 r.push_field($x.as_bytes());
553 )*
554
555 r
556 }};
557}
558
559#[cfg(test)]
560mod tests {
561 use super::*;
562
563 #[test]
564 fn test_zero_copy_byte_record() {
565 let record = ZeroCopyByteRecord::new(b"name,surname,age", &[4, 12], b'"');
566
567 assert_eq!(record.len(), 3);
568
569 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
570 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
571
572 for i in 0..expected.len() {
573 assert_eq!(record.get(i), Some(expected[i]));
574 }
575
576 assert_eq!(record.get(4), None);
577 }
578
579 #[test]
580 fn test_byte_record() {
581 let mut record = ByteRecord::new();
582
583 assert_eq!(record.len(), 0);
584 assert_eq!(record.is_empty(), true);
585 assert_eq!(record.get(0), None);
586
587 record.push_field(b"name");
588 record.push_field(b"surname");
589 record.push_field(b"age");
590
591 let expected: Vec<&[u8]> = vec![b"name", b"surname", b"age"];
592 assert_eq!(record.iter().collect::<Vec<_>>(), expected);
593
594 assert_eq!(record.get(0), Some::<&[u8]>(b"name"));
595 assert_eq!(record.get(1), Some::<&[u8]>(b"surname"));
596 assert_eq!(record.get(2), Some::<&[u8]>(b"age"));
597 assert_eq!(record.get(3), None);
598 }
599
600 #[test]
601 fn test_mutate_record_after_read() {
602 let mut record = ByteRecord::new();
603 let mut builder = ByteRecordBuilder::wrap(&mut record);
604 builder.extend_from_slice(b"test\r");
605 builder.finalize_record();
606
607 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test"]);
608
609 record.push_field(b"next");
610
611 assert_eq!(record.iter().collect::<Vec<_>>(), vec![b"test", b"next"]);
612 }
613
614 #[test]
615 fn test_reverse_byte_record() {
616 let record = brec!["name", "surname", "age"];
617 let mut reversed = record.clone();
618 reversed.reverse();
619
620 assert_eq!(reversed, brec!["ega", "emanrus", "eman"]);
621 reversed.reverse();
622 assert_eq!(record, reversed);
623 }
624}