hcklib/
single_byte_delim_parser.rs1use std::{
7 cmp::min,
8 io::{self, Write},
9};
10
11use ripline::LineTerminator;
12
13use crate::{core::JoinAppend, field_range::FieldRange};
14
15pub struct SingleByteDelimParser<'a> {
17 line_terminator: LineTerminator,
19 output_delimiter: &'a [u8],
20 fields: &'a [FieldRange],
21 sep: u8,
22 max_field: usize,
24 offset: usize,
26 newline: u8,
27 line: Vec<(usize, usize)>,
28}
29
30impl<'a> SingleByteDelimParser<'a> {
31 pub fn new(
33 line_terminator: LineTerminator,
34 output_delimiter: &'a [u8],
35 fields: &'a [FieldRange],
36 sep: u8,
37 ) -> Self {
38 Self {
39 line_terminator,
40 output_delimiter,
41 fields,
42 sep,
43 max_field: fields.last().map_or(usize::MAX, |f| f.high + 1),
44 offset: 0,
45 newline: line_terminator.as_byte(),
46 line: vec![],
47 }
48 }
49
50 #[inline]
52 pub fn reset(&mut self) {
53 self.offset = 0;
54 }
55
56 #[inline]
60 pub fn process_buffer<W: Write>(
61 &mut self,
62 buffer: &[u8],
63 mut output: W,
64 ) -> Result<(), io::Error> {
65 if let Some(byte) = buffer.first() {
67 if *byte == self.newline {
68 output.join_append(
69 self.output_delimiter,
70 std::iter::empty(),
71 &self.line_terminator,
72 )?;
73 self.offset += 1;
74 }
75 }
76
77 while self.offset < buffer.len() {
78 self.fill_line(buffer)?;
79 let items = self.fields.iter().flat_map(|f| {
80 let slice = self
81 .line
82 .get(f.low..=min(f.high, self.line.len().saturating_sub(1)))
83 .unwrap_or(&[]);
84 slice.iter().map(|(start, stop)| &buffer[*start..=*stop])
85 });
86 output.join_append(self.output_delimiter, items, &self.line_terminator)?;
87 self.line.clear();
88 }
89 Ok(())
90 }
91
92 #[inline]
95 fn fill_line(&mut self, buffer: &[u8]) -> Result<(), io::Error> {
96 let mut field_count = 0;
97 let iter = memchr::memchr2_iter(self.sep, self.newline, &buffer[self.offset..]);
98
99 let mut line_offset = 0;
100 let mut found_newline = false;
101
102 for index in iter {
103 if buffer[self.offset + index] == self.sep {
104 field_count += 1;
105 } else {
106 found_newline = true;
107 }
108
109 self.line
110 .push((self.offset + line_offset, self.offset + index - 1));
111 line_offset = index + 1;
112
113 if found_newline || field_count == self.max_field {
114 break;
115 }
116 }
117
118 if !found_newline {
119 let end = memchr::memchr(self.newline, &buffer[self.offset + line_offset..])
120 .ok_or(io::ErrorKind::InvalidData)?;
121 self.offset += line_offset + end + 1;
122 } else {
123 self.offset += line_offset;
124 }
125 Ok(())
126 }
127}