1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::{ByteRecord, ZeroCopyByteRecord};
7
8pub struct WriterBuilder {
9 delimiter: u8,
10 quote: u8,
11 buffer_capacity: usize,
12 flexible: bool,
13}
14
15impl Default for WriterBuilder {
16 fn default() -> Self {
17 Self {
18 delimiter: b',',
19 quote: b'"',
20 buffer_capacity: 8192,
21 flexible: false,
22 }
23 }
24}
25
26impl WriterBuilder {
27 pub fn new() -> Self {
28 Self::default()
29 }
30
31 pub fn with_capacity(capacity: usize) -> Self {
32 let mut builder = Self::default();
33 builder.buffer_capacity(capacity);
34 builder
35 }
36
37 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
38 self.delimiter = delimiter;
39 self
40 }
41
42 pub fn quote(&mut self, quote: u8) -> &mut Self {
43 self.quote = quote;
44 self
45 }
46
47 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
48 self.buffer_capacity = capacity;
49 self
50 }
51
52 pub fn flexible(&mut self, yes: bool) -> &mut Self {
53 self.flexible = yes;
54 self
55 }
56
57 pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
58 let mut must_quote = [false; 256];
59 must_quote[b'\r' as usize] = true;
60 must_quote[b'\n' as usize] = true;
61 must_quote[self.delimiter as usize] = true;
62 must_quote[self.quote as usize] = true;
63
64 Writer {
65 delimiter: self.delimiter,
66 quote: self.quote,
67 buffer: BufWriter::with_capacity(self.buffer_capacity, writer),
68 flexible: self.flexible,
69 field_count: None,
70 must_quote,
71 }
72 }
73}
74
75pub struct Writer<W: Write> {
76 delimiter: u8,
77 quote: u8,
78 buffer: BufWriter<W>,
79 flexible: bool,
80 field_count: Option<usize>,
81 must_quote: [bool; 256],
82}
83
84impl<W: Write> Writer<W> {
85 pub fn from_writer(writer: W) -> Self {
86 WriterBuilder::new().from_writer(writer)
87 }
88
89 #[inline(always)]
90 pub fn flush(&mut self) -> io::Result<()> {
91 self.buffer.flush()
92 }
93
94 #[inline]
95 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
96 if self.flexible {
97 return Ok(());
98 }
99
100 match self.field_count {
101 Some(expected) => {
102 if written != expected {
103 return Err(Error::new(ErrorKind::UnequalLengths {
104 expected_len: expected,
105 len: written,
106 pos: None,
107 }));
108 }
109 }
110 None => {
111 self.field_count = Some(written);
112 }
113 }
114
115 Ok(())
116 }
117
118 pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
119 where
120 I: IntoIterator<Item = T>,
121 T: AsRef<[u8]>,
122 {
123 let mut first = true;
124 let mut written: usize = 0;
125 let mut empty = false;
126
127 for cell in record.into_iter() {
128 if first {
129 first = false;
130 } else {
131 self.buffer.write_all(&[self.delimiter])?;
132 }
133
134 let cell = cell.as_ref();
135
136 if cell.is_empty() {
137 empty = true;
138 }
139
140 self.buffer.write_all(cell)?;
141
142 written += 1;
143 }
144
145 if written == 1 && empty {
146 self.buffer.write_all(&[self.quote, self.quote])?;
147 }
148
149 self.check_field_count(written)?;
150
151 self.buffer.write_all(b"\n")?;
152
153 Ok(())
154 }
155
156 #[inline(always)]
157 pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
158 self.write_record_no_quoting(record.iter())
159 }
160
161 #[inline]
162 fn should_quote(&self, mut cell: &[u8]) -> bool {
163 let mut yes = false;
165 while !yes && cell.len() >= 8 {
166 yes = self.must_quote[cell[0] as usize]
167 || self.must_quote[cell[1] as usize]
168 || self.must_quote[cell[2] as usize]
169 || self.must_quote[cell[3] as usize]
170 || self.must_quote[cell[4] as usize]
171 || self.must_quote[cell[5] as usize]
172 || self.must_quote[cell[6] as usize]
173 || self.must_quote[cell[7] as usize];
174 cell = &cell[8..];
175 }
176 yes || cell.iter().any(|&b| self.must_quote[b as usize])
177 }
178
179 fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
180 self.buffer.write_all(&[self.quote])?;
181
182 let mut i: usize = 0;
183
184 if cell.len() < 8 {
185 while i < cell.len() {
186 match cell[i..].iter().copied().position(|b| b == self.quote) {
187 None => {
188 self.buffer.write_all(&cell[i..])?;
189 break;
190 }
191 Some(offset) => {
192 self.buffer.write_all(&cell[i..i + offset + 1])?;
193 self.buffer.write_all(&[self.quote])?;
194 i += offset + 1;
195 }
196 }
197 }
198 } else {
199 while i < cell.len() {
200 match memchr(self.quote, &cell[i..]) {
201 None => {
202 self.buffer.write_all(&cell[i..])?;
203 break;
204 }
205 Some(offset) => {
206 self.buffer.write_all(&cell[i..i + offset + 1])?;
207 self.buffer.write_all(&[self.quote])?;
208 i += offset + 1;
209 }
210 };
211 }
212 }
213
214 self.buffer.write_all(&[self.quote])?;
215
216 Ok(())
217 }
218
219 pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
220 where
221 I: IntoIterator<Item = T>,
222 T: AsRef<[u8]>,
223 {
224 let mut first = true;
225 let mut written: usize = 0;
226 let mut empty = false;
227
228 for cell in record.into_iter() {
229 if first {
230 first = false;
231 } else {
232 self.buffer.write_all(&[self.delimiter])?;
233 }
234
235 let cell = cell.as_ref();
236
237 if cell.is_empty() {
238 empty = true;
239 }
240
241 if self.should_quote(cell) {
242 self.write_quoted_cell(cell)?;
243 } else {
244 self.buffer.write_all(cell)?;
245 }
246
247 written += 1;
248 }
249
250 if written == 1 && empty {
251 self.buffer.write_all(&[self.quote, self.quote])?;
252 }
253
254 self.check_field_count(written)?;
255
256 self.buffer.write_all(b"\n")?;
257
258 Ok(())
259 }
260
261 #[inline(always)]
262 pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
263 self.write_record(record.iter())
264 }
265
266 #[inline]
267 pub fn write_zero_copy_byte_record(
268 &mut self,
269 record: &ZeroCopyByteRecord,
270 ) -> error::Result<()> {
271 if record.quote == self.quote {
272 self.write_record_no_quoting(record.iter())
273 } else {
274 self.write_record(record.unescaped_iter())
275 }
276 }
277
278 #[inline(always)]
279 pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
280 self.buffer.write_all(record)?;
281 self.buffer.write_all(b"\n")?;
282
283 Ok(())
284 }
285
286 #[inline]
287 pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
288 self.buffer.into_inner()
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use std::io::{self, Cursor};
295
296 use super::*;
297
298 use crate::brec;
299
300 #[test]
301 fn test_write_byte_record() -> io::Result<()> {
302 let output = Cursor::new(Vec::<u8>::new());
303 let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
304
305 writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
306 writer.write_byte_record(&brec!["john,", "landis", "45"])?;
307 writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
308
309 assert_eq!(
310 std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
311 "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
312 );
313
314 Ok(())
315 }
316
317 #[test]
318 fn test_write_empty_cells() {
319 fn write(record: &ByteRecord) -> String {
320 let output = Cursor::new(Vec::<u8>::new());
321 let mut writer = Writer::from_writer(output);
322 writer.write_byte_record(record).unwrap();
323 String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
324 }
325
326 assert_eq!(write(&brec![]), "\n");
327 assert_eq!(write(&brec![""]), "\"\"\n");
328 assert_eq!(write(&brec!["", "", ""]), ",,\n");
329 assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
330 assert_eq!(write(&brec!["name", ""]), "name,\n");
331 }
332
333 #[test]
334 fn should_quote() {
335 let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
336
337 assert_eq!(writer.should_quote(b"test"), false);
338 assert_eq!(writer.should_quote(b"test,"), true);
339 assert_eq!(writer.should_quote(b"te\"st"), true);
340 assert_eq!(writer.should_quote(b"te\nst"), true);
341 assert_eq!(
342 writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
343 true
344 );
345 assert_eq!(writer.should_quote(b"te\rst"), true);
346 }
347}