1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::{ByteRecord, ZeroCopyByteRecord};
7
8pub struct WriterBuilder {
10 delimiter: u8,
11 quote: u8,
12 buffer_capacity: usize,
13 flexible: bool,
14 crlf: bool,
15}
16
17impl Default for WriterBuilder {
18 fn default() -> Self {
19 Self {
20 delimiter: b',',
21 quote: b'"',
22 buffer_capacity: 8192,
23 flexible: false,
24 crlf: false,
25 }
26 }
27}
28
29impl WriterBuilder {
30 pub fn new() -> Self {
32 Self::default()
33 }
34
35 pub fn with_capacity(capacity: usize) -> Self {
37 let mut builder = Self::default();
38 builder.buffer_capacity(capacity);
39 builder
40 }
41
42 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
48 self.delimiter = delimiter;
49 self
50 }
51
52 pub fn quote(&mut self, quote: u8) -> &mut Self {
58 self.quote = quote;
59 self
60 }
61
62 pub fn crlf_newlines(&mut self, yes: bool) -> &mut Self {
64 self.crlf = yes;
65 self
66 }
67
68 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
70 self.buffer_capacity = capacity;
71 self
72 }
73
74 pub fn flexible(&mut self, yes: bool) -> &mut Self {
80 self.flexible = yes;
81 self
82 }
83
84 pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
87 let mut must_quote = [false; 256];
88 must_quote[b'\r' as usize] = true;
89 must_quote[b'\n' as usize] = true;
90 must_quote[self.delimiter as usize] = true;
91 must_quote[self.quote as usize] = true;
92
93 Writer {
94 delimiter: self.delimiter,
95 quote: self.quote,
96 line_terminator: if self.crlf { b"\r\n" } else { b"\n" },
97 buf_writer: BufWriter::with_capacity(self.buffer_capacity, writer),
98 flexible: self.flexible,
99 field_count: None,
100 must_quote,
101 }
102 }
103}
104
105pub struct Writer<W: Write> {
113 delimiter: u8,
114 quote: u8,
115 line_terminator: &'static [u8],
116 buf_writer: BufWriter<W>,
117 flexible: bool,
118 field_count: Option<usize>,
119 must_quote: [bool; 256],
120}
121
122impl<W: Write> Writer<W> {
123 pub fn from_writer(writer: W) -> Self {
129 WriterBuilder::new().from_writer(writer)
130 }
131
132 #[inline(always)]
134 pub fn flush(&mut self) -> io::Result<()> {
135 self.buf_writer.flush()
136 }
137
138 #[inline]
139 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
140 if self.flexible {
141 return Ok(());
142 }
143
144 match self.field_count {
145 Some(expected) => {
146 if written != expected {
147 return Err(Error::new(ErrorKind::UnequalLengths {
148 expected_len: expected,
149 len: written,
150 pos: None,
151 }));
152 }
153 }
154 None => {
155 self.field_count = Some(written);
156 }
157 }
158
159 Ok(())
160 }
161
162 pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
173 where
174 I: IntoIterator<Item = T>,
175 T: AsRef<[u8]>,
176 {
177 let mut first = true;
178 let mut written: usize = 0;
179 let mut empty = false;
180
181 for cell in record.into_iter() {
182 if first {
183 first = false;
184 } else {
185 self.buf_writer.write_all(&[self.delimiter])?;
186 }
187
188 let cell = cell.as_ref();
189
190 if cell.is_empty() {
191 empty = true;
192 }
193
194 self.buf_writer.write_all(cell)?;
195
196 written += 1;
197 }
198
199 if written == 1 && empty {
200 self.buf_writer.write_all(&[self.quote, self.quote])?;
201 }
202
203 self.check_field_count(written)?;
204
205 self.buf_writer.write_all(self.line_terminator)?;
206
207 Ok(())
208 }
209
210 #[inline(always)]
218 pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
219 self.write_record_no_quoting(record.iter())
220 }
221
222 #[inline]
223 fn should_quote(&self, mut cell: &[u8]) -> bool {
224 let mut yes = false;
226 while !yes && cell.len() >= 8 {
227 yes = self.must_quote[cell[0] as usize]
228 || self.must_quote[cell[1] as usize]
229 || self.must_quote[cell[2] as usize]
230 || self.must_quote[cell[3] as usize]
231 || self.must_quote[cell[4] as usize]
232 || self.must_quote[cell[5] as usize]
233 || self.must_quote[cell[6] as usize]
234 || self.must_quote[cell[7] as usize];
235 cell = &cell[8..];
236 }
237 yes || cell.iter().any(|&b| self.must_quote[b as usize])
238 }
239
240 fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
241 self.buf_writer.write_all(&[self.quote])?;
242
243 let mut i: usize = 0;
244
245 if cell.len() < 8 {
246 while i < cell.len() {
247 match cell[i..].iter().copied().position(|b| b == self.quote) {
248 None => {
249 self.buf_writer.write_all(&cell[i..])?;
250 break;
251 }
252 Some(offset) => {
253 self.buf_writer.write_all(&cell[i..i + offset + 1])?;
254 self.buf_writer.write_all(&[self.quote])?;
255 i += offset + 1;
256 }
257 }
258 }
259 } else {
260 while i < cell.len() {
261 match memchr(self.quote, &cell[i..]) {
262 None => {
263 self.buf_writer.write_all(&cell[i..])?;
264 break;
265 }
266 Some(offset) => {
267 self.buf_writer.write_all(&cell[i..i + offset + 1])?;
268 self.buf_writer.write_all(&[self.quote])?;
269 i += offset + 1;
270 }
271 };
272 }
273 }
274
275 self.buf_writer.write_all(&[self.quote])?;
276
277 Ok(())
278 }
279
280 pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
285 where
286 I: IntoIterator<Item = T>,
287 T: AsRef<[u8]>,
288 {
289 let mut first = true;
290 let mut written: usize = 0;
291 let mut empty = false;
292
293 for cell in record.into_iter() {
294 if first {
295 first = false;
296 } else {
297 self.buf_writer.write_all(&[self.delimiter])?;
298 }
299
300 let cell = cell.as_ref();
301
302 if cell.is_empty() {
303 empty = true;
304 }
305
306 if self.should_quote(cell) {
307 self.write_quoted_cell(cell)?;
308 } else {
309 self.buf_writer.write_all(cell)?;
310 }
311
312 written += 1;
313 }
314
315 if written == 1 && empty {
316 self.buf_writer.write_all(&[self.quote, self.quote])?;
317 }
318
319 self.check_field_count(written)?;
320
321 self.buf_writer.write_all(self.line_terminator)?;
322
323 Ok(())
324 }
325
326 #[inline(always)]
328 pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
329 self.write_record(record.iter())
330 }
331
332 #[inline]
338 pub fn write_zero_copy_byte_record(
339 &mut self,
340 delimiter: u8,
341 record: &ZeroCopyByteRecord,
342 ) -> error::Result<()> {
343 if self.delimiter == delimiter && record.quote == self.quote {
344 self.buf_writer.write_all(record.as_slice())?;
345 self.buf_writer.write_all(self.line_terminator)?;
346 } else {
347 self.write_record(record.unescaped_iter())?;
348 }
349
350 Ok(())
351 }
352
353 #[inline]
356 pub fn write_zero_copy_byte_record_indices(
357 &mut self,
358 delimiter: u8,
359 record: &ZeroCopyByteRecord,
360 indices: &[usize],
361 ) -> error::Result<()> {
362 if self.delimiter == delimiter && record.quote == self.quote {
363 self.write_record_no_quoting(indices.iter().copied().map(|i| &record[i]))?;
364 } else {
365 self.write_record(indices.iter().copied().map(|i| record.unescape(i).unwrap()))?;
366 }
367
368 Ok(())
369 }
370
371 #[inline(always)]
380 pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
381 self.buf_writer.write_all(record)?;
382 self.buf_writer.write_all(self.line_terminator)?;
383
384 Ok(())
385 }
386
387 #[inline]
390 pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
391 self.buf_writer.into_inner()
392 }
393}
394
395#[cfg(test)]
396mod tests {
397 use std::io::{self, Cursor};
398
399 use super::*;
400
401 #[test]
402 fn test_write_byte_record() -> io::Result<()> {
403 let output = Cursor::new(Vec::<u8>::new());
404 let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
405
406 writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
407 writer.write_byte_record(&brec!["john,", "landis", "45"])?;
408 writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
409
410 assert_eq!(
411 std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
412 "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
413 );
414
415 Ok(())
416 }
417
418 #[test]
419 fn test_write_empty_cells() {
420 fn write(record: &ByteRecord) -> String {
421 let output = Cursor::new(Vec::<u8>::new());
422 let mut writer = Writer::from_writer(output);
423 writer.write_byte_record(record).unwrap();
424 String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
425 }
426
427 assert_eq!(write(&brec![]), "\n");
428 assert_eq!(write(&brec![""]), "\"\"\n");
429 assert_eq!(write(&brec!["", "", ""]), ",,\n");
430 assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
431 assert_eq!(write(&brec!["name", ""]), "name,\n");
432 }
433
434 #[test]
435 fn should_quote() {
436 let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
437
438 assert_eq!(writer.should_quote(b"test"), false);
439 assert_eq!(writer.should_quote(b"test,"), true);
440 assert_eq!(writer.should_quote(b"te\"st"), true);
441 assert_eq!(writer.should_quote(b"te\nst"), true);
442 assert_eq!(
443 writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
444 true
445 );
446 assert_eq!(writer.should_quote(b"te\rst"), true);
447 }
448}