simd_csv/
writer.rs

1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::{ByteRecord, ZeroCopyByteRecord};
7
8/// Builds a [`Writer`] with given configuration.
9pub struct WriterBuilder {
10    delimiter: u8,
11    quote: u8,
12    buffer_capacity: usize,
13    flexible: bool,
14}
15
16impl Default for WriterBuilder {
17    fn default() -> Self {
18        Self {
19            delimiter: b',',
20            quote: b'"',
21            buffer_capacity: 8192,
22            flexible: false,
23        }
24    }
25}
26
27impl WriterBuilder {
28    pub fn new() -> Self {
29        Self::default()
30    }
31
32    pub fn with_capacity(capacity: usize) -> Self {
33        let mut builder = Self::default();
34        builder.buffer_capacity(capacity);
35        builder
36    }
37
38    pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
39        self.delimiter = delimiter;
40        self
41    }
42
43    pub fn quote(&mut self, quote: u8) -> &mut Self {
44        self.quote = quote;
45        self
46    }
47
48    pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
49        self.buffer_capacity = capacity;
50        self
51    }
52
53    pub fn flexible(&mut self, yes: bool) -> &mut Self {
54        self.flexible = yes;
55        self
56    }
57
58    pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
59        let mut must_quote = [false; 256];
60        must_quote[b'\r' as usize] = true;
61        must_quote[b'\n' as usize] = true;
62        must_quote[self.delimiter as usize] = true;
63        must_quote[self.quote as usize] = true;
64
65        Writer {
66            delimiter: self.delimiter,
67            quote: self.quote,
68            buffer: BufWriter::with_capacity(self.buffer_capacity, writer),
69            flexible: self.flexible,
70            field_count: None,
71            must_quote,
72        }
73    }
74}
75
76/// An already configured CSV writer.
77///
78/// # Configuration
79///
80/// To configure a [`Writer`], if you need a custom delimiter for instance of if
81/// you want to tweak the size of the inner buffer. Check out the
82/// [`WriterBuilder`].
83pub struct Writer<W: Write> {
84    delimiter: u8,
85    quote: u8,
86    buffer: BufWriter<W>,
87    flexible: bool,
88    field_count: Option<usize>,
89    must_quote: [bool; 256],
90}
91
92impl<W: Write> Writer<W> {
93    pub fn from_writer(writer: W) -> Self {
94        WriterBuilder::new().from_writer(writer)
95    }
96
97    #[inline(always)]
98    pub fn flush(&mut self) -> io::Result<()> {
99        self.buffer.flush()
100    }
101
102    #[inline]
103    fn check_field_count(&mut self, written: usize) -> error::Result<()> {
104        if self.flexible {
105            return Ok(());
106        }
107
108        match self.field_count {
109            Some(expected) => {
110                if written != expected {
111                    return Err(Error::new(ErrorKind::UnequalLengths {
112                        expected_len: expected,
113                        len: written,
114                        pos: None,
115                    }));
116                }
117            }
118            None => {
119                self.field_count = Some(written);
120            }
121        }
122
123        Ok(())
124    }
125
126    pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
127    where
128        I: IntoIterator<Item = T>,
129        T: AsRef<[u8]>,
130    {
131        let mut first = true;
132        let mut written: usize = 0;
133        let mut empty = false;
134
135        for cell in record.into_iter() {
136            if first {
137                first = false;
138            } else {
139                self.buffer.write_all(&[self.delimiter])?;
140            }
141
142            let cell = cell.as_ref();
143
144            if cell.is_empty() {
145                empty = true;
146            }
147
148            self.buffer.write_all(cell)?;
149
150            written += 1;
151        }
152
153        if written == 1 && empty {
154            self.buffer.write_all(&[self.quote, self.quote])?;
155        }
156
157        self.check_field_count(written)?;
158
159        self.buffer.write_all(b"\n")?;
160
161        Ok(())
162    }
163
164    #[inline(always)]
165    pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
166        self.write_record_no_quoting(record.iter())
167    }
168
169    #[inline]
170    fn should_quote(&self, mut cell: &[u8]) -> bool {
171        // This strategy comes directly from `rust-csv`
172        let mut yes = false;
173        while !yes && cell.len() >= 8 {
174            yes = self.must_quote[cell[0] as usize]
175                || self.must_quote[cell[1] as usize]
176                || self.must_quote[cell[2] as usize]
177                || self.must_quote[cell[3] as usize]
178                || self.must_quote[cell[4] as usize]
179                || self.must_quote[cell[5] as usize]
180                || self.must_quote[cell[6] as usize]
181                || self.must_quote[cell[7] as usize];
182            cell = &cell[8..];
183        }
184        yes || cell.iter().any(|&b| self.must_quote[b as usize])
185    }
186
187    fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
188        self.buffer.write_all(&[self.quote])?;
189
190        let mut i: usize = 0;
191
192        if cell.len() < 8 {
193            while i < cell.len() {
194                match cell[i..].iter().copied().position(|b| b == self.quote) {
195                    None => {
196                        self.buffer.write_all(&cell[i..])?;
197                        break;
198                    }
199                    Some(offset) => {
200                        self.buffer.write_all(&cell[i..i + offset + 1])?;
201                        self.buffer.write_all(&[self.quote])?;
202                        i += offset + 1;
203                    }
204                }
205            }
206        } else {
207            while i < cell.len() {
208                match memchr(self.quote, &cell[i..]) {
209                    None => {
210                        self.buffer.write_all(&cell[i..])?;
211                        break;
212                    }
213                    Some(offset) => {
214                        self.buffer.write_all(&cell[i..i + offset + 1])?;
215                        self.buffer.write_all(&[self.quote])?;
216                        i += offset + 1;
217                    }
218                };
219            }
220        }
221
222        self.buffer.write_all(&[self.quote])?;
223
224        Ok(())
225    }
226
227    pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
228    where
229        I: IntoIterator<Item = T>,
230        T: AsRef<[u8]>,
231    {
232        let mut first = true;
233        let mut written: usize = 0;
234        let mut empty = false;
235
236        for cell in record.into_iter() {
237            if first {
238                first = false;
239            } else {
240                self.buffer.write_all(&[self.delimiter])?;
241            }
242
243            let cell = cell.as_ref();
244
245            if cell.is_empty() {
246                empty = true;
247            }
248
249            if self.should_quote(cell) {
250                self.write_quoted_cell(cell)?;
251            } else {
252                self.buffer.write_all(cell)?;
253            }
254
255            written += 1;
256        }
257
258        if written == 1 && empty {
259            self.buffer.write_all(&[self.quote, self.quote])?;
260        }
261
262        self.check_field_count(written)?;
263
264        self.buffer.write_all(b"\n")?;
265
266        Ok(())
267    }
268
269    #[inline(always)]
270    pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
271        self.write_record(record.iter())
272    }
273
274    #[inline]
275    pub fn write_zero_copy_byte_record(
276        &mut self,
277        record: &ZeroCopyByteRecord,
278    ) -> error::Result<()> {
279        if record.quote == self.quote {
280            self.write_record_no_quoting(record.iter())
281        } else {
282            self.write_record(record.unescaped_iter())
283        }
284    }
285
286    #[inline(always)]
287    pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
288        self.buffer.write_all(record)?;
289        self.buffer.write_all(b"\n")?;
290
291        Ok(())
292    }
293
294    #[inline]
295    pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
296        self.buffer.into_inner()
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use std::io::{self, Cursor};
303
304    use super::*;
305
306    #[test]
307    fn test_write_byte_record() -> io::Result<()> {
308        let output = Cursor::new(Vec::<u8>::new());
309        let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
310
311        writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
312        writer.write_byte_record(&brec!["john,", "landis", "45"])?;
313        writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
314
315        assert_eq!(
316            std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
317            "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
318        );
319
320        Ok(())
321    }
322
323    #[test]
324    fn test_write_empty_cells() {
325        fn write(record: &ByteRecord) -> String {
326            let output = Cursor::new(Vec::<u8>::new());
327            let mut writer = Writer::from_writer(output);
328            writer.write_byte_record(record).unwrap();
329            String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
330        }
331
332        assert_eq!(write(&brec![]), "\n");
333        assert_eq!(write(&brec![""]), "\"\"\n");
334        assert_eq!(write(&brec!["", "", ""]), ",,\n");
335        assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
336        assert_eq!(write(&brec!["name", ""]), "name,\n");
337    }
338
339    #[test]
340    fn should_quote() {
341        let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
342
343        assert_eq!(writer.should_quote(b"test"), false);
344        assert_eq!(writer.should_quote(b"test,"), true);
345        assert_eq!(writer.should_quote(b"te\"st"), true);
346        assert_eq!(writer.should_quote(b"te\nst"), true);
347        assert_eq!(
348            writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
349            true
350        );
351        assert_eq!(writer.should_quote(b"te\rst"), true);
352    }
353}