simd_csv/
writer.rs

1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::{ByteRecord, ZeroCopyByteRecord};
7
8pub struct WriterBuilder {
9    delimiter: u8,
10    quote: u8,
11    buffer_capacity: usize,
12    flexible: bool,
13}
14
15impl Default for WriterBuilder {
16    fn default() -> Self {
17        Self {
18            delimiter: b',',
19            quote: b'"',
20            buffer_capacity: 8192,
21            flexible: false,
22        }
23    }
24}
25
26impl WriterBuilder {
27    pub fn new() -> Self {
28        Self::default()
29    }
30
31    pub fn with_capacity(capacity: usize) -> Self {
32        let mut builder = Self::default();
33        builder.buffer_capacity(capacity);
34        builder
35    }
36
37    pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
38        self.delimiter = delimiter;
39        self
40    }
41
42    pub fn quote(&mut self, quote: u8) -> &mut Self {
43        self.quote = quote;
44        self
45    }
46
47    pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
48        self.buffer_capacity = capacity;
49        self
50    }
51
52    pub fn flexible(&mut self, yes: bool) -> &mut Self {
53        self.flexible = yes;
54        self
55    }
56
57    pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
58        let mut must_quote = [false; 256];
59        must_quote[b'\r' as usize] = true;
60        must_quote[b'\n' as usize] = true;
61        must_quote[self.delimiter as usize] = true;
62        must_quote[self.quote as usize] = true;
63
64        Writer {
65            delimiter: self.delimiter,
66            quote: self.quote,
67            buffer: BufWriter::with_capacity(self.buffer_capacity, writer),
68            flexible: self.flexible,
69            field_count: None,
70            must_quote,
71        }
72    }
73}
74
75pub struct Writer<W: Write> {
76    delimiter: u8,
77    quote: u8,
78    buffer: BufWriter<W>,
79    flexible: bool,
80    field_count: Option<usize>,
81    must_quote: [bool; 256],
82}
83
84impl<W: Write> Writer<W> {
85    pub fn from_writer(writer: W) -> Self {
86        WriterBuilder::new().from_writer(writer)
87    }
88
89    #[inline(always)]
90    pub fn flush(&mut self) -> io::Result<()> {
91        self.buffer.flush()
92    }
93
94    #[inline]
95    fn check_field_count(&mut self, written: usize) -> error::Result<()> {
96        if self.flexible {
97            return Ok(());
98        }
99
100        match self.field_count {
101            Some(expected) => {
102                if written != expected {
103                    return Err(Error::new(ErrorKind::UnequalLengths {
104                        expected_len: expected,
105                        len: written,
106                        pos: None,
107                    }));
108                }
109            }
110            None => {
111                self.field_count = Some(written);
112            }
113        }
114
115        Ok(())
116    }
117
118    pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
119    where
120        I: IntoIterator<Item = T>,
121        T: AsRef<[u8]>,
122    {
123        let mut first = true;
124        let mut written: usize = 0;
125        let mut empty = false;
126
127        for cell in record.into_iter() {
128            if first {
129                first = false;
130            } else {
131                self.buffer.write_all(&[self.delimiter])?;
132            }
133
134            let cell = cell.as_ref();
135
136            if cell.is_empty() {
137                empty = true;
138            }
139
140            self.buffer.write_all(cell)?;
141
142            written += 1;
143        }
144
145        if written == 1 && empty {
146            self.buffer.write_all(&[self.quote, self.quote])?;
147        }
148
149        self.check_field_count(written)?;
150
151        self.buffer.write_all(b"\n")?;
152
153        Ok(())
154    }
155
156    #[inline(always)]
157    pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
158        self.write_record_no_quoting(record.iter())
159    }
160
161    #[inline]
162    fn should_quote(&self, mut cell: &[u8]) -> bool {
163        // This strategy comes directly from `rust-csv`
164        let mut yes = false;
165        while !yes && cell.len() >= 8 {
166            yes = self.must_quote[cell[0] as usize]
167                || self.must_quote[cell[1] as usize]
168                || self.must_quote[cell[2] as usize]
169                || self.must_quote[cell[3] as usize]
170                || self.must_quote[cell[4] as usize]
171                || self.must_quote[cell[5] as usize]
172                || self.must_quote[cell[6] as usize]
173                || self.must_quote[cell[7] as usize];
174            cell = &cell[8..];
175        }
176        yes || cell.iter().any(|&b| self.must_quote[b as usize])
177    }
178
179    fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
180        self.buffer.write_all(&[self.quote])?;
181
182        let mut i: usize = 0;
183
184        if cell.len() < 8 {
185            while i < cell.len() {
186                match cell[i..].iter().copied().position(|b| b == self.quote) {
187                    None => {
188                        self.buffer.write_all(&cell[i..])?;
189                        break;
190                    }
191                    Some(offset) => {
192                        self.buffer.write_all(&cell[i..i + offset + 1])?;
193                        self.buffer.write_all(&[self.quote])?;
194                        i += offset + 1;
195                    }
196                }
197            }
198        } else {
199            while i < cell.len() {
200                match memchr(self.quote, &cell[i..]) {
201                    None => {
202                        self.buffer.write_all(&cell[i..])?;
203                        break;
204                    }
205                    Some(offset) => {
206                        self.buffer.write_all(&cell[i..i + offset + 1])?;
207                        self.buffer.write_all(&[self.quote])?;
208                        i += offset + 1;
209                    }
210                };
211            }
212        }
213
214        self.buffer.write_all(&[self.quote])?;
215
216        Ok(())
217    }
218
219    pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
220    where
221        I: IntoIterator<Item = T>,
222        T: AsRef<[u8]>,
223    {
224        let mut first = true;
225        let mut written: usize = 0;
226        let mut empty = false;
227
228        for cell in record.into_iter() {
229            if first {
230                first = false;
231            } else {
232                self.buffer.write_all(&[self.delimiter])?;
233            }
234
235            let cell = cell.as_ref();
236
237            if cell.is_empty() {
238                empty = true;
239            }
240
241            if self.should_quote(cell) {
242                self.write_quoted_cell(cell)?;
243            } else {
244                self.buffer.write_all(cell)?;
245            }
246
247            written += 1;
248        }
249
250        if written == 1 && empty {
251            self.buffer.write_all(&[self.quote, self.quote])?;
252        }
253
254        self.check_field_count(written)?;
255
256        self.buffer.write_all(b"\n")?;
257
258        Ok(())
259    }
260
261    #[inline(always)]
262    pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
263        self.write_record(record.iter())
264    }
265
266    #[inline]
267    pub fn write_zero_copy_byte_record(
268        &mut self,
269        record: &ZeroCopyByteRecord,
270    ) -> error::Result<()> {
271        if record.quote == self.quote {
272            self.write_record_no_quoting(record.iter())
273        } else {
274            self.write_record(record.unescaped_iter())
275        }
276    }
277
278    #[inline(always)]
279    pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
280        self.buffer.write_all(record)?;
281        self.buffer.write_all(b"\n")?;
282
283        Ok(())
284    }
285
286    #[inline]
287    pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
288        self.buffer.into_inner()
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use std::io::{self, Cursor};
295
296    use super::*;
297
298    use crate::brec;
299
300    #[test]
301    fn test_write_byte_record() -> io::Result<()> {
302        let output = Cursor::new(Vec::<u8>::new());
303        let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
304
305        writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
306        writer.write_byte_record(&brec!["john,", "landis", "45"])?;
307        writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
308
309        assert_eq!(
310            std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
311            "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
312        );
313
314        Ok(())
315    }
316
317    #[test]
318    fn test_write_empty_cells() {
319        fn write(record: &ByteRecord) -> String {
320            let output = Cursor::new(Vec::<u8>::new());
321            let mut writer = Writer::from_writer(output);
322            writer.write_byte_record(record).unwrap();
323            String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
324        }
325
326        assert_eq!(write(&brec![]), "\n");
327        assert_eq!(write(&brec![""]), "\"\"\n");
328        assert_eq!(write(&brec!["", "", ""]), ",,\n");
329        assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
330        assert_eq!(write(&brec!["name", ""]), "name,\n");
331    }
332
333    #[test]
334    fn should_quote() {
335        let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
336
337        assert_eq!(writer.should_quote(b"test"), false);
338        assert_eq!(writer.should_quote(b"test,"), true);
339        assert_eq!(writer.should_quote(b"te\"st"), true);
340        assert_eq!(writer.should_quote(b"te\nst"), true);
341        assert_eq!(
342            writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
343            true
344        );
345        assert_eq!(writer.should_quote(b"te\rst"), true);
346    }
347}