simd_csv/
writer.rs

1use std::io::{BufWriter, IntoInnerError, Write};
2
3use memchr::{memchr, memchr3};
4
5use crate::error::{self, Error};
6use crate::records::ByteRecord;
7
8pub struct WriterBuilder {
9    delimiter: u8,
10    quote: u8,
11    buffer_capacity: Option<usize>,
12    flexible: bool,
13}
14
15impl Default for WriterBuilder {
16    fn default() -> Self {
17        Self {
18            delimiter: b',',
19            quote: b'"',
20            buffer_capacity: None,
21            flexible: false,
22        }
23    }
24}
25
26impl WriterBuilder {
27    pub fn new() -> Self {
28        Self::default()
29    }
30
31    pub fn with_capacity(capacity: usize) -> Self {
32        let mut builder = Self::default();
33        builder.buffer_capacity(capacity);
34        builder
35    }
36
37    pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
38        self.delimiter = delimiter;
39        self
40    }
41
42    pub fn quote(&mut self, quote: u8) -> &mut Self {
43        self.quote = quote;
44        self
45    }
46
47    pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
48        self.buffer_capacity = Some(capacity);
49        self
50    }
51
52    pub fn flexible(&mut self, yes: bool) -> &mut Self {
53        self.flexible = yes;
54        self
55    }
56
57    fn bufwriter<W: Write>(&self, writer: W) -> BufWriter<W> {
58        match self.buffer_capacity {
59            None => BufWriter::new(writer),
60            Some(capacity) => BufWriter::with_capacity(capacity, writer),
61        }
62    }
63
64    pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
65        Writer {
66            delimiter: self.delimiter,
67            quote: self.quote,
68            buffer: self.bufwriter(writer),
69            flexible: self.flexible,
70            field_count: None,
71        }
72    }
73}
74
75pub struct Writer<W: Write> {
76    delimiter: u8,
77    quote: u8,
78    buffer: BufWriter<W>,
79    flexible: bool,
80    field_count: Option<usize>,
81}
82
83impl<W: Write> Writer<W> {
84    pub fn from_writer(writer: W) -> Self {
85        WriterBuilder::new().from_writer(writer)
86    }
87
88    #[inline(always)]
89    pub fn flush(&mut self) -> error::Result<()> {
90        self.buffer.flush()?;
91
92        Ok(())
93    }
94
95    #[inline]
96    fn check_field_count(&mut self, written: usize) -> error::Result<()> {
97        if self.flexible {
98            return Ok(());
99        }
100
101        match self.field_count {
102            Some(expected) => {
103                if written != expected {
104                    return Err(Error::unequal_lengths(expected, written));
105                }
106            }
107            None => {
108                self.field_count = Some(written);
109            }
110        }
111
112        Ok(())
113    }
114
115    pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
116    where
117        I: IntoIterator<Item = T>,
118        T: AsRef<[u8]>,
119    {
120        let mut first = true;
121        let mut written: usize = 0;
122        let mut empty = false;
123
124        for cell in record.into_iter() {
125            if first {
126                first = false;
127            } else {
128                self.buffer.write_all(&[self.delimiter])?;
129            }
130
131            let cell = cell.as_ref();
132
133            if cell.is_empty() {
134                empty = true;
135            }
136
137            self.buffer.write_all(cell)?;
138
139            written += 1;
140        }
141
142        if written == 1 && empty {
143            self.buffer.write_all(&[self.quote, self.quote])?;
144        }
145
146        self.check_field_count(written)?;
147
148        self.buffer.write_all(b"\n")?;
149
150        Ok(())
151    }
152
153    #[inline(always)]
154    pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
155        self.write_record_no_quoting(record.iter())
156    }
157
158    #[inline]
159    fn should_quote(&self, cell: &[u8]) -> bool {
160        if cell.len() < 8 {
161            cell.iter()
162                .copied()
163                .any(|b| b == self.quote || b == self.delimiter || b == b'\n')
164        } else {
165            memchr3(self.quote, self.delimiter, b'\n', cell).is_some()
166        }
167    }
168
169    fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
170        self.buffer.write_all(&[self.quote])?;
171
172        let mut i: usize = 0;
173
174        if cell.len() < 8 {
175            while i < cell.len() {
176                match cell[i..].iter().copied().position(|b| b == self.quote) {
177                    None => {
178                        self.buffer.write_all(&cell[i..])?;
179                        break;
180                    }
181                    Some(offset) => {
182                        self.buffer.write_all(&cell[i..i + offset + 1])?;
183                        self.buffer.write_all(&[self.quote])?;
184                        i += offset + 1;
185                    }
186                }
187            }
188        } else {
189            while i < cell.len() {
190                match memchr(self.quote, &cell[i..]) {
191                    None => {
192                        self.buffer.write_all(&cell[i..])?;
193                        break;
194                    }
195                    Some(offset) => {
196                        self.buffer.write_all(&cell[i..i + offset + 1])?;
197                        self.buffer.write_all(&[self.quote])?;
198                        i += offset + 1;
199                    }
200                };
201            }
202        }
203
204        self.buffer.write_all(&[self.quote])?;
205
206        Ok(())
207    }
208
209    pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
210    where
211        I: IntoIterator<Item = T>,
212        T: AsRef<[u8]>,
213    {
214        let mut first = true;
215        let mut written: usize = 0;
216        let mut empty = false;
217
218        for cell in record.into_iter() {
219            if first {
220                first = false;
221            } else {
222                self.buffer.write_all(&[self.delimiter])?;
223            }
224
225            let cell = cell.as_ref();
226
227            if cell.is_empty() {
228                empty = true;
229            }
230
231            if self.should_quote(cell) {
232                self.write_quoted_cell(cell)?;
233            } else {
234                self.buffer.write_all(cell)?;
235            }
236
237            written += 1;
238        }
239
240        if written == 1 && empty {
241            self.buffer.write_all(&[self.quote, self.quote])?;
242        }
243
244        self.check_field_count(written)?;
245
246        self.buffer.write_all(b"\n")?;
247
248        Ok(())
249    }
250
251    #[inline(always)]
252    pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
253        self.write_record(record.iter())
254    }
255
256    #[inline]
257    pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
258        self.buffer.into_inner()
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use std::io::{self, Cursor};
265
266    use super::*;
267
268    use crate::brec;
269
270    #[test]
271    fn test_write_byte_record() -> io::Result<()> {
272        let output = Cursor::new(Vec::<u8>::new());
273        let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
274
275        writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
276        writer.write_byte_record(&brec!["john,", "landis", "45"])?;
277        writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
278
279        assert_eq!(
280            std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
281            "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
282        );
283
284        Ok(())
285    }
286
287    #[test]
288    fn test_write_empty_cells() {
289        fn write(record: &ByteRecord) -> String {
290            let output = Cursor::new(Vec::<u8>::new());
291            let mut writer = Writer::from_writer(output);
292            writer.write_byte_record(record).unwrap();
293            String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
294        }
295
296        assert_eq!(write(&brec![]), "\n");
297        assert_eq!(write(&brec![""]), "\"\"\n");
298        assert_eq!(write(&brec!["", "", ""]), ",,\n");
299        assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
300        assert_eq!(write(&brec!["name", ""]), "name,\n");
301    }
302}