simd_csv/
writer.rs

1use std::io::{BufWriter, IntoInnerError, Write};
2
3use memchr::{memchr, memchr3};
4
5use crate::error::{self, Error};
6use crate::records::ByteRecord;
7
8pub struct Writer<W: Write> {
9    delimiter: u8,
10    quote: u8,
11    buffer: BufWriter<W>,
12    field_count: Option<usize>,
13}
14
15impl<W: Write> Writer<W> {
16    pub fn new(writer: W, delimiter: u8, quote: u8) -> Self {
17        Self {
18            buffer: BufWriter::new(writer),
19            quote,
20            delimiter,
21            field_count: None,
22        }
23    }
24
25    pub fn with_capacity(capacity: usize, writer: W, delimiter: u8, quote: u8) -> Self {
26        Self {
27            buffer: BufWriter::with_capacity(capacity, writer),
28            quote,
29            delimiter,
30            field_count: None,
31        }
32    }
33
34    #[inline(always)]
35    pub fn flush(&mut self) -> error::Result<()> {
36        self.buffer.flush()?;
37
38        Ok(())
39    }
40
41    #[inline]
42    fn check_field_count(&mut self, written: usize) -> error::Result<()> {
43        match self.field_count {
44            Some(expected) => {
45                if written != expected {
46                    return Err(Error::unequal_lengths(expected, written));
47                }
48            }
49            None => {
50                self.field_count = Some(written);
51            }
52        }
53
54        Ok(())
55    }
56
57    pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
58    where
59        I: IntoIterator<Item = T>,
60        T: AsRef<[u8]>,
61    {
62        let mut first = true;
63        let mut written: usize = 0;
64        let mut empty = false;
65
66        for cell in record.into_iter() {
67            if first {
68                first = false;
69            } else {
70                self.buffer.write_all(&[self.delimiter])?;
71            }
72
73            let cell = cell.as_ref();
74
75            if cell.is_empty() {
76                empty = true;
77            }
78
79            self.buffer.write_all(cell)?;
80
81            written += 1;
82        }
83
84        if written == 1 && empty {
85            self.buffer.write_all(&[self.quote, self.quote])?;
86        }
87
88        self.check_field_count(written)?;
89
90        self.buffer.write_all(b"\n")?;
91
92        Ok(())
93    }
94
95    #[inline(always)]
96    pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
97        self.write_record_no_quoting(record.iter())
98    }
99
100    #[inline]
101    fn should_quote(&self, cell: &[u8]) -> bool {
102        if cell.len() < 8 {
103            cell.iter()
104                .copied()
105                .any(|b| b == self.quote || b == self.delimiter || b == b'\n')
106        } else {
107            memchr3(self.quote, self.delimiter, b'\n', cell).is_some()
108        }
109    }
110
111    fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
112        self.buffer.write_all(&[self.quote])?;
113
114        let mut i: usize = 0;
115
116        if cell.len() < 8 {
117            while i < cell.len() {
118                match cell[i..].iter().copied().position(|b| b == self.quote) {
119                    None => {
120                        self.buffer.write_all(&cell[i..])?;
121                        break;
122                    }
123                    Some(offset) => {
124                        self.buffer.write_all(&cell[i..i + offset + 1])?;
125                        self.buffer.write_all(&[self.quote])?;
126                        i += offset + 1;
127                    }
128                }
129            }
130        } else {
131            while i < cell.len() {
132                match memchr(self.quote, &cell[i..]) {
133                    None => {
134                        self.buffer.write_all(&cell[i..])?;
135                        break;
136                    }
137                    Some(offset) => {
138                        self.buffer.write_all(&cell[i..i + offset + 1])?;
139                        self.buffer.write_all(&[self.quote])?;
140                        i += offset + 1;
141                    }
142                };
143            }
144        }
145
146        self.buffer.write_all(&[self.quote])?;
147
148        Ok(())
149    }
150
151    pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
152    where
153        I: IntoIterator<Item = T>,
154        T: AsRef<[u8]>,
155    {
156        let mut first = true;
157        let mut written: usize = 0;
158        let mut empty = false;
159
160        for cell in record.into_iter() {
161            if first {
162                first = false;
163            } else {
164                self.buffer.write_all(&[self.delimiter])?;
165            }
166
167            let cell = cell.as_ref();
168
169            if cell.is_empty() {
170                empty = true;
171            }
172
173            if self.should_quote(cell) {
174                self.write_quoted_cell(cell)?;
175            } else {
176                self.buffer.write_all(cell)?;
177            }
178
179            written += 1;
180        }
181
182        if written == 1 && empty {
183            self.buffer.write_all(&[self.quote, self.quote])?;
184        }
185
186        self.check_field_count(written)?;
187
188        self.buffer.write_all(b"\n")?;
189
190        Ok(())
191    }
192
193    #[inline(always)]
194    pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
195        self.write_record(record.iter())
196    }
197
198    #[inline]
199    pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
200        self.buffer.into_inner()
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use std::io::{self, Cursor};
207
208    use super::*;
209
210    use crate::brec;
211
212    #[test]
213    fn test_write_byte_record() -> io::Result<()> {
214        let output = Cursor::new(Vec::<u8>::new());
215        let mut writer = Writer::with_capacity(32, output, b',', b'"');
216
217        writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
218        writer.write_byte_record(&brec!["john,", "landis", "45"])?;
219        writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
220
221        assert_eq!(
222            std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
223            "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
224        );
225
226        Ok(())
227    }
228
229    #[test]
230    fn test_write_empty_cells() {
231        fn write(record: &ByteRecord) -> String {
232            let output = Cursor::new(Vec::<u8>::new());
233            let mut writer = Writer::new(output, b',', b'"');
234            writer.write_byte_record(record).unwrap();
235            String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
236        }
237
238        assert_eq!(write(&brec![]), "\n");
239        assert_eq!(write(&brec![""]), "\"\"\n");
240        assert_eq!(write(&brec!["", "", ""]), ",,\n");
241        assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
242        assert_eq!(write(&brec!["name", ""]), "name,\n");
243    }
244}