1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::{ByteRecord, ZeroCopyByteRecord};
7
8pub struct WriterBuilder {
10 delimiter: u8,
11 quote: u8,
12 buffer_capacity: usize,
13 flexible: bool,
14}
15
16impl Default for WriterBuilder {
17 fn default() -> Self {
18 Self {
19 delimiter: b',',
20 quote: b'"',
21 buffer_capacity: 8192,
22 flexible: false,
23 }
24 }
25}
26
27impl WriterBuilder {
28 pub fn new() -> Self {
29 Self::default()
30 }
31
32 pub fn with_capacity(capacity: usize) -> Self {
33 let mut builder = Self::default();
34 builder.buffer_capacity(capacity);
35 builder
36 }
37
38 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
39 self.delimiter = delimiter;
40 self
41 }
42
43 pub fn quote(&mut self, quote: u8) -> &mut Self {
44 self.quote = quote;
45 self
46 }
47
48 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
49 self.buffer_capacity = capacity;
50 self
51 }
52
53 pub fn flexible(&mut self, yes: bool) -> &mut Self {
54 self.flexible = yes;
55 self
56 }
57
58 pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
59 let mut must_quote = [false; 256];
60 must_quote[b'\r' as usize] = true;
61 must_quote[b'\n' as usize] = true;
62 must_quote[self.delimiter as usize] = true;
63 must_quote[self.quote as usize] = true;
64
65 Writer {
66 delimiter: self.delimiter,
67 quote: self.quote,
68 buffer: BufWriter::with_capacity(self.buffer_capacity, writer),
69 flexible: self.flexible,
70 field_count: None,
71 must_quote,
72 }
73 }
74}
75
76pub struct Writer<W: Write> {
84 delimiter: u8,
85 quote: u8,
86 buffer: BufWriter<W>,
87 flexible: bool,
88 field_count: Option<usize>,
89 must_quote: [bool; 256],
90}
91
92impl<W: Write> Writer<W> {
93 pub fn from_writer(writer: W) -> Self {
94 WriterBuilder::new().from_writer(writer)
95 }
96
97 #[inline(always)]
98 pub fn flush(&mut self) -> io::Result<()> {
99 self.buffer.flush()
100 }
101
102 #[inline]
103 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
104 if self.flexible {
105 return Ok(());
106 }
107
108 match self.field_count {
109 Some(expected) => {
110 if written != expected {
111 return Err(Error::new(ErrorKind::UnequalLengths {
112 expected_len: expected,
113 len: written,
114 pos: None,
115 }));
116 }
117 }
118 None => {
119 self.field_count = Some(written);
120 }
121 }
122
123 Ok(())
124 }
125
126 pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
127 where
128 I: IntoIterator<Item = T>,
129 T: AsRef<[u8]>,
130 {
131 let mut first = true;
132 let mut written: usize = 0;
133 let mut empty = false;
134
135 for cell in record.into_iter() {
136 if first {
137 first = false;
138 } else {
139 self.buffer.write_all(&[self.delimiter])?;
140 }
141
142 let cell = cell.as_ref();
143
144 if cell.is_empty() {
145 empty = true;
146 }
147
148 self.buffer.write_all(cell)?;
149
150 written += 1;
151 }
152
153 if written == 1 && empty {
154 self.buffer.write_all(&[self.quote, self.quote])?;
155 }
156
157 self.check_field_count(written)?;
158
159 self.buffer.write_all(b"\n")?;
160
161 Ok(())
162 }
163
164 #[inline(always)]
165 pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
166 self.write_record_no_quoting(record.iter())
167 }
168
169 #[inline]
170 fn should_quote(&self, mut cell: &[u8]) -> bool {
171 let mut yes = false;
173 while !yes && cell.len() >= 8 {
174 yes = self.must_quote[cell[0] as usize]
175 || self.must_quote[cell[1] as usize]
176 || self.must_quote[cell[2] as usize]
177 || self.must_quote[cell[3] as usize]
178 || self.must_quote[cell[4] as usize]
179 || self.must_quote[cell[5] as usize]
180 || self.must_quote[cell[6] as usize]
181 || self.must_quote[cell[7] as usize];
182 cell = &cell[8..];
183 }
184 yes || cell.iter().any(|&b| self.must_quote[b as usize])
185 }
186
187 fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
188 self.buffer.write_all(&[self.quote])?;
189
190 let mut i: usize = 0;
191
192 if cell.len() < 8 {
193 while i < cell.len() {
194 match cell[i..].iter().copied().position(|b| b == self.quote) {
195 None => {
196 self.buffer.write_all(&cell[i..])?;
197 break;
198 }
199 Some(offset) => {
200 self.buffer.write_all(&cell[i..i + offset + 1])?;
201 self.buffer.write_all(&[self.quote])?;
202 i += offset + 1;
203 }
204 }
205 }
206 } else {
207 while i < cell.len() {
208 match memchr(self.quote, &cell[i..]) {
209 None => {
210 self.buffer.write_all(&cell[i..])?;
211 break;
212 }
213 Some(offset) => {
214 self.buffer.write_all(&cell[i..i + offset + 1])?;
215 self.buffer.write_all(&[self.quote])?;
216 i += offset + 1;
217 }
218 };
219 }
220 }
221
222 self.buffer.write_all(&[self.quote])?;
223
224 Ok(())
225 }
226
227 pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
228 where
229 I: IntoIterator<Item = T>,
230 T: AsRef<[u8]>,
231 {
232 let mut first = true;
233 let mut written: usize = 0;
234 let mut empty = false;
235
236 for cell in record.into_iter() {
237 if first {
238 first = false;
239 } else {
240 self.buffer.write_all(&[self.delimiter])?;
241 }
242
243 let cell = cell.as_ref();
244
245 if cell.is_empty() {
246 empty = true;
247 }
248
249 if self.should_quote(cell) {
250 self.write_quoted_cell(cell)?;
251 } else {
252 self.buffer.write_all(cell)?;
253 }
254
255 written += 1;
256 }
257
258 if written == 1 && empty {
259 self.buffer.write_all(&[self.quote, self.quote])?;
260 }
261
262 self.check_field_count(written)?;
263
264 self.buffer.write_all(b"\n")?;
265
266 Ok(())
267 }
268
269 #[inline(always)]
270 pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
271 self.write_record(record.iter())
272 }
273
274 #[inline]
275 pub fn write_zero_copy_byte_record(
276 &mut self,
277 record: &ZeroCopyByteRecord,
278 ) -> error::Result<()> {
279 if record.quote == self.quote {
280 self.write_record_no_quoting(record.iter())
281 } else {
282 self.write_record(record.unescaped_iter())
283 }
284 }
285
286 #[inline(always)]
287 pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
288 self.buffer.write_all(record)?;
289 self.buffer.write_all(b"\n")?;
290
291 Ok(())
292 }
293
294 #[inline]
295 pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
296 self.buffer.into_inner()
297 }
298}
299
300#[cfg(test)]
301mod tests {
302 use std::io::{self, Cursor};
303
304 use super::*;
305
306 #[test]
307 fn test_write_byte_record() -> io::Result<()> {
308 let output = Cursor::new(Vec::<u8>::new());
309 let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
310
311 writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
312 writer.write_byte_record(&brec!["john,", "landis", "45"])?;
313 writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
314
315 assert_eq!(
316 std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
317 "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
318 );
319
320 Ok(())
321 }
322
323 #[test]
324 fn test_write_empty_cells() {
325 fn write(record: &ByteRecord) -> String {
326 let output = Cursor::new(Vec::<u8>::new());
327 let mut writer = Writer::from_writer(output);
328 writer.write_byte_record(record).unwrap();
329 String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
330 }
331
332 assert_eq!(write(&brec![]), "\n");
333 assert_eq!(write(&brec![""]), "\"\"\n");
334 assert_eq!(write(&brec!["", "", ""]), ",,\n");
335 assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
336 assert_eq!(write(&brec!["name", ""]), "name,\n");
337 }
338
339 #[test]
340 fn should_quote() {
341 let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
342
343 assert_eq!(writer.should_quote(b"test"), false);
344 assert_eq!(writer.should_quote(b"test,"), true);
345 assert_eq!(writer.should_quote(b"te\"st"), true);
346 assert_eq!(writer.should_quote(b"te\nst"), true);
347 assert_eq!(
348 writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
349 true
350 );
351 assert_eq!(writer.should_quote(b"te\rst"), true);
352 }
353}