1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::ByteRecord;
7
8pub struct WriterBuilder {
10 delimiter: u8,
11 quote: u8,
12 buffer_capacity: usize,
13 flexible: bool,
14}
15
16impl Default for WriterBuilder {
17 fn default() -> Self {
18 Self {
19 delimiter: b',',
20 quote: b'"',
21 buffer_capacity: 8192,
22 flexible: false,
23 }
24 }
25}
26
27impl WriterBuilder {
28 pub fn new() -> Self {
30 Self::default()
31 }
32
33 pub fn with_capacity(capacity: usize) -> Self {
35 let mut builder = Self::default();
36 builder.buffer_capacity(capacity);
37 builder
38 }
39
40 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
46 self.delimiter = delimiter;
47 self
48 }
49
50 pub fn quote(&mut self, quote: u8) -> &mut Self {
56 self.quote = quote;
57 self
58 }
59
60 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
62 self.buffer_capacity = capacity;
63 self
64 }
65
66 pub fn flexible(&mut self, yes: bool) -> &mut Self {
72 self.flexible = yes;
73 self
74 }
75
76 pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
79 let mut must_quote = [false; 256];
80 must_quote[b'\r' as usize] = true;
81 must_quote[b'\n' as usize] = true;
82 must_quote[self.delimiter as usize] = true;
83 must_quote[self.quote as usize] = true;
84
85 Writer {
86 delimiter: self.delimiter,
87 quote: self.quote,
88 buffer: BufWriter::with_capacity(self.buffer_capacity, writer),
89 flexible: self.flexible,
90 field_count: None,
91 must_quote,
92 }
93 }
94}
95
96pub struct Writer<W: Write> {
104 delimiter: u8,
105 quote: u8,
106 buffer: BufWriter<W>,
107 flexible: bool,
108 field_count: Option<usize>,
109 must_quote: [bool; 256],
110}
111
112impl<W: Write> Writer<W> {
113 pub fn from_writer(writer: W) -> Self {
119 WriterBuilder::new().from_writer(writer)
120 }
121
122 #[inline(always)]
124 pub fn flush(&mut self) -> io::Result<()> {
125 self.buffer.flush()
126 }
127
128 #[inline]
129 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
130 if self.flexible {
131 return Ok(());
132 }
133
134 match self.field_count {
135 Some(expected) => {
136 if written != expected {
137 return Err(Error::new(ErrorKind::UnequalLengths {
138 expected_len: expected,
139 len: written,
140 pos: None,
141 }));
142 }
143 }
144 None => {
145 self.field_count = Some(written);
146 }
147 }
148
149 Ok(())
150 }
151
152 pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
163 where
164 I: IntoIterator<Item = T>,
165 T: AsRef<[u8]>,
166 {
167 let mut first = true;
168 let mut written: usize = 0;
169 let mut empty = false;
170
171 for cell in record.into_iter() {
172 if first {
173 first = false;
174 } else {
175 self.buffer.write_all(&[self.delimiter])?;
176 }
177
178 let cell = cell.as_ref();
179
180 if cell.is_empty() {
181 empty = true;
182 }
183
184 self.buffer.write_all(cell)?;
185
186 written += 1;
187 }
188
189 if written == 1 && empty {
190 self.buffer.write_all(&[self.quote, self.quote])?;
191 }
192
193 self.check_field_count(written)?;
194
195 self.buffer.write_all(b"\n")?;
196
197 Ok(())
198 }
199
200 #[inline(always)]
208 pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
209 self.write_record_no_quoting(record.iter())
210 }
211
212 #[inline]
213 fn should_quote(&self, mut cell: &[u8]) -> bool {
214 let mut yes = false;
216 while !yes && cell.len() >= 8 {
217 yes = self.must_quote[cell[0] as usize]
218 || self.must_quote[cell[1] as usize]
219 || self.must_quote[cell[2] as usize]
220 || self.must_quote[cell[3] as usize]
221 || self.must_quote[cell[4] as usize]
222 || self.must_quote[cell[5] as usize]
223 || self.must_quote[cell[6] as usize]
224 || self.must_quote[cell[7] as usize];
225 cell = &cell[8..];
226 }
227 yes || cell.iter().any(|&b| self.must_quote[b as usize])
228 }
229
230 fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
231 self.buffer.write_all(&[self.quote])?;
232
233 let mut i: usize = 0;
234
235 if cell.len() < 8 {
236 while i < cell.len() {
237 match cell[i..].iter().copied().position(|b| b == self.quote) {
238 None => {
239 self.buffer.write_all(&cell[i..])?;
240 break;
241 }
242 Some(offset) => {
243 self.buffer.write_all(&cell[i..i + offset + 1])?;
244 self.buffer.write_all(&[self.quote])?;
245 i += offset + 1;
246 }
247 }
248 }
249 } else {
250 while i < cell.len() {
251 match memchr(self.quote, &cell[i..]) {
252 None => {
253 self.buffer.write_all(&cell[i..])?;
254 break;
255 }
256 Some(offset) => {
257 self.buffer.write_all(&cell[i..i + offset + 1])?;
258 self.buffer.write_all(&[self.quote])?;
259 i += offset + 1;
260 }
261 };
262 }
263 }
264
265 self.buffer.write_all(&[self.quote])?;
266
267 Ok(())
268 }
269
270 pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
275 where
276 I: IntoIterator<Item = T>,
277 T: AsRef<[u8]>,
278 {
279 let mut first = true;
280 let mut written: usize = 0;
281 let mut empty = false;
282
283 for cell in record.into_iter() {
284 if first {
285 first = false;
286 } else {
287 self.buffer.write_all(&[self.delimiter])?;
288 }
289
290 let cell = cell.as_ref();
291
292 if cell.is_empty() {
293 empty = true;
294 }
295
296 if self.should_quote(cell) {
297 self.write_quoted_cell(cell)?;
298 } else {
299 self.buffer.write_all(cell)?;
300 }
301
302 written += 1;
303 }
304
305 if written == 1 && empty {
306 self.buffer.write_all(&[self.quote, self.quote])?;
307 }
308
309 self.check_field_count(written)?;
310
311 self.buffer.write_all(b"\n")?;
312
313 Ok(())
314 }
315
316 #[inline(always)]
318 pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
319 self.write_record(record.iter())
320 }
321
322 #[inline(always)]
331 pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
332 self.buffer.write_all(record)?;
333 self.buffer.write_all(b"\n")?;
334
335 Ok(())
336 }
337
338 #[inline]
341 pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
342 self.buffer.into_inner()
343 }
344}
345
346#[cfg(test)]
347mod tests {
348 use std::io::{self, Cursor};
349
350 use super::*;
351
352 #[test]
353 fn test_write_byte_record() -> io::Result<()> {
354 let output = Cursor::new(Vec::<u8>::new());
355 let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
356
357 writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
358 writer.write_byte_record(&brec!["john,", "landis", "45"])?;
359 writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
360
361 assert_eq!(
362 std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
363 "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
364 );
365
366 Ok(())
367 }
368
369 #[test]
370 fn test_write_empty_cells() {
371 fn write(record: &ByteRecord) -> String {
372 let output = Cursor::new(Vec::<u8>::new());
373 let mut writer = Writer::from_writer(output);
374 writer.write_byte_record(record).unwrap();
375 String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
376 }
377
378 assert_eq!(write(&brec![]), "\n");
379 assert_eq!(write(&brec![""]), "\"\"\n");
380 assert_eq!(write(&brec!["", "", ""]), ",,\n");
381 assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
382 assert_eq!(write(&brec!["name", ""]), "name,\n");
383 }
384
385 #[test]
386 fn should_quote() {
387 let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
388
389 assert_eq!(writer.should_quote(b"test"), false);
390 assert_eq!(writer.should_quote(b"test,"), true);
391 assert_eq!(writer.should_quote(b"te\"st"), true);
392 assert_eq!(writer.should_quote(b"te\nst"), true);
393 assert_eq!(
394 writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
395 true
396 );
397 assert_eq!(writer.should_quote(b"te\rst"), true);
398 }
399}