1use std::io::{self, BufWriter, IntoInnerError, Write};
2
3use memchr::memchr;
4
5use crate::error::{self, Error, ErrorKind};
6use crate::records::ByteRecord;
7
8pub struct WriterBuilder {
10 delimiter: u8,
11 quote: u8,
12 buffer_capacity: usize,
13 flexible: bool,
14}
15
16impl Default for WriterBuilder {
17 fn default() -> Self {
18 Self {
19 delimiter: b',',
20 quote: b'"',
21 buffer_capacity: 8192,
22 flexible: false,
23 }
24 }
25}
26
27impl WriterBuilder {
28 pub fn new() -> Self {
29 Self::default()
30 }
31
32 pub fn with_capacity(capacity: usize) -> Self {
33 let mut builder = Self::default();
34 builder.buffer_capacity(capacity);
35 builder
36 }
37
38 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
39 self.delimiter = delimiter;
40 self
41 }
42
43 pub fn quote(&mut self, quote: u8) -> &mut Self {
44 self.quote = quote;
45 self
46 }
47
48 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
49 self.buffer_capacity = capacity;
50 self
51 }
52
53 pub fn flexible(&mut self, yes: bool) -> &mut Self {
54 self.flexible = yes;
55 self
56 }
57
58 pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
59 let mut must_quote = [false; 256];
60 must_quote[b'\r' as usize] = true;
61 must_quote[b'\n' as usize] = true;
62 must_quote[self.delimiter as usize] = true;
63 must_quote[self.quote as usize] = true;
64
65 Writer {
66 delimiter: self.delimiter,
67 quote: self.quote,
68 buffer: BufWriter::with_capacity(self.buffer_capacity, writer),
69 flexible: self.flexible,
70 field_count: None,
71 must_quote,
72 }
73 }
74}
75
76pub struct Writer<W: Write> {
84 delimiter: u8,
85 quote: u8,
86 buffer: BufWriter<W>,
87 flexible: bool,
88 field_count: Option<usize>,
89 must_quote: [bool; 256],
90}
91
92impl<W: Write> Writer<W> {
93 pub fn from_writer(writer: W) -> Self {
94 WriterBuilder::new().from_writer(writer)
95 }
96
97 #[inline(always)]
98 pub fn flush(&mut self) -> io::Result<()> {
99 self.buffer.flush()
100 }
101
102 #[inline]
103 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
104 if self.flexible {
105 return Ok(());
106 }
107
108 match self.field_count {
109 Some(expected) => {
110 if written != expected {
111 return Err(Error::new(ErrorKind::UnequalLengths {
112 expected_len: expected,
113 len: written,
114 pos: None,
115 }));
116 }
117 }
118 None => {
119 self.field_count = Some(written);
120 }
121 }
122
123 Ok(())
124 }
125
126 pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
127 where
128 I: IntoIterator<Item = T>,
129 T: AsRef<[u8]>,
130 {
131 let mut first = true;
132 let mut written: usize = 0;
133 let mut empty = false;
134
135 for cell in record.into_iter() {
136 if first {
137 first = false;
138 } else {
139 self.buffer.write_all(&[self.delimiter])?;
140 }
141
142 let cell = cell.as_ref();
143
144 if cell.is_empty() {
145 empty = true;
146 }
147
148 self.buffer.write_all(cell)?;
149
150 written += 1;
151 }
152
153 if written == 1 && empty {
154 self.buffer.write_all(&[self.quote, self.quote])?;
155 }
156
157 self.check_field_count(written)?;
158
159 self.buffer.write_all(b"\n")?;
160
161 Ok(())
162 }
163
164 #[inline(always)]
165 pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
166 self.write_record_no_quoting(record.iter())
167 }
168
169 #[inline]
170 fn should_quote(&self, mut cell: &[u8]) -> bool {
171 let mut yes = false;
173 while !yes && cell.len() >= 8 {
174 yes = self.must_quote[cell[0] as usize]
175 || self.must_quote[cell[1] as usize]
176 || self.must_quote[cell[2] as usize]
177 || self.must_quote[cell[3] as usize]
178 || self.must_quote[cell[4] as usize]
179 || self.must_quote[cell[5] as usize]
180 || self.must_quote[cell[6] as usize]
181 || self.must_quote[cell[7] as usize];
182 cell = &cell[8..];
183 }
184 yes || cell.iter().any(|&b| self.must_quote[b as usize])
185 }
186
187 fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
188 self.buffer.write_all(&[self.quote])?;
189
190 let mut i: usize = 0;
191
192 if cell.len() < 8 {
193 while i < cell.len() {
194 match cell[i..].iter().copied().position(|b| b == self.quote) {
195 None => {
196 self.buffer.write_all(&cell[i..])?;
197 break;
198 }
199 Some(offset) => {
200 self.buffer.write_all(&cell[i..i + offset + 1])?;
201 self.buffer.write_all(&[self.quote])?;
202 i += offset + 1;
203 }
204 }
205 }
206 } else {
207 while i < cell.len() {
208 match memchr(self.quote, &cell[i..]) {
209 None => {
210 self.buffer.write_all(&cell[i..])?;
211 break;
212 }
213 Some(offset) => {
214 self.buffer.write_all(&cell[i..i + offset + 1])?;
215 self.buffer.write_all(&[self.quote])?;
216 i += offset + 1;
217 }
218 };
219 }
220 }
221
222 self.buffer.write_all(&[self.quote])?;
223
224 Ok(())
225 }
226
227 pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
228 where
229 I: IntoIterator<Item = T>,
230 T: AsRef<[u8]>,
231 {
232 let mut first = true;
233 let mut written: usize = 0;
234 let mut empty = false;
235
236 for cell in record.into_iter() {
237 if first {
238 first = false;
239 } else {
240 self.buffer.write_all(&[self.delimiter])?;
241 }
242
243 let cell = cell.as_ref();
244
245 if cell.is_empty() {
246 empty = true;
247 }
248
249 if self.should_quote(cell) {
250 self.write_quoted_cell(cell)?;
251 } else {
252 self.buffer.write_all(cell)?;
253 }
254
255 written += 1;
256 }
257
258 if written == 1 && empty {
259 self.buffer.write_all(&[self.quote, self.quote])?;
260 }
261
262 self.check_field_count(written)?;
263
264 self.buffer.write_all(b"\n")?;
265
266 Ok(())
267 }
268
269 #[inline(always)]
270 pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
271 self.write_record(record.iter())
272 }
273
274 #[inline(always)]
275 pub fn write_splitted_record(&mut self, record: &[u8]) -> error::Result<()> {
276 self.buffer.write_all(record)?;
277 self.buffer.write_all(b"\n")?;
278
279 Ok(())
280 }
281
282 #[inline]
283 pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
284 self.buffer.into_inner()
285 }
286}
287
288#[cfg(test)]
289mod tests {
290 use std::io::{self, Cursor};
291
292 use super::*;
293
294 #[test]
295 fn test_write_byte_record() -> io::Result<()> {
296 let output = Cursor::new(Vec::<u8>::new());
297 let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
298
299 writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
300 writer.write_byte_record(&brec!["john,", "landis", "45"])?;
301 writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
302
303 assert_eq!(
304 std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
305 "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
306 );
307
308 Ok(())
309 }
310
311 #[test]
312 fn test_write_empty_cells() {
313 fn write(record: &ByteRecord) -> String {
314 let output = Cursor::new(Vec::<u8>::new());
315 let mut writer = Writer::from_writer(output);
316 writer.write_byte_record(record).unwrap();
317 String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
318 }
319
320 assert_eq!(write(&brec![]), "\n");
321 assert_eq!(write(&brec![""]), "\"\"\n");
322 assert_eq!(write(&brec!["", "", ""]), ",,\n");
323 assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
324 assert_eq!(write(&brec!["name", ""]), "name,\n");
325 }
326
327 #[test]
328 fn should_quote() {
329 let writer = Writer::from_writer(Cursor::new(Vec::<u8>::new()));
330
331 assert_eq!(writer.should_quote(b"test"), false);
332 assert_eq!(writer.should_quote(b"test,"), true);
333 assert_eq!(writer.should_quote(b"te\"st"), true);
334 assert_eq!(writer.should_quote(b"te\nst"), true);
335 assert_eq!(
336 writer.should_quote(b"testtesttesttesttesttesttesttest\n"),
337 true
338 );
339 assert_eq!(writer.should_quote(b"te\rst"), true);
340 }
341}