1use std::io::{BufWriter, IntoInnerError, Write};
2
3use memchr::{memchr, memchr3};
4
5use crate::error::{self, Error};
6use crate::records::ByteRecord;
7
8pub struct WriterBuilder {
9 delimiter: u8,
10 quote: u8,
11 buffer_capacity: Option<usize>,
12 flexible: bool,
13}
14
15impl Default for WriterBuilder {
16 fn default() -> Self {
17 Self {
18 delimiter: b',',
19 quote: b'"',
20 buffer_capacity: None,
21 flexible: false,
22 }
23 }
24}
25
26impl WriterBuilder {
27 pub fn new() -> Self {
28 Self::default()
29 }
30
31 pub fn with_capacity(capacity: usize) -> Self {
32 let mut builder = Self::default();
33 builder.buffer_capacity(capacity);
34 builder
35 }
36
37 pub fn delimiter(&mut self, delimiter: u8) -> &mut Self {
38 self.delimiter = delimiter;
39 self
40 }
41
42 pub fn quote(&mut self, quote: u8) -> &mut Self {
43 self.quote = quote;
44 self
45 }
46
47 pub fn buffer_capacity(&mut self, capacity: usize) -> &mut Self {
48 self.buffer_capacity = Some(capacity);
49 self
50 }
51
52 pub fn flexible(&mut self, yes: bool) -> &mut Self {
53 self.flexible = yes;
54 self
55 }
56
57 fn bufwriter<W: Write>(&self, writer: W) -> BufWriter<W> {
58 match self.buffer_capacity {
59 None => BufWriter::new(writer),
60 Some(capacity) => BufWriter::with_capacity(capacity, writer),
61 }
62 }
63
64 pub fn from_writer<W: Write>(&self, writer: W) -> Writer<W> {
65 Writer {
66 delimiter: self.delimiter,
67 quote: self.quote,
68 buffer: self.bufwriter(writer),
69 flexible: self.flexible,
70 field_count: None,
71 }
72 }
73}
74
75pub struct Writer<W: Write> {
76 delimiter: u8,
77 quote: u8,
78 buffer: BufWriter<W>,
79 flexible: bool,
80 field_count: Option<usize>,
81}
82
83impl<W: Write> Writer<W> {
84 pub fn from_writer(writer: W) -> Self {
85 WriterBuilder::new().from_writer(writer)
86 }
87
88 #[inline(always)]
89 pub fn flush(&mut self) -> error::Result<()> {
90 self.buffer.flush()?;
91
92 Ok(())
93 }
94
95 #[inline]
96 fn check_field_count(&mut self, written: usize) -> error::Result<()> {
97 if self.flexible {
98 return Ok(());
99 }
100
101 match self.field_count {
102 Some(expected) => {
103 if written != expected {
104 return Err(Error::unequal_lengths(expected, written));
105 }
106 }
107 None => {
108 self.field_count = Some(written);
109 }
110 }
111
112 Ok(())
113 }
114
115 pub fn write_record_no_quoting<I, T>(&mut self, record: I) -> error::Result<()>
116 where
117 I: IntoIterator<Item = T>,
118 T: AsRef<[u8]>,
119 {
120 let mut first = true;
121 let mut written: usize = 0;
122 let mut empty = false;
123
124 for cell in record.into_iter() {
125 if first {
126 first = false;
127 } else {
128 self.buffer.write_all(&[self.delimiter])?;
129 }
130
131 let cell = cell.as_ref();
132
133 if cell.is_empty() {
134 empty = true;
135 }
136
137 self.buffer.write_all(cell)?;
138
139 written += 1;
140 }
141
142 if written == 1 && empty {
143 self.buffer.write_all(&[self.quote, self.quote])?;
144 }
145
146 self.check_field_count(written)?;
147
148 self.buffer.write_all(b"\n")?;
149
150 Ok(())
151 }
152
153 #[inline(always)]
154 pub fn write_byte_record_no_quoting(&mut self, record: &ByteRecord) -> error::Result<()> {
155 self.write_record_no_quoting(record.iter())
156 }
157
158 #[inline]
159 fn should_quote(&self, cell: &[u8]) -> bool {
160 if cell.len() < 8 {
161 cell.iter()
162 .copied()
163 .any(|b| b == self.quote || b == self.delimiter || b == b'\n')
164 } else {
165 memchr3(self.quote, self.delimiter, b'\n', cell).is_some()
166 }
167 }
168
169 fn write_quoted_cell(&mut self, cell: &[u8]) -> error::Result<()> {
170 self.buffer.write_all(&[self.quote])?;
171
172 let mut i: usize = 0;
173
174 if cell.len() < 8 {
175 while i < cell.len() {
176 match cell[i..].iter().copied().position(|b| b == self.quote) {
177 None => {
178 self.buffer.write_all(&cell[i..])?;
179 break;
180 }
181 Some(offset) => {
182 self.buffer.write_all(&cell[i..i + offset + 1])?;
183 self.buffer.write_all(&[self.quote])?;
184 i += offset + 1;
185 }
186 }
187 }
188 } else {
189 while i < cell.len() {
190 match memchr(self.quote, &cell[i..]) {
191 None => {
192 self.buffer.write_all(&cell[i..])?;
193 break;
194 }
195 Some(offset) => {
196 self.buffer.write_all(&cell[i..i + offset + 1])?;
197 self.buffer.write_all(&[self.quote])?;
198 i += offset + 1;
199 }
200 };
201 }
202 }
203
204 self.buffer.write_all(&[self.quote])?;
205
206 Ok(())
207 }
208
209 pub fn write_record<I, T>(&mut self, record: I) -> error::Result<()>
210 where
211 I: IntoIterator<Item = T>,
212 T: AsRef<[u8]>,
213 {
214 let mut first = true;
215 let mut written: usize = 0;
216 let mut empty = false;
217
218 for cell in record.into_iter() {
219 if first {
220 first = false;
221 } else {
222 self.buffer.write_all(&[self.delimiter])?;
223 }
224
225 let cell = cell.as_ref();
226
227 if cell.is_empty() {
228 empty = true;
229 }
230
231 if self.should_quote(cell) {
232 self.write_quoted_cell(cell)?;
233 } else {
234 self.buffer.write_all(cell)?;
235 }
236
237 written += 1;
238 }
239
240 if written == 1 && empty {
241 self.buffer.write_all(&[self.quote, self.quote])?;
242 }
243
244 self.check_field_count(written)?;
245
246 self.buffer.write_all(b"\n")?;
247
248 Ok(())
249 }
250
251 #[inline(always)]
252 pub fn write_byte_record(&mut self, record: &ByteRecord) -> error::Result<()> {
253 self.write_record(record.iter())
254 }
255
256 #[inline]
257 pub fn into_inner(self) -> Result<W, IntoInnerError<BufWriter<W>>> {
258 self.buffer.into_inner()
259 }
260}
261
262#[cfg(test)]
263mod tests {
264 use std::io::{self, Cursor};
265
266 use super::*;
267
268 use crate::brec;
269
270 #[test]
271 fn test_write_byte_record() -> io::Result<()> {
272 let output = Cursor::new(Vec::<u8>::new());
273 let mut writer = WriterBuilder::with_capacity(32).from_writer(output);
274
275 writer.write_byte_record_no_quoting(&brec!["name", "surname", "age"])?;
276 writer.write_byte_record(&brec!["john,", "landis", "45"])?;
277 writer.write_byte_record(&brec!["lucy", "get\ngot", "\"te,\"st\""])?;
278
279 assert_eq!(
280 std::str::from_utf8(writer.into_inner()?.get_ref()).unwrap(),
281 "name,surname,age\n\"john,\",landis,45\nlucy,\"get\ngot\",\"\"\"te,\"\"st\"\"\"\n",
282 );
283
284 Ok(())
285 }
286
287 #[test]
288 fn test_write_empty_cells() {
289 fn write(record: &ByteRecord) -> String {
290 let output = Cursor::new(Vec::<u8>::new());
291 let mut writer = Writer::from_writer(output);
292 writer.write_byte_record(record).unwrap();
293 String::from_utf8_lossy(&writer.into_inner().unwrap().into_inner()).into_owned()
294 }
295
296 assert_eq!(write(&brec![]), "\n");
297 assert_eq!(write(&brec![""]), "\"\"\n");
298 assert_eq!(write(&brec!["", "", ""]), ",,\n");
299 assert_eq!(write(&brec!["name", "", "age"]), "name,,age\n");
300 assert_eq!(write(&brec!["name", ""]), "name,\n");
301 }
302}