csv_stream/writer.rs
1use csv_core::{self, WriteResult, Writer as CoreWriter, WriterBuilder as CoreWriterBuilder};
2use serde::Serialize;
3
4use crate::error::{Error, ErrorKind, Result};
5use crate::serializer::{serialize, serialize_header};
6use crate::{QuoteStyle, Terminator};
7
8/// Builds a CSV writer with various configuration knobs.
9///
10/// This builder can be used to tweak the field delimiter, record terminator
11/// and more. Once a CSV `Writer` is built, its configuration cannot be
12/// changed.
13#[derive(Debug)]
14pub struct WriterBuilder {
15 builder: CoreWriterBuilder,
16 capacity: usize,
17 flexible: bool,
18 has_headers: bool,
19}
20
21impl Default for WriterBuilder {
22 fn default() -> WriterBuilder {
23 WriterBuilder {
24 builder: CoreWriterBuilder::default(),
25 capacity: 8 * (1 << 10),
26 flexible: false,
27 has_headers: true,
28 }
29 }
30}
31
32impl WriterBuilder {
33 pub fn build(&self) -> Writer {
34 Writer::new(self)
35 }
36
37 /// The field delimiter to use when writing CSV.
38 ///
39 /// The default is `b','`.
40 ///
41 /// # Example
42 ///
43 /// ```
44 /// use std::error::Error;
45 /// use csv_stream::WriterBuilder;
46 ///
47 /// # fn main() { example().unwrap(); }
48 /// fn example() -> Result<(), Box<dyn Error>> {
49 /// let mut wtr = WriterBuilder::default()
50 /// .delimiter(b';')
51 /// .build();
52 ///
53 /// let mut buf = vec![];
54 /// wtr.write_record(&mut buf, &["a", "b", "c"])?;
55 /// wtr.write_record(&mut buf, &["x", "y", "z"])?;
56 ///
57 /// let data = String::from_utf8(buf)?;
58 /// assert_eq!(data, "a;b;c\nx;y;z\n");
59 /// Ok(())
60 /// }
61 /// ```
62 pub fn delimiter(&mut self, delimiter: u8) -> &mut WriterBuilder {
63 self.builder.delimiter(delimiter);
64 self
65 }
66
67 /// Whether to write a header row before writing any other row.
68 ///
69 /// When this is enabled and the `serialize` method is used to write data
70 /// with something that contains field names (i.e., a struct), then a
71 /// header row is written containing the field names before any other row
72 /// is written.
73 ///
74 /// This option has no effect when using other methods to write rows. That
75 /// is, if you don't use `serialize`, then you must write your header row
76 /// explicitly if you want a header row.
77 ///
78 /// This is enabled by default.
79 ///
80 /// # Example: with headers
81 ///
82 /// This shows how the header will be automatically written from the field
83 /// names of a struct.
84 ///
85 /// ```
86 /// use std::error::Error;
87 ///
88 /// use csv_stream::WriterBuilder;
89 /// use serde::Serialize;
90 ///
91 /// #[derive(Serialize)]
92 /// struct Row<'a> {
93 /// city: &'a str,
94 /// country: &'a str,
95 /// // Serde allows us to name our headers exactly,
96 /// // even if they don't match our struct field names.
97 /// #[serde(rename = "popcount")]
98 /// population: u64,
99 /// }
100 ///
101 /// # fn main() { example().unwrap(); }
102 /// fn example() -> Result<(), Box<dyn Error>> {
103 /// let mut wtr = WriterBuilder::default().build();
104 ///
105 /// let mut buf = vec![];
106 /// wtr.serialize(
107 /// &mut buf,
108 /// Row {
109 /// city: "Boston",
110 /// country: "United States",
111 /// population: 4628910,
112 /// },
113 /// )?;
114 /// wtr.serialize(
115 /// &mut buf,
116 /// Row {
117 /// city: "Concord",
118 /// country: "United States",
119 /// population: 42695,
120 /// },
121 /// )?;
122 ///
123 /// let data = String::from_utf8(buf)?;
124 /// assert_eq!(data, "\
125 /// city,country,popcount
126 /// Boston,United States,4628910
127 /// Concord,United States,42695
128 /// ");
129 /// Ok(())
130 /// }
131 /// ```
132 ///
133 /// # Example: without headers
134 ///
135 /// This shows that serializing things that aren't structs (in this case,
136 /// a tuple struct) won't result in a header row being written. This means
137 /// you usually don't need to set `has_headers(false)` unless you
138 /// explicitly want to both write custom headers and serialize structs.
139 ///
140 /// ```
141 /// use std::error::Error;
142 /// use csv_stream::WriterBuilder;
143 ///
144 /// # fn main() { example().unwrap(); }
145 /// fn example() -> Result<(), Box<dyn Error>> {
146 /// let mut wtr = WriterBuilder::default().build();
147 /// let mut buf = vec![];
148 /// wtr.serialize(&mut buf, ("Boston", "United States", 4628910))?;
149 /// wtr.serialize(&mut buf, ("Concord", "United States", 42695))?;
150 ///
151 /// let data = String::from_utf8(buf)?;
152 /// assert_eq!(data, "\
153 /// Boston,United States,4628910
154 /// Concord,United States,42695
155 /// ");
156 /// Ok(())
157 /// }
158 /// ```
159 pub fn has_headers(&mut self, yes: bool) -> &mut WriterBuilder {
160 self.has_headers = yes;
161 self
162 }
163
164 /// Whether the number of fields in records is allowed to change or not.
165 ///
166 /// When disabled (which is the default), writing CSV data will return an
167 /// error if a record is written with a number of fields different from the
168 /// number of fields written in a previous record.
169 ///
170 /// When enabled, this error checking is turned off.
171 ///
172 /// # Example: writing flexible records
173 ///
174 /// ```
175 /// use std::error::Error;
176 /// use csv_stream::WriterBuilder;
177 ///
178 /// # fn main() { example().unwrap(); }
179 /// fn example() -> Result<(), Box<dyn Error>> {
180 /// let mut wtr = WriterBuilder::default()
181 /// .flexible(true)
182 /// .build();
183 /// let mut buf = vec![];
184 /// wtr.write_record(&mut buf, &["a", "b"])?;
185 /// wtr.write_record(&mut buf, &["x", "y", "z"])?;
186 ///
187 /// let data = String::from_utf8(buf)?;
188 /// assert_eq!(data, "a,b\nx,y,z\n");
189 /// Ok(())
190 /// }
191 /// ```
192 ///
193 /// # Example: error when `flexible` is disabled
194 ///
195 /// ```
196 /// use std::error::Error;
197 /// use csv_stream::WriterBuilder;
198 ///
199 /// # fn main() { example().unwrap(); }
200 /// fn example() -> Result<(), Box<dyn Error>> {
201 /// let mut wtr = WriterBuilder::default()
202 /// .flexible(false)
203 /// .build();
204 /// let mut buf = vec![];
205 /// wtr.write_record(&mut buf, &["a", "b"])?;
206 /// let err = wtr.write_record(&mut buf, &["x", "y", "z"]).unwrap_err();
207 /// match *err.kind() {
208 /// csv_stream::ErrorKind::UnequalLengths { expected_len, len, .. } => {
209 /// assert_eq!(expected_len, 2);
210 /// assert_eq!(len, 3);
211 /// }
212 /// ref wrong => {
213 /// panic!("expected UnequalLengths but got {:?}", wrong);
214 /// }
215 /// }
216 /// Ok(())
217 /// }
218 /// ```
219 pub fn flexible(&mut self, yes: bool) -> &mut WriterBuilder {
220 self.flexible = yes;
221 self
222 }
223
224 /// The record terminator to use when writing CSV.
225 ///
226 /// A record terminator can be any single byte. The default is `\n`.
227 ///
228 /// Note that RFC 4180 specifies that record terminators should be `\r\n`.
229 /// To use `\r\n`, use the special `Terminator::CRLF` value.
230 ///
231 /// # Example: CRLF
232 ///
233 /// This shows how to use RFC 4180 compliant record terminators.
234 ///
235 /// ```
236 /// use std::error::Error;
237 /// use csv_stream::{Terminator, WriterBuilder};
238 ///
239 /// # fn main() { example().unwrap(); }
240 /// fn example() -> Result<(), Box<dyn Error>> {
241 /// let mut wtr = WriterBuilder::default()
242 /// .terminator(Terminator::CRLF)
243 /// .build();
244 /// let mut buf = vec![];
245 /// wtr.write_record(&mut buf, &["a", "b", "c"])?;
246 /// wtr.write_record(&mut buf, &["x", "y", "z"])?;
247 ///
248 /// let data = String::from_utf8(buf)?;
249 /// assert_eq!(data, "a,b,c\r\nx,y,z\r\n");
250 /// Ok(())
251 /// }
252 /// ```
253 pub fn terminator(&mut self, term: Terminator) -> &mut WriterBuilder {
254 self.builder.terminator(term.to_core());
255 self
256 }
257
258 /// The quoting style to use when writing CSV.
259 ///
260 /// By default, this is set to `QuoteStyle::Necessary`, which will only
261 /// use quotes when they are necessary to preserve the integrity of data.
262 ///
263 /// Note that unless the quote style is set to `Never`, an empty field is
264 /// quoted if it is the only field in a record.
265 ///
266 /// # Example: non-numeric quoting
267 ///
268 /// This shows how to quote non-numeric fields only.
269 ///
270 /// ```
271 /// use std::error::Error;
272 /// use csv_stream::{QuoteStyle, WriterBuilder};
273 ///
274 /// # fn main() { example().unwrap(); }
275 /// fn example() -> Result<(), Box<dyn Error>> {
276 /// let mut wtr = WriterBuilder::default()
277 /// .quote_style(QuoteStyle::NonNumeric)
278 /// .build();
279 /// let mut buf = vec![];
280 /// wtr.write_record(&mut buf, &["a", "5", "c"])?;
281 /// wtr.write_record(&mut buf, &["3.14", "y", "z"])?;
282 ///
283 /// let data = String::from_utf8(buf)?;
284 /// assert_eq!(data, "\"a\",5,\"c\"\n3.14,\"y\",\"z\"\n");
285 /// Ok(())
286 /// }
287 /// ```
288 ///
289 /// # Example: never quote
290 ///
291 /// This shows how the CSV writer can be made to never write quotes, even
292 /// if it sacrifices the integrity of the data.
293 ///
294 /// ```
295 /// use std::error::Error;
296 /// use csv_stream::{QuoteStyle, WriterBuilder};
297 ///
298 /// # fn main() { example().unwrap(); }
299 /// fn example() -> Result<(), Box<dyn Error>> {
300 /// let mut wtr = WriterBuilder::default()
301 /// .quote_style(QuoteStyle::Never)
302 /// .build();
303 /// let mut buf = vec![];
304 /// wtr.write_record(&mut buf, &["a", "foo\nbar", "c"])?;
305 /// wtr.write_record(&mut buf, &["g\"h\"i", "y", "z"])?;
306 ///
307 /// let data = String::from_utf8(buf)?;
308 /// assert_eq!(data, "a,foo\nbar,c\ng\"h\"i,y,z\n");
309 /// Ok(())
310 /// }
311 /// ```
312 pub fn quote_style(&mut self, style: QuoteStyle) -> &mut WriterBuilder {
313 self.builder.quote_style(style.to_core());
314 self
315 }
316
317 /// The quote character to use when writing CSV.
318 ///
319 /// The default is `b'"'`.
320 ///
321 /// # Example
322 ///
323 /// ```
324 /// use std::error::Error;
325 /// use csv_stream::WriterBuilder;
326 ///
327 /// # fn main() { example().unwrap(); }
328 /// fn example() -> Result<(), Box<dyn Error>> {
329 /// let mut wtr = WriterBuilder::default()
330 /// .quote(b'\'')
331 /// .build();
332 /// let mut buf = vec![];
333 /// wtr.write_record(&mut buf, &["a", "foo\nbar", "c"])?;
334 /// wtr.write_record(&mut buf, &["g'h'i", "y\"y\"y", "z"])?;
335 ///
336 /// let data = String::from_utf8(buf)?;
337 /// assert_eq!(data, "a,'foo\nbar',c\n'g''h''i',y\"y\"y,z\n");
338 /// Ok(())
339 /// }
340 /// ```
341 pub fn quote(&mut self, quote: u8) -> &mut WriterBuilder {
342 self.builder.quote(quote);
343 self
344 }
345
346 /// Enable double quote escapes.
347 ///
348 /// This is enabled by default, but it may be disabled. When disabled,
349 /// quotes in field data are escaped instead of doubled.
350 ///
351 /// # Example
352 ///
353 /// ```
354 /// use std::error::Error;
355 /// use csv_stream::WriterBuilder;
356 ///
357 /// # fn main() { example().unwrap(); }
358 /// fn example() -> Result<(), Box<dyn Error>> {
359 /// let mut wtr = WriterBuilder::default()
360 /// .double_quote(false)
361 /// .build();
362 /// let mut buf = vec![];
363 /// wtr.write_record(&mut buf, &["a", "foo\"bar", "c"])?;
364 /// wtr.write_record(&mut buf, &["x", "y", "z"])?;
365 ///
366 /// let data = String::from_utf8(buf)?;
367 /// assert_eq!(data, "a,\"foo\\\"bar\",c\nx,y,z\n");
368 /// Ok(())
369 /// }
370 /// ```
371 pub fn double_quote(&mut self, yes: bool) -> &mut WriterBuilder {
372 self.builder.double_quote(yes);
373 self
374 }
375
376 /// The escape character to use when writing CSV.
377 ///
378 /// In some variants of CSV, quotes are escaped using a special escape
379 /// character like `\` (instead of escaping quotes by doubling them).
380 ///
381 /// By default, writing these idiosyncratic escapes is disabled, and is
382 /// only used when `double_quote` is disabled.
383 ///
384 /// # Example
385 ///
386 /// ```
387 /// use std::error::Error;
388 /// use csv_stream::WriterBuilder;
389 ///
390 /// # fn main() { example().unwrap(); }
391 /// fn example() -> Result<(), Box<dyn Error>> {
392 /// let mut wtr = WriterBuilder::default()
393 /// .double_quote(false)
394 /// .escape(b'$')
395 /// .build();
396 /// let mut buf = vec![];
397 /// wtr.write_record(&mut buf, &["a", "foo\"bar", "c"])?;
398 /// wtr.write_record(&mut buf, &["x", "y", "z"])?;
399 ///
400 /// let data = String::from_utf8(buf)?;
401 /// assert_eq!(data, "a,\"foo$\"bar\",c\nx,y,z\n");
402 /// Ok(())
403 /// }
404 /// ```
405 pub fn escape(&mut self, escape: u8) -> &mut WriterBuilder {
406 self.builder.escape(escape);
407 self
408 }
409
410 /// Create a new iterator for creating CSVs from the given iterator of rows
411 ///
412 /// # Example
413 ///
414 /// ```
415 /// use std::error::Error;
416 /// use csv_stream::WriterBuilder;
417 /// use serde::Serialize;
418 ///
419 /// # fn main() { example().unwrap(); }
420 /// fn example() -> Result<(), Box<dyn Error>> {
421 /// #[derive(Serialize)]
422 /// struct Row { foo: usize, bar: usize }
423 /// let rows = [
424 /// Row{ foo: 1, bar: 2 },
425 /// Row{ foo: 3, bar: 4 },
426 /// ];
427 ///
428 /// let mut csv_iter = WriterBuilder::default().build_iter(rows);
429 ///
430 /// let mut buf = vec![];
431 /// for row in csv_iter {
432 /// let row = row.unwrap();
433 /// buf.extend_from_slice(&row);
434 /// }
435 ///
436 /// let data = String::from_utf8(buf)?;
437 /// assert_eq!(data, "foo,bar\n1,2\n3,4\n");
438 /// Ok(())
439 /// }
440 /// ```
441 pub fn build_iter<I: IntoIterator>(&self, iter: I) -> crate::Iter<I::IntoIter> {
442 crate::Iter::new(iter, self.build())
443 }
444
445 /// Create a new stream for creating CSVs from the given stream of rows
446 ///
447 /// # Example
448 ///
449 /// ```
450 /// use std::error::Error;
451 /// use csv_stream::WriterBuilder;
452 /// use serde::Serialize;
453 /// use futures::StreamExt;
454 ///
455 /// # #[tokio::main]
456 /// # async fn main() { example().await.unwrap(); }
457 /// async fn example() -> Result<(), Box<dyn Error>> {
458 /// #[derive(Serialize)]
459 /// struct Row { foo: usize, bar: usize }
460 /// let rows = [
461 /// Row{ foo: 1, bar: 2 },
462 /// Row{ foo: 3, bar: 4 },
463 /// ];
464 /// // a Stream over rows
465 /// let stream = futures::stream::iter(rows);
466 ///
467 /// let mut csv_stream = WriterBuilder::default().build_stream(stream);
468 ///
469 /// let mut buf = vec![];
470 /// while let Some(row) = csv_stream.next().await {
471 /// let row = row.unwrap();
472 /// buf.extend_from_slice(&row);
473 /// }
474 ///
475 /// let data = String::from_utf8(buf)?;
476 /// assert_eq!(data, "foo,bar\n1,2\n3,4\n");
477 /// Ok(())
478 /// }
479 /// ```
480 #[cfg(feature = "stream")]
481 pub fn build_stream<S>(&self, stream: S) -> crate::Stream<S> {
482 crate::Stream::new(stream, self.build())
483 }
484}
485
486/// A already configured CSV writer.
487///
488/// A CSV writer takes as input Rust values and writes those values in a valid
489/// CSV format as output.
490///
491/// While CSV writing is considerably easier than parsing CSV, a proper writer
492/// will do a number of things for you:
493///
494/// 1. Quote fields when necessary.
495/// 2. Check that all records have the same number of fields.
496/// 3. Write records with a single empty field correctly.
497/// 4. Automatically serialize normal Rust types to CSV records. When that
498/// type is a struct, a header row is automatically written corresponding
499/// to the fields of that struct.
500/// 5. Use buffering intelligently and otherwise avoid allocation. (This means
501/// that callers should not do their own buffering.)
502///
503/// All of the above can be configured using a
504/// [`WriterBuilder`](struct.WriterBuilder.html).
505///
506/// Note that the default configuration of a `Writer` uses `\n` for record
507/// terminators instead of `\r\n` as specified by RFC 4180. Use the
508/// `terminator` method on `WriterBuilder` to set the terminator to `\r\n` if
509/// it's desired.
510#[derive(Debug)]
511pub struct Writer {
512 core: CoreWriter,
513 state: WriterState,
514}
515
516#[derive(Debug)]
517struct WriterState {
518 /// Whether the Serde serializer should attempt to write a header row.
519 header: HeaderState,
520 /// Whether inconsistent record lengths are allowed.
521 flexible: bool,
522 /// The number of fields writtein in the first record. This is compared
523 /// with `fields_written` on all subsequent records to check for
524 /// inconsistent record lengths.
525 first_field_count: Option<u64>,
526 /// The number of fields written in this record. This is used to report
527 /// errors for inconsistent record lengths if `flexible` is disabled.
528 fields_written: u64,
529 /// This is set immediately before flushing the buffer and then unset
530 /// immediately after flushing the buffer. This avoids flushing the buffer
531 /// twice if the inner writer panics.
532 panicked: bool,
533}
534
535/// HeaderState encodes a small state machine for handling header writes.
536#[derive(Debug)]
537enum HeaderState {
538 /// Indicates that we should attempt to write a header.
539 Write,
540 /// Indicates that writing a header was attempt, and a header was written.
541 DidWrite,
542 /// Indicates that writing a header was attempted, but no headers were
543 /// written or the attempt failed.
544 DidNotWrite,
545 /// This state is used when headers are disabled. It cannot transition
546 /// to any other state.
547 None,
548}
549
550impl Default for Writer {
551 fn default() -> Self {
552 WriterBuilder::default().build()
553 }
554}
555
556impl Writer {
557 fn new(builder: &WriterBuilder) -> Writer {
558 let header_state = if builder.has_headers {
559 HeaderState::Write
560 } else {
561 HeaderState::None
562 };
563 Writer {
564 core: builder.builder.build(),
565 state: WriterState {
566 header: header_state,
567 flexible: builder.flexible,
568 first_field_count: None,
569 fields_written: 0,
570 panicked: false,
571 },
572 }
573 }
574
575 /// Serialize a single record using Serde.
576 ///
577 /// # Example
578 ///
579 /// This shows how to serialize normal Rust structs as CSV records. The
580 /// fields of the struct are used to write a header row automatically.
581 /// (Writing the header row automatically can be disabled by building the
582 /// CSV writer with a [`WriterBuilder`](struct.WriterBuilder.html) and
583 /// calling the `has_headers` method.)
584 ///
585 /// ```
586 /// use std::error::Error;
587 ///
588 /// use csv_stream::WriterBuilder;
589 /// use serde::Serialize;
590 ///
591 /// #[derive(Serialize)]
592 /// struct Row<'a> {
593 /// city: &'a str,
594 /// country: &'a str,
595 /// // Serde allows us to name our headers exactly,
596 /// // even if they don't match our struct field names.
597 /// #[serde(rename = "popcount")]
598 /// population: u64,
599 /// }
600 ///
601 /// # fn main() { example().unwrap(); }
602 /// fn example() -> Result<(), Box<dyn Error>> {
603 /// let mut wtr = WriterBuilder::default().build();
604 /// let mut buf = vec![];
605 /// wtr.serialize(
606 /// &mut buf,
607 /// Row {
608 /// city: "Boston",
609 /// country: "United States",
610 /// population: 4628910,
611 /// },
612 /// )?;
613 /// wtr.serialize(
614 /// &mut buf,
615 /// Row {
616 /// city: "Concord",
617 /// country: "United States",
618 /// population: 42695,
619 /// },
620 /// )?;
621 ///
622 /// let data = String::from_utf8(buf)?;
623 /// assert_eq!(data, "\
624 /// city,country,popcount
625 /// Boston,United States,4628910
626 /// Concord,United States,42695
627 /// ");
628 /// Ok(())
629 /// }
630 /// ```
631 ///
632 /// # Rules
633 ///
634 /// The behavior of `serialize` is fairly simple:
635 ///
636 /// 1. Nested containers (tuples, `Vec`s, structs, etc.) are always
637 /// flattened (depth-first order).
638 ///
639 /// 2. If `has_headers` is `true` and the type contains field names, then
640 /// a header row is automatically generated.
641 ///
642 /// However, some container types cannot be serialized, and if
643 /// `has_headers` is `true`, there are some additional restrictions on the
644 /// types that can be serialized. See below for details.
645 ///
646 /// For the purpose of this section, Rust types can be divided into three
647 /// categories: scalars, non-struct containers, and structs.
648 ///
649 /// ## Scalars
650 ///
651 /// Single values with no field names are written like the following. Note
652 /// that some of the outputs may be quoted, according to the selected
653 /// quoting style.
654 ///
655 /// | Name | Example Type | Example Value | Output |
656 /// | ---- | ---- | ---- | ---- |
657 /// | boolean | `bool` | `true` | `true` |
658 /// | integers | `i8`, `i16`, `i32`, `i64`, `i128`, `u8`, `u16`, `u32`, `u64`, `u128` | `5` | `5` |
659 /// | floats | `f32`, `f64` | `3.14` | `3.14` |
660 /// | character | `char` | `'☃'` | `☃` |
661 /// | string | `&str` | `"hi"` | `hi` |
662 /// | bytes | `&[u8]` | `b"hi"[..]` | `hi` |
663 /// | option | `Option` | `None` | *empty* |
664 /// | option | | `Some(5)` | `5` |
665 /// | unit | `()` | `()` | *empty* |
666 /// | unit struct | `struct Foo;` | `Foo` | `Foo` |
667 /// | unit enum variant | `enum E { A, B }` | `E::A` | `A` |
668 /// | newtype struct | `struct Foo(u8);` | `Foo(5)` | `5` |
669 /// | newtype enum variant | `enum E { A(u8) }` | `E::A(5)` | `5` |
670 ///
671 /// Note that this table includes simple structs and enums. For example, to
672 /// serialize a field from either an integer or a float type, one can do
673 /// this:
674 ///
675 /// ```
676 /// use std::error::Error;
677 ///
678 /// use csv_stream::WriterBuilder;
679 /// use serde::Serialize;
680 ///
681 /// #[derive(Serialize)]
682 /// struct Row {
683 /// label: String,
684 /// value: Value,
685 /// }
686 ///
687 /// #[derive(Serialize)]
688 /// enum Value {
689 /// Integer(i64),
690 /// Float(f64),
691 /// }
692 ///
693 /// # fn main() { example().unwrap(); }
694 /// fn example() -> Result<(), Box<dyn Error>> {
695 /// let mut wtr = WriterBuilder::default().build();
696 /// let mut buf = vec![];
697 /// wtr.serialize(
698 /// &mut buf,
699 /// Row {
700 /// label: "foo".to_string(),
701 /// value: Value::Integer(3),
702 /// },
703 /// )?;
704 /// wtr.serialize(
705 /// &mut buf,
706 /// Row {
707 /// label: "bar".to_string(),
708 /// value: Value::Float(3.14),
709 /// },
710 /// )?;
711 ///
712 /// let data = String::from_utf8(buf)?;
713 /// assert_eq!(data, "\
714 /// label,value
715 /// foo,3
716 /// bar,3.14
717 /// ");
718 /// Ok(())
719 /// }
720 /// ```
721 ///
722 /// ## Non-Struct Containers
723 ///
724 /// Nested containers are flattened to their scalar components, with the
725 /// exception of a few types that are not allowed:
726 ///
727 /// | Name | Example Type | Example Value | Output |
728 /// | ---- | ---- | ---- | ---- |
729 /// | sequence | `Vec<u8>` | `vec![1, 2, 3]` | `1,2,3` |
730 /// | tuple | `(u8, bool)` | `(5, true)` | `5,true` |
731 /// | tuple struct | `Foo(u8, bool)` | `Foo(5, true)` | `5,true` |
732 /// | tuple enum variant | `enum E { A(u8, bool) }` | `E::A(5, true)` | *error* |
733 /// | struct enum variant | `enum E { V { a: u8, b: bool } }` | `E::V { a: 5, b: true }` | *error* |
734 /// | map | `BTreeMap<K, V>` | `BTreeMap::new()` | *error* |
735 ///
736 /// ## Structs
737 ///
738 /// Like the other containers, structs are flattened to their scalar
739 /// components:
740 ///
741 /// | Name | Example Type | Example Value | Output |
742 /// | ---- | ---- | ---- | ---- |
743 /// | struct | `struct Foo { a: u8, b: bool }` | `Foo { a: 5, b: true }` | `5,true` |
744 ///
745 /// If `has_headers` is `false`, then there are no additional restrictions;
746 /// types can be nested arbitrarily. For example:
747 ///
748 /// ```
749 /// use std::error::Error;
750 ///
751 /// use csv_stream::WriterBuilder;
752 /// use serde::Serialize;
753 ///
754 /// #[derive(Serialize)]
755 /// struct Row {
756 /// label: String,
757 /// values: Vec<f64>,
758 /// }
759 ///
760 /// # fn main() { example().unwrap(); }
761 /// fn example() -> Result<(), Box<dyn Error>> {
762 /// let mut wtr = WriterBuilder::default()
763 /// .has_headers(false)
764 /// .build();
765 ///
766 /// let mut buf = vec![];
767 /// wtr.serialize(
768 /// &mut buf,
769 /// Row {
770 /// label: "foo".to_string(),
771 /// values: vec![1.1234, 2.5678, 3.14],
772 /// },
773 /// )?;
774 ///
775 /// let data = String::from_utf8(buf)?;
776 /// assert_eq!(data, "\
777 /// foo,1.1234,2.5678,3.14
778 /// ");
779 /// Ok(())
780 /// }
781 /// ```
782 ///
783 /// However, if `has_headers` were enabled in the above example, then
784 /// serialization would return an error. Specifically, when `has_headers` is
785 /// `true`, there are two restrictions:
786 ///
787 /// 1. Named field values in structs must be scalars.
788 ///
789 /// 2. All scalars must be named field values in structs.
790 ///
791 /// Other than these two restrictions, types can be nested arbitrarily.
792 /// Here are a few examples:
793 ///
794 /// | Value | Header | Record |
795 /// | ---- | ---- | ---- |
796 /// | `(Foo { x: 5, y: 6 }, Bar { z: true })` | `x,y,z` | `5,6,true` |
797 /// | `vec![Foo { x: 5, y: 6 }, Foo { x: 7, y: 8 }]` | `x,y,x,y` | `5,6,7,8` |
798 /// | `(Foo { x: 5, y: 6 }, vec![Bar { z: Baz(true) }])` | `x,y,z` | `5,6,true` |
799 /// | `Foo { x: 5, y: (6, 7) }` | *error: restriction 1* | `5,6,7` |
800 /// | `(5, Foo { x: 6, y: 7 }` | *error: restriction 2* | `5,6,7` |
801 /// | `(Foo { x: 5, y: 6 }, true)` | *error: restriction 2* | `5,6,true` |
802 pub fn serialize<S: Serialize>(&mut self, buf: &mut Vec<u8>, record: S) -> Result<()> {
803 if let HeaderState::Write = self.state.header {
804 let wrote_header = serialize_header(self, buf, &record)?;
805 if wrote_header {
806 self.write_terminator(buf)?;
807 self.state.header = HeaderState::DidWrite;
808 } else {
809 self.state.header = HeaderState::DidNotWrite;
810 };
811 }
812 serialize(self, buf, &record)?;
813 self.write_terminator(buf)?;
814 Ok(())
815 }
816
817 /// Write a single record.
818 ///
819 /// This method accepts something that can be turned into an iterator that
820 /// yields elements that can be represented by a `&[u8]`.
821 ///
822 /// This may be called with an empty iterator, which will cause a record
823 /// terminator to be written. If no fields had been written, then a single
824 /// empty field is written before the terminator.
825 ///
826 /// # Example
827 ///
828 /// ```
829 /// use std::error::Error;
830 /// use csv_stream::WriterBuilder;
831 ///
832 /// # fn main() { example().unwrap(); }
833 /// fn example() -> Result<(), Box<dyn Error>> {
834 /// let mut wtr = WriterBuilder::default().build();
835 /// let mut buf = vec![];
836 /// wtr.write_record(&mut buf, &["a", "b", "c"])?;
837 /// wtr.write_record(&mut buf, &["x", "y", "z"])?;
838 ///
839 /// let data = String::from_utf8(buf)?;
840 /// assert_eq!(data, "a,b,c\nx,y,z\n");
841 /// Ok(())
842 /// }
843 /// ```
844 pub fn write_record<I, T>(&mut self, buf: &mut Vec<u8>, record: I) -> Result<()>
845 where
846 I: IntoIterator<Item = T>,
847 T: AsRef<[u8]>,
848 {
849 for field in record.into_iter() {
850 self.write_field_impl(buf, field)?;
851 }
852 self.write_terminator(buf)
853 }
854
855 /// Write a single field.
856 ///
857 /// One should prefer using `write_record` over this method. It is provided
858 /// for cases where writing a field at a time is more convenient than
859 /// writing a record at a time.
860 ///
861 /// Note that if this API is used, `write_record` should be called with an
862 /// empty iterator to write a record terminator.
863 ///
864 /// # Example
865 ///
866 /// ```
867 /// use std::error::Error;
868 /// use csv_stream::WriterBuilder;
869 ///
870 /// # fn main() { example().unwrap(); }
871 /// fn example() -> Result<(), Box<dyn Error>> {
872 /// let mut wtr = WriterBuilder::default().build();
873 /// let mut buf = vec![];
874 /// wtr.write_field(&mut buf, "a")?;
875 /// wtr.write_field(&mut buf, "b")?;
876 /// wtr.write_field(&mut buf, "c")?;
877 /// wtr.write_record(&mut buf, None::<&[u8]>)?;
878 /// wtr.write_field(&mut buf, "x")?;
879 /// wtr.write_field(&mut buf, "y")?;
880 /// wtr.write_field(&mut buf, "z")?;
881 /// wtr.write_record(&mut buf, None::<&[u8]>)?;
882 ///
883 /// let data = String::from_utf8(buf)?;
884 /// assert_eq!(data, "a,b,c\nx,y,z\n");
885 /// Ok(())
886 /// }
887 /// ```
888 pub fn write_field<T: AsRef<[u8]>>(&mut self, buf: &mut Vec<u8>, field: T) -> Result<()> {
889 self.write_field_impl(buf, field)
890 }
891
892 /// Implementation of write_field.
893 ///
894 /// This is a separate method so we can force the compiler to inline it
895 /// into write_record.
896 #[inline(always)]
897 fn write_field_impl<T: AsRef<[u8]>>(&mut self, buf: &mut Vec<u8>, field: T) -> Result<()> {
898 if self.state.fields_written > 0 {
899 self.write_delimiter(buf)?;
900 }
901 let field = field.as_ref();
902
903 extend(buf, 2 * field.len() + 2, |buf| {
904 let (res, nin, nout) = self.core.field(field, buf);
905 debug_assert_eq!(res, WriteResult::InputEmpty);
906 debug_assert_eq!(nin, field.len());
907 self.state.fields_written += 1;
908 nout
909 });
910
911 Ok(())
912 }
913
914 /// Write a CSV delimiter.
915 fn write_delimiter(&mut self, buf: &mut Vec<u8>) -> Result<()> {
916 extend(buf, 2, |buf| {
917 let (res, nout) = self.core.delimiter(buf);
918 debug_assert_eq!(res, WriteResult::InputEmpty);
919 nout
920 });
921
922 Ok(())
923 }
924
925 /// Write a CSV terminator.
926 fn write_terminator(&mut self, buf: &mut Vec<u8>) -> Result<()> {
927 self.check_field_count()?;
928 extend(buf, 4, |buf| {
929 let (res, nout) = self.core.terminator(buf);
930 debug_assert_eq!(res, WriteResult::InputEmpty);
931 self.state.fields_written = 0;
932 nout
933 });
934
935 Ok(())
936 }
937
938 fn check_field_count(&mut self) -> Result<()> {
939 if !self.state.flexible {
940 match self.state.first_field_count {
941 None => {
942 self.state.first_field_count = Some(self.state.fields_written);
943 }
944 Some(expected) if expected != self.state.fields_written => {
945 return Err(Error::new(ErrorKind::UnequalLengths {
946 expected_len: expected,
947 len: self.state.fields_written,
948 }))
949 }
950 Some(_) => {}
951 }
952 }
953 Ok(())
954 }
955}
956
957fn extend(buf: &mut Vec<u8>, max: usize, f: impl FnOnce(&mut [u8]) -> usize) {
958 let len = buf.len();
959 buf.resize(len + max, 0);
960 let n = f(&mut buf[len..]);
961 buf.resize(len + n, 0);
962}
963
964#[cfg(test)]
965mod tests {
966 use super::WriterBuilder;
967 use serde::{serde_if_integer128, Serialize};
968
969 fn buf_as_string(buf: Vec<u8>) -> String {
970 String::from_utf8(buf).unwrap()
971 }
972
973 #[test]
974 fn one_record() {
975 let mut wtr = WriterBuilder::default().build();
976 let mut buf = vec![];
977 wtr.write_record(&mut buf, &["a", "b", "c"]).unwrap();
978
979 assert_eq!(buf_as_string(buf), "a,b,c\n");
980 }
981
982 #[test]
983 fn one_empty_record() {
984 let mut wtr = WriterBuilder::default().build();
985 let mut buf = vec![];
986 wtr.write_record(&mut buf, &[""]).unwrap();
987
988 assert_eq!(buf_as_string(buf), "\"\"\n");
989 }
990
991 #[test]
992 fn two_empty_records() {
993 let mut wtr = WriterBuilder::default().build();
994 let mut buf = vec![];
995 wtr.write_record(&mut buf, &[""]).unwrap();
996 wtr.write_record(&mut buf, &[""]).unwrap();
997
998 assert_eq!(buf_as_string(buf), "\"\"\n\"\"\n");
999 }
1000
1001 #[test]
1002 fn serialize_with_headers() {
1003 #[derive(Serialize)]
1004 struct Row {
1005 foo: i32,
1006 bar: f64,
1007 baz: bool,
1008 }
1009
1010 let mut wtr = WriterBuilder::default().build();
1011 let mut buf = vec![];
1012 wtr.serialize(
1013 &mut buf,
1014 Row {
1015 foo: 42,
1016 bar: 42.5,
1017 baz: true,
1018 },
1019 )
1020 .unwrap();
1021 assert_eq!(buf_as_string(buf), "foo,bar,baz\n42,42.5,true\n");
1022 }
1023
1024 #[test]
1025 fn serialize_no_headers() {
1026 #[derive(Serialize)]
1027 struct Row {
1028 foo: i32,
1029 bar: f64,
1030 baz: bool,
1031 }
1032
1033 let mut wtr = WriterBuilder::default().has_headers(false).build();
1034 let mut buf = vec![];
1035 wtr.serialize(
1036 &mut buf,
1037 Row {
1038 foo: 42,
1039 bar: 42.5,
1040 baz: true,
1041 },
1042 )
1043 .unwrap();
1044 assert_eq!(buf_as_string(buf), "42,42.5,true\n");
1045 }
1046
1047 serde_if_integer128! {
1048 #[test]
1049 fn serialize_no_headers_128() {
1050 #[derive(Serialize)]
1051 struct Row {
1052 foo: i128,
1053 bar: f64,
1054 baz: bool,
1055 }
1056
1057 let mut wtr =
1058 WriterBuilder::default().has_headers(false).build();
1059 let mut buf = vec![];
1060 wtr.serialize(&mut buf, Row {
1061 foo: 9_223_372_036_854_775_808,
1062 bar: 42.5,
1063 baz: true,
1064 }).unwrap();
1065 assert_eq!(buf_as_string(buf), "9223372036854775808,42.5,true\n");
1066 }
1067 }
1068
1069 #[test]
1070 fn serialize_tuple() {
1071 let mut wtr = WriterBuilder::default().build();
1072 let mut buf = vec![];
1073 wtr.serialize(&mut buf, (true, 1.3, "hi")).unwrap();
1074 assert_eq!(buf_as_string(buf), "true,1.3,hi\n");
1075 }
1076}