Skip to main content

polars_io/csv/write/
options.rs

1use std::num::NonZeroUsize;
2use std::sync::Arc;
3
4use polars_utils::pl_str::PlSmallStr;
5#[cfg(feature = "serde")]
6use serde::{Deserialize, Serialize};
7
8use crate::ExternalCompression;
9
10/// Options for writing CSV files.
11#[derive(Clone, Debug, Eq, Hash, PartialEq)]
12#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
13#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
14pub struct CsvWriterOptions {
15    pub include_bom: bool,
16    #[cfg_attr(feature = "serde", serde(default))]
17    pub compression: ExternalCompression,
18    #[cfg_attr(feature = "serde", serde(default))]
19    pub check_extension: bool,
20    pub include_header: bool,
21    pub batch_size: NonZeroUsize,
22    pub serialize_options: Arc<SerializeOptions>,
23}
24
25impl Default for CsvWriterOptions {
26    fn default() -> Self {
27        Self {
28            include_bom: false,
29            compression: ExternalCompression::default(),
30            check_extension: true,
31            include_header: true,
32            batch_size: NonZeroUsize::new(1024).unwrap(),
33            serialize_options: SerializeOptions::default().into(),
34        }
35    }
36}
37
38/// Options to serialize logical types to CSV.
39///
40/// The default is to format times and dates as `chrono` crate formats them.
41#[derive(Clone, Debug, Eq, Hash, PartialEq)]
42#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
43#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
44pub struct SerializeOptions {
45    /// Used for [`DataType::Date`](polars_core::datatypes::DataType::Date).
46    pub date_format: Option<PlSmallStr>,
47    /// Used for [`DataType::Time`](polars_core::datatypes::DataType::Time).
48    pub time_format: Option<PlSmallStr>,
49    /// Used for [`DataType::Datetime`](polars_core::datatypes::DataType::Datetime).
50    pub datetime_format: Option<PlSmallStr>,
51    /// Used for [`DataType::Float64`](polars_core::datatypes::DataType::Float64)
52    /// and [`DataType::Float32`](polars_core::datatypes::DataType::Float32).
53    pub float_scientific: Option<bool>,
54    pub float_precision: Option<usize>,
55    /// Use comma as the decimal separator.
56    pub decimal_comma: bool,
57    /// Used as separator.
58    pub separator: u8,
59    /// Quoting character.
60    pub quote_char: u8,
61    /// Null value representation.
62    pub null: PlSmallStr,
63    /// String appended after every row.
64    pub line_terminator: PlSmallStr,
65    /// When to insert quotes.
66    pub quote_style: QuoteStyle,
67}
68
69impl Default for SerializeOptions {
70    fn default() -> Self {
71        Self {
72            date_format: None,
73            time_format: None,
74            datetime_format: None,
75            float_scientific: None,
76            float_precision: None,
77            decimal_comma: false,
78            separator: b',',
79            quote_char: b'"',
80            null: PlSmallStr::EMPTY,
81            line_terminator: "\n".into(),
82            quote_style: Default::default(),
83        }
84    }
85}
86
87/// Quote style indicating when to insert quotes around a field.
88#[derive(Copy, Clone, Debug, Default, Eq, Hash, PartialEq)]
89#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
90#[cfg_attr(feature = "dsl-schema", derive(schemars::JsonSchema))]
91pub enum QuoteStyle {
92    /// Quote fields only when necessary.
93    ///
94    /// Quotes are necessary when fields contain a quote, separator or record terminator.
95    /// Quotes are also necessary when writing an empty record (which is indistinguishable
96    /// from arecord with one empty field).
97    /// This is the default.
98    #[default]
99    Necessary,
100    /// Quote every field. Always.
101    Always,
102    /// Quote non-numeric fields.
103    ///
104    /// When writing a field that does not parse as a valid float or integer,
105    /// quotes will be used even if they aren't strictly necessary.
106    NonNumeric,
107    /// Never quote any fields, even if it would produce invalid CSV data.
108    Never,
109}