dsq_formats/reader/
options.rs

1use crate::writer::CsvEncoding;
2
3/// Options for reading data
4#[derive(Debug, Clone)]
5pub struct ReadOptions {
6    /// Maximum number of rows to read (None for all)
7    pub max_rows: Option<usize>,
8    /// Whether to infer schema from data
9    pub infer_schema: bool,
10    /// Number of rows to use for schema inference
11    pub infer_schema_length: Option<usize>,
12    /// Whether to use lazy evaluation
13    pub lazy: bool,
14    /// Custom schema to apply
15    #[cfg(any(
16        feature = "csv",
17        feature = "json",
18        feature = "parquet",
19        feature = "avro"
20    ))]
21    pub schema: Option<polars::prelude::Schema>,
22    /// Skip first N rows
23    pub skip_rows: usize,
24    /// Column names to select (None for all)
25    pub columns: Option<Vec<String>>,
26    /// Whether to parse dates
27    pub parse_dates: bool,
28}
29
30impl Default for ReadOptions {
31    fn default() -> Self {
32        Self {
33            max_rows: None,
34            infer_schema: true,
35            infer_schema_length: Some(1000),
36            lazy: false,
37            #[cfg(any(
38                feature = "csv",
39                feature = "json",
40                feature = "parquet",
41                feature = "avro"
42            ))]
43            schema: None,
44            skip_rows: 0,
45            columns: None,
46            parse_dates: true,
47        }
48    }
49}
50
51/// Format-specific read options
52#[derive(Debug, Clone)]
53pub enum FormatReadOptions {
54    /// CSV format options
55    Csv {
56        /// Separator character
57        separator: u8,
58        /// Whether the file has a header row
59        has_header: bool,
60        /// Quote character
61        quote_char: Option<u8>,
62        /// Comment character
63        comment_char: Option<u8>,
64        /// Null values
65        null_values: Option<Vec<String>>,
66        /// Encoding
67        encoding: CsvEncoding,
68    },
69    /// Parquet format options
70    Parquet {
71        /// Whether to read in parallel
72        parallel: bool,
73        /// Whether to use statistics
74        use_statistics: bool,
75        /// Columns to read
76        columns: Option<Vec<String>>,
77    },
78    /// JSON format options
79    Json {
80        /// Whether to read lines
81        lines: bool,
82        /// Whether to ignore errors
83        ignore_errors: bool,
84    },
85    /// Avro format options
86    Avro {
87        /// Columns to read
88        columns: Option<Vec<String>>,
89    },
90    /// Arrow format options
91    Arrow {
92        /// Columns to read
93        columns: Option<Vec<String>>,
94    },
95}
96
97impl Default for FormatReadOptions {
98    fn default() -> Self {
99        FormatReadOptions::Csv {
100            separator: b',',
101            has_header: true,
102            quote_char: Some(b'"'),
103            comment_char: None,
104            null_values: None,
105            encoding: CsvEncoding::Utf8,
106        }
107    }
108}