Skip to main content

csv_nose/
sample.rs

1/// Sample size configuration for sniffing.
2#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3pub enum SampleSize {
4    /// Sample a specific number of records.
5    Records(usize),
6    /// Sample a specific number of bytes.
7    Bytes(usize),
8    /// Read the entire file.
9    ///
10    /// # Warning
11    ///
12    /// This loads the entire file into memory. For large files (e.g., >100 MB), prefer
13    /// [`SampleSize::Bytes`] with a reasonable limit to avoid excessive memory usage.
14    All,
15}
16
17impl Default for SampleSize {
18    fn default() -> Self {
19        // Default to 100 records which is reasonable for most files
20        SampleSize::Records(100)
21    }
22}
23
24impl SampleSize {
25    /// Returns the number of records to sample, or None for All.
26    pub fn records(&self) -> Option<usize> {
27        match self {
28            SampleSize::Records(n) => Some(*n),
29            _ => None,
30        }
31    }
32
33    /// Returns the number of bytes to sample, or None for other modes.
34    pub fn bytes(&self) -> Option<usize> {
35        match self {
36            SampleSize::Bytes(n) => Some(*n),
37            _ => None,
38        }
39    }
40}
41
42/// Date format preference for ambiguous date parsing.
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
44pub enum DatePreference {
45    /// Day-Month-Year format (e.g., 31/12/2023).
46    DmyFormat,
47    /// Month-Day-Year format (e.g., 12/31/2023).
48    #[default]
49    MdyFormat,
50}
51
52impl DatePreference {
53    /// Returns true if day comes before month in ambiguous dates.
54    pub fn is_dmy(&self) -> bool {
55        matches!(self, DatePreference::DmyFormat)
56    }
57}