csv_nose/sample.rs
1/// Sample size configuration for sniffing.
2#[derive(Debug, Clone, Copy, PartialEq, Eq)]
3pub enum SampleSize {
4 /// Sample a specific number of records.
5 Records(usize),
6 /// Sample a specific number of bytes.
7 Bytes(usize),
8 /// Read the entire file.
9 ///
10 /// # Warning
11 ///
12 /// This loads the entire file into memory. For large files (e.g., >100 MB), prefer
13 /// [`SampleSize::Bytes`] with a reasonable limit to avoid excessive memory usage.
14 All,
15}
16
17impl Default for SampleSize {
18 fn default() -> Self {
19 // Default to 100 records which is reasonable for most files
20 SampleSize::Records(100)
21 }
22}
23
24impl SampleSize {
25 /// Returns the number of records to sample, or None for All.
26 pub fn records(&self) -> Option<usize> {
27 match self {
28 SampleSize::Records(n) => Some(*n),
29 _ => None,
30 }
31 }
32
33 /// Returns the number of bytes to sample, or None for other modes.
34 pub fn bytes(&self) -> Option<usize> {
35 match self {
36 SampleSize::Bytes(n) => Some(*n),
37 _ => None,
38 }
39 }
40}
41
42/// Date format preference for ambiguous date parsing.
43#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
44pub enum DatePreference {
45 /// Day-Month-Year format (e.g., 31/12/2023).
46 DmyFormat,
47 /// Month-Day-Year format (e.g., 12/31/2023).
48 #[default]
49 MdyFormat,
50}
51
52impl DatePreference {
53 /// Returns true if day comes before month in ambiguous dates.
54 pub fn is_dmy(&self) -> bool {
55 matches!(self, DatePreference::DmyFormat)
56 }
57}