1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
//! csv-nose: CSV dialect sniffer using the Table Uniformity Method
//!
//! A drop-in replacement for qsv-sniffer with improved dialect detection accuracy
//! using the Table Uniformity Method from the CSVsniffer paper.
//!
//! # Quick Start
//!
//! ```no_run
//! use csv_nose::{Sniffer, SampleSize};
//!
//! // Create a sniffer with default settings
//! let mut sniffer = Sniffer::new();
//!
//! // Optionally configure sampling
//! sniffer.sample_size(SampleSize::Records(100));
//!
//! // Sniff a file
//! let metadata = sniffer.sniff_path("data.csv").unwrap();
//!
//! println!("Delimiter: {}", metadata.dialect.delimiter as char);
//! println!("Has header: {}", metadata.dialect.header.has_header_row);
//! println!("Fields: {:?}", metadata.fields);
//! println!("Types: {:?}", metadata.types);
//! ```
//!
//! # API Compatibility
//!
//! This crate provides API compatibility with qsv-sniffer, making it easy to
//! switch between implementations:
//!
//! ```no_run
//! use csv_nose::{Sniffer, Metadata, Dialect, Header, Quote, Type, SampleSize, DatePreference};
//!
//! let mut sniffer = Sniffer::new();
//! sniffer
//! .sample_size(SampleSize::Records(50))
//! .date_preference(DatePreference::MdyFormat)
//! .delimiter(b',')
//! .quote(Quote::Some(b'"'));
//! ```
//!
//! # The Table Uniformity Method
//!
//! This library implements the Table Uniformity Method from:
//! "Detecting CSV File Dialects by Table Uniformity Measurement and Data Type Inference"
//! by GarcĂa (2024).
//!
//! The algorithm achieves ~93% accuracy on real-world messy CSV files by:
//! 1. Testing multiple potential dialects (delimiter, quote, line terminator combinations)
//! 2. Scoring each dialect based on table uniformity (consistent field counts)
//! 3. Scoring based on type detection (consistent data types within columns)
//! 4. Selecting the dialect with the highest combined score
// Re-export public API (qsv-sniffer compatible)
pub use ;
pub use Type;
pub use ;
pub use ;
pub use Sniffer;
// Re-export for advanced usage
pub use ;