csv_sniffer/
metadata.rs

1/*!
2CSV metadata types.
3*/
4use std::fmt;
5use std::fs::File;
6use std::io::{Read, Seek};
7use std::path::Path;
8
9use csv::{Reader, ReaderBuilder};
10
11use error::*;
12use field_type::Type;
13use snip::snip_preamble;
14
15/// Primary CSV metadata. Generated by
16/// [`Sniffer::sniff_path`](../struct.Sniffer.html#method.sniff_path) or
17/// [`Sniffer::sniff_reader`](../struct.Sniffer.html#method.sniff_reader) after examining a CSV
18/// file.
19#[derive(Debug, Clone, PartialEq)]
20pub struct Metadata {
21    /// [`Dialect`](struct.Dialect.html) subtype.
22    pub dialect: Dialect,
23    /// (Maximum) number of fields per record.
24    pub num_fields: usize,
25    /// Inferred field types.
26    pub types: Vec<Type>,
27}
28impl fmt::Display for Metadata {
29    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30        writeln!(f, "Metadata")?;
31        writeln!(f, "========")?;
32        writeln!(f, "{}", self.dialect)?;
33        writeln!(f, "Number of fields: {}", self.num_fields)?;
34        writeln!(f, "Types:")?;
35        for (i, ty) in self.types.iter().enumerate() {
36            writeln!(f, "\t{}: {}", i, ty)?;
37        }
38        Ok(())
39    }
40}
41
42/// Dialect-level metadata. This type encapsulates the details to be used to derive a
43/// `ReaderBuilder` object (in the [`csv`](https://docs.rs/csv) crate).
44#[derive(Clone)]
45pub struct Dialect {
46    /// CSV delimiter (field separator).
47    pub delimiter: u8,
48    /// [`Header`](struct.Header.html) subtype (header row boolean and number of preamble rows).
49    pub header: Header,
50    /// Record quoting details.
51    pub quote: Quote,
52    /// Whether or not the number of fields in a record is allowed to change.
53    pub flexible: bool,
54}
55impl PartialEq for Dialect {
56    fn eq(&self, other: &Dialect) -> bool {
57        self.delimiter == other.delimiter
58            && self.header == other.header
59            && self.quote == other.quote
60            && self.flexible == other.flexible
61    }
62}
63impl fmt::Debug for Dialect {
64    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
65        f.debug_struct("Dialect")
66            .field("delimiter", &char::from(self.delimiter))
67            .field("header", &self.header)
68            .field("quote", &self.quote)
69            .field("flexible", &self.flexible)
70            .finish()
71    }
72}
73impl fmt::Display for Dialect {
74    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
75        writeln!(f, "Dialect:")?;
76        writeln!(f, "\tDelimiter: {}", char::from(self.delimiter))?;
77        writeln!(f, "\tHas header row?: {}", self.header.has_header_row)?;
78        writeln!(
79            f,
80            "\tNumber of preamble rows: {}",
81            self.header.num_preamble_rows
82        )?;
83        writeln!(
84            f,
85            "\tQuote character: {}",
86            match self.quote {
87                Quote::Some(chr) => format!("{}", char::from(chr)),
88                Quote::None => "none".into(),
89            }
90        )?;
91        writeln!(f, "\tFlexible: {}", self.flexible)
92    }
93}
94impl Dialect {
95    /// Use this `Dialect` to open a file specified by provided path. Returns a `Reader` (from the
96    /// [`csv`](https://docs.rs/csv) crate). Fails on file opening or reading errors.
97    pub fn open_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
98        self.open_reader(File::open(path)?)
99    }
100
101    /// Use this `Dialect` to create a `Reader` (from the [`csv`](https://docs.rs/csv) crate) using
102    /// the provided reader. Fails if unable to read from the reader.
103    pub fn open_reader<R: Read + Seek>(&self, mut rdr: R) -> Result<Reader<R>> {
104        snip_preamble(&mut rdr, self.header.num_preamble_rows)?;
105        let bldr: ReaderBuilder = self.clone().into();
106        Ok(bldr.from_reader(rdr))
107    }
108}
109impl From<Dialect> for ReaderBuilder {
110    fn from(dialect: Dialect) -> ReaderBuilder {
111        let mut bldr = ReaderBuilder::new();
112        bldr.delimiter(dialect.delimiter)
113            .has_headers(dialect.header.has_header_row)
114            .flexible(dialect.flexible);
115
116        match dialect.quote {
117            Quote::Some(character) => {
118                bldr.quoting(true);
119                bldr.quote(character);
120            }
121            Quote::None => {
122                bldr.quoting(false);
123            }
124        }
125
126        bldr
127    }
128}
129
130/// Metadata about the header of the CSV file.
131#[derive(Debug, Clone, PartialEq)]
132pub struct Header {
133    /// Whether or not this CSV file has a header row (a row containing column labels).
134    pub has_header_row: bool,
135    /// Number of rows that occur before either the header row (if `has_header_row` is `true), or
136    /// the first data row.
137    pub num_preamble_rows: usize,
138}
139
140/// Metadata about the quoting style of the CSV file.
141#[derive(Clone, PartialEq)]
142pub enum Quote {
143    /// Quotes are not used in the CSV file.
144    None,
145    /// Quotes are enabled, with the provided character used as the quote character.
146    Some(u8),
147}
148impl fmt::Debug for Quote {
149    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
150        match *self {
151            Quote::Some(ref character) => f
152                .debug_struct("Some")
153                .field("character", &char::from(*character))
154                .finish(),
155            Quote::None => write!(f, "None"),
156        }
157    }
158}
159
160/// The escape character (or `Disabled` if escaping is disabled)
161#[derive(Clone, PartialEq)]
162pub enum Escape {
163    /// Escapes are enabled, with the provided character as the escape character.
164    Enabled(u8),
165    /// Escapes are disabled.
166    Disabled,
167}
168impl From<Escape> for Option<u8> {
169    fn from(escape: Escape) -> Option<u8> {
170        match escape {
171            Escape::Enabled(chr) => Some(chr),
172            Escape::Disabled => None,
173        }
174    }
175}
176impl fmt::Debug for Escape {
177    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
178        match *self {
179            Escape::Enabled(chr) => write!(f, "Enabled({})", char::from(chr)),
180            Escape::Disabled => write!(f, "Disabled"),
181        }
182    }
183}
184
185/// The comment character (or `Disabled` if commenting doesn't exist in this dialect)
186#[derive(Clone, PartialEq)]
187pub enum Comment {
188    /// Comments are enabled, with the provided character as the comment character.
189    Enabled(u8),
190    /// Comments are disabled.
191    Disabled,
192}
193impl From<Comment> for Option<u8> {
194    fn from(comment: Comment) -> Option<u8> {
195        match comment {
196            Comment::Enabled(chr) => Some(chr),
197            Comment::Disabled => None,
198        }
199    }
200}
201impl fmt::Debug for Comment {
202    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
203        match *self {
204            Comment::Enabled(chr) => write!(f, "Enabled({})", char::from(chr)),
205            Comment::Disabled => write!(f, "Disabled"),
206        }
207    }
208}