csv_scout/
metadata.rs

1/*!
2CSV metadata types.
3*/
4use std::fmt;
5use std::fs::File;
6use std::io::{Read, Seek};
7use std::path::Path;
8
9use csv::{Reader, ReaderBuilder};
10// use tabwriter::TabWriter;
11
12use crate::error::Result;
13
14/// Primary CSV metadata. Generated by
15/// [`Sniffer::sniff_path`](../struct.Sniffer.html#method.sniff_path) or
16/// [`Sniffer::sniff_reader`](../struct.Sniffer.html#method.sniff_reader) after examining a CSV
17/// file.
18#[derive(Debug, Clone, PartialEq)]
19pub struct Metadata {
20    /// [`Dialect`](struct.Dialect.html) subtype.
21    pub dialect: Dialect,
22    // Average record length (in bytes).
23    // pub avg_record_len: usize,
24    // (Maximum) number of fields per record.
25    // pub num_fields: usize,
26    //  field/column names
27    // pub fields: Vec<String>,
28    // / Inferred field types.
29    // pub types: Vec<Type>,
30}
31impl fmt::Display for Metadata {
32    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
33        writeln!(f, "Metadata")?;
34        writeln!(f, "========")?;
35        writeln!(f, "{}", self.dialect)?;
36        // writeln!(f, "Average record length (bytes): {}", self.avg_record_len)?;
37        // writeln!(f, "Number of fields: {}", self.num_fields)?;
38        // writeln!(f, "Fields:")?;
39
40        // let mut tabwtr = TabWriter::new(vec![]);
41
42        // for (i, ty) in self.types.iter().enumerate() {
43        //     writeln!(
44        //         &mut tabwtr,
45        //         "\t{}:\t{}\t{}",
46        //         i,
47        //         ty,
48        //         self.fields.get(i).unwrap_or(&String::new())
49        //     )
50        //     .unwrap_or_default();
51        // }
52        // safety: we just wrote to the tabwriter, so it should be ok to unwrap
53        // tabwtr.flush().unwrap();
54
55        // safety: we just flushed the tabwriter, so it should be ok to unwrap the inner vec
56        // the second unwrap is to convert the vec<u8> to a String, so its also safe.
57        // let tabbed_field_list = simdutf8::basic::from_utf8(&tabwtr.into_inner().unwrap())
58        //     .unwrap()
59        //     .to_string();
60        // writeln!(f, "{tabbed_field_list}")?;
61
62        Ok(())
63    }
64}
65
66/// Dialect-level metadata. This type encapsulates the details to be used to derive a
67/// `ReaderBuilder` object (in the [`csv`](https://docs.rs/csv) crate).
68#[derive(Clone)]
69pub struct Dialect {
70    /// CSV delimiter (field separator).
71    pub delimiter: u8,
72    // [`Header`](struct.Header.html) subtype (header row boolean and number of preamble rows).
73    // pub header: Header,
74    /// Record quoting details.
75    pub quote: Quote,
76    // Whether or not the number of fields in a record is allowed to change.
77    // pub flexible: bool,
78    // Whether the file is utf-8 encoded.
79    // pub is_utf8: bool,
80}
81impl PartialEq for Dialect {
82    fn eq(&self, other: &Dialect) -> bool {
83        self.delimiter == other.delimiter
84            // && self.header == other.header
85            && self.quote == other.quote
86        // && self.flexible == other.flexible
87        // && self.is_utf8 == other.is_utf8
88    }
89}
90impl fmt::Debug for Dialect {
91    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92        f.debug_struct("Dialect")
93            .field("delimiter", &char::from(self.delimiter))
94            // .field("header", &self.header)
95            .field("quote", &self.quote)
96            // .field("flexible", &self.flexible)
97            // .field("is_utf8", &self.is_utf8)
98            .finish()
99    }
100}
101impl fmt::Display for Dialect {
102    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
103        writeln!(f, "Dialect:")?;
104        writeln!(f, "\tDelimiter: {}", char::from(self.delimiter))?;
105        // writeln!(f, "\tHas header row?: {}", self.header.has_header_row)?;
106        // writeln!(
107        //     f,
108        //     "\tNumber of preamble rows: {}",
109        //     self.header.num_preamble_rows
110        // )?;
111        writeln!(
112            f,
113            "\tQuote character: {}",
114            match self.quote {
115                Quote::Some(chr) => format!("{}", char::from(chr)),
116                Quote::None => "none".into(),
117            }
118        )
119        // writeln!(f, "\tFlexible: {}", self.flexible)?;
120        // writeln!(f, "\tIs utf-8 encoded?: {}", self.is_utf8)
121    }
122}
123impl Dialect {
124    /// Use this `Dialect` to open a file specified by provided path. Returns a `Reader` (from the
125    /// [`csv`](https://docs.rs/csv) crate). Fails on file opening or reading errors.
126    pub fn open_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
127        self.open_reader(File::open(path)?)
128    }
129
130    /// Use this `Dialect` to create a `Reader` (from the [`csv`](https://docs.rs/csv) crate) using
131    /// the provided reader. Fails if unable to read from the reader.
132    pub fn open_reader<R: Read + Seek>(&self, rdr: R) -> Result<Reader<R>> {
133        // snip_preamble(&mut rdr, self.header.num_preamble_rows)?;
134        let bldr: ReaderBuilder = self.clone().into();
135        Ok(bldr.from_reader(rdr))
136    }
137}
138impl From<Dialect> for ReaderBuilder {
139    fn from(dialect: Dialect) -> Self {
140        let mut bldr = Self::new();
141        bldr.delimiter(dialect.delimiter);
142        // .has_headers(dialect.header.has_header_row)
143        // .flexible(dialect.flexible);
144
145        match dialect.quote {
146            Quote::Some(character) => {
147                bldr.quoting(true);
148                bldr.quote(character);
149            }
150            Quote::None => {
151                bldr.quoting(false);
152            }
153        }
154
155        bldr
156    }
157}
158
159/// Metadata about the header of the CSV file.
160#[derive(Debug, Clone, PartialEq, Eq)]
161pub struct Header {
162    /// Whether or not this CSV file has a header row (a row containing column labels).
163    pub has_header_row: bool,
164    /// Number of rows that occur before either the header row (if `has_header_row` is `true), or
165    /// the first data row.
166    pub num_preamble_rows: usize,
167}
168
169/// Metadata about the quoting style of the CSV file.
170#[derive(Clone, PartialEq, Eq)]
171pub enum Quote {
172    /// Quotes are not used in the CSV file.
173    None,
174    /// Quotes are enabled, with the provided character used as the quote character.
175    Some(u8),
176}
177impl fmt::Debug for Quote {
178    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
179        match *self {
180            Self::Some(ref character) => f
181                .debug_struct("Some")
182                .field("character", &char::from(*character))
183                .finish(),
184            Self::None => write!(f, "None"),
185        }
186    }
187}
188
189/// The escape character (or `Disabled` if escaping is disabled)
190#[derive(Clone, PartialEq, Eq)]
191pub enum Escape {
192    /// Escapes are enabled, with the provided character as the escape character.
193    Enabled(u8),
194    /// Escapes are disabled.
195    Disabled,
196}
197impl From<Escape> for Option<u8> {
198    fn from(escape: Escape) -> Self {
199        match escape {
200            Escape::Enabled(chr) => Some(chr),
201            Escape::Disabled => None,
202        }
203    }
204}
205impl fmt::Debug for Escape {
206    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207        match *self {
208            Self::Enabled(chr) => write!(f, "Enabled({})", char::from(chr)),
209            Self::Disabled => write!(f, "Disabled"),
210        }
211    }
212}
213
214/// The comment character (or `Disabled` if commenting doesn't exist in this dialect)
215#[derive(Clone, PartialEq, Eq)]
216pub enum Comment {
217    /// Comments are enabled, with the provided character as the comment character.
218    Enabled(u8),
219    /// Comments are disabled.
220    Disabled,
221}
222impl From<Comment> for Option<u8> {
223    fn from(comment: Comment) -> Self {
224        match comment {
225            Comment::Enabled(chr) => Some(chr),
226            Comment::Disabled => None,
227        }
228    }
229}
230impl fmt::Debug for Comment {
231    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232        match *self {
233            Self::Enabled(chr) => write!(f, "Enabled({})", char::from(chr)),
234            Self::Disabled => write!(f, "Disabled"),
235        }
236    }
237}