csv_scout/metadata.rs
1/*!
2CSV metadata types.
3*/
4use std::fmt;
5use std::fs::File;
6use std::io::{Read, Seek};
7use std::path::Path;
8
9use csv::{Reader, ReaderBuilder};
10// use tabwriter::TabWriter;
11
12use crate::error::Result;
13
14/// Primary CSV metadata. Generated by
15/// [`Sniffer::sniff_path`](../struct.Sniffer.html#method.sniff_path) or
16/// [`Sniffer::sniff_reader`](../struct.Sniffer.html#method.sniff_reader) after examining a CSV
17/// file.
18#[derive(Debug, Clone, PartialEq)]
19pub struct Metadata {
20 /// [`Dialect`](struct.Dialect.html) subtype.
21 pub dialect: Dialect,
22 // Average record length (in bytes).
23 // pub avg_record_len: usize,
24 // (Maximum) number of fields per record.
25 // pub num_fields: usize,
26 // field/column names
27 // pub fields: Vec<String>,
28 // / Inferred field types.
29 // pub types: Vec<Type>,
30}
31impl fmt::Display for Metadata {
32 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
33 writeln!(f, "Metadata")?;
34 writeln!(f, "========")?;
35 writeln!(f, "{}", self.dialect)?;
36 // writeln!(f, "Average record length (bytes): {}", self.avg_record_len)?;
37 // writeln!(f, "Number of fields: {}", self.num_fields)?;
38 // writeln!(f, "Fields:")?;
39
40 // let mut tabwtr = TabWriter::new(vec![]);
41
42 // for (i, ty) in self.types.iter().enumerate() {
43 // writeln!(
44 // &mut tabwtr,
45 // "\t{}:\t{}\t{}",
46 // i,
47 // ty,
48 // self.fields.get(i).unwrap_or(&String::new())
49 // )
50 // .unwrap_or_default();
51 // }
52 // safety: we just wrote to the tabwriter, so it should be ok to unwrap
53 // tabwtr.flush().unwrap();
54
55 // safety: we just flushed the tabwriter, so it should be ok to unwrap the inner vec
56 // the second unwrap is to convert the vec<u8> to a String, so its also safe.
57 // let tabbed_field_list = simdutf8::basic::from_utf8(&tabwtr.into_inner().unwrap())
58 // .unwrap()
59 // .to_string();
60 // writeln!(f, "{tabbed_field_list}")?;
61
62 Ok(())
63 }
64}
65
66/// Dialect-level metadata. This type encapsulates the details to be used to derive a
67/// `ReaderBuilder` object (in the [`csv`](https://docs.rs/csv) crate).
68#[derive(Clone)]
69pub struct Dialect {
70 /// CSV delimiter (field separator).
71 pub delimiter: u8,
72 // [`Header`](struct.Header.html) subtype (header row boolean and number of preamble rows).
73 // pub header: Header,
74 /// Record quoting details.
75 pub quote: Quote,
76 // Whether or not the number of fields in a record is allowed to change.
77 // pub flexible: bool,
78 // Whether the file is utf-8 encoded.
79 // pub is_utf8: bool,
80}
81impl PartialEq for Dialect {
82 fn eq(&self, other: &Dialect) -> bool {
83 self.delimiter == other.delimiter
84 // && self.header == other.header
85 && self.quote == other.quote
86 // && self.flexible == other.flexible
87 // && self.is_utf8 == other.is_utf8
88 }
89}
90impl fmt::Debug for Dialect {
91 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
92 f.debug_struct("Dialect")
93 .field("delimiter", &char::from(self.delimiter))
94 // .field("header", &self.header)
95 .field("quote", &self.quote)
96 // .field("flexible", &self.flexible)
97 // .field("is_utf8", &self.is_utf8)
98 .finish()
99 }
100}
101impl fmt::Display for Dialect {
102 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
103 writeln!(f, "Dialect:")?;
104 writeln!(f, "\tDelimiter: {}", char::from(self.delimiter))?;
105 // writeln!(f, "\tHas header row?: {}", self.header.has_header_row)?;
106 // writeln!(
107 // f,
108 // "\tNumber of preamble rows: {}",
109 // self.header.num_preamble_rows
110 // )?;
111 writeln!(
112 f,
113 "\tQuote character: {}",
114 match self.quote {
115 Quote::Some(chr) => format!("{}", char::from(chr)),
116 Quote::None => "none".into(),
117 }
118 )
119 // writeln!(f, "\tFlexible: {}", self.flexible)?;
120 // writeln!(f, "\tIs utf-8 encoded?: {}", self.is_utf8)
121 }
122}
123impl Dialect {
124 /// Use this `Dialect` to open a file specified by provided path. Returns a `Reader` (from the
125 /// [`csv`](https://docs.rs/csv) crate). Fails on file opening or reading errors.
126 pub fn open_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
127 self.open_reader(File::open(path)?)
128 }
129
130 /// Use this `Dialect` to create a `Reader` (from the [`csv`](https://docs.rs/csv) crate) using
131 /// the provided reader. Fails if unable to read from the reader.
132 pub fn open_reader<R: Read + Seek>(&self, rdr: R) -> Result<Reader<R>> {
133 // snip_preamble(&mut rdr, self.header.num_preamble_rows)?;
134 let bldr: ReaderBuilder = self.clone().into();
135 Ok(bldr.from_reader(rdr))
136 }
137}
138impl From<Dialect> for ReaderBuilder {
139 fn from(dialect: Dialect) -> Self {
140 let mut bldr = Self::new();
141 bldr.delimiter(dialect.delimiter);
142 // .has_headers(dialect.header.has_header_row)
143 // .flexible(dialect.flexible);
144
145 match dialect.quote {
146 Quote::Some(character) => {
147 bldr.quoting(true);
148 bldr.quote(character);
149 }
150 Quote::None => {
151 bldr.quoting(false);
152 }
153 }
154
155 bldr
156 }
157}
158
159/// Metadata about the header of the CSV file.
160#[derive(Debug, Clone, PartialEq, Eq)]
161pub struct Header {
162 /// Whether or not this CSV file has a header row (a row containing column labels).
163 pub has_header_row: bool,
164 /// Number of rows that occur before either the header row (if `has_header_row` is `true), or
165 /// the first data row.
166 pub num_preamble_rows: usize,
167}
168
169/// Metadata about the quoting style of the CSV file.
170#[derive(Clone, PartialEq, Eq)]
171pub enum Quote {
172 /// Quotes are not used in the CSV file.
173 None,
174 /// Quotes are enabled, with the provided character used as the quote character.
175 Some(u8),
176}
177impl fmt::Debug for Quote {
178 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
179 match *self {
180 Self::Some(ref character) => f
181 .debug_struct("Some")
182 .field("character", &char::from(*character))
183 .finish(),
184 Self::None => write!(f, "None"),
185 }
186 }
187}
188
189/// The escape character (or `Disabled` if escaping is disabled)
190#[derive(Clone, PartialEq, Eq)]
191pub enum Escape {
192 /// Escapes are enabled, with the provided character as the escape character.
193 Enabled(u8),
194 /// Escapes are disabled.
195 Disabled,
196}
197impl From<Escape> for Option<u8> {
198 fn from(escape: Escape) -> Self {
199 match escape {
200 Escape::Enabled(chr) => Some(chr),
201 Escape::Disabled => None,
202 }
203 }
204}
205impl fmt::Debug for Escape {
206 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
207 match *self {
208 Self::Enabled(chr) => write!(f, "Enabled({})", char::from(chr)),
209 Self::Disabled => write!(f, "Disabled"),
210 }
211 }
212}
213
214/// The comment character (or `Disabled` if commenting doesn't exist in this dialect)
215#[derive(Clone, PartialEq, Eq)]
216pub enum Comment {
217 /// Comments are enabled, with the provided character as the comment character.
218 Enabled(u8),
219 /// Comments are disabled.
220 Disabled,
221}
222impl From<Comment> for Option<u8> {
223 fn from(comment: Comment) -> Self {
224 match comment {
225 Comment::Enabled(chr) => Some(chr),
226 Comment::Disabled => None,
227 }
228 }
229}
230impl fmt::Debug for Comment {
231 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232 match *self {
233 Self::Enabled(chr) => write!(f, "Enabled({})", char::from(chr)),
234 Self::Disabled => write!(f, "Disabled"),
235 }
236 }
237}