irox_csv/reader.rs
1use std::collections::BTreeMap;
2use std::io::Read;
3
4use crate::{BasicTokenReader, CSVError, CSVErrorType, Dialect, Token, TokenReader};
5
6///
7/// Incredibly basic CSV reader.
8///
9/// Has some equivalent functionality as `String.split(",")`, except it handles quoted entries.
10pub struct CSVReader<T>
11where
12 T: Read + Sized,
13{
14 tokenizer: BasicTokenReader<T>,
15}
16
17impl<T: Read + Sized> CSVReader<T> {
18 ///
19 /// Create a new CSV Reader from the input. Accepts anything that implements [`Read`]
20 pub fn new(reader: T) -> CSVReader<T> {
21 CSVReader {
22 tokenizer: BasicTokenReader::new(reader),
23 }
24 }
25
26 ///
27 /// Creates a new CSV reader based on the specified dialect. Accepts any
28 /// [`Read`]er and consumes it.
29 pub fn dialect(reader: T, dialect: Dialect) -> CSVReader<T> {
30 CSVReader {
31 tokenizer: BasicTokenReader::dialect(reader, dialect),
32 }
33 }
34
35 ///
36 /// Read and parse a single line from the CSV file.
37 ///
38 /// Will return [`Result::Ok(None)`] upon EOF.
39 /// Will return [`Result::Err(CSVError)`] upon any I/O error.
40 /// Will return [`Result::Ok(Option::Some(Vec<String>))`] upon success, with each element within
41 /// the line separated inside of the innermost [`Vec<String>`]
42 pub fn read_line(&mut self) -> Result<Option<Vec<String>>, CSVError> {
43 let mut out: Vec<String> = Vec::new();
44
45 let mut in_a_comment = false;
46 loop {
47 if let Some(toks) = self.tokenizer.next_tokens()? {
48 for tok in toks {
49 match tok {
50 Token::Field(f) => {
51 if !in_a_comment {
52 out.push(f);
53 }
54 }
55 Token::EndRow => {
56 if in_a_comment {
57 in_a_comment = false;
58 } else {
59 return Ok(Some(out));
60 }
61 }
62 Token::Comment(f) => {
63 // only a comment if it's the first token of a line
64 in_a_comment = out.is_empty() && f.is_empty();
65 if !in_a_comment {
66 out.push(f);
67 }
68 }
69 }
70 }
71 } else {
72 if !out.is_empty() {
73 return Ok(Some(out));
74 }
75 return Ok(None);
76 }
77 }
78 }
79}
80
81///
82/// Returns each row as a Key => Value Mapping, rather than a simple list of values.
83///
84/// CSVMapReader has more validation than [`CSVReader`], as it REQUIRES that each line in the
85/// csv file have the same number of elements as the header.
86pub struct CSVMapReader<T>
87where
88 T: Read + Sized,
89{
90 reader: CSVReader<T>,
91 keys: Vec<String>,
92}
93
94impl<T: Read + Sized> CSVMapReader<T> {
95 ///
96 /// Creates a new [`CSVMapReader`]
97 ///
98 /// Will return [`Result::Ok(CSVMapReader)`] if it can read the CSV's header.
99 /// Will return [`Result::Err(CSVError)`] if any I/O Error, or no header.
100 pub fn new(read: T) -> Result<CSVMapReader<T>, CSVError> {
101 Self::dialect(read, Dialect::default())
102 }
103
104 ///
105 /// Creates a new [`CSVMapReader`] using the specified dialect
106 ///
107 /// Will return [`Result::Ok(CSVMapReader)`] if it can read the CSV's header.
108 /// Will return [`Result::Err(CSVError)`] if any I/O Error, or no header.
109 pub fn dialect(read: T, dialect: Dialect) -> Result<Self, CSVError> {
110 let mut reader = CSVReader::dialect(read, dialect);
111 let keys = reader.read_line()?;
112 match keys {
113 Some(keys) => Ok(CSVMapReader { reader, keys }),
114 None => CSVError::err(
115 CSVErrorType::MissingHeaderError,
116 "Missing header or empty file".to_string(),
117 ),
118 }
119 }
120
121 ///
122 /// Maybe return a single row from the CSV file.
123 ///
124 /// Will return [`std::result::Result::Ok(None)`] upon EOF
125 /// Will return [`std::result::Result::Err(CSVError)`] upon underlying I/O error, or if the
126 /// particular row doesn't have enough elements to match up against the header.
127 pub fn next_row(&mut self) -> Result<Option<Row>, CSVError> {
128 let data = self.reader.read_line()?;
129 let Some(data) = data else {
130 return Ok(None);
131 };
132 let hdrlen = self.keys.len();
133 let datalen = data.len();
134 if hdrlen != datalen {
135 return CSVError::err(
136 CSVErrorType::HeaderDataMismatchError,
137 format!("Headers length ({hdrlen}) != data length ({datalen})"),
138 );
139 }
140
141 Ok(Some(Row {
142 keys: self.keys.clone(),
143 data,
144 }))
145 }
146
147 ///
148 /// Apply the specified function on each element of the read CSV file. This WILL iteratively
149 /// consume the underlying reader, and will continue until the reader exhausts.
150 pub fn for_each<F: FnMut(Row)>(mut self, mut func: F) -> Result<(), CSVError> {
151 while let Some(row) = self.next_row()? {
152 func(row);
153 }
154 Ok(())
155 }
156}
157
158///
159/// A row represents a single Map line from a CSV table
160#[derive(Debug, Clone, PartialEq, Default)]
161pub struct Row {
162 /// A list of the Map Keys (may be repeats!)
163 keys: Vec<String>,
164
165 /// A list of the row values (may be repeats!)
166 data: Vec<String>,
167}
168
169impl Row {
170 ///
171 /// Converts this row into a BTreeMap<String, String>.
172 ///
173 /// This WILL return a [`Err`] if there are duplicate keys
174 pub fn into_map(self) -> Result<BTreeMap<String, String>, CSVError> {
175 let mut out: BTreeMap<String, String> = BTreeMap::new();
176 for (k, v) in self.into_items() {
177 if let Some(_elem) = out.insert(k.clone(), v) {
178 return CSVError::err(
179 CSVErrorType::DuplicateKeyInHeaderError,
180 format!("Duplicate key in header detected: {k}"),
181 );
182 }
183 }
184 Ok(out)
185 }
186
187 ///
188 /// Convert into a [`BTreeMap<String, String>`].
189 ///
190 /// Unlike [`Self::into_map`], this function will overwrite any previous keys with those found later in
191 /// the row.
192 #[must_use]
193 pub fn into_map_lossy(self) -> BTreeMap<String, String> {
194 BTreeMap::from_iter(self.into_items())
195 }
196
197 ///
198 /// Converts into a [`std::vec::Vec<(String, String)>`], pairing each key with it's associated value
199 #[must_use]
200 pub fn into_items(self) -> Vec<(String, String)> {
201 self.keys.into_iter().zip(self.data).collect()
202 }
203}