irox_csv/
reader.rs

1use std::collections::BTreeMap;
2use std::io::Read;
3
4use crate::{BasicTokenReader, CSVError, CSVErrorType, Dialect, Token, TokenReader};
5
6///
7/// Incredibly basic CSV reader.
8///
9/// Has some equivalent functionality as `String.split(",")`, except it handles quoted entries.
10pub struct CSVReader<T>
11where
12    T: Read + Sized,
13{
14    tokenizer: BasicTokenReader<T>,
15}
16
17impl<T: Read + Sized> CSVReader<T> {
18    ///
19    /// Create a new CSV Reader from the input.  Accepts anything that implements [`Read`]
20    pub fn new(reader: T) -> CSVReader<T> {
21        CSVReader {
22            tokenizer: BasicTokenReader::new(reader),
23        }
24    }
25
26    ///
27    /// Creates a new CSV reader based on the specified dialect.  Accepts any
28    /// [`Read`]er and consumes it.
29    pub fn dialect(reader: T, dialect: Dialect) -> CSVReader<T> {
30        CSVReader {
31            tokenizer: BasicTokenReader::dialect(reader, dialect),
32        }
33    }
34
35    ///
36    /// Read and parse a single line from the CSV file.
37    ///
38    /// Will return [`Result::Ok(None)`] upon EOF.
39    /// Will return [`Result::Err(CSVError)`] upon any I/O error.
40    /// Will return [`Result::Ok(Option::Some(Vec<String>))`] upon success, with each element within
41    /// the line separated inside of the innermost [`Vec<String>`]
42    pub fn read_line(&mut self) -> Result<Option<Vec<String>>, CSVError> {
43        let mut out: Vec<String> = Vec::new();
44
45        let mut in_a_comment = false;
46        loop {
47            if let Some(toks) = self.tokenizer.next_tokens()? {
48                for tok in toks {
49                    match tok {
50                        Token::Field(f) => {
51                            if !in_a_comment {
52                                out.push(f);
53                            }
54                        }
55                        Token::EndRow => {
56                            if in_a_comment {
57                                in_a_comment = false;
58                            } else {
59                                return Ok(Some(out));
60                            }
61                        }
62                        Token::Comment(f) => {
63                            // only a comment if it's the first token of a line
64                            in_a_comment = out.is_empty() && f.is_empty();
65                            if !in_a_comment {
66                                out.push(f);
67                            }
68                        }
69                    }
70                }
71            } else {
72                if !out.is_empty() {
73                    return Ok(Some(out));
74                }
75                return Ok(None);
76            }
77        }
78    }
79}
80
81///
82/// Returns each row as a Key => Value Mapping, rather than a simple list of values.
83///
84/// CSVMapReader has more validation than [`CSVReader`], as it REQUIRES that each line in the
85/// csv file have the same number of elements as the header.
86pub struct CSVMapReader<T>
87where
88    T: Read + Sized,
89{
90    reader: CSVReader<T>,
91    keys: Vec<String>,
92}
93
94impl<T: Read + Sized> CSVMapReader<T> {
95    ///
96    /// Creates a new [`CSVMapReader`]
97    ///
98    /// Will return [`Result::Ok(CSVMapReader)`] if it can read the CSV's header.
99    /// Will return [`Result::Err(CSVError)`] if any I/O Error, or no header.
100    pub fn new(read: T) -> Result<CSVMapReader<T>, CSVError> {
101        Self::dialect(read, Dialect::default())
102    }
103
104    ///
105    /// Creates a new [`CSVMapReader`] using the specified dialect
106    ///
107    /// Will return [`Result::Ok(CSVMapReader)`] if it can read the CSV's header.
108    /// Will return [`Result::Err(CSVError)`] if any I/O Error, or no header.
109    pub fn dialect(read: T, dialect: Dialect) -> Result<Self, CSVError> {
110        let mut reader = CSVReader::dialect(read, dialect);
111        let keys = reader.read_line()?;
112        match keys {
113            Some(keys) => Ok(CSVMapReader { reader, keys }),
114            None => CSVError::err(
115                CSVErrorType::MissingHeaderError,
116                "Missing header or empty file".to_string(),
117            ),
118        }
119    }
120
121    ///
122    /// Maybe return a single row from the CSV file.
123    ///
124    /// Will return [`std::result::Result::Ok(None)`] upon EOF
125    /// Will return [`std::result::Result::Err(CSVError)`] upon underlying I/O error, or if the
126    /// particular row doesn't have enough elements to match up against the header.
127    pub fn next_row(&mut self) -> Result<Option<Row>, CSVError> {
128        let data = self.reader.read_line()?;
129        let Some(data) = data else {
130            return Ok(None);
131        };
132        let hdrlen = self.keys.len();
133        let datalen = data.len();
134        if hdrlen != datalen {
135            return CSVError::err(
136                CSVErrorType::HeaderDataMismatchError,
137                format!("Headers length ({hdrlen}) != data length ({datalen})"),
138            );
139        }
140
141        Ok(Some(Row {
142            keys: self.keys.clone(),
143            data,
144        }))
145    }
146
147    ///
148    /// Apply the specified function on each element of the read CSV file.  This WILL iteratively
149    /// consume the underlying reader, and will continue until the reader exhausts.
150    pub fn for_each<F: FnMut(Row)>(mut self, mut func: F) -> Result<(), CSVError> {
151        while let Some(row) = self.next_row()? {
152            func(row);
153        }
154        Ok(())
155    }
156}
157
158///
159/// A row represents a single Map line from a CSV table
160#[derive(Debug, Clone, PartialEq, Default)]
161pub struct Row {
162    /// A list of the Map Keys (may be repeats!)
163    keys: Vec<String>,
164
165    /// A list of the row values (may be repeats!)
166    data: Vec<String>,
167}
168
169impl Row {
170    ///
171    /// Converts this row into a BTreeMap<String, String>.
172    ///
173    /// This WILL return a [`Err`] if there are duplicate keys
174    pub fn into_map(self) -> Result<BTreeMap<String, String>, CSVError> {
175        let mut out: BTreeMap<String, String> = BTreeMap::new();
176        for (k, v) in self.into_items() {
177            if let Some(_elem) = out.insert(k.clone(), v) {
178                return CSVError::err(
179                    CSVErrorType::DuplicateKeyInHeaderError,
180                    format!("Duplicate key in header detected: {k}"),
181                );
182            }
183        }
184        Ok(out)
185    }
186
187    ///
188    /// Convert into a [`BTreeMap<String, String>`].
189    ///
190    /// Unlike [`Self::into_map`], this function will overwrite any previous keys with those found later in
191    /// the row.
192    #[must_use]
193    pub fn into_map_lossy(self) -> BTreeMap<String, String> {
194        BTreeMap::from_iter(self.into_items())
195    }
196
197    ///
198    /// Converts into a [`std::vec::Vec<(String, String)>`], pairing each key with it's associated value
199    #[must_use]
200    pub fn into_items(self) -> Vec<(String, String)> {
201        self.keys.into_iter().zip(self.data).collect()
202    }
203}