use std::collections::BTreeMap;
use std::io::Read;
use crate::{BasicTokenReader, CSVError, CSVErrorType, Dialect, Token, TokenReader};
pub struct CSVReader<T>
where
T: Read + Sized,
{
tokenizer: BasicTokenReader<T>,
}
impl<T: Read + Sized> CSVReader<T> {
pub fn new(reader: T) -> CSVReader<T> {
CSVReader {
tokenizer: BasicTokenReader::new(reader),
}
}
pub fn dialect(reader: T, dialect: Dialect) -> CSVReader<T> {
CSVReader {
tokenizer: BasicTokenReader::dialect(reader, dialect),
}
}
pub fn read_line(&mut self) -> Result<Option<Vec<String>>, CSVError> {
let mut out: Vec<String> = Vec::new();
let mut in_a_comment = false;
loop {
if let Some(toks) = self.tokenizer.next_tokens()? {
for tok in toks {
match tok {
Token::Field(f) => {
if !in_a_comment {
out.push(f);
}
}
Token::EndRow => {
if in_a_comment {
in_a_comment = false;
} else {
return Ok(Some(out));
}
}
Token::Comment(f) => {
in_a_comment = out.is_empty() && f.is_empty();
if !in_a_comment {
out.push(f);
}
}
}
}
} else {
if !out.is_empty() {
return Ok(Some(out));
}
return Ok(None);
}
}
}
}
pub struct CSVMapReader<T>
where
T: Read + Sized,
{
reader: CSVReader<T>,
keys: Vec<String>,
}
impl<T: Read + Sized> CSVMapReader<T> {
pub fn new(read: T) -> Result<CSVMapReader<T>, CSVError> {
Self::dialect(read, Dialect::default())
}
pub fn dialect(read: T, dialect: Dialect) -> Result<Self, CSVError> {
let mut reader = CSVReader::dialect(read, dialect);
let keys = reader.read_line()?;
match keys {
Some(keys) => Ok(CSVMapReader { reader, keys }),
None => CSVError::err(
CSVErrorType::MissingHeaderError,
"Missing header or empty file".to_string(),
),
}
}
pub fn next_row(&mut self) -> Result<Option<Row>, CSVError> {
let data = self.reader.read_line()?;
let Some(data) = data else {
return Ok(None);
};
let hdrlen = self.keys.len();
let datalen = data.len();
if hdrlen != datalen {
return CSVError::err(
CSVErrorType::HeaderDataMismatchError,
format!("Headers length ({hdrlen}) != data length ({datalen})"),
);
}
Ok(Some(Row {
keys: self.keys.clone(),
data,
}))
}
pub fn for_each<F: FnMut(Row)>(mut self, mut func: F) -> Result<(), CSVError> {
while let Some(row) = self.next_row()? {
func(row);
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Default)]
pub struct Row {
keys: Vec<String>,
data: Vec<String>,
}
impl Row {
pub fn into_map(self) -> Result<BTreeMap<String, String>, CSVError> {
let mut out: BTreeMap<String, String> = BTreeMap::new();
for (k, v) in self.into_items() {
if let Some(_elem) = out.insert(k.clone(), v) {
return CSVError::err(
CSVErrorType::DuplicateKeyInHeaderError,
format!("Duplicate key in header detected: {k}"),
);
}
}
Ok(out)
}
#[must_use]
pub fn into_map_lossy(self) -> BTreeMap<String, String> {
BTreeMap::from_iter(self.into_items())
}
#[must_use]
pub fn into_items(self) -> Vec<(String, String)> {
self.keys.into_iter().zip(self.data).collect()
}
}