use super::{AtomicToken, FileReaderImpl};
use crate::error::Error;
use crate::index::BuildConfig;
use ragit_fs::FileError;
use std::fs::File;
pub struct CsvReader {
iterator: csv::ByteRecordsIntoIter<File>,
headers: Vec<String>,
rows: Vec<AtomicToken>, strict_mode: bool,
is_exhausted: bool,
}
impl FileReaderImpl for CsvReader {
fn new(path: &str, _root_dir: &str, config: &BuildConfig) -> Result<Self, Error> {
match File::open(path) {
Ok(f) => {
let mut reader = csv::ReaderBuilder::new()
.delimiter(b',')
.flexible(!config.strict_file_reader)
.from_reader(f);
let mut headers = vec![];
for header in reader.byte_headers()? {
match String::from_utf8(header.to_vec()) {
Ok(s) => { headers.push(s); },
Err(e) => if config.strict_file_reader {
return Err(e.into());
} else {
headers.push(String::from_utf8_lossy(header).to_string());
},
}
}
Ok(CsvReader {
iterator: reader.into_byte_records(),
headers,
rows: vec![],
strict_mode: config.strict_file_reader,
is_exhausted: false,
})
},
Err(e) => Err(FileError::from_std(e, path).into()),
}
}
fn load_tokens(&mut self) -> Result<(), Error> {
if self.is_exhausted {
return Ok(());
}
match self.iterator.next() {
Some(Ok(records)) => {
let mut string_records = Vec::with_capacity(self.headers.len());
for (index, record) in records.iter().enumerate() {
if index >= self.headers.len() {
break;
}
match String::from_utf8(record.to_vec()) {
Ok(s) => { string_records.push(s); },
Err(e) => if self.strict_mode {
return Err(e.into());
} else {
string_records.push(String::from_utf8_lossy(record).to_string());
},
}
}
let mut cells = Vec::with_capacity(self.headers.len());
for (record, header) in string_records.iter().zip(self.headers.iter()) {
let record = match record.parse::<i64>() {
Ok(n) => n.to_string(),
_ => match record.parse::<f64>() {
Ok(f) => f.to_string(),
_ => format!("{record:?}"),
},
};
cells.push(format!("{header:?}: {record}"));
}
let row = format!("{}{}{}\n", "{", cells.join(", "), "}");
self.rows.push(AtomicToken::String {
char_len: row.chars().count(),
data: row,
});
},
Some(Err(e)) => if self.strict_mode {
return Err(e.into());
} else {
},
None => {
self.is_exhausted = true;
},
}
Ok(())
}
fn pop_all_tokens(&mut self) -> Result<Vec<AtomicToken>, Error> {
let mut result = vec![];
std::mem::swap(&mut self.rows, &mut result);
Ok(result)
}
fn has_more_to_read(&self) -> bool {
!self.is_exhausted || !self.rows.is_empty()
}
fn key(&self) -> String {
String::from("csv_reader_v0")
}
}