use crate::csv::CsvParser;
use crate::error::{ExcelError, Result};
use crate::fast_writer::StreamingZipReader;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
pub struct CsvReader {
direct_reader: Option<BufReader<File>>,
zip_reader_data: Option<Vec<u8>>,
line_buffer: String,
row_count: u64,
lines_iter: Option<Box<dyn Iterator<Item = String>>>,
delimiter: u8,
quote_char: u8,
has_header: bool,
headers: Vec<String>,
}
impl CsvReader {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self> {
let path_ref = path.as_ref();
let path_str = path_ref.to_str().unwrap_or("");
if path_str.ends_with(".csv.zst")
|| path_str.ends_with(".csv.zip")
|| path_str.ends_with(".csv.gz")
{
let mut zip = StreamingZipReader::open(path_ref)
.map_err(|e| ExcelError::ReadError(format!("Failed to open ZIP: {}", e)))?;
let entry_name = zip
.entries()
.iter()
.find(|e| e.name.ends_with(".csv"))
.or_else(|| zip.entries().first())
.ok_or_else(|| ExcelError::ReadError("No CSV entry found in archive".to_string()))?
.name
.clone();
let data = zip
.read_entry_by_name(&entry_name)
.map_err(|e| ExcelError::ReadError(format!("Failed to read ZIP entry: {}", e)))?;
Ok(CsvReader {
direct_reader: None,
zip_reader_data: Some(data),
line_buffer: String::with_capacity(1024),
row_count: 0,
lines_iter: None,
delimiter: b',',
quote_char: b'"',
has_header: false,
headers: Vec::new(),
})
} else {
let file = File::open(path_ref)
.map_err(|e| ExcelError::ReadError(format!("Failed to open CSV file: {}", e)))?;
Ok(CsvReader {
direct_reader: Some(BufReader::new(file)),
zip_reader_data: None,
line_buffer: String::with_capacity(1024),
row_count: 0,
lines_iter: None,
delimiter: b',',
quote_char: b'"',
has_header: false,
headers: Vec::new(),
})
}
}
pub fn delimiter(mut self, delim: u8) -> Self {
self.delimiter = delim;
self
}
pub fn quote_char(mut self, quote: u8) -> Self {
self.quote_char = quote;
self
}
pub fn has_header(mut self, has: bool) -> Self {
self.has_header = has;
self
}
pub fn headers(&self) -> Option<&[String]> {
if self.headers.is_empty() {
None
} else {
Some(&self.headers)
}
}
pub fn read_row(&mut self) -> Result<Option<Vec<String>>> {
self.line_buffer.clear();
let bytes_read = if let Some(ref mut reader) = self.direct_reader {
reader
.read_line(&mut self.line_buffer)
.map_err(|e| ExcelError::ReadError(format!("Failed to read line: {}", e)))?
} else if let Some(ref data) = self.zip_reader_data {
if self.lines_iter.is_none() {
let content = String::from_utf8_lossy(data).to_string();
let lines: Vec<String> = content.lines().map(|s| s.to_string()).collect();
self.lines_iter = Some(Box::new(lines.into_iter()));
}
if let Some(ref mut iter) = self.lines_iter {
if let Some(line) = iter.next() {
self.line_buffer = line;
self.line_buffer.len()
} else {
return Ok(None); }
} else {
return Ok(None);
}
} else {
return Err(ExcelError::ReadError("No reader available".to_string()));
};
if bytes_read == 0 {
return Ok(None); }
if self.line_buffer.ends_with('\n') {
self.line_buffer.pop();
if self.line_buffer.ends_with('\r') {
self.line_buffer.pop();
}
}
let parser = CsvParser::new(self.delimiter, self.quote_char);
let fields = parser.parse_line(&self.line_buffer);
if self.has_header && self.row_count == 0 {
self.headers = fields.clone();
}
self.row_count += 1;
Ok(Some(fields))
}
pub fn rows(&mut self) -> CsvRowIterator<'_> {
CsvRowIterator { reader: self }
}
pub fn row_count(&self) -> u64 {
self.row_count
}
}
pub struct CsvRowIterator<'a> {
reader: &'a mut CsvReader,
}
impl<'a> Iterator for CsvRowIterator<'a> {
type Item = Result<Vec<String>>;
fn next(&mut self) -> Option<Self::Item> {
match self.reader.read_row() {
Ok(Some(row)) => {
if self.reader.has_header && self.reader.row_count == 1 {
match self.reader.read_row() {
Ok(Some(next_row)) => Some(Ok(next_row)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}
} else {
Some(Ok(row))
}
}
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::csv_writer::CsvWriter;
#[test]
fn test_read_plain_csv() -> Result<()> {
let path = "test_read_plain.csv";
{
let mut writer = CsvWriter::new(path)?;
writer.write_row(["Name", "Age", "City"])?;
writer.write_row(["Alice", "30", "NYC"])?;
writer.write_row(["Bob", "25", "SF"])?;
writer.save()?;
}
let mut reader = CsvReader::open(path)?;
let mut rows = vec![];
for row_result in reader.rows() {
rows.push(row_result?);
}
assert_eq!(rows.len(), 3);
assert_eq!(rows[0], vec!["Name", "Age", "City"]);
assert_eq!(rows[1], vec!["Alice", "30", "NYC"]);
std::fs::remove_file(path).ok();
Ok(())
}
#[test]
fn test_read_with_headers() -> Result<()> {
let path = "test_read_headers.csv";
{
let mut writer = CsvWriter::new(path)?;
writer.write_row(["ID", "Name"])?;
writer.write_row(["1", "Alice"])?;
writer.write_row(["2", "Bob"])?;
writer.save()?;
}
let mut reader = CsvReader::open(path)?.has_header(true);
assert_eq!(reader.headers(), None);
let mut rows = vec![];
for row_result in reader.rows() {
rows.push(row_result?);
}
assert_eq!(
reader.headers(),
Some(&["ID".to_string(), "Name".to_string()][..])
);
assert_eq!(rows.len(), 2);
assert_eq!(rows[0], vec!["1", "Alice"]);
std::fs::remove_file(path).ok();
Ok(())
}
}