use std::io::{Read, Seek, SeekFrom};
use crate::sheet::{CellValue, Result as SheetResult};
pub struct TextParser<'a, R: Read + Seek> {
reader: &'a mut R,
config: super::workbook::TextConfig,
buffer: Vec<u8>,
buffer_pos: usize,
buffer_len: usize,
line_start_pos: u64,
}
impl<'a, R: Read + Seek> TextParser<'a, R> {
pub fn new(reader: &'a mut R, config: super::workbook::TextConfig) -> Self {
let _ = reader.seek(SeekFrom::Start(0));
let buffer_size = config.buffer_size;
TextParser {
reader,
config,
buffer: vec![0; buffer_size],
buffer_pos: 0,
buffer_len: 0,
line_start_pos: 0,
}
}
pub fn reset(&mut self) -> SheetResult<()> {
self.reader.seek(SeekFrom::Start(0))?;
self.buffer_pos = 0;
self.buffer_len = 0;
self.line_start_pos = 0;
Ok(())
}
pub fn parse_row(&mut self) -> SheetResult<Option<SheetResult<Vec<CellValue>>>> {
let mut fields = Vec::new();
let mut field_start = true;
let mut in_quotes = false;
let mut current_field = Vec::new();
loop {
if self.buffer_pos >= self.buffer_len {
self.buffer_len = self.reader.read(&mut self.buffer)?;
self.buffer_pos = 0;
if self.buffer_len == 0 {
if !fields.is_empty() || !current_field.is_empty() {
self.finish_field(&mut current_field, &mut fields);
return Ok(Some(Ok(fields)));
}
return Ok(None);
}
}
let byte = self.buffer[self.buffer_pos];
self.buffer_pos += 1;
match byte {
b'\n' => {
if in_quotes {
current_field.push(byte);
} else {
self.finish_field(&mut current_field, &mut fields);
return Ok(Some(Ok(fields)));
}
}
b'\r' => {
if !in_quotes {
continue;
} else {
current_field.push(byte);
}
}
quote if quote == self.config.quote => {
if in_quotes {
if self.buffer_pos < self.buffer_len && self.buffer[self.buffer_pos] == self.config.quote {
current_field.push(self.config.quote);
self.buffer_pos += 1;
} else {
in_quotes = false;
}
} else {
in_quotes = true;
field_start = false; }
}
delim if delim == self.config.delimiter && !in_quotes => {
self.finish_field(&mut current_field, &mut fields);
field_start = true;
}
b'\\' if in_quotes => {
if self.buffer_pos < self.buffer_len {
let next_byte = self.buffer[self.buffer_pos];
self.buffer_pos += 1;
match next_byte {
b'n' => current_field.push(b'\n'),
b'r' => current_field.push(b'\r'),
b't' => current_field.push(b'\t'),
b'\\' => current_field.push(b'\\'),
quote if quote == self.config.quote => current_field.push(quote),
_ => {
current_field.push(byte);
current_field.push(next_byte);
}
}
} else {
current_field.push(byte);
}
}
_ => {
if field_start && self.config.comment == Some(byte) && fields.is_empty() && !in_quotes {
while self.buffer_pos < self.buffer_len {
let b = self.buffer[self.buffer_pos];
self.buffer_pos += 1;
if b == b'\n' {
break;
}
}
return self.parse_row();
}
current_field.push(byte);
field_start = false;
}
}
}
}
fn finish_field(&self, current_field: &mut Vec<u8>, fields: &mut Vec<CellValue>) {
let mut field_bytes = std::mem::take(current_field);
if self.config.trim_whitespace {
let start = field_bytes.iter().position(|&b| !b.is_ascii_whitespace()).unwrap_or(field_bytes.len());
let end = field_bytes.iter().rposition(|&b| !b.is_ascii_whitespace()).map(|i| i + 1).unwrap_or(0);
if start < end {
field_bytes = field_bytes[start..end].to_vec();
} else {
field_bytes.clear();
}
}
let field_str = match String::from_utf8(field_bytes) {
Ok(s) => s,
Err(e) => {
let valid_bytes = e.into_bytes();
String::from_utf8_lossy(&valid_bytes).to_string()
}
};
let cell_value = if field_str.is_empty() {
CellValue::Empty
} else if let Ok(int_val) = field_str.parse::<i64>() {
CellValue::Int(int_val)
} else if let Ok(float_val) = fast_float2::parse(&field_str) {
CellValue::Float(float_val)
} else {
match field_str.to_lowercase().as_str() {
"true" | "1" | "yes" | "on" => CellValue::Bool(true),
"false" | "0" | "no" | "off" => CellValue::Bool(false),
_ => CellValue::String(field_str),
}
};
fields.push(cell_value);
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use crate::sheet::CellValue;
#[test]
fn test_simple_csv_parsing() {
let csv = "name,age,city\nJohn,25,New York\nJane,30,London";
let config = super::super::workbook::TextConfig::default();
let mut cursor = Cursor::new(csv.as_bytes());
let mut parser = TextParser::new(&mut cursor, config);
let row1 = parser.parse_row().unwrap().unwrap().unwrap();
assert_eq!(row1.len(), 3);
assert_eq!(row1[0], CellValue::String("name".to_string()));
assert_eq!(row1[1], CellValue::String("age".to_string()));
assert_eq!(row1[2], CellValue::String("city".to_string()));
let row2 = parser.parse_row().unwrap().unwrap().unwrap();
assert_eq!(row2.len(), 3);
assert_eq!(row2[0], CellValue::String("John".to_string()));
assert_eq!(row2[1], CellValue::Int(25));
assert_eq!(row2[2], CellValue::String("New York".to_string()));
let row3 = parser.parse_row().unwrap().unwrap().unwrap();
assert_eq!(row3.len(), 3);
assert_eq!(row3[0], CellValue::String("Jane".to_string()));
assert_eq!(row3[1], CellValue::Int(30));
assert_eq!(row3[2], CellValue::String("London".to_string()));
assert!(parser.parse_row().unwrap().is_none());
}
#[test]
fn test_quoted_fields() {
let csv = "\"Hello, World\",\"Value with \"\"quotes\"\"\",\"Normal\"";
let config = super::super::workbook::TextConfig::default();
let mut cursor = Cursor::new(csv.as_bytes());
let mut parser = TextParser::new(&mut cursor, config);
let row = parser.parse_row().unwrap().unwrap().unwrap();
assert_eq!(row.len(), 3);
assert_eq!(row[0], CellValue::String("Hello, World".to_string()));
assert_eq!(row[1], CellValue::String("Value with \"quotes\"".to_string()));
assert_eq!(row[2], CellValue::String("Normal".to_string()));
}
#[test]
fn test_tsv_parsing() {
let tsv = "name\tage\tcity\nJohn\t25\tNew York";
let config = super::super::workbook::TextConfig::tsv();
let mut cursor = Cursor::new(tsv.as_bytes());
let mut parser = TextParser::new(&mut cursor, config);
let row1 = parser.parse_row().unwrap().unwrap().unwrap();
assert_eq!(row1.len(), 3);
assert_eq!(row1[0], CellValue::String("name".to_string()));
assert_eq!(row1[1], CellValue::String("age".to_string()));
assert_eq!(row1[2], CellValue::String("city".to_string()));
let row2 = parser.parse_row().unwrap().unwrap().unwrap();
assert_eq!(row2.len(), 3);
assert_eq!(row2[0], CellValue::String("John".to_string()));
assert_eq!(row2[1], CellValue::Int(25));
assert_eq!(row2[2], CellValue::String("New York".to_string()));
}
}