use crate::spath::SPath;
use crate::{Error, Result, open_file};
use std::io::{self, Read};
pub fn csv_row_spans(path: impl AsRef<SPath>) -> Result<Vec<(usize, usize)>> {
let path = path.as_ref();
let mut f = open_file(path)?;
csv_row_spans_from_reader(&mut f).map_err(|err| Error::FileCantRead((path, err).into()))
}
fn csv_row_spans_from_reader<R: Read>(r: &mut R) -> io::Result<Vec<(usize, usize)>> {
let mut spans: Vec<(usize, usize)> = Vec::new();
let mut buf = [0u8; 64 * 1024];
let mut file_pos: usize = 0;
let mut rec_start: usize = 0;
let mut in_quotes: bool = false;
let mut quote_pending: bool = false;
let mut prev_byte_is_cr: bool = false;
loop {
let n = r.read(&mut buf)?;
if n == 0 {
break;
}
let chunk = &buf[..n];
let mut i = 0usize;
while i < n {
let b = chunk[i];
if quote_pending {
if b == b'"' {
quote_pending = false;
i += 1;
prev_byte_is_cr = false;
continue;
} else {
in_quotes = false;
quote_pending = false;
}
}
match b {
b'"' => {
if in_quotes {
quote_pending = true;
} else {
in_quotes = true;
}
}
b'\n' => {
if !in_quotes && !quote_pending {
let abs_nl = file_pos + i;
let end = if i > 0 {
if chunk[i - 1] == b'\r' { abs_nl - 1 } else { abs_nl }
} else if prev_byte_is_cr {
abs_nl - 1
} else {
abs_nl
};
spans.push((rec_start, end));
rec_start = abs_nl + 1;
}
}
_ => { }
}
prev_byte_is_cr = b == b'\r';
i += 1;
}
file_pos += n;
}
#[allow(unused)]
if quote_pending {
in_quotes = false;
quote_pending = false;
}
if rec_start < file_pos {
spans.push((rec_start, file_pos));
}
Ok(spans)
}
#[cfg(test)]
mod tests {
type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
use super::*;
#[test]
fn test_span_csv_row_spans_simple() -> Result<()> {
let path = SPath::from("tests-data/example.csv");
let spans = csv_row_spans(&path)?;
assert_eq!(spans.len(), 4, "should find 4 CSV records (including header)");
let expected = [
"name,age,comment",
"Alice,30,\"hello, world\"",
"Bob,25,\"Line with \"\"quote\"\"\"",
"Carol,28,\"multi\nline with \"\"quotes\"\" inside\"",
];
for (i, exp) in expected.iter().enumerate() {
let (s, e) = spans.get(i).copied().ok_or("missing expected span")?;
let got = crate::read_span(&path, s, e)?;
assert_eq!(&got, exp);
}
Ok(())
}
}