use crate::spath::SPath;
use crate::{Error, Result, open_file};
use memchr::memchr_iter;
use std::io::{self, Read};
pub fn line_spans(path: impl AsRef<SPath>) -> Result<Vec<(usize, usize)>> {
let path = path.as_ref();
let mut f = open_file(path)?;
let res = line_spans_from_reader(&mut f).map_err(|err| Error::FileCantRead((path, err).into()))?;
Ok(res)
}
fn line_spans_from_reader<R: Read>(r: &mut R) -> io::Result<Vec<(usize, usize)>> {
let mut spans: Vec<(usize, usize)> = Vec::new();
let mut buf = [0u8; 64 * 1024];
let mut file_pos: usize = 0; let mut line_start: usize = 0; let mut prev_byte_is_cr = false;
loop {
let n = r.read(&mut buf)?;
if n == 0 {
break;
}
let chunk = &buf[..n];
for nl_idx in memchr_iter(b'\n', chunk) {
let abs_nl = file_pos + nl_idx;
let end = if nl_idx > 0 {
if chunk[nl_idx - 1] == b'\r' { abs_nl - 1 } else { abs_nl }
} else if prev_byte_is_cr {
abs_nl - 1
} else {
abs_nl
};
spans.push((line_start, end));
line_start = abs_nl + 1; }
prev_byte_is_cr = chunk[n - 1] == b'\r';
file_pos += n;
}
if line_start < file_pos {
spans.push((line_start, file_pos));
}
Ok(spans)
}
#[cfg(test)]
mod tests {
type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
use super::*;
#[test]
fn test_span_line_span_line_spans_simple() -> Result<()> {
let path = SPath::from("tests-data/example.csv");
let spans = line_spans(&path)?;
assert_eq!(spans.len(), 5, "should find 5 physical lines");
let expected = [
"name,age,comment",
"Alice,30,\"hello, world\"",
"Bob,25,\"Line with \"\"quote\"\"\"",
"Carol,28,\"multi",
"line with \"\"quotes\"\" inside\"",
];
for (i, exp) in expected.iter().enumerate() {
let (s, e) = spans.get(i).copied().ok_or("missing expected line span")?;
let got = crate::read_span(&path, s, e)?;
assert_eq!(&got, exp);
}
Ok(())
}
}