simple_fs/span/
csv_spans.rs1use crate::spath::SPath;
2use crate::{Error, Result, open_file};
3use std::io::{self, Read};
4
5pub fn csv_row_spans(path: impl AsRef<SPath>) -> Result<Vec<(usize, usize)>> {
11 let path = path.as_ref();
12 let mut f = open_file(path)?;
13 csv_row_spans_from_reader(&mut f).map_err(|err| Error::FileCantRead((path, err).into()))
14}
15
16fn csv_row_spans_from_reader<R: Read>(r: &mut R) -> io::Result<Vec<(usize, usize)>> {
19 let mut spans: Vec<(usize, usize)> = Vec::new();
20
21 let mut buf = [0u8; 64 * 1024];
23
24 let mut file_pos: usize = 0;
26 let mut rec_start: usize = 0;
28
29 let mut in_quotes: bool = false;
31 let mut quote_pending: bool = false;
34
35 let mut prev_byte_is_cr: bool = false;
37
38 loop {
39 let n = r.read(&mut buf)?;
40 if n == 0 {
41 break;
42 }
43 let chunk = &buf[..n];
44
45 let mut i = 0usize;
46 while i < n {
47 let b = chunk[i];
48
49 if quote_pending {
51 if b == b'"' {
52 quote_pending = false;
55 i += 1;
57 prev_byte_is_cr = false;
58 continue;
59 } else {
60 in_quotes = false;
62 quote_pending = false;
63 }
65 }
66
67 match b {
68 b'"' => {
69 if in_quotes {
70 quote_pending = true;
72 } else {
73 in_quotes = true;
75 }
77 }
78 b'\n' => {
79 if !in_quotes && !quote_pending {
80 let abs_nl = file_pos + i;
82 let end = if i > 0 {
83 if chunk[i - 1] == b'\r' { abs_nl - 1 } else { abs_nl }
84 } else if prev_byte_is_cr {
85 abs_nl - 1
86 } else {
87 abs_nl
88 };
89 spans.push((rec_start, end));
90 rec_start = abs_nl + 1;
91 }
92 }
93 _ => { }
94 }
95
96 prev_byte_is_cr = b == b'\r';
97 i += 1;
98 }
99
100 file_pos += n;
106 }
107
108 #[allow(unused)]
110 if quote_pending {
111 in_quotes = false;
112 quote_pending = false;
113 }
114
115 if rec_start < file_pos {
117 spans.push((rec_start, file_pos));
118 }
119
120 Ok(spans)
121}
122
123#[cfg(test)]
128mod tests {
129 type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>; use super::*;
132
133 #[test]
134 fn test_span_csv_row_spans_simple() -> Result<()> {
135 let path = SPath::from("tests-data/example.csv");
137
138 let spans = csv_row_spans(&path)?;
140
141 assert_eq!(spans.len(), 4, "should find 4 CSV records (including header)");
143
144 let expected = [
145 "name,age,comment",
146 "Alice,30,\"hello, world\"",
147 "Bob,25,\"Line with \"\"quote\"\"\"",
148 "Carol,28,\"multi\nline with \"\"quotes\"\" inside\"",
149 ];
150
151 for (i, exp) in expected.iter().enumerate() {
152 let (s, e) = spans.get(i).copied().ok_or("missing expected span")?;
153 let got = crate::read_span(&path, s, e)?;
154 assert_eq!(&got, exp);
155 }
156
157 Ok(())
158 }
159}
160
161