simple_fs/span/
csv_spans.rs1use crate::spath::SPath;
2use crate::{Error, Result, open_file};
3use std::io::{self, Read};
4
5pub fn csv_row_spans(path: impl AsRef<SPath>) -> Result<Vec<(usize, usize)>> {
11 let path = path.as_ref();
12 let mut f = open_file(path)?;
13 csv_row_spans_from_reader(&mut f).map_err(|err| Error::FileCantRead((path, err).into()))
14}
15
16fn csv_row_spans_from_reader<R: Read>(r: &mut R) -> io::Result<Vec<(usize, usize)>> {
19 let mut spans: Vec<(usize, usize)> = Vec::new();
20
21 let mut buf = [0u8; 64 * 1024];
23
24 let mut file_pos: usize = 0;
26 let mut rec_start: usize = 0;
28
29 let mut in_quotes: bool = false;
31 let mut quote_pending: bool = false;
34
35 let mut prev_byte_is_cr: bool = false;
37
38 loop {
39 let n = r.read(&mut buf)?;
40 if n == 0 {
41 break;
42 }
43 let chunk = &buf[..n];
44
45 let mut i = 0usize;
46 while i < n {
47 let b = chunk[i];
48
49 if quote_pending {
51 if b == b'"' {
52 quote_pending = false;
55 i += 1;
57 prev_byte_is_cr = false;
58 continue;
59 } else {
60 in_quotes = false;
62 quote_pending = false;
63 }
65 }
66
67 match b {
68 b'"' => {
69 if in_quotes {
70 quote_pending = true;
72 } else {
73 in_quotes = true;
75 }
77 }
78 b'\n' if !in_quotes && !quote_pending => {
79 let abs_nl = file_pos + i;
81 let end = if i > 0 {
82 if chunk[i - 1] == b'\r' { abs_nl - 1 } else { abs_nl }
83 } else if prev_byte_is_cr {
84 abs_nl - 1
85 } else {
86 abs_nl
87 };
88 spans.push((rec_start, end));
89 rec_start = abs_nl + 1;
90 }
91 _ => { }
92 }
93
94 prev_byte_is_cr = b == b'\r';
95 i += 1;
96 }
97
98 file_pos += n;
104 }
105
106 #[allow(unused)]
108 if quote_pending {
109 in_quotes = false;
110 quote_pending = false;
111 }
112
113 if rec_start < file_pos {
115 spans.push((rec_start, file_pos));
116 }
117
118 Ok(spans)
119}
120
121#[cfg(test)]
126mod tests {
127 type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>; use super::*;
130
131 #[test]
132 fn test_span_csv_row_spans_simple() -> Result<()> {
133 let path = SPath::from("tests-data/example.csv");
135
136 let spans = csv_row_spans(&path)?;
138
139 assert_eq!(spans.len(), 4, "should find 4 CSV records (including header)");
141
142 let expected = [
143 "name,age,comment",
144 "Alice,30,\"hello, world\"",
145 "Bob,25,\"Line with \"\"quote\"\"\"",
146 "Carol,28,\"multi\nline with \"\"quotes\"\" inside\"",
147 ];
148
149 for (i, exp) in expected.iter().enumerate() {
150 let (s, e) = spans.get(i).copied().ok_or("missing expected span")?;
151 let got = crate::read_span(&path, s, e)?;
152 assert_eq!(&got, exp);
153 }
154
155 Ok(())
156 }
157}
158
159