Skip to main content

cell_sheet_core/io/
csv.rs

1use crate::model::Sheet;
2use std::io::{Read, Write};
3
4const MAX_COL_WIDTH: u16 = 40;
5const DEFAULT_COL_WIDTH: u16 = 10;
6
7/// Inspect the first line of `sample` (up to 4 KiB) and return the most
8/// frequent delimiter among `,`, `\t`, `|`, `;`. Ties are broken in that
9/// order (comma wins ties). Returns `b','` for empty input.
10pub fn sniff_delimiter(sample: &[u8]) -> u8 {
11    let line_end = sample
12        .iter()
13        .position(|&b| b == b'\n')
14        .unwrap_or(sample.len());
15    let line = &sample[..line_end.min(4096)];
16
17    // Iterate in preference order so the first candidate wins ties.
18    let candidates = [b',', b'\t', b'|', b';'];
19    let mut best_delim = b',';
20    let mut best_count = 0usize;
21    for &d in &candidates {
22        let count = line.iter().filter(|&&b| b == d).count();
23        if count > best_count {
24            best_count = count;
25            best_delim = d;
26        }
27    }
28    best_delim
29}
30
31pub fn read_csv<R: Read>(reader: R, delimiter: u8) -> Result<Sheet, Box<dyn std::error::Error>> {
32    let mut sheet = Sheet::new();
33    let mut csv_reader = csv::ReaderBuilder::new()
34        .has_headers(false)
35        .delimiter(delimiter)
36        .from_reader(reader);
37
38    let mut max_col = 0usize;
39    let mut col_content_widths: Vec<usize> = Vec::new();
40
41    for (row_idx, result) in csv_reader.records().enumerate() {
42        let record = result?;
43        if record.len() > max_col {
44            max_col = record.len();
45            col_content_widths.resize(max_col, 0);
46        }
47        for (col_idx, field) in record.iter().enumerate() {
48            if !field.is_empty() {
49                sheet.set_cell((row_idx, col_idx), field);
50                col_content_widths[col_idx] = col_content_widths[col_idx].max(field.len());
51            }
52        }
53        sheet.row_count = row_idx + 1;
54    }
55    sheet.col_count = max_col;
56
57    sheet.col_widths = col_content_widths
58        .iter()
59        .map(|&w| {
60            let width = (w as u16).max(DEFAULT_COL_WIDTH);
61            width.min(MAX_COL_WIDTH)
62        })
63        .collect();
64
65    Ok(sheet)
66}
67
68pub fn write_csv<W: Write>(
69    sheet: &Sheet,
70    writer: W,
71    delimiter: u8,
72) -> Result<(), Box<dyn std::error::Error>> {
73    let mut csv_writer = csv::WriterBuilder::new()
74        .delimiter(delimiter)
75        .from_writer(writer);
76
77    for row in 0..sheet.row_count {
78        let mut record = Vec::new();
79        for col in 0..sheet.col_count {
80            let value = match sheet.get_cell((row, col)) {
81                Some(cell) => cell.value.to_string(),
82                None => String::new(),
83            };
84            record.push(value);
85        }
86        csv_writer.write_record(&record)?;
87    }
88    csv_writer.flush()?;
89    Ok(())
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95    use crate::model::CellValue;
96
97    #[test]
98    fn read_csv_simple() {
99        let data = "Name,Score\nAlice,95\nBob,88\n";
100        let sheet = read_csv(data.as_bytes(), b',').unwrap();
101        assert_eq!(sheet.row_count, 3);
102        assert_eq!(sheet.col_count, 2);
103        assert_eq!(
104            sheet.get_cell((0, 0)).unwrap().value,
105            CellValue::Text("Name".into())
106        );
107        assert_eq!(
108            sheet.get_cell((1, 1)).unwrap().value,
109            CellValue::Number(95.0)
110        );
111    }
112
113    #[test]
114    fn read_tsv() {
115        let data = "A\tB\n1\t2\n";
116        let sheet = read_csv(data.as_bytes(), b'\t').unwrap();
117        assert_eq!(
118            sheet.get_cell((1, 0)).unwrap().value,
119            CellValue::Number(1.0)
120        );
121        assert_eq!(
122            sheet.get_cell((1, 1)).unwrap().value,
123            CellValue::Number(2.0)
124        );
125    }
126
127    #[test]
128    fn read_csv_empty_cells() {
129        let data = "a,,b\n,,\n";
130        let sheet = read_csv(data.as_bytes(), b',').unwrap();
131        assert_eq!(
132            sheet.get_cell((0, 0)).unwrap().value,
133            CellValue::Text("a".into())
134        );
135        assert!(sheet.get_cell((0, 1)).is_none());
136        assert_eq!(
137            sheet.get_cell((0, 2)).unwrap().value,
138            CellValue::Text("b".into())
139        );
140    }
141
142    #[test]
143    fn read_csv_quoted_fields() {
144        let data = "\"hello, world\",42\n";
145        let sheet = read_csv(data.as_bytes(), b',').unwrap();
146        assert_eq!(
147            sheet.get_cell((0, 0)).unwrap().value,
148            CellValue::Text("hello, world".into())
149        );
150    }
151
152    #[test]
153    fn read_csv_formula_as_text() {
154        let data = "=SUM(A1:A3)\n";
155        let sheet = read_csv(data.as_bytes(), b',').unwrap();
156        assert_eq!(sheet.get_cell((0, 0)).unwrap().raw, "=SUM(A1:A3)");
157        assert_eq!(
158            sheet.get_cell((0, 0)).unwrap().value,
159            CellValue::Text("=SUM(A1:A3)".into())
160        );
161    }
162
163    #[test]
164    fn write_csv_simple() {
165        let mut sheet = Sheet::new();
166        sheet.set_cell((0, 0), "Name");
167        sheet.set_cell((0, 1), "Score");
168        sheet.set_cell((1, 0), "Alice");
169        sheet.set_cell((1, 1), "95");
170        let mut buf = Vec::new();
171        write_csv(&sheet, &mut buf, b',').unwrap();
172        let output = String::from_utf8(buf).unwrap();
173        assert_eq!(output, "Name,Score\nAlice,95\n");
174    }
175
176    #[test]
177    fn write_csv_flattens_formula_values() {
178        let mut sheet = Sheet::new();
179        sheet.set_cell((0, 0), "=1+2");
180        sheet.cells.get_mut(&(0, 0)).unwrap().value = CellValue::Number(3.0);
181        let mut buf = Vec::new();
182        write_csv(&sheet, &mut buf, b',').unwrap();
183        let output = String::from_utf8(buf).unwrap();
184        assert_eq!(output, "3\n");
185    }
186
187    #[test]
188    fn write_csv_empty_cells() {
189        let mut sheet = Sheet::new();
190        sheet.set_cell((0, 0), "a");
191        sheet.set_cell((0, 2), "b");
192        sheet.row_count = 1;
193        sheet.col_count = 3;
194        let mut buf = Vec::new();
195        write_csv(&sheet, &mut buf, b',').unwrap();
196        let output = String::from_utf8(buf).unwrap();
197        assert_eq!(output, "a,,b\n");
198    }
199
200    #[test]
201    fn write_csv_needs_quoting() {
202        let mut sheet = Sheet::new();
203        sheet.set_cell((0, 0), "hello, world");
204        let mut buf = Vec::new();
205        write_csv(&sheet, &mut buf, b',').unwrap();
206        let output = String::from_utf8(buf).unwrap();
207        assert_eq!(output, "\"hello, world\"\n");
208    }
209
210    #[test]
211    fn col_widths_auto_sized() {
212        let data = "Name,Score\nAlice,95\n";
213        let sheet = read_csv(data.as_bytes(), b',').unwrap();
214        assert!(sheet.col_widths[0] >= 5);
215        assert!(sheet.col_widths[1] >= 5);
216    }
217
218    #[test]
219    fn sniff_pipe_delimiter() {
220        let sample = b"name|score|grade\nalice|95|A\n";
221        assert_eq!(sniff_delimiter(sample), b'|');
222    }
223
224    #[test]
225    fn sniff_semicolon_delimiter() {
226        let sample = b"a;b;c\n1;2;3\n";
227        assert_eq!(sniff_delimiter(sample), b';');
228    }
229
230    #[test]
231    fn sniff_tab_delimiter() {
232        let sample = b"a\tb\tc\n1\t2\t3\n";
233        assert_eq!(sniff_delimiter(sample), b'\t');
234    }
235
236    #[test]
237    fn sniff_empty_defaults_to_comma() {
238        assert_eq!(sniff_delimiter(b""), b',');
239    }
240
241    #[test]
242    fn sniff_tie_prefers_comma() {
243        // one comma, one pipe — comma wins ties
244        assert_eq!(sniff_delimiter(b"a,b|c\n"), b',');
245    }
246
247    #[test]
248    fn sniff_only_reads_first_line() {
249        // First line has pipes; second line has many commas — sniff ignores line 2
250        let sample = b"a|b|c\n1,2,3,4,5,6,7,8,9\n";
251        assert_eq!(sniff_delimiter(sample), b'|');
252    }
253
254    #[test]
255    fn read_pipe_delimited() {
256        let data = "a|b|c\n1|2|3\n";
257        let sheet = read_csv(data.as_bytes(), b'|').unwrap();
258        assert_eq!(sheet.row_count, 2);
259        assert_eq!(sheet.col_count, 3);
260        assert_eq!(
261            sheet.get_cell((0, 1)).unwrap().value,
262            CellValue::Text("b".into())
263        );
264        assert_eq!(
265            sheet.get_cell((1, 2)).unwrap().value,
266            CellValue::Number(3.0)
267        );
268    }
269
270    #[test]
271    fn write_pipe_delimited() {
272        let mut sheet = Sheet::new();
273        sheet.set_cell((0, 0), "a");
274        sheet.set_cell((0, 1), "b");
275        let mut buf = Vec::new();
276        write_csv(&sheet, &mut buf, b'|').unwrap();
277        assert_eq!(String::from_utf8(buf).unwrap(), "a|b\n");
278    }
279
280    #[test]
281    fn sniff_respects_4kib_cap_on_long_first_line() {
282        // First line is 4097 bytes: 4096 pipes followed by one comma.
283        // Only the first 4096 bytes are inspected, so pipe wins.
284        let mut line = vec![b'|'; 4096];
285        line.push(b',');
286        line.push(b'\n');
287        assert_eq!(sniff_delimiter(&line), b'|');
288    }
289
290    #[test]
291    fn sniff_crlf_line_ending() {
292        // Windows-style CRLF — \r is not a candidate, so it's harmless;
293        // sniff should still correctly detect the pipe on the first line.
294        let sample = b"a|b|c\r\n1,2,3\n";
295        assert_eq!(sniff_delimiter(sample), b'|');
296    }
297
298    #[test]
299    fn sniff_then_read_round_trip() {
300        let data = b"x|y|z\n1|2|3\n";
301        let delim = sniff_delimiter(data);
302        assert_eq!(delim, b'|');
303        let sheet = read_csv(data.as_ref(), delim).unwrap();
304        assert_eq!(sheet.col_count, 3);
305        assert_eq!(
306            sheet.get_cell((1, 1)).unwrap().value,
307            CellValue::Number(2.0)
308        );
309    }
310}