Skip to main content

haystack_core/codecs/
csv.rs

1// CSV wire format codec — encode-only CSV output for Haystack grids.
2
3use super::{Codec, CodecError};
4use crate::codecs::zinc;
5use crate::data::HGrid;
6use crate::kinds::Kind;
7
8/// CSV wire format codec (encode only).
9pub struct CsvCodec;
10
11/// Escape a value for inclusion in a CSV cell.
12///
13/// The value is always wrapped in double quotes, and any internal
14/// double-quote characters are escaped by doubling them (`""`).
15fn csv_quote(val: &str) -> String {
16    let mut out = String::with_capacity(val.len() + 2);
17    out.push('"');
18    for ch in val.chars() {
19        if ch == '"' {
20            out.push_str("\"\"");
21        } else {
22            out.push(ch);
23        }
24    }
25    out.push('"');
26    out
27}
28
29/// Encode an HGrid to CSV format.
30fn encode_grid(grid: &HGrid) -> Result<String, CodecError> {
31    let mut buf = String::new();
32
33    // Header row: quoted column names
34    let headers: Vec<String> = grid.cols.iter().map(|col| csv_quote(&col.name)).collect();
35    buf.push_str(&headers.join(","));
36    buf.push('\n');
37
38    // Data rows: Zinc-encoded scalar values, quoted for CSV
39    for row in &grid.rows {
40        let cells: Result<Vec<String>, CodecError> = grid
41            .cols
42            .iter()
43            .map(|col| {
44                let val = match row.get(&col.name) {
45                    Some(v) => v,
46                    None => &Kind::Null,
47                };
48                let zinc_str = zinc::encode_scalar(val)?;
49                Ok(csv_quote(&zinc_str))
50            })
51            .collect();
52        buf.push_str(&cells?.join(","));
53        buf.push('\n');
54    }
55
56    Ok(buf)
57}
58
59impl Codec for CsvCodec {
60    fn mime_type(&self) -> &str {
61        "text/csv"
62    }
63
64    fn encode_grid(&self, grid: &HGrid) -> Result<String, CodecError> {
65        encode_grid(grid)
66    }
67
68    fn decode_grid(&self, _input: &str) -> Result<HGrid, CodecError> {
69        Err(CodecError::Parse {
70            pos: 0,
71            message: "CSV decode not supported".into(),
72        })
73    }
74
75    fn encode_scalar(&self, val: &Kind) -> Result<String, CodecError> {
76        zinc::encode_scalar(val)
77    }
78
79    fn decode_scalar(&self, input: &str) -> Result<Kind, CodecError> {
80        zinc::decode_scalar(input)
81    }
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87    use crate::data::{HCol, HDict, HGrid};
88    use crate::kinds::*;
89    use chrono::NaiveDate;
90
91    #[test]
92    fn encode_grid_mixed_types() {
93        let cols = vec![
94            HCol::new("dis"),
95            HCol::new("area"),
96            HCol::new("built"),
97            HCol::new("site"),
98        ];
99
100        let mut row1 = HDict::new();
101        row1.set("dis", Kind::Str("Alpha".into()));
102        row1.set(
103            "area",
104            Kind::Number(Number::new(3500.0, Some("ft\u{00B2}".into()))),
105        );
106        row1.set(
107            "built",
108            Kind::Date(NaiveDate::from_ymd_opt(2020, 6, 15).unwrap()),
109        );
110        row1.set("site", Kind::Marker);
111
112        let mut row2 = HDict::new();
113        row2.set("dis", Kind::Str("Beta".into()));
114        row2.set("area", Kind::Number(Number::unitless(2100.0)));
115        // built is missing in row2
116        row2.set("site", Kind::Bool(false));
117
118        let grid = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
119        let csv = encode_grid(&grid).unwrap();
120        let lines: Vec<&str> = csv.lines().collect();
121
122        assert_eq!(lines[0], r#""dis","area","built","site""#);
123        // Zinc encodes Kind::Str("Alpha") as "Alpha" (with quotes).
124        // csv_quote doubles the inner " chars: """Alpha"""
125        assert_eq!(
126            lines[1],
127            "\"\"\"Alpha\"\"\",\"3500ft\u{00B2}\",\"2020-06-15\",\"M\""
128        );
129        assert_eq!(lines[2], "\"\"\"Beta\"\"\",\"2100\",\"N\",\"F\"");
130    }
131
132    #[test]
133    fn encode_empty_grid() {
134        let grid = HGrid::new();
135        let csv = encode_grid(&grid).unwrap();
136        // Empty grid has no columns so the header row is just a newline
137        assert_eq!(csv, "\n");
138    }
139
140    #[test]
141    fn encode_grid_with_commas_and_quotes_in_strings() {
142        let cols = vec![HCol::new("name"), HCol::new("notes")];
143
144        let mut row = HDict::new();
145        row.set("name", Kind::Str("O'Brien, James".into()));
146        row.set("notes", Kind::Str("He said \"hello\"".into()));
147
148        let grid = HGrid::from_parts(HDict::new(), cols, vec![row]);
149        let csv = encode_grid(&grid).unwrap();
150        let lines: Vec<&str> = csv.lines().collect();
151
152        assert_eq!(lines[0], "\"name\",\"notes\"");
153
154        // The Zinc encoder encodes the strings with Zinc escaping (\" for
155        // double quotes), and then csv_quote wraps the result in CSV
156        // double quotes, doubling any literal " that appear.
157        //
158        // Kind::Str("O'Brien, James") -> zinc encode -> "O'Brien, James"
159        //   (with outer zinc quotes and backslash-escaped inner content)
160        // csv_quote on that -> wrap in CSV quotes, doubling the " chars:
161        //   """O'Brien, James"""
162        //
163        // Kind::Str("He said \"hello\"") -> zinc encode -> "He said \"hello\""
164        // csv_quote on that:
165        //   """He said \""hello\""""
166
167        // The name cell: zinc produces "O'Brien, James" (with quotes),
168        // csv_quote doubles the quotes -> """O'Brien, James"""
169        assert!(lines[1].starts_with("\"\"\"O'Brien, James\"\"\""));
170
171        // The notes cell: zinc produces "He said \"hello\""
172        // csv_quote doubles the literal " chars
173        let notes_cell = lines[1].split(',').skip(1).collect::<Vec<_>>().join(",");
174        assert!(notes_cell.contains("He said"));
175        assert!(notes_cell.contains("hello"));
176    }
177
178    #[test]
179    fn decode_grid_not_supported() {
180        let codec = CsvCodec;
181        let result = codec.decode_grid("anything");
182        assert!(result.is_err());
183        let err = result.unwrap_err();
184        assert!(err.to_string().contains("CSV decode not supported"));
185    }
186
187    #[test]
188    fn scalar_delegates_to_zinc() {
189        let codec = CsvCodec;
190        // encode
191        let encoded = codec
192            .encode_scalar(&Kind::Number(Number::unitless(42.0)))
193            .unwrap();
194        assert_eq!(encoded, "42");
195
196        // decode
197        let decoded = codec.decode_scalar("42").unwrap();
198        assert_eq!(decoded, Kind::Number(Number::unitless(42.0)));
199    }
200
201    #[test]
202    fn mime_type() {
203        let codec = CsvCodec;
204        assert_eq!(codec.mime_type(), "text/csv");
205    }
206
207    #[test]
208    fn encode_grid_cols_no_rows() {
209        let cols = vec![HCol::new("a"), HCol::new("b")];
210        let grid = HGrid::from_parts(HDict::new(), cols, vec![]);
211        let csv = encode_grid(&grid).unwrap();
212        assert_eq!(csv, "\"a\",\"b\"\n");
213    }
214
215    #[test]
216    fn csv_quote_escapes_double_quotes() {
217        assert_eq!(csv_quote("hello"), "\"hello\"");
218        assert_eq!(csv_quote("say \"hi\""), "\"say \"\"hi\"\"\"");
219        assert_eq!(csv_quote(""), "\"\"");
220        assert_eq!(csv_quote("a,b"), "\"a,b\"");
221    }
222}