Skip to main content

haystack_core/codecs/
csv.rs

1//! CSV wire format codec for Haystack grids.
2//!
3//! This module provides encode-only CSV output (`text/csv`). Each cell is
4//! Zinc-encoded and then quoted for CSV. Decoding is **not** supported —
5//! calling [`CsvCodec::decode_grid`] returns an error. Nested or complex
6//! kinds (grids, lists, dicts) may not roundtrip through CSV because the
7//! Zinc scalar representation is used verbatim inside each cell.
8
9use super::{Codec, CodecError};
10use crate::codecs::zinc;
11use crate::data::HGrid;
12use crate::kinds::Kind;
13
14/// CSV wire format codec implementing the [`Codec`] trait for `text/csv`.
15///
16/// Encoding writes a header row of quoted column names followed by data rows
17/// whose cells are Zinc-encoded scalars wrapped in CSV quoting. Decoding
18/// is not supported and will return a [`CodecError`].
19pub struct CsvCodec;
20
21/// Escape a value for inclusion in a CSV cell.
22///
23/// The value is always wrapped in double quotes, and any internal
24/// double-quote characters are escaped by doubling them (`""`).
25fn csv_quote(val: &str) -> String {
26    let mut out = String::with_capacity(val.len() + 2);
27    out.push('"');
28    for ch in val.chars() {
29        if ch == '"' {
30            out.push_str("\"\"");
31        } else {
32            out.push(ch);
33        }
34    }
35    out.push('"');
36    out
37}
38
39/// Encode an HGrid to CSV format.
40fn encode_grid(grid: &HGrid) -> Result<String, CodecError> {
41    let mut buf = String::new();
42
43    // Header row: quoted column names
44    let headers: Vec<String> = grid.cols.iter().map(|col| csv_quote(&col.name)).collect();
45    buf.push_str(&headers.join(","));
46    buf.push('\n');
47
48    // Data rows: Zinc-encoded scalar values, quoted for CSV
49    for row in &grid.rows {
50        let cells: Result<Vec<String>, CodecError> = grid
51            .cols
52            .iter()
53            .map(|col| {
54                let val = match row.get(&col.name) {
55                    Some(v) => v,
56                    None => &Kind::Null,
57                };
58                let zinc_str = zinc::encode_scalar(val)?;
59                Ok(csv_quote(&zinc_str))
60            })
61            .collect();
62        buf.push_str(&cells?.join(","));
63        buf.push('\n');
64    }
65
66    Ok(buf)
67}
68
69impl Codec for CsvCodec {
70    fn mime_type(&self) -> &str {
71        "text/csv"
72    }
73
74    fn encode_grid(&self, grid: &HGrid) -> Result<String, CodecError> {
75        encode_grid(grid)
76    }
77
78    fn decode_grid(&self, _input: &str) -> Result<HGrid, CodecError> {
79        Err(CodecError::Parse {
80            pos: 0,
81            message: "CSV decode not supported".into(),
82        })
83    }
84
85    fn encode_scalar(&self, val: &Kind) -> Result<String, CodecError> {
86        zinc::encode_scalar(val)
87    }
88
89    fn decode_scalar(&self, input: &str) -> Result<Kind, CodecError> {
90        zinc::decode_scalar(input)
91    }
92}
93
94#[cfg(test)]
95mod tests {
96    use super::*;
97    use crate::data::{HCol, HDict, HGrid};
98    use crate::kinds::*;
99    use chrono::NaiveDate;
100
101    #[test]
102    fn encode_grid_mixed_types() {
103        let cols = vec![
104            HCol::new("dis"),
105            HCol::new("area"),
106            HCol::new("built"),
107            HCol::new("site"),
108        ];
109
110        let mut row1 = HDict::new();
111        row1.set("dis", Kind::Str("Alpha".into()));
112        row1.set(
113            "area",
114            Kind::Number(Number::new(3500.0, Some("ft\u{00B2}".into()))),
115        );
116        row1.set(
117            "built",
118            Kind::Date(NaiveDate::from_ymd_opt(2020, 6, 15).unwrap()),
119        );
120        row1.set("site", Kind::Marker);
121
122        let mut row2 = HDict::new();
123        row2.set("dis", Kind::Str("Beta".into()));
124        row2.set("area", Kind::Number(Number::unitless(2100.0)));
125        // built is missing in row2
126        row2.set("site", Kind::Bool(false));
127
128        let grid = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
129        let csv = encode_grid(&grid).unwrap();
130        let lines: Vec<&str> = csv.lines().collect();
131
132        assert_eq!(lines[0], r#""dis","area","built","site""#);
133        // Zinc encodes Kind::Str("Alpha") as "Alpha" (with quotes).
134        // csv_quote doubles the inner " chars: """Alpha"""
135        assert_eq!(
136            lines[1],
137            "\"\"\"Alpha\"\"\",\"3500ft\u{00B2}\",\"2020-06-15\",\"M\""
138        );
139        assert_eq!(lines[2], "\"\"\"Beta\"\"\",\"2100\",\"N\",\"F\"");
140    }
141
142    #[test]
143    fn encode_empty_grid() {
144        let grid = HGrid::new();
145        let csv = encode_grid(&grid).unwrap();
146        // Empty grid has no columns so the header row is just a newline
147        assert_eq!(csv, "\n");
148    }
149
150    #[test]
151    fn encode_grid_with_commas_and_quotes_in_strings() {
152        let cols = vec![HCol::new("name"), HCol::new("notes")];
153
154        let mut row = HDict::new();
155        row.set("name", Kind::Str("O'Brien, James".into()));
156        row.set("notes", Kind::Str("He said \"hello\"".into()));
157
158        let grid = HGrid::from_parts(HDict::new(), cols, vec![row]);
159        let csv = encode_grid(&grid).unwrap();
160        let lines: Vec<&str> = csv.lines().collect();
161
162        assert_eq!(lines[0], "\"name\",\"notes\"");
163
164        // The Zinc encoder encodes the strings with Zinc escaping (\" for
165        // double quotes), and then csv_quote wraps the result in CSV
166        // double quotes, doubling any literal " that appear.
167        //
168        // Kind::Str("O'Brien, James") -> zinc encode -> "O'Brien, James"
169        //   (with outer zinc quotes and backslash-escaped inner content)
170        // csv_quote on that -> wrap in CSV quotes, doubling the " chars:
171        //   """O'Brien, James"""
172        //
173        // Kind::Str("He said \"hello\"") -> zinc encode -> "He said \"hello\""
174        // csv_quote on that:
175        //   """He said \""hello\""""
176
177        // The name cell: zinc produces "O'Brien, James" (with quotes),
178        // csv_quote doubles the quotes -> """O'Brien, James"""
179        assert!(lines[1].starts_with("\"\"\"O'Brien, James\"\"\""));
180
181        // The notes cell: zinc produces "He said \"hello\""
182        // csv_quote doubles the literal " chars
183        let notes_cell = lines[1].split(',').skip(1).collect::<Vec<_>>().join(",");
184        assert!(notes_cell.contains("He said"));
185        assert!(notes_cell.contains("hello"));
186    }
187
188    #[test]
189    fn decode_grid_not_supported() {
190        let codec = CsvCodec;
191        let result = codec.decode_grid("anything");
192        assert!(result.is_err());
193        let err = result.unwrap_err();
194        assert!(err.to_string().contains("CSV decode not supported"));
195    }
196
197    #[test]
198    fn scalar_delegates_to_zinc() {
199        let codec = CsvCodec;
200        // encode
201        let encoded = codec
202            .encode_scalar(&Kind::Number(Number::unitless(42.0)))
203            .unwrap();
204        assert_eq!(encoded, "42");
205
206        // decode
207        let decoded = codec.decode_scalar("42").unwrap();
208        assert_eq!(decoded, Kind::Number(Number::unitless(42.0)));
209    }
210
211    #[test]
212    fn mime_type() {
213        let codec = CsvCodec;
214        assert_eq!(codec.mime_type(), "text/csv");
215    }
216
217    #[test]
218    fn encode_grid_cols_no_rows() {
219        let cols = vec![HCol::new("a"), HCol::new("b")];
220        let grid = HGrid::from_parts(HDict::new(), cols, vec![]);
221        let csv = encode_grid(&grid).unwrap();
222        assert_eq!(csv, "\"a\",\"b\"\n");
223    }
224
225    #[test]
226    fn csv_quote_escapes_double_quotes() {
227        assert_eq!(csv_quote("hello"), "\"hello\"");
228        assert_eq!(csv_quote("say \"hi\""), "\"say \"\"hi\"\"\"");
229        assert_eq!(csv_quote(""), "\"\"");
230        assert_eq!(csv_quote("a,b"), "\"a,b\"");
231    }
232}