Skip to main content

haystack_core/codecs/zinc/
encoder.rs

1// Zinc scalar and grid encoder.
2
3use crate::codecs::CodecError;
4use crate::codecs::shared;
5use crate::data::{HCol, HDict, HGrid};
6use crate::kinds::Kind;
7
8/// Encode a single Kind value to its Zinc string representation.
9pub fn encode_scalar(val: &Kind) -> Result<String, CodecError> {
10    match val {
11        Kind::Null => Ok("N".to_string()),
12        Kind::Bool(true) => Ok("T".to_string()),
13        Kind::Bool(false) => Ok("F".to_string()),
14        Kind::Marker => Ok("M".to_string()),
15        Kind::NA => Ok("NA".to_string()),
16        Kind::Remove => Ok("R".to_string()),
17        Kind::Number(n) => Ok(encode_number(n)),
18        Kind::Str(s) => Ok(encode_str(s)),
19        Kind::Ref(r) => Ok(encode_ref(r)),
20        Kind::Uri(u) => Ok(format!("`{}`", u.val())),
21        Kind::Symbol(s) => Ok(format!("^{}", s.val())),
22        Kind::Date(d) => Ok(d.format("%Y-%m-%d").to_string()),
23        Kind::Time(t) => Ok(encode_time(t)),
24        Kind::DateTime(hdt) => Ok(encode_datetime(hdt)),
25        Kind::Coord(c) => Ok(format!("C({},{})", c.lat, c.lng)),
26        Kind::XStr(x) => Ok(format!("{}(\"{}\")", x.type_name, escape_str(&x.val))),
27        Kind::List(items) => {
28            let mut parts = Vec::with_capacity(items.len());
29            for item in items {
30                parts.push(encode_scalar(item)?);
31            }
32            Ok(format!("[{}]", parts.join(", ")))
33        }
34        Kind::Dict(d) => Ok(encode_dict_inline(d)?),
35        Kind::Grid(_) => Err(CodecError::Encode(
36            "grids cannot be encoded as scalars".to_string(),
37        )),
38    }
39}
40
41/// Encode a Number to its Zinc string representation.
42fn encode_number(n: &crate::kinds::Number) -> String {
43    let s = shared::format_number_val(n.val);
44    match &n.unit {
45        Some(u) => format!("{s}{u}"),
46        None => s,
47    }
48}
49
50/// Encode a time value, always including seconds.
51fn encode_time(t: &chrono::NaiveTime) -> String {
52    shared::format_time(t)
53}
54
55use chrono::Timelike;
56
57/// Encode a Haystack DateTime to Zinc format.
58fn encode_datetime(hdt: &crate::kinds::HDateTime) -> String {
59    let dt_str = hdt.dt.format("%Y-%m-%dT%H:%M:%S").to_string();
60    let frac = shared::format_frac_seconds(hdt.dt.nanosecond());
61    let offset_str = hdt.dt.format("%:z").to_string();
62    let tz = &hdt.tz_name;
63    format!("{dt_str}{frac}{offset_str} {tz}")
64}
65
66/// Encode a string value (with outer quotes).
67fn encode_str(s: &str) -> String {
68    format!("\"{}\"", escape_str(s))
69}
70
71/// Escape string content for Zinc format (without outer quotes).
72pub fn escape_str(s: &str) -> String {
73    let mut out = String::with_capacity(s.len());
74    for ch in s.chars() {
75        match ch {
76            '\\' => out.push_str("\\\\"),
77            '"' => out.push_str("\\\""),
78            '\n' => out.push_str("\\n"),
79            '\r' => out.push_str("\\r"),
80            '\t' => out.push_str("\\t"),
81            '$' => out.push_str("\\$"),
82            '\u{0008}' => out.push_str("\\b"),
83            '\u{000C}' => out.push_str("\\f"),
84            _ => out.push(ch),
85        }
86    }
87    out
88}
89
90/// Encode an HRef to Zinc format.
91fn encode_ref(r: &crate::kinds::HRef) -> String {
92    match &r.dis {
93        Some(dis) => format!("@{} \"{}\"", r.val, escape_str(dis)),
94        None => format!("@{}", r.val),
95    }
96}
97
98/// Encode an HDict inline (for use inside grid cells or nested dicts).
99fn encode_dict_inline(d: &HDict) -> Result<String, CodecError> {
100    let mut parts = Vec::new();
101    for (k, v) in d.sorted_iter() {
102        if matches!(v, Kind::Marker) {
103            parts.push(k.to_string());
104        } else {
105            parts.push(format!("{}:{}", k, encode_scalar(v)?));
106        }
107    }
108    Ok(format!("{{{}}}", parts.join(" ")))
109}
110
111/// Encode metadata tags in inline format for grid/column metadata.
112/// Format: `"tag1 tag2:val2 tag3:val3"`
113pub fn encode_meta(d: &HDict) -> Result<String, CodecError> {
114    let mut parts = Vec::new();
115    for (k, v) in d.sorted_iter() {
116        if matches!(v, Kind::Marker) {
117            parts.push(k.to_string());
118        } else {
119            parts.push(format!("{}:{}", k, encode_scalar(v)?));
120        }
121    }
122    Ok(parts.join(" "))
123}
124
125/// Encode an HGrid to the Zinc wire format.
126pub fn encode_grid(grid: &HGrid) -> Result<String, CodecError> {
127    use std::fmt::Write;
128
129    let mut buf = String::new();
130
131    // Line 1: version + grid meta
132    buf.push_str("ver:\"3.0\"");
133    if !grid.meta.is_empty() {
134        buf.push(' ');
135        buf.push_str(&encode_meta(&grid.meta)?);
136    }
137    buf.push('\n');
138
139    // Line 2: columns — write directly, comma-delimited.
140    if grid.cols.is_empty() {
141        buf.push_str("empty\n");
142    } else {
143        for (i, col) in grid.cols.iter().enumerate() {
144            if i > 0 {
145                buf.push(',');
146            }
147            buf.push_str(&col.name);
148            if !col.meta.is_empty() {
149                buf.push(' ');
150                buf.push_str(&encode_meta(&col.meta)?);
151            }
152        }
153        buf.push('\n');
154    }
155
156    // Rows — write cells directly, comma-delimited.
157    for row in &grid.rows {
158        for (i, col) in grid.cols.iter().enumerate() {
159            if i > 0 {
160                buf.push(',');
161            }
162            match row.get(&col.name) {
163                Some(val) => write!(buf, "{}", encode_scalar(val)?).unwrap(),
164                None => buf.push('N'),
165            }
166        }
167        buf.push('\n');
168    }
169
170    Ok(buf)
171}
172
173/// Encode just the grid header: version line + meta + column definitions.
174pub fn encode_grid_header(grid: &HGrid) -> Result<String, CodecError> {
175    let mut buf = String::new();
176
177    // Line 1: version + grid meta
178    buf.push_str("ver:\"3.0\"");
179    if !grid.meta.is_empty() {
180        buf.push(' ');
181        buf.push_str(&encode_meta(&grid.meta)?);
182    }
183    buf.push('\n');
184
185    // Line 2: columns
186    if grid.cols.is_empty() {
187        buf.push_str("empty\n");
188    } else {
189        for (i, col) in grid.cols.iter().enumerate() {
190            if i > 0 {
191                buf.push(',');
192            }
193            buf.push_str(&col.name);
194            if !col.meta.is_empty() {
195                buf.push(' ');
196                buf.push_str(&encode_meta(&col.meta)?);
197            }
198        }
199        buf.push('\n');
200    }
201
202    Ok(buf)
203}
204
205/// Encode a single grid row as comma-delimited values followed by newline.
206pub fn encode_grid_row(cols: &[HCol], row: &HDict) -> Result<String, CodecError> {
207    use std::fmt::Write;
208    let mut buf = String::new();
209    for (i, col) in cols.iter().enumerate() {
210        if i > 0 {
211            buf.push(',');
212        }
213        match row.get(&col.name) {
214            Some(val) => write!(buf, "{}", encode_scalar(val)?).unwrap(),
215            None => buf.push('N'),
216        }
217    }
218    buf.push('\n');
219    Ok(buf)
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use crate::data::{HCol, HDict, HGrid};
226    use crate::kinds::*;
227    use chrono::{FixedOffset, NaiveDate, NaiveTime, TimeZone};
228
229    #[test]
230    fn encode_null() {
231        assert_eq!(encode_scalar(&Kind::Null).unwrap(), "N");
232    }
233
234    #[test]
235    fn encode_bool_true() {
236        assert_eq!(encode_scalar(&Kind::Bool(true)).unwrap(), "T");
237    }
238
239    #[test]
240    fn encode_bool_false() {
241        assert_eq!(encode_scalar(&Kind::Bool(false)).unwrap(), "F");
242    }
243
244    #[test]
245    fn encode_marker() {
246        assert_eq!(encode_scalar(&Kind::Marker).unwrap(), "M");
247    }
248
249    #[test]
250    fn encode_na() {
251        assert_eq!(encode_scalar(&Kind::NA).unwrap(), "NA");
252    }
253
254    #[test]
255    fn encode_remove() {
256        assert_eq!(encode_scalar(&Kind::Remove).unwrap(), "R");
257    }
258
259    #[test]
260    fn encode_number_zero() {
261        let k = Kind::Number(Number::unitless(0.0));
262        assert_eq!(encode_scalar(&k).unwrap(), "0");
263    }
264
265    #[test]
266    fn encode_number_integer() {
267        let k = Kind::Number(Number::unitless(42.0));
268        assert_eq!(encode_scalar(&k).unwrap(), "42");
269    }
270
271    #[test]
272    fn encode_number_float() {
273        let k = Kind::Number(Number::unitless(72.5));
274        assert_eq!(encode_scalar(&k).unwrap(), "72.5");
275    }
276
277    #[test]
278    fn encode_number_negative() {
279        let k = Kind::Number(Number::unitless(-23.45));
280        assert_eq!(encode_scalar(&k).unwrap(), "-23.45");
281    }
282
283    #[test]
284    fn encode_number_with_unit() {
285        let k = Kind::Number(Number::new(72.5, Some("\u{00B0}F".into())));
286        assert_eq!(encode_scalar(&k).unwrap(), "72.5\u{00B0}F");
287    }
288
289    #[test]
290    fn encode_number_inf() {
291        let k = Kind::Number(Number::unitless(f64::INFINITY));
292        assert_eq!(encode_scalar(&k).unwrap(), "INF");
293    }
294
295    #[test]
296    fn encode_number_neg_inf() {
297        let k = Kind::Number(Number::unitless(f64::NEG_INFINITY));
298        assert_eq!(encode_scalar(&k).unwrap(), "-INF");
299    }
300
301    #[test]
302    fn encode_number_nan() {
303        let k = Kind::Number(Number::unitless(f64::NAN));
304        assert_eq!(encode_scalar(&k).unwrap(), "NaN");
305    }
306
307    #[test]
308    fn encode_string_simple() {
309        let k = Kind::Str("hello".into());
310        assert_eq!(encode_scalar(&k).unwrap(), "\"hello\"");
311    }
312
313    #[test]
314    fn encode_string_empty() {
315        let k = Kind::Str(String::new());
316        assert_eq!(encode_scalar(&k).unwrap(), "\"\"");
317    }
318
319    #[test]
320    fn encode_string_escapes() {
321        let k = Kind::Str("line1\nline2\ttab\\slash\"quote$dollar".into());
322        let encoded = encode_scalar(&k).unwrap();
323        assert_eq!(
324            encoded,
325            "\"line1\\nline2\\ttab\\\\slash\\\"quote\\$dollar\""
326        );
327    }
328
329    #[test]
330    fn encode_ref_simple() {
331        let k = Kind::Ref(HRef::from_val("site-1"));
332        assert_eq!(encode_scalar(&k).unwrap(), "@site-1");
333    }
334
335    #[test]
336    fn encode_ref_with_dis() {
337        let k = Kind::Ref(HRef::new("site-1", Some("Main Site".into())));
338        assert_eq!(encode_scalar(&k).unwrap(), "@site-1 \"Main Site\"");
339    }
340
341    #[test]
342    fn encode_uri() {
343        let k = Kind::Uri(Uri::new("http://example.com"));
344        assert_eq!(encode_scalar(&k).unwrap(), "`http://example.com`");
345    }
346
347    #[test]
348    fn encode_symbol() {
349        let k = Kind::Symbol(Symbol::new("hot-water"));
350        assert_eq!(encode_scalar(&k).unwrap(), "^hot-water");
351    }
352
353    #[test]
354    fn encode_date() {
355        let k = Kind::Date(NaiveDate::from_ymd_opt(2024, 3, 13).unwrap());
356        assert_eq!(encode_scalar(&k).unwrap(), "2024-03-13");
357    }
358
359    #[test]
360    fn encode_time_no_frac() {
361        let k = Kind::Time(NaiveTime::from_hms_opt(8, 12, 5).unwrap());
362        assert_eq!(encode_scalar(&k).unwrap(), "08:12:05");
363    }
364
365    #[test]
366    fn encode_time_with_frac() {
367        let k = Kind::Time(NaiveTime::from_hms_milli_opt(14, 30, 0, 123).unwrap());
368        assert_eq!(encode_scalar(&k).unwrap(), "14:30:00.123");
369    }
370
371    #[test]
372    fn encode_datetime() {
373        let offset = FixedOffset::west_opt(5 * 3600).unwrap();
374        let dt = offset.with_ymd_and_hms(2024, 1, 1, 8, 12, 5).unwrap();
375        let hdt = HDateTime::new(dt, "New_York");
376        let k = Kind::DateTime(hdt);
377        assert_eq!(
378            encode_scalar(&k).unwrap(),
379            "2024-01-01T08:12:05-05:00 New_York"
380        );
381    }
382
383    #[test]
384    fn encode_datetime_utc() {
385        let offset = FixedOffset::east_opt(0).unwrap();
386        let dt = offset.with_ymd_and_hms(2024, 6, 15, 12, 0, 0).unwrap();
387        let hdt = HDateTime::new(dt, "UTC");
388        let k = Kind::DateTime(hdt);
389        assert_eq!(encode_scalar(&k).unwrap(), "2024-06-15T12:00:00+00:00 UTC");
390    }
391
392    #[test]
393    fn encode_coord() {
394        let k = Kind::Coord(Coord::new(37.5458266, -77.4491888));
395        assert_eq!(encode_scalar(&k).unwrap(), "C(37.5458266,-77.4491888)");
396    }
397
398    #[test]
399    fn encode_xstr() {
400        let k = Kind::XStr(XStr::new("Color", "red"));
401        assert_eq!(encode_scalar(&k).unwrap(), "Color(\"red\")");
402    }
403
404    #[test]
405    fn encode_list_empty() {
406        let k = Kind::List(vec![]);
407        assert_eq!(encode_scalar(&k).unwrap(), "[]");
408    }
409
410    #[test]
411    fn encode_list_mixed() {
412        let k = Kind::List(vec![
413            Kind::Number(Number::unitless(1.0)),
414            Kind::Str("two".into()),
415            Kind::Marker,
416        ]);
417        assert_eq!(encode_scalar(&k).unwrap(), "[1, \"two\", M]");
418    }
419
420    #[test]
421    fn encode_dict_empty() {
422        let k = Kind::Dict(Box::new(HDict::new()));
423        assert_eq!(encode_scalar(&k).unwrap(), "{}");
424    }
425
426    #[test]
427    fn encode_dict_with_values() {
428        let mut d = HDict::new();
429        d.set("site", Kind::Marker);
430        d.set("dis", Kind::Str("Main".into()));
431        let k = Kind::Dict(Box::new(d));
432        let encoded = encode_scalar(&k).unwrap();
433        // Sorted keys: dis, site
434        assert_eq!(encoded, "{dis:\"Main\" site}");
435    }
436
437    #[test]
438    fn encode_grid_error() {
439        let k = Kind::Grid(Box::new(HGrid::new()));
440        assert!(encode_scalar(&k).is_err());
441    }
442
443    #[test]
444    fn encode_grid_empty() {
445        let g = HGrid::new();
446        let encoded = encode_grid(&g).unwrap();
447        assert_eq!(encoded, "ver:\"3.0\"\nempty\n");
448    }
449
450    #[test]
451    fn encode_grid_with_data() {
452        let cols = vec![HCol::new("dis"), HCol::new("area")];
453        let mut row1 = HDict::new();
454        row1.set("dis", Kind::Str("Site One".into()));
455        row1.set("area", Kind::Number(Number::unitless(4500.0)));
456        let mut row2 = HDict::new();
457        row2.set("dis", Kind::Str("Site Two".into()));
458        // area missing in row2
459
460        let g = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
461        let encoded = encode_grid(&g).unwrap();
462        let lines: Vec<&str> = encoded.lines().collect();
463        assert_eq!(lines[0], "ver:\"3.0\"");
464        assert_eq!(lines[1], "dis,area");
465        assert_eq!(lines[2], "\"Site One\",4500");
466        assert_eq!(lines[3], "\"Site Two\",N");
467    }
468
469    #[test]
470    fn encode_grid_with_meta() {
471        let mut meta = HDict::new();
472        meta.set("err", Kind::Marker);
473        meta.set("dis", Kind::Str("some error".into()));
474
475        let g = HGrid::from_parts(meta, vec![], vec![]);
476        let encoded = encode_grid(&g).unwrap();
477        let first_line = encoded.lines().next().unwrap();
478        assert!(first_line.starts_with("ver:\"3.0\" "));
479        assert!(first_line.contains("err"));
480        assert!(first_line.contains("dis:\"some error\""));
481    }
482
483    #[test]
484    fn encode_grid_with_col_meta() {
485        let mut col_meta = HDict::new();
486        col_meta.set("unit", Kind::Str("kW".into()));
487        let cols = vec![HCol::new("name"), HCol::with_meta("power", col_meta)];
488        let g = HGrid::from_parts(HDict::new(), cols, vec![]);
489        let encoded = encode_grid(&g).unwrap();
490        let lines: Vec<&str> = encoded.lines().collect();
491        assert_eq!(lines[1], "name,power unit:\"kW\"");
492    }
493
494    #[test]
495    fn encode_escape_str() {
496        assert_eq!(escape_str("hello"), "hello");
497        assert_eq!(escape_str("a\\b"), "a\\\\b");
498        assert_eq!(escape_str("a\"b"), "a\\\"b");
499        assert_eq!(escape_str("a\nb"), "a\\nb");
500        assert_eq!(escape_str("a\rb"), "a\\rb");
501        assert_eq!(escape_str("a\tb"), "a\\tb");
502        assert_eq!(escape_str("a$b"), "a\\$b");
503        assert_eq!(escape_str("a\u{0008}b"), "a\\bb");
504        assert_eq!(escape_str("a\u{000C}b"), "a\\fb");
505    }
506}