Skip to main content

haystack_core/codecs/trio/
encoder.rs

1// Trio format encoder — encode HGrid to record-per-entity text format.
2
3use crate::codecs::CodecError;
4use crate::codecs::zinc;
5use crate::data::HGrid;
6use crate::kinds::Kind;
7
8/// Encode an HGrid to Trio format.
9///
10/// Each row in the grid becomes a record. Records are separated by `---`.
11/// For each tag in the row:
12/// - Marker values: just the tag name
13/// - Multi-line strings (containing \n): `name:` then each line indented with 2 spaces
14/// - All other values: `name: ` + zinc-encoded scalar
15pub fn encode_grid(grid: &HGrid) -> Result<String, CodecError> {
16    let mut parts: Vec<String> = Vec::new();
17
18    for row in &grid.rows {
19        parts.push(encode_dict(row)?);
20    }
21
22    Ok(parts.join("\n---\n"))
23}
24
25/// Encode a single dict (row) as Trio-formatted lines.
26fn encode_dict(d: &crate::data::HDict) -> Result<String, CodecError> {
27    let mut lines: Vec<String> = Vec::new();
28
29    // Sort keys for deterministic output
30    let mut keys: Vec<&String> = d.tags().keys().collect();
31    keys.sort();
32
33    for name in keys {
34        let val = &d.tags()[name];
35        match val {
36            Kind::Marker => {
37                lines.push(name.clone());
38            }
39            Kind::Str(s) if s.contains('\n') => {
40                // Multi-line string: name followed by colon, then indented lines
41                lines.push(format!("{name}:"));
42                for line in s.split('\n') {
43                    lines.push(format!("  {line}"));
44                }
45            }
46            _ => {
47                let encoded = zinc::encode_scalar(val)?;
48                lines.push(format!("{name}: {encoded}"));
49            }
50        }
51    }
52
53    Ok(lines.join("\n"))
54}
55
56#[cfg(test)]
57mod tests {
58    use super::*;
59    use crate::data::{HCol, HDict, HGrid};
60    use crate::kinds::{HRef, Kind, Number};
61
62    #[test]
63    fn encode_empty_grid() {
64        let g = HGrid::new();
65        let encoded = encode_grid(&g).unwrap();
66        assert_eq!(encoded, "");
67    }
68
69    #[test]
70    fn encode_single_record_with_marker() {
71        let cols = vec![HCol::new("site")];
72        let mut row = HDict::new();
73        row.set("site", Kind::Marker);
74        let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
75
76        let encoded = encode_grid(&g).unwrap();
77        assert_eq!(encoded, "site");
78    }
79
80    #[test]
81    fn encode_single_record_with_values() {
82        let cols = vec![HCol::new("dis"), HCol::new("area"), HCol::new("site")];
83        let mut row = HDict::new();
84        row.set("dis", Kind::Str("Main Site".into()));
85        row.set("site", Kind::Marker);
86        row.set(
87            "area",
88            Kind::Number(Number::new(5000.0, Some("ft\u{00B2}".into()))),
89        );
90        let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
91
92        let encoded = encode_grid(&g).unwrap();
93        // Keys are sorted: area, dis, site
94        let lines: Vec<&str> = encoded.lines().collect();
95        assert_eq!(lines[0], "area: 5000ft\u{00B2}");
96        assert_eq!(lines[1], "dis: \"Main Site\"");
97        assert_eq!(lines[2], "site");
98    }
99
100    #[test]
101    fn encode_multiple_records() {
102        let cols = vec![HCol::new("dis"), HCol::new("site")];
103        let mut row1 = HDict::new();
104        row1.set("dis", Kind::Str("Site A".into()));
105        row1.set("site", Kind::Marker);
106        let mut row2 = HDict::new();
107        row2.set("dis", Kind::Str("Site B".into()));
108        row2.set("site", Kind::Marker);
109        let g = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
110
111        let encoded = encode_grid(&g).unwrap();
112        assert!(encoded.contains("---"));
113        let records: Vec<&str> = encoded.split("\n---\n").collect();
114        assert_eq!(records.len(), 2);
115    }
116
117    #[test]
118    fn encode_multiline_string() {
119        let cols = vec![HCol::new("doc")];
120        let mut row = HDict::new();
121        row.set("doc", Kind::Str("Line 1\nLine 2\nLine 3".into()));
122        let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
123
124        let encoded = encode_grid(&g).unwrap();
125        let lines: Vec<&str> = encoded.lines().collect();
126        assert_eq!(lines[0], "doc:");
127        assert_eq!(lines[1], "  Line 1");
128        assert_eq!(lines[2], "  Line 2");
129        assert_eq!(lines[3], "  Line 3");
130    }
131
132    #[test]
133    fn encode_ref_value() {
134        let cols = vec![HCol::new("id")];
135        let mut row = HDict::new();
136        row.set("id", Kind::Ref(HRef::from_val("site-1")));
137        let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
138
139        let encoded = encode_grid(&g).unwrap();
140        assert_eq!(encoded, "id: @site-1");
141    }
142
143    #[test]
144    fn encode_number_with_unit() {
145        let cols = vec![HCol::new("temp")];
146        let mut row = HDict::new();
147        row.set(
148            "temp",
149            Kind::Number(Number::new(72.5, Some("\u{00B0}F".into()))),
150        );
151        let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
152
153        let encoded = encode_grid(&g).unwrap();
154        assert_eq!(encoded, "temp: 72.5\u{00B0}F");
155    }
156
157    #[test]
158    fn encode_bool_values() {
159        let cols = vec![HCol::new("active"), HCol::new("deleted")];
160        let mut row = HDict::new();
161        row.set("active", Kind::Bool(true));
162        row.set("deleted", Kind::Bool(false));
163        let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
164
165        let encoded = encode_grid(&g).unwrap();
166        let lines: Vec<&str> = encoded.lines().collect();
167        assert_eq!(lines[0], "active: T");
168        assert_eq!(lines[1], "deleted: F");
169    }
170}