haystack_core/codecs/trio/
parser.rs1use crate::codecs::CodecError;
4use crate::codecs::zinc::ZincParser;
5use crate::data::{HCol, HDict, HGrid};
6use crate::kinds::Kind;
7
8pub fn decode_grid(input: &str) -> Result<HGrid, CodecError> {
14 let records = parse_records(input)?;
15
16 if records.is_empty() {
17 return Ok(HGrid::new());
18 }
19
20 let mut col_names: Vec<String> = Vec::new();
22 let mut seen = std::collections::HashSet::new();
23 for rec in &records {
24 let mut names: Vec<&str> = rec.tag_names().collect();
26 names.sort();
27 for name in names {
28 if seen.insert(name.to_string()) {
29 col_names.push(name.to_string());
30 }
31 }
32 }
33
34 let cols: Vec<HCol> = col_names.iter().map(HCol::new).collect();
35 Ok(HGrid::from_parts(HDict::new(), cols, records))
36}
37
38fn parse_records(input: &str) -> Result<Vec<HDict>, CodecError> {
40 let mut records: Vec<HDict> = Vec::new();
41 let mut current_tags: Vec<(String, Kind)> = Vec::new();
42 let mut multiline_name: Option<String> = None;
43 let mut multiline_lines: Vec<String> = Vec::new();
44
45 for line in input.split('\n') {
46 let stripped = line.trim();
47
48 if is_record_separator(stripped) {
50 if let Some(name) = multiline_name.take() {
52 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
53 multiline_lines.clear();
54 }
55 if !current_tags.is_empty() {
57 records.push(tags_to_dict(current_tags));
58 current_tags = Vec::new();
59 }
60 continue;
61 }
62
63 if stripped.starts_with("//") {
65 continue;
66 }
67
68 if multiline_name.is_some() {
70 if let Some(content) = line.strip_prefix(" ").or_else(|| line.strip_prefix('\t')) {
71 multiline_lines.push(content.to_string());
73 continue;
74 } else {
75 if let Some(name) = multiline_name.take() {
77 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
78 }
79 multiline_lines.clear();
80 }
82 }
83
84 if stripped.is_empty() {
86 continue;
87 }
88
89 match stripped.find(':') {
91 None => {
92 current_tags.push((stripped.to_string(), Kind::Marker));
94 }
95 Some(colon_idx) => {
96 let name = stripped[..colon_idx].trim().to_string();
97 let rest = &stripped[colon_idx + 1..];
98
99 if rest.trim().is_empty() {
100 multiline_name = Some(name);
102 multiline_lines.clear();
103 } else {
104 let val_str = rest.trim();
106 let val = parse_scalar_value(val_str);
107 current_tags.push((name, val));
108 }
109 }
110 }
111 }
112
113 if let Some(name) = multiline_name.take() {
115 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
116 }
117
118 if !current_tags.is_empty() {
120 records.push(tags_to_dict(current_tags));
121 }
122
123 Ok(records)
124}
125
126fn parse_scalar_value(val_str: &str) -> Kind {
129 let mut parser = ZincParser::new(val_str);
130 match parser.parse_scalar() {
131 Ok(val) => {
132 if parser.at_end() {
133 val
134 } else {
135 Kind::Str(val_str.to_string())
137 }
138 }
139 Err(_) => {
140 Kind::Str(val_str.to_string())
142 }
143 }
144}
145
146fn is_record_separator(stripped: &str) -> bool {
148 !stripped.is_empty() && stripped.len() >= 3 && stripped.chars().all(|ch| ch == '-')
149}
150
151fn tags_to_dict(tags: Vec<(String, Kind)>) -> HDict {
153 let mut dict = HDict::new();
154 for (name, val) in tags {
155 dict.set(name, val);
156 }
157 dict
158}
159
160#[cfg(test)]
161mod tests {
162 use super::*;
163 use crate::kinds::{Coord, HRef, Number};
164 use chrono::NaiveDate;
165
166 #[test]
167 fn parse_empty_input() {
168 let grid = decode_grid("").unwrap();
169 assert!(grid.is_empty());
170 assert_eq!(grid.num_cols(), 0);
171 }
172
173 #[test]
174 fn parse_whitespace_only() {
175 let grid = decode_grid(" \n \n ").unwrap();
176 assert!(grid.is_empty());
177 }
178
179 #[test]
180 fn parse_single_record_with_markers_and_values() {
181 let input = "dis: \"Site 1\"\nsite\narea: 3702ft\u{00B2}\n";
182 let grid = decode_grid(input).unwrap();
183 assert_eq!(grid.len(), 1);
184
185 let row = grid.row(0).unwrap();
186 assert_eq!(row.get("dis"), Some(&Kind::Str("Site 1".into())));
187 assert_eq!(row.get("site"), Some(&Kind::Marker));
188 assert_eq!(
189 row.get("area"),
190 Some(&Kind::Number(Number::new(
191 3702.0,
192 Some("ft\u{00B2}".into())
193 )))
194 );
195 }
196
197 #[test]
198 fn parse_multiple_records() {
199 let input = "dis: \"Site A\"\nsite\n---\ndis: \"Site B\"\nsite\n";
200 let grid = decode_grid(input).unwrap();
201 assert_eq!(grid.len(), 2);
202
203 assert_eq!(
204 grid.row(0).unwrap().get("dis"),
205 Some(&Kind::Str("Site A".into()))
206 );
207 assert_eq!(
208 grid.row(1).unwrap().get("dis"),
209 Some(&Kind::Str("Site B".into()))
210 );
211 }
212
213 #[test]
214 fn parse_comments_skipped() {
215 let input = "// This is a comment\ndis: \"Site\"\nsite\n";
216 let grid = decode_grid(input).unwrap();
217 assert_eq!(grid.len(), 1);
218 assert_eq!(
219 grid.row(0).unwrap().get("dis"),
220 Some(&Kind::Str("Site".into()))
221 );
222 assert!(grid.row(0).unwrap().missing("//"));
223 }
224
225 #[test]
226 fn parse_multiline_string() {
227 let input = "dis: \"Test\"\ndoc:\n This is line 1\n This is line 2\nsite\n";
228 let grid = decode_grid(input).unwrap();
229 assert_eq!(grid.len(), 1);
230
231 let row = grid.row(0).unwrap();
232 assert_eq!(
233 row.get("doc"),
234 Some(&Kind::Str("This is line 1\nThis is line 2".into()))
235 );
236 assert_eq!(row.get("site"), Some(&Kind::Marker));
237 }
238
239 #[test]
240 fn parse_multiline_string_with_tab_indent() {
241 let input = "doc:\n\tLine A\n\tLine B\n";
242 let grid = decode_grid(input).unwrap();
243 assert_eq!(grid.len(), 1);
244
245 let row = grid.row(0).unwrap();
246 assert_eq!(row.get("doc"), Some(&Kind::Str("Line A\nLine B".into())));
247 }
248
249 #[test]
250 fn parse_multiline_string_at_end_of_input() {
251 let input = "doc:\n Last line";
252 let grid = decode_grid(input).unwrap();
253 assert_eq!(grid.len(), 1);
254
255 let row = grid.row(0).unwrap();
256 assert_eq!(row.get("doc"), Some(&Kind::Str("Last line".into())));
257 }
258
259 #[test]
260 fn parse_markers_alone() {
261 let input = "site\nequip\nahu\n";
262 let grid = decode_grid(input).unwrap();
263 assert_eq!(grid.len(), 1);
264
265 let row = grid.row(0).unwrap();
266 assert_eq!(row.get("site"), Some(&Kind::Marker));
267 assert_eq!(row.get("equip"), Some(&Kind::Marker));
268 assert_eq!(row.get("ahu"), Some(&Kind::Marker));
269 }
270
271 #[test]
272 fn parse_blank_lines_between_tags() {
273 let input = "dis: \"Test\"\n\nsite\n\narea: 100\n";
274 let grid = decode_grid(input).unwrap();
275 assert_eq!(grid.len(), 1);
276
277 let row = grid.row(0).unwrap();
278 assert_eq!(row.get("dis"), Some(&Kind::Str("Test".into())));
279 assert_eq!(row.get("site"), Some(&Kind::Marker));
280 assert_eq!(
281 row.get("area"),
282 Some(&Kind::Number(Number::unitless(100.0)))
283 );
284 }
285
286 #[test]
287 fn parse_ref_values() {
288 let input = "id: @site-1\nsiteRef: @alpha\n";
289 let grid = decode_grid(input).unwrap();
290 assert_eq!(grid.len(), 1);
291
292 let row = grid.row(0).unwrap();
293 assert_eq!(row.get("id"), Some(&Kind::Ref(HRef::from_val("site-1"))));
294 assert_eq!(
295 row.get("siteRef"),
296 Some(&Kind::Ref(HRef::from_val("alpha")))
297 );
298 }
299
300 #[test]
301 fn parse_date_value() {
302 let input = "installed: 2024-03-13\n";
303 let grid = decode_grid(input).unwrap();
304 let row = grid.row(0).unwrap();
305 assert_eq!(
306 row.get("installed"),
307 Some(&Kind::Date(NaiveDate::from_ymd_opt(2024, 3, 13).unwrap()))
308 );
309 }
310
311 #[test]
312 fn parse_coord_value() {
313 let input = "geoCoord: C(37.5458,-77.4491)\n";
314 let grid = decode_grid(input).unwrap();
315 let row = grid.row(0).unwrap();
316 assert_eq!(
317 row.get("geoCoord"),
318 Some(&Kind::Coord(Coord::new(37.5458, -77.4491)))
319 );
320 }
321
322 #[test]
323 fn parse_bool_values() {
324 let input = "active: T\ndeleted: F\n";
325 let grid = decode_grid(input).unwrap();
326 let row = grid.row(0).unwrap();
327 assert_eq!(row.get("active"), Some(&Kind::Bool(true)));
328 assert_eq!(row.get("deleted"), Some(&Kind::Bool(false)));
329 }
330
331 #[test]
332 fn parse_number_with_unit() {
333 let input = "temp: 72.5\u{00B0}F\nflow: 350gal/min\n";
334 let grid = decode_grid(input).unwrap();
335 let row = grid.row(0).unwrap();
336 assert_eq!(
337 row.get("temp"),
338 Some(&Kind::Number(Number::new(72.5, Some("\u{00B0}F".into()))))
339 );
340 assert_eq!(
341 row.get("flow"),
342 Some(&Kind::Number(Number::new(350.0, Some("gal/min".into()))))
343 );
344 }
345
346 #[test]
347 fn parse_separator_with_more_dashes() {
348 let input = "site\n-----\nequip\n";
349 let grid = decode_grid(input).unwrap();
350 assert_eq!(grid.len(), 2);
351 assert_eq!(grid.row(0).unwrap().get("site"), Some(&Kind::Marker));
352 assert_eq!(grid.row(1).unwrap().get("equip"), Some(&Kind::Marker));
353 }
354
355 #[test]
356 fn parse_columns_derived_from_all_records() {
357 let input = "dis: \"A\"\nsite\n---\ndis: \"B\"\narea: 100\n";
358 let grid = decode_grid(input).unwrap();
359
360 let col_names: Vec<&str> = grid.col_names().collect();
362 assert!(col_names.contains(&"dis"));
363 assert!(col_names.contains(&"site"));
364 assert!(col_names.contains(&"area"));
365 }
366
367 #[test]
368 fn parse_complex_trio_file() {
369 let input = "\
370// Alpha Office
371id: @alpha
372dis: \"Alpha Office\"
373site
374geoAddr: \"600 N 2nd St, Richmond VA 23219\"
375geoCoord: C(37.5407,-77.4360)
376area: 120000ft\u{00B2}
377---
378// Floor 1
379id: @floor1
380dis: \"Floor 1\"
381floor
382siteRef: @alpha
383---
384id: @ahu1
385dis: \"AHU-1\"
386equip
387ahu
388siteRef: @alpha
389floorRef: @floor1
390";
391 let grid = decode_grid(input).unwrap();
392 assert_eq!(grid.len(), 3);
393
394 let site = grid.row(0).unwrap();
395 assert_eq!(site.get("dis"), Some(&Kind::Str("Alpha Office".into())));
396 assert_eq!(site.get("site"), Some(&Kind::Marker));
397 assert_eq!(site.get("id"), Some(&Kind::Ref(HRef::from_val("alpha"))));
398 assert_eq!(
399 site.get("area"),
400 Some(&Kind::Number(Number::new(
401 120000.0,
402 Some("ft\u{00B2}".into())
403 )))
404 );
405
406 let floor = grid.row(1).unwrap();
407 assert_eq!(floor.get("dis"), Some(&Kind::Str("Floor 1".into())));
408 assert_eq!(floor.get("floor"), Some(&Kind::Marker));
409
410 let ahu = grid.row(2).unwrap();
411 assert_eq!(ahu.get("dis"), Some(&Kind::Str("AHU-1".into())));
412 assert_eq!(ahu.get("equip"), Some(&Kind::Marker));
413 assert_eq!(ahu.get("ahu"), Some(&Kind::Marker));
414 }
415
416 #[test]
417 fn parse_multiline_between_records() {
418 let input = "dis: \"A\"\ndoc:\n Hello world\n Second line\n---\ndis: \"B\"\n";
419 let grid = decode_grid(input).unwrap();
420 assert_eq!(grid.len(), 2);
421
422 assert_eq!(
423 grid.row(0).unwrap().get("doc"),
424 Some(&Kind::Str("Hello world\nSecond line".into()))
425 );
426 assert_eq!(
427 grid.row(1).unwrap().get("dis"),
428 Some(&Kind::Str("B".into()))
429 );
430 }
431
432 #[test]
433 fn roundtrip_encode_decode() {
434 use crate::codecs::trio::encode_grid;
435 use crate::data::HCol;
436
437 let cols = vec![
438 HCol::new("area"),
439 HCol::new("dis"),
440 HCol::new("id"),
441 HCol::new("site"),
442 ];
443 let mut row1 = HDict::new();
444 row1.set("dis", Kind::Str("My Site".into()));
445 row1.set("site", Kind::Marker);
446 row1.set(
447 "area",
448 Kind::Number(Number::new(1000.0, Some("ft\u{00B2}".into()))),
449 );
450 row1.set("id", Kind::Ref(HRef::from_val("site-1")));
451
452 let mut row2 = HDict::new();
453 row2.set("dis", Kind::Str("AHU-1".into()));
454 row2.set("id", Kind::Ref(HRef::from_val("ahu-1")));
455
456 let g = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
457 let encoded = encode_grid(&g).unwrap();
458 let decoded = decode_grid(&encoded).unwrap();
459
460 assert_eq!(decoded.len(), 2);
461
462 let r0 = decoded.row(0).unwrap();
463 assert_eq!(r0.get("dis"), Some(&Kind::Str("My Site".into())));
464 assert_eq!(r0.get("site"), Some(&Kind::Marker));
465 assert_eq!(
466 r0.get("area"),
467 Some(&Kind::Number(Number::new(
468 1000.0,
469 Some("ft\u{00B2}".into())
470 )))
471 );
472 assert_eq!(r0.get("id"), Some(&Kind::Ref(HRef::from_val("site-1"))));
473
474 let r1 = decoded.row(1).unwrap();
475 assert_eq!(r1.get("dis"), Some(&Kind::Str("AHU-1".into())));
476 assert_eq!(r1.get("id"), Some(&Kind::Ref(HRef::from_val("ahu-1"))));
477 }
478
479 #[test]
480 fn roundtrip_multiline_string() {
481 use crate::codecs::trio::encode_grid;
482 use crate::data::HCol;
483
484 let cols = vec![HCol::new("dis"), HCol::new("doc")];
485 let mut row = HDict::new();
486 row.set("dis", Kind::Str("Test".into()));
487 row.set("doc", Kind::Str("Line 1\nLine 2\nLine 3".into()));
488
489 let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
490 let encoded = encode_grid(&g).unwrap();
491 let decoded = decode_grid(&encoded).unwrap();
492
493 assert_eq!(decoded.len(), 1);
494 let r = decoded.row(0).unwrap();
495 assert_eq!(r.get("dis"), Some(&Kind::Str("Test".into())));
496 assert_eq!(
497 r.get("doc"),
498 Some(&Kind::Str("Line 1\nLine 2\nLine 3".into()))
499 );
500 }
501
502 #[test]
503 fn parse_uri_value() {
504 use crate::kinds::Uri;
505
506 let input = "href: `http://example.com/api`\n";
507 let grid = decode_grid(input).unwrap();
508 let row = grid.row(0).unwrap();
509 assert_eq!(
510 row.get("href"),
511 Some(&Kind::Uri(Uri::new("http://example.com/api")))
512 );
513 }
514
515 #[test]
516 fn codec_for_registry() {
517 use crate::codecs::codec_for;
518
519 let trio = codec_for("text/trio").expect("trio codec should be registered");
520 assert_eq!(trio.mime_type(), "text/trio");
521
522 let zinc = codec_for("text/zinc").expect("zinc codec should be registered");
523 assert_eq!(zinc.mime_type(), "text/zinc");
524
525 assert!(codec_for("text/json").is_none());
526 }
527
528 #[test]
529 fn trio_codec_trait_impl() {
530 use crate::codecs::Codec;
531 use crate::codecs::trio::TrioCodec;
532
533 let codec = TrioCodec;
534 assert_eq!(codec.mime_type(), "text/trio");
535
536 let val = Kind::Number(Number::unitless(42.0));
538 let encoded = codec.encode_scalar(&val).unwrap();
539 assert_eq!(encoded, "42");
540 let decoded = codec.decode_scalar(&encoded).unwrap();
541 assert_eq!(decoded, val);
542 }
543}