haystack_core/codecs/trio/
parser.rs1use crate::codecs::CodecError;
4use crate::codecs::zinc::ZincParser;
5use crate::data::{HCol, HDict, HGrid};
6use crate::kinds::Kind;
7
8pub fn decode_grid(input: &str) -> Result<HGrid, CodecError> {
14 let records = parse_records(input)?;
15
16 if records.is_empty() {
17 return Ok(HGrid::new());
18 }
19
20 let mut col_names: Vec<String> = Vec::new();
22 let mut seen = std::collections::HashSet::new();
23 for rec in &records {
24 let mut names: Vec<&str> = rec.tag_names().collect();
26 names.sort();
27 for name in names {
28 if seen.insert(name.to_string()) {
29 col_names.push(name.to_string());
30 }
31 }
32 }
33
34 let cols: Vec<HCol> = col_names.iter().map(HCol::new).collect();
35 Ok(HGrid::from_parts(HDict::new(), cols, records))
36}
37
38fn parse_records(input: &str) -> Result<Vec<HDict>, CodecError> {
40 let mut records: Vec<HDict> = Vec::new();
41 let mut current_tags: Vec<(String, Kind)> = Vec::new();
42 let mut multiline_name: Option<String> = None;
43 let mut multiline_lines: Vec<String> = Vec::new();
44
45 for line in input.split('\n') {
46 let stripped = line.trim();
47
48 if is_record_separator(stripped) {
50 if let Some(name) = multiline_name.take() {
52 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
53 multiline_lines.clear();
54 }
55 if !current_tags.is_empty() {
57 records.push(tags_to_dict(current_tags));
58 current_tags = Vec::new();
59 }
60 continue;
61 }
62
63 if stripped.starts_with("//") {
65 continue;
66 }
67
68 if multiline_name.is_some() {
70 if let Some(content) = line.strip_prefix(" ").or_else(|| line.strip_prefix('\t')) {
71 multiline_lines.push(content.to_string());
73 continue;
74 } else {
75 if let Some(name) = multiline_name.take() {
77 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
78 }
79 multiline_lines.clear();
80 }
82 }
83
84 if stripped.is_empty() {
86 continue;
87 }
88
89 match stripped.find(':') {
91 None => {
92 current_tags.push((stripped.to_string(), Kind::Marker));
94 }
95 Some(colon_idx) => {
96 let name = stripped[..colon_idx].trim().to_string();
97 let rest = &stripped[colon_idx + 1..];
98
99 if rest.trim().is_empty() {
100 multiline_name = Some(name);
102 multiline_lines.clear();
103 } else {
104 let val_str = rest.trim();
106 let val = parse_scalar_value(val_str);
107 current_tags.push((name, val));
108 }
109 }
110 }
111 }
112
113 if let Some(name) = multiline_name.take() {
115 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
116 }
117
118 if !current_tags.is_empty() {
120 records.push(tags_to_dict(current_tags));
121 }
122
123 Ok(records)
124}
125
126fn parse_scalar_value(val_str: &str) -> Kind {
135 let mut parser = ZincParser::new(val_str);
136 match parser.parse_scalar() {
137 Ok(val) => {
138 if parser.at_end() {
139 val
140 } else {
141 Kind::Str(val_str.to_string())
143 }
144 }
145 Err(_) => {
146 Kind::Str(val_str.to_string())
148 }
149 }
150}
151
152fn is_record_separator(stripped: &str) -> bool {
154 !stripped.is_empty() && stripped.len() >= 3 && stripped.chars().all(|ch| ch == '-')
155}
156
157fn tags_to_dict(tags: Vec<(String, Kind)>) -> HDict {
159 let mut dict = HDict::new();
160 for (name, val) in tags {
161 dict.set(name, val);
162 }
163 dict
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169 use crate::kinds::{Coord, HRef, Number};
170 use chrono::NaiveDate;
171
172 #[test]
173 fn parse_empty_input() {
174 let grid = decode_grid("").unwrap();
175 assert!(grid.is_empty());
176 assert_eq!(grid.num_cols(), 0);
177 }
178
179 #[test]
180 fn parse_whitespace_only() {
181 let grid = decode_grid(" \n \n ").unwrap();
182 assert!(grid.is_empty());
183 }
184
185 #[test]
186 fn parse_single_record_with_markers_and_values() {
187 let input = "dis: \"Site 1\"\nsite\narea: 3702ft\u{00B2}\n";
188 let grid = decode_grid(input).unwrap();
189 assert_eq!(grid.len(), 1);
190
191 let row = grid.row(0).unwrap();
192 assert_eq!(row.get("dis"), Some(&Kind::Str("Site 1".into())));
193 assert_eq!(row.get("site"), Some(&Kind::Marker));
194 assert_eq!(
195 row.get("area"),
196 Some(&Kind::Number(Number::new(
197 3702.0,
198 Some("ft\u{00B2}".into())
199 )))
200 );
201 }
202
203 #[test]
204 fn parse_multiple_records() {
205 let input = "dis: \"Site A\"\nsite\n---\ndis: \"Site B\"\nsite\n";
206 let grid = decode_grid(input).unwrap();
207 assert_eq!(grid.len(), 2);
208
209 assert_eq!(
210 grid.row(0).unwrap().get("dis"),
211 Some(&Kind::Str("Site A".into()))
212 );
213 assert_eq!(
214 grid.row(1).unwrap().get("dis"),
215 Some(&Kind::Str("Site B".into()))
216 );
217 }
218
219 #[test]
220 fn parse_comments_skipped() {
221 let input = "// This is a comment\ndis: \"Site\"\nsite\n";
222 let grid = decode_grid(input).unwrap();
223 assert_eq!(grid.len(), 1);
224 assert_eq!(
225 grid.row(0).unwrap().get("dis"),
226 Some(&Kind::Str("Site".into()))
227 );
228 assert!(grid.row(0).unwrap().missing("//"));
229 }
230
231 #[test]
232 fn parse_multiline_string() {
233 let input = "dis: \"Test\"\ndoc:\n This is line 1\n This is line 2\nsite\n";
234 let grid = decode_grid(input).unwrap();
235 assert_eq!(grid.len(), 1);
236
237 let row = grid.row(0).unwrap();
238 assert_eq!(
239 row.get("doc"),
240 Some(&Kind::Str("This is line 1\nThis is line 2".into()))
241 );
242 assert_eq!(row.get("site"), Some(&Kind::Marker));
243 }
244
245 #[test]
246 fn parse_multiline_string_with_tab_indent() {
247 let input = "doc:\n\tLine A\n\tLine B\n";
248 let grid = decode_grid(input).unwrap();
249 assert_eq!(grid.len(), 1);
250
251 let row = grid.row(0).unwrap();
252 assert_eq!(row.get("doc"), Some(&Kind::Str("Line A\nLine B".into())));
253 }
254
255 #[test]
256 fn parse_multiline_string_at_end_of_input() {
257 let input = "doc:\n Last line";
258 let grid = decode_grid(input).unwrap();
259 assert_eq!(grid.len(), 1);
260
261 let row = grid.row(0).unwrap();
262 assert_eq!(row.get("doc"), Some(&Kind::Str("Last line".into())));
263 }
264
265 #[test]
266 fn parse_markers_alone() {
267 let input = "site\nequip\nahu\n";
268 let grid = decode_grid(input).unwrap();
269 assert_eq!(grid.len(), 1);
270
271 let row = grid.row(0).unwrap();
272 assert_eq!(row.get("site"), Some(&Kind::Marker));
273 assert_eq!(row.get("equip"), Some(&Kind::Marker));
274 assert_eq!(row.get("ahu"), Some(&Kind::Marker));
275 }
276
277 #[test]
278 fn parse_blank_lines_between_tags() {
279 let input = "dis: \"Test\"\n\nsite\n\narea: 100\n";
280 let grid = decode_grid(input).unwrap();
281 assert_eq!(grid.len(), 1);
282
283 let row = grid.row(0).unwrap();
284 assert_eq!(row.get("dis"), Some(&Kind::Str("Test".into())));
285 assert_eq!(row.get("site"), Some(&Kind::Marker));
286 assert_eq!(
287 row.get("area"),
288 Some(&Kind::Number(Number::unitless(100.0)))
289 );
290 }
291
292 #[test]
293 fn parse_ref_values() {
294 let input = "id: @site-1\nsiteRef: @alpha\n";
295 let grid = decode_grid(input).unwrap();
296 assert_eq!(grid.len(), 1);
297
298 let row = grid.row(0).unwrap();
299 assert_eq!(row.get("id"), Some(&Kind::Ref(HRef::from_val("site-1"))));
300 assert_eq!(
301 row.get("siteRef"),
302 Some(&Kind::Ref(HRef::from_val("alpha")))
303 );
304 }
305
306 #[test]
307 fn parse_date_value() {
308 let input = "installed: 2024-03-13\n";
309 let grid = decode_grid(input).unwrap();
310 let row = grid.row(0).unwrap();
311 assert_eq!(
312 row.get("installed"),
313 Some(&Kind::Date(NaiveDate::from_ymd_opt(2024, 3, 13).unwrap()))
314 );
315 }
316
317 #[test]
318 fn parse_coord_value() {
319 let input = "geoCoord: C(37.5458,-77.4491)\n";
320 let grid = decode_grid(input).unwrap();
321 let row = grid.row(0).unwrap();
322 assert_eq!(
323 row.get("geoCoord"),
324 Some(&Kind::Coord(Coord::new(37.5458, -77.4491)))
325 );
326 }
327
328 #[test]
329 fn parse_bool_values() {
330 let input = "active: T\ndeleted: F\n";
331 let grid = decode_grid(input).unwrap();
332 let row = grid.row(0).unwrap();
333 assert_eq!(row.get("active"), Some(&Kind::Bool(true)));
334 assert_eq!(row.get("deleted"), Some(&Kind::Bool(false)));
335 }
336
337 #[test]
338 fn parse_number_with_unit() {
339 let input = "temp: 72.5\u{00B0}F\nflow: 350gal/min\n";
340 let grid = decode_grid(input).unwrap();
341 let row = grid.row(0).unwrap();
342 assert_eq!(
343 row.get("temp"),
344 Some(&Kind::Number(Number::new(72.5, Some("\u{00B0}F".into()))))
345 );
346 assert_eq!(
347 row.get("flow"),
348 Some(&Kind::Number(Number::new(350.0, Some("gal/min".into()))))
349 );
350 }
351
352 #[test]
353 fn parse_separator_with_more_dashes() {
354 let input = "site\n-----\nequip\n";
355 let grid = decode_grid(input).unwrap();
356 assert_eq!(grid.len(), 2);
357 assert_eq!(grid.row(0).unwrap().get("site"), Some(&Kind::Marker));
358 assert_eq!(grid.row(1).unwrap().get("equip"), Some(&Kind::Marker));
359 }
360
361 #[test]
362 fn parse_columns_derived_from_all_records() {
363 let input = "dis: \"A\"\nsite\n---\ndis: \"B\"\narea: 100\n";
364 let grid = decode_grid(input).unwrap();
365
366 let col_names: Vec<&str> = grid.col_names().collect();
368 assert!(col_names.contains(&"dis"));
369 assert!(col_names.contains(&"site"));
370 assert!(col_names.contains(&"area"));
371 }
372
373 #[test]
374 fn parse_complex_trio_file() {
375 let input = "\
376// Alpha Office
377id: @alpha
378dis: \"Alpha Office\"
379site
380geoAddr: \"600 N 2nd St, Richmond VA 23219\"
381geoCoord: C(37.5407,-77.4360)
382area: 120000ft\u{00B2}
383---
384// Floor 1
385id: @floor1
386dis: \"Floor 1\"
387floor
388siteRef: @alpha
389---
390id: @ahu1
391dis: \"AHU-1\"
392equip
393ahu
394siteRef: @alpha
395floorRef: @floor1
396";
397 let grid = decode_grid(input).unwrap();
398 assert_eq!(grid.len(), 3);
399
400 let site = grid.row(0).unwrap();
401 assert_eq!(site.get("dis"), Some(&Kind::Str("Alpha Office".into())));
402 assert_eq!(site.get("site"), Some(&Kind::Marker));
403 assert_eq!(site.get("id"), Some(&Kind::Ref(HRef::from_val("alpha"))));
404 assert_eq!(
405 site.get("area"),
406 Some(&Kind::Number(Number::new(
407 120000.0,
408 Some("ft\u{00B2}".into())
409 )))
410 );
411
412 let floor = grid.row(1).unwrap();
413 assert_eq!(floor.get("dis"), Some(&Kind::Str("Floor 1".into())));
414 assert_eq!(floor.get("floor"), Some(&Kind::Marker));
415
416 let ahu = grid.row(2).unwrap();
417 assert_eq!(ahu.get("dis"), Some(&Kind::Str("AHU-1".into())));
418 assert_eq!(ahu.get("equip"), Some(&Kind::Marker));
419 assert_eq!(ahu.get("ahu"), Some(&Kind::Marker));
420 }
421
422 #[test]
423 fn parse_multiline_between_records() {
424 let input = "dis: \"A\"\ndoc:\n Hello world\n Second line\n---\ndis: \"B\"\n";
425 let grid = decode_grid(input).unwrap();
426 assert_eq!(grid.len(), 2);
427
428 assert_eq!(
429 grid.row(0).unwrap().get("doc"),
430 Some(&Kind::Str("Hello world\nSecond line".into()))
431 );
432 assert_eq!(
433 grid.row(1).unwrap().get("dis"),
434 Some(&Kind::Str("B".into()))
435 );
436 }
437
438 #[test]
439 fn roundtrip_encode_decode() {
440 use crate::codecs::trio::encode_grid;
441 use crate::data::HCol;
442
443 let cols = vec![
444 HCol::new("area"),
445 HCol::new("dis"),
446 HCol::new("id"),
447 HCol::new("site"),
448 ];
449 let mut row1 = HDict::new();
450 row1.set("dis", Kind::Str("My Site".into()));
451 row1.set("site", Kind::Marker);
452 row1.set(
453 "area",
454 Kind::Number(Number::new(1000.0, Some("ft\u{00B2}".into()))),
455 );
456 row1.set("id", Kind::Ref(HRef::from_val("site-1")));
457
458 let mut row2 = HDict::new();
459 row2.set("dis", Kind::Str("AHU-1".into()));
460 row2.set("id", Kind::Ref(HRef::from_val("ahu-1")));
461
462 let g = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
463 let encoded = encode_grid(&g).unwrap();
464 let decoded = decode_grid(&encoded).unwrap();
465
466 assert_eq!(decoded.len(), 2);
467
468 let r0 = decoded.row(0).unwrap();
469 assert_eq!(r0.get("dis"), Some(&Kind::Str("My Site".into())));
470 assert_eq!(r0.get("site"), Some(&Kind::Marker));
471 assert_eq!(
472 r0.get("area"),
473 Some(&Kind::Number(Number::new(
474 1000.0,
475 Some("ft\u{00B2}".into())
476 )))
477 );
478 assert_eq!(r0.get("id"), Some(&Kind::Ref(HRef::from_val("site-1"))));
479
480 let r1 = decoded.row(1).unwrap();
481 assert_eq!(r1.get("dis"), Some(&Kind::Str("AHU-1".into())));
482 assert_eq!(r1.get("id"), Some(&Kind::Ref(HRef::from_val("ahu-1"))));
483 }
484
485 #[test]
486 fn roundtrip_multiline_string() {
487 use crate::codecs::trio::encode_grid;
488 use crate::data::HCol;
489
490 let cols = vec![HCol::new("dis"), HCol::new("doc")];
491 let mut row = HDict::new();
492 row.set("dis", Kind::Str("Test".into()));
493 row.set("doc", Kind::Str("Line 1\nLine 2\nLine 3".into()));
494
495 let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
496 let encoded = encode_grid(&g).unwrap();
497 let decoded = decode_grid(&encoded).unwrap();
498
499 assert_eq!(decoded.len(), 1);
500 let r = decoded.row(0).unwrap();
501 assert_eq!(r.get("dis"), Some(&Kind::Str("Test".into())));
502 assert_eq!(
503 r.get("doc"),
504 Some(&Kind::Str("Line 1\nLine 2\nLine 3".into()))
505 );
506 }
507
508 #[test]
509 fn parse_uri_value() {
510 use crate::kinds::Uri;
511
512 let input = "href: `http://example.com/api`\n";
513 let grid = decode_grid(input).unwrap();
514 let row = grid.row(0).unwrap();
515 assert_eq!(
516 row.get("href"),
517 Some(&Kind::Uri(Uri::new("http://example.com/api")))
518 );
519 }
520
521 #[test]
522 fn codec_for_registry() {
523 use crate::codecs::codec_for;
524
525 let trio = codec_for("text/trio").expect("trio codec should be registered");
526 assert_eq!(trio.mime_type(), "text/trio");
527
528 let zinc = codec_for("text/zinc").expect("zinc codec should be registered");
529 assert_eq!(zinc.mime_type(), "text/zinc");
530
531 assert!(codec_for("text/json").is_none());
532 }
533
534 #[test]
535 fn trio_codec_trait_impl() {
536 use crate::codecs::Codec;
537 use crate::codecs::trio::TrioCodec;
538
539 let codec = TrioCodec;
540 assert_eq!(codec.mime_type(), "text/trio");
541
542 let val = Kind::Number(Number::unitless(42.0));
544 let encoded = codec.encode_scalar(&val).unwrap();
545 assert_eq!(encoded, "42");
546 let decoded = codec.decode_scalar(&encoded).unwrap();
547 assert_eq!(decoded, val);
548 }
549}