haystack_core/codecs/trio/
parser.rs1use crate::codecs::CodecError;
4use crate::codecs::zinc::ZincParser;
5use crate::data::{HCol, HDict, HGrid};
6use crate::kinds::Kind;
7
8pub fn decode_grid(input: &str) -> Result<HGrid, CodecError> {
14 let records = parse_records(input)?;
15
16 if records.is_empty() {
17 return Ok(HGrid::new());
18 }
19
20 let mut col_names: Vec<String> = Vec::new();
22 let mut seen = std::collections::HashSet::new();
23 for rec in &records {
24 let mut names: Vec<&str> = rec.tag_names().collect();
26 names.sort();
27 for name in names {
28 if seen.insert(name.to_string()) {
29 col_names.push(name.to_string());
30 }
31 }
32 }
33
34 let cols: Vec<HCol> = col_names.iter().map(HCol::new).collect();
35 Ok(HGrid::from_parts(HDict::new(), cols, records))
36}
37
38fn parse_records(input: &str) -> Result<Vec<HDict>, CodecError> {
40 let mut records: Vec<HDict> = Vec::new();
41 let mut current_tags: Vec<(String, Kind)> = Vec::new();
42 let mut multiline_name: Option<String> = None;
43 let mut multiline_lines: Vec<String> = Vec::new();
44
45 for line in input.split('\n') {
46 let stripped = line.trim();
47
48 if is_record_separator(stripped) {
50 if let Some(name) = multiline_name.take() {
52 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
53 multiline_lines.clear();
54 }
55 if !current_tags.is_empty() {
57 records.push(tags_to_dict(current_tags));
58 current_tags = Vec::new();
59 }
60 continue;
61 }
62
63 if stripped.starts_with("//") {
65 continue;
66 }
67
68 if multiline_name.is_some() {
70 if let Some(content) = line.strip_prefix(" ").or_else(|| line.strip_prefix('\t')) {
71 multiline_lines.push(content.to_string());
73 continue;
74 } else {
75 let name = multiline_name.take().unwrap();
77 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
78 multiline_lines.clear();
79 }
81 }
82
83 if stripped.is_empty() {
85 continue;
86 }
87
88 match stripped.find(':') {
90 None => {
91 current_tags.push((stripped.to_string(), Kind::Marker));
93 }
94 Some(colon_idx) => {
95 let name = stripped[..colon_idx].trim().to_string();
96 let rest = &stripped[colon_idx + 1..];
97
98 if rest.trim().is_empty() {
99 multiline_name = Some(name);
101 multiline_lines.clear();
102 } else {
103 let val_str = rest.trim();
105 let val = parse_scalar_value(val_str);
106 current_tags.push((name, val));
107 }
108 }
109 }
110 }
111
112 if let Some(name) = multiline_name.take() {
114 current_tags.push((name, Kind::Str(multiline_lines.join("\n"))));
115 }
116
117 if !current_tags.is_empty() {
119 records.push(tags_to_dict(current_tags));
120 }
121
122 Ok(records)
123}
124
125fn parse_scalar_value(val_str: &str) -> Kind {
128 let mut parser = ZincParser::new(val_str);
129 match parser.parse_scalar() {
130 Ok(val) => {
131 if parser.at_end() {
132 val
133 } else {
134 Kind::Str(val_str.to_string())
136 }
137 }
138 Err(_) => {
139 Kind::Str(val_str.to_string())
141 }
142 }
143}
144
145fn is_record_separator(stripped: &str) -> bool {
147 !stripped.is_empty() && stripped.len() >= 3 && stripped.chars().all(|ch| ch == '-')
148}
149
150fn tags_to_dict(tags: Vec<(String, Kind)>) -> HDict {
152 let mut dict = HDict::new();
153 for (name, val) in tags {
154 dict.set(name, val);
155 }
156 dict
157}
158
159#[cfg(test)]
160mod tests {
161 use super::*;
162 use crate::kinds::{Coord, HRef, Number};
163 use chrono::NaiveDate;
164
165 #[test]
166 fn parse_empty_input() {
167 let grid = decode_grid("").unwrap();
168 assert!(grid.is_empty());
169 assert_eq!(grid.num_cols(), 0);
170 }
171
172 #[test]
173 fn parse_whitespace_only() {
174 let grid = decode_grid(" \n \n ").unwrap();
175 assert!(grid.is_empty());
176 }
177
178 #[test]
179 fn parse_single_record_with_markers_and_values() {
180 let input = "dis: \"Site 1\"\nsite\narea: 3702ft\u{00B2}\n";
181 let grid = decode_grid(input).unwrap();
182 assert_eq!(grid.len(), 1);
183
184 let row = grid.row(0).unwrap();
185 assert_eq!(row.get("dis"), Some(&Kind::Str("Site 1".into())));
186 assert_eq!(row.get("site"), Some(&Kind::Marker));
187 assert_eq!(
188 row.get("area"),
189 Some(&Kind::Number(Number::new(
190 3702.0,
191 Some("ft\u{00B2}".into())
192 )))
193 );
194 }
195
196 #[test]
197 fn parse_multiple_records() {
198 let input = "dis: \"Site A\"\nsite\n---\ndis: \"Site B\"\nsite\n";
199 let grid = decode_grid(input).unwrap();
200 assert_eq!(grid.len(), 2);
201
202 assert_eq!(
203 grid.row(0).unwrap().get("dis"),
204 Some(&Kind::Str("Site A".into()))
205 );
206 assert_eq!(
207 grid.row(1).unwrap().get("dis"),
208 Some(&Kind::Str("Site B".into()))
209 );
210 }
211
212 #[test]
213 fn parse_comments_skipped() {
214 let input = "// This is a comment\ndis: \"Site\"\nsite\n";
215 let grid = decode_grid(input).unwrap();
216 assert_eq!(grid.len(), 1);
217 assert_eq!(
218 grid.row(0).unwrap().get("dis"),
219 Some(&Kind::Str("Site".into()))
220 );
221 assert!(grid.row(0).unwrap().missing("//"));
222 }
223
224 #[test]
225 fn parse_multiline_string() {
226 let input = "dis: \"Test\"\ndoc:\n This is line 1\n This is line 2\nsite\n";
227 let grid = decode_grid(input).unwrap();
228 assert_eq!(grid.len(), 1);
229
230 let row = grid.row(0).unwrap();
231 assert_eq!(
232 row.get("doc"),
233 Some(&Kind::Str("This is line 1\nThis is line 2".into()))
234 );
235 assert_eq!(row.get("site"), Some(&Kind::Marker));
236 }
237
238 #[test]
239 fn parse_multiline_string_with_tab_indent() {
240 let input = "doc:\n\tLine A\n\tLine B\n";
241 let grid = decode_grid(input).unwrap();
242 assert_eq!(grid.len(), 1);
243
244 let row = grid.row(0).unwrap();
245 assert_eq!(row.get("doc"), Some(&Kind::Str("Line A\nLine B".into())));
246 }
247
248 #[test]
249 fn parse_multiline_string_at_end_of_input() {
250 let input = "doc:\n Last line";
251 let grid = decode_grid(input).unwrap();
252 assert_eq!(grid.len(), 1);
253
254 let row = grid.row(0).unwrap();
255 assert_eq!(row.get("doc"), Some(&Kind::Str("Last line".into())));
256 }
257
258 #[test]
259 fn parse_markers_alone() {
260 let input = "site\nequip\nahu\n";
261 let grid = decode_grid(input).unwrap();
262 assert_eq!(grid.len(), 1);
263
264 let row = grid.row(0).unwrap();
265 assert_eq!(row.get("site"), Some(&Kind::Marker));
266 assert_eq!(row.get("equip"), Some(&Kind::Marker));
267 assert_eq!(row.get("ahu"), Some(&Kind::Marker));
268 }
269
270 #[test]
271 fn parse_blank_lines_between_tags() {
272 let input = "dis: \"Test\"\n\nsite\n\narea: 100\n";
273 let grid = decode_grid(input).unwrap();
274 assert_eq!(grid.len(), 1);
275
276 let row = grid.row(0).unwrap();
277 assert_eq!(row.get("dis"), Some(&Kind::Str("Test".into())));
278 assert_eq!(row.get("site"), Some(&Kind::Marker));
279 assert_eq!(
280 row.get("area"),
281 Some(&Kind::Number(Number::unitless(100.0)))
282 );
283 }
284
285 #[test]
286 fn parse_ref_values() {
287 let input = "id: @site-1\nsiteRef: @alpha\n";
288 let grid = decode_grid(input).unwrap();
289 assert_eq!(grid.len(), 1);
290
291 let row = grid.row(0).unwrap();
292 assert_eq!(row.get("id"), Some(&Kind::Ref(HRef::from_val("site-1"))));
293 assert_eq!(
294 row.get("siteRef"),
295 Some(&Kind::Ref(HRef::from_val("alpha")))
296 );
297 }
298
299 #[test]
300 fn parse_date_value() {
301 let input = "installed: 2024-03-13\n";
302 let grid = decode_grid(input).unwrap();
303 let row = grid.row(0).unwrap();
304 assert_eq!(
305 row.get("installed"),
306 Some(&Kind::Date(NaiveDate::from_ymd_opt(2024, 3, 13).unwrap()))
307 );
308 }
309
310 #[test]
311 fn parse_coord_value() {
312 let input = "geoCoord: C(37.5458,-77.4491)\n";
313 let grid = decode_grid(input).unwrap();
314 let row = grid.row(0).unwrap();
315 assert_eq!(
316 row.get("geoCoord"),
317 Some(&Kind::Coord(Coord::new(37.5458, -77.4491)))
318 );
319 }
320
321 #[test]
322 fn parse_bool_values() {
323 let input = "active: T\ndeleted: F\n";
324 let grid = decode_grid(input).unwrap();
325 let row = grid.row(0).unwrap();
326 assert_eq!(row.get("active"), Some(&Kind::Bool(true)));
327 assert_eq!(row.get("deleted"), Some(&Kind::Bool(false)));
328 }
329
330 #[test]
331 fn parse_number_with_unit() {
332 let input = "temp: 72.5\u{00B0}F\nflow: 350gal/min\n";
333 let grid = decode_grid(input).unwrap();
334 let row = grid.row(0).unwrap();
335 assert_eq!(
336 row.get("temp"),
337 Some(&Kind::Number(Number::new(72.5, Some("\u{00B0}F".into()))))
338 );
339 assert_eq!(
340 row.get("flow"),
341 Some(&Kind::Number(Number::new(350.0, Some("gal/min".into()))))
342 );
343 }
344
345 #[test]
346 fn parse_separator_with_more_dashes() {
347 let input = "site\n-----\nequip\n";
348 let grid = decode_grid(input).unwrap();
349 assert_eq!(grid.len(), 2);
350 assert_eq!(grid.row(0).unwrap().get("site"), Some(&Kind::Marker));
351 assert_eq!(grid.row(1).unwrap().get("equip"), Some(&Kind::Marker));
352 }
353
354 #[test]
355 fn parse_columns_derived_from_all_records() {
356 let input = "dis: \"A\"\nsite\n---\ndis: \"B\"\narea: 100\n";
357 let grid = decode_grid(input).unwrap();
358
359 let col_names: Vec<&str> = grid.col_names().collect();
361 assert!(col_names.contains(&"dis"));
362 assert!(col_names.contains(&"site"));
363 assert!(col_names.contains(&"area"));
364 }
365
366 #[test]
367 fn parse_complex_trio_file() {
368 let input = "\
369// Alpha Office
370id: @alpha
371dis: \"Alpha Office\"
372site
373geoAddr: \"600 N 2nd St, Richmond VA 23219\"
374geoCoord: C(37.5407,-77.4360)
375area: 120000ft\u{00B2}
376---
377// Floor 1
378id: @floor1
379dis: \"Floor 1\"
380floor
381siteRef: @alpha
382---
383id: @ahu1
384dis: \"AHU-1\"
385equip
386ahu
387siteRef: @alpha
388floorRef: @floor1
389";
390 let grid = decode_grid(input).unwrap();
391 assert_eq!(grid.len(), 3);
392
393 let site = grid.row(0).unwrap();
394 assert_eq!(site.get("dis"), Some(&Kind::Str("Alpha Office".into())));
395 assert_eq!(site.get("site"), Some(&Kind::Marker));
396 assert_eq!(site.get("id"), Some(&Kind::Ref(HRef::from_val("alpha"))));
397 assert_eq!(
398 site.get("area"),
399 Some(&Kind::Number(Number::new(
400 120000.0,
401 Some("ft\u{00B2}".into())
402 )))
403 );
404
405 let floor = grid.row(1).unwrap();
406 assert_eq!(floor.get("dis"), Some(&Kind::Str("Floor 1".into())));
407 assert_eq!(floor.get("floor"), Some(&Kind::Marker));
408
409 let ahu = grid.row(2).unwrap();
410 assert_eq!(ahu.get("dis"), Some(&Kind::Str("AHU-1".into())));
411 assert_eq!(ahu.get("equip"), Some(&Kind::Marker));
412 assert_eq!(ahu.get("ahu"), Some(&Kind::Marker));
413 }
414
415 #[test]
416 fn parse_multiline_between_records() {
417 let input = "dis: \"A\"\ndoc:\n Hello world\n Second line\n---\ndis: \"B\"\n";
418 let grid = decode_grid(input).unwrap();
419 assert_eq!(grid.len(), 2);
420
421 assert_eq!(
422 grid.row(0).unwrap().get("doc"),
423 Some(&Kind::Str("Hello world\nSecond line".into()))
424 );
425 assert_eq!(
426 grid.row(1).unwrap().get("dis"),
427 Some(&Kind::Str("B".into()))
428 );
429 }
430
431 #[test]
432 fn roundtrip_encode_decode() {
433 use crate::codecs::trio::encode_grid;
434 use crate::data::HCol;
435
436 let cols = vec![
437 HCol::new("area"),
438 HCol::new("dis"),
439 HCol::new("id"),
440 HCol::new("site"),
441 ];
442 let mut row1 = HDict::new();
443 row1.set("dis", Kind::Str("My Site".into()));
444 row1.set("site", Kind::Marker);
445 row1.set(
446 "area",
447 Kind::Number(Number::new(1000.0, Some("ft\u{00B2}".into()))),
448 );
449 row1.set("id", Kind::Ref(HRef::from_val("site-1")));
450
451 let mut row2 = HDict::new();
452 row2.set("dis", Kind::Str("AHU-1".into()));
453 row2.set("id", Kind::Ref(HRef::from_val("ahu-1")));
454
455 let g = HGrid::from_parts(HDict::new(), cols, vec![row1, row2]);
456 let encoded = encode_grid(&g).unwrap();
457 let decoded = decode_grid(&encoded).unwrap();
458
459 assert_eq!(decoded.len(), 2);
460
461 let r0 = decoded.row(0).unwrap();
462 assert_eq!(r0.get("dis"), Some(&Kind::Str("My Site".into())));
463 assert_eq!(r0.get("site"), Some(&Kind::Marker));
464 assert_eq!(
465 r0.get("area"),
466 Some(&Kind::Number(Number::new(
467 1000.0,
468 Some("ft\u{00B2}".into())
469 )))
470 );
471 assert_eq!(r0.get("id"), Some(&Kind::Ref(HRef::from_val("site-1"))));
472
473 let r1 = decoded.row(1).unwrap();
474 assert_eq!(r1.get("dis"), Some(&Kind::Str("AHU-1".into())));
475 assert_eq!(r1.get("id"), Some(&Kind::Ref(HRef::from_val("ahu-1"))));
476 }
477
478 #[test]
479 fn roundtrip_multiline_string() {
480 use crate::codecs::trio::encode_grid;
481 use crate::data::HCol;
482
483 let cols = vec![HCol::new("dis"), HCol::new("doc")];
484 let mut row = HDict::new();
485 row.set("dis", Kind::Str("Test".into()));
486 row.set("doc", Kind::Str("Line 1\nLine 2\nLine 3".into()));
487
488 let g = HGrid::from_parts(HDict::new(), cols, vec![row]);
489 let encoded = encode_grid(&g).unwrap();
490 let decoded = decode_grid(&encoded).unwrap();
491
492 assert_eq!(decoded.len(), 1);
493 let r = decoded.row(0).unwrap();
494 assert_eq!(r.get("dis"), Some(&Kind::Str("Test".into())));
495 assert_eq!(
496 r.get("doc"),
497 Some(&Kind::Str("Line 1\nLine 2\nLine 3".into()))
498 );
499 }
500
501 #[test]
502 fn parse_uri_value() {
503 use crate::kinds::Uri;
504
505 let input = "href: `http://example.com/api`\n";
506 let grid = decode_grid(input).unwrap();
507 let row = grid.row(0).unwrap();
508 assert_eq!(
509 row.get("href"),
510 Some(&Kind::Uri(Uri::new("http://example.com/api")))
511 );
512 }
513
514 #[test]
515 fn codec_for_registry() {
516 use crate::codecs::codec_for;
517
518 let trio = codec_for("text/trio").expect("trio codec should be registered");
519 assert_eq!(trio.mime_type(), "text/trio");
520
521 let zinc = codec_for("text/zinc").expect("zinc codec should be registered");
522 assert_eq!(zinc.mime_type(), "text/zinc");
523
524 assert!(codec_for("text/json").is_none());
525 }
526
527 #[test]
528 fn trio_codec_trait_impl() {
529 use crate::codecs::Codec;
530 use crate::codecs::trio::TrioCodec;
531
532 let codec = TrioCodec;
533 assert_eq!(codec.mime_type(), "text/trio");
534
535 let val = Kind::Number(Number::unitless(42.0));
537 let encoded = codec.encode_scalar(&val).unwrap();
538 assert_eq!(encoded, "42");
539 let decoded = codec.decode_scalar(&encoded).unwrap();
540 assert_eq!(decoded, val);
541 }
542}