1use std::fmt;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum LayoutSource {
15 Detector,
17 Constant,
19 NdAttribute,
21 Unset,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum LayoutDataType {
30 Int,
31 Float,
32 String,
33}
34
35impl LayoutDataType {
36 fn parse(s: &str) -> LayoutDataType {
37 match s {
38 "int" => LayoutDataType::Int,
39 "float" => LayoutDataType::Float,
40 _ => LayoutDataType::String,
41 }
42 }
43}
44
45#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum LayoutWhen {
48 OnFileOpen,
49 OnFileClose,
50 OnFileWrite,
51 OnFrame,
53}
54
55impl LayoutWhen {
56 fn parse(s: &str) -> LayoutWhen {
57 match s {
58 "OnFileOpen" => LayoutWhen::OnFileOpen,
59 "OnFileClose" => LayoutWhen::OnFileClose,
60 "OnFileWrite" => LayoutWhen::OnFileWrite,
61 _ => LayoutWhen::OnFrame,
62 }
63 }
64}
65
66#[derive(Debug, Clone)]
68pub struct LayoutAttribute {
69 pub name: String,
70 pub source: LayoutSource,
71 pub data_type: LayoutDataType,
72 pub value: String,
73 pub ndattribute: String,
75 pub when: LayoutWhen,
76}
77
78#[derive(Debug, Clone)]
80pub struct LayoutDataset {
81 pub name: String,
82 pub source: LayoutSource,
83 pub data_type: LayoutDataType,
84 pub value: String,
85 pub ndattribute: String,
87 pub det_default: bool,
89 pub when: LayoutWhen,
90 pub attributes: Vec<LayoutAttribute>,
92}
93
94#[derive(Debug, Clone)]
96pub struct LayoutHardlink {
97 pub name: String,
98 pub target: String,
99}
100
101#[derive(Debug, Clone)]
103pub struct LayoutGroup {
104 pub name: String,
105 pub ndattr_default: bool,
107 pub attributes: Vec<LayoutAttribute>,
108 pub datasets: Vec<LayoutDataset>,
109 pub hardlinks: Vec<LayoutHardlink>,
110 pub groups: Vec<LayoutGroup>,
111}
112
113impl LayoutGroup {
114 fn new(name: String, ndattr_default: bool) -> Self {
115 Self {
116 name,
117 ndattr_default,
118 attributes: Vec::new(),
119 datasets: Vec::new(),
120 hardlinks: Vec::new(),
121 groups: Vec::new(),
122 }
123 }
124}
125
126#[derive(Debug, Clone, Default)]
128pub struct Hdf5Layout {
129 pub groups: Vec<LayoutGroup>,
131 pub detector_data_destination: Option<String>,
133}
134
135#[derive(Debug, Clone)]
137pub struct LayoutError(pub String);
138
139impl fmt::Display for LayoutError {
140 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
141 write!(f, "{}", self.0)
142 }
143}
144
145impl std::error::Error for LayoutError {}
146
147impl Hdf5Layout {
148 pub fn from_file(path: &std::path::Path) -> Result<Hdf5Layout, LayoutError> {
150 let text = std::fs::read_to_string(path)
151 .map_err(|e| LayoutError(format!("cannot read layout file: {}", e)))?;
152 Self::parse(&text)
153 }
154
155 pub fn parse(text: &str) -> Result<Hdf5Layout, LayoutError> {
157 let tokens = tokenize(text)?;
158 let mut parser = Parser { tokens, pos: 0 };
159 parser.parse_document()
160 }
161
162 pub fn for_each_dataset<F: FnMut(&str, &LayoutDataset)>(&self, mut f: F) {
164 fn recurse<F: FnMut(&str, &LayoutDataset)>(g: &LayoutGroup, path: &str, f: &mut F) {
165 let here = if path.is_empty() {
166 format!("/{}", g.name)
167 } else {
168 format!("{}/{}", path, g.name)
169 };
170 for d in &g.datasets {
171 f(&here, d);
172 }
173 for sub in &g.groups {
174 recurse(sub, &here, f);
175 }
176 }
177 for g in &self.groups {
178 recurse(g, "", &mut f);
179 }
180 }
181
182 pub fn detector_dataset_path(&self) -> Option<String> {
184 let mut found = None;
185 self.for_each_dataset(|path, d| {
186 if d.det_default && d.source == LayoutSource::Detector && found.is_none() {
187 found = Some(format!("{}/{}", path, d.name));
188 }
189 });
190 if found.is_none() {
191 self.for_each_dataset(|path, d| {
193 if d.source == LayoutSource::Detector && found.is_none() {
194 found = Some(format!("{}/{}", path, d.name));
195 }
196 });
197 }
198 found
199 }
200
201 pub fn dataset_group_path(&self, name: &str) -> Option<String> {
205 let mut found = None;
206 self.for_each_dataset(|path, d| {
207 if d.name == name && found.is_none() {
208 found = Some(path.to_string());
209 }
210 });
211 found
212 }
213
214 pub fn ndattr_default_group(&self) -> Option<String> {
216 fn recurse(g: &LayoutGroup, path: &str) -> Option<String> {
217 let here = if path.is_empty() {
218 format!("/{}", g.name)
219 } else {
220 format!("{}/{}", path, g.name)
221 };
222 if g.ndattr_default {
223 return Some(here.clone());
224 }
225 for sub in &g.groups {
226 if let Some(p) = recurse(sub, &here) {
227 return Some(p);
228 }
229 }
230 None
231 }
232 for g in &self.groups {
233 if let Some(p) = recurse(g, "") {
234 return Some(p);
235 }
236 }
237 None
238 }
239}
240
241#[derive(Debug, Clone)]
246enum Token {
247 Open {
249 name: String,
250 attrs: Vec<(String, String)>,
251 self_closing: bool,
252 },
253 Close(String),
255}
256
257fn tokenize(text: &str) -> Result<Vec<Token>, LayoutError> {
261 let bytes = text.as_bytes();
262 let mut tokens = Vec::new();
263 let mut i = 0;
264 while i < bytes.len() {
265 if bytes[i] != b'<' {
266 i += 1;
267 continue;
268 }
269 if text[i..].starts_with("<!--") {
271 match text[i..].find("-->") {
272 Some(end) => {
273 i += end + 3;
274 continue;
275 }
276 None => return Err(LayoutError("unterminated XML comment".into())),
277 }
278 }
279 if text[i..].starts_with("<?") || text[i..].starts_with("<!") {
281 match text[i..].find('>') {
282 Some(end) => {
283 i += end + 1;
284 continue;
285 }
286 None => return Err(LayoutError("unterminated XML declaration".into())),
287 }
288 }
289 let close = text[i..]
291 .find('>')
292 .ok_or_else(|| LayoutError("unterminated XML tag".into()))?;
293 let inner = &text[i + 1..i + close];
294 i += close + 1;
295
296 let inner_trim = inner.trim();
297 if let Some(rest) = inner_trim.strip_prefix('/') {
298 tokens.push(Token::Close(rest.trim().to_string()));
299 continue;
300 }
301 let self_closing = inner_trim.ends_with('/');
302 let body = if self_closing {
303 inner_trim[..inner_trim.len() - 1].trim()
304 } else {
305 inner_trim
306 };
307 let (name, attrs) = parse_tag_body(body)?;
308 tokens.push(Token::Open {
309 name,
310 attrs,
311 self_closing,
312 });
313 }
314 Ok(tokens)
315}
316
317fn parse_tag_body(body: &str) -> Result<(String, Vec<(String, String)>), LayoutError> {
319 let chars: Vec<char> = body.chars().collect();
320 let mut idx = 0;
321 let name_start = idx;
323 while idx < chars.len() && !chars[idx].is_whitespace() {
324 idx += 1;
325 }
326 let name: String = chars[name_start..idx].iter().collect();
327 if name.is_empty() {
328 return Err(LayoutError("empty XML tag name".into()));
329 }
330 let mut attrs = Vec::new();
331 loop {
332 while idx < chars.len() && chars[idx].is_whitespace() {
333 idx += 1;
334 }
335 if idx >= chars.len() {
336 break;
337 }
338 let attr_start = idx;
340 while idx < chars.len() && chars[idx] != '=' && !chars[idx].is_whitespace() {
341 idx += 1;
342 }
343 let attr_name: String = chars[attr_start..idx].iter().collect();
344 while idx < chars.len() && chars[idx].is_whitespace() {
345 idx += 1;
346 }
347 if idx >= chars.len() || chars[idx] != '=' {
348 return Err(LayoutError(format!(
349 "malformed attribute '{}' in tag '{}'",
350 attr_name, name
351 )));
352 }
353 idx += 1; while idx < chars.len() && chars[idx].is_whitespace() {
355 idx += 1;
356 }
357 if idx >= chars.len() || (chars[idx] != '"' && chars[idx] != '\'') {
358 return Err(LayoutError(format!(
359 "unquoted attribute value for '{}' in tag '{}'",
360 attr_name, name
361 )));
362 }
363 let quote = chars[idx];
364 idx += 1;
365 let val_start = idx;
366 while idx < chars.len() && chars[idx] != quote {
367 idx += 1;
368 }
369 if idx >= chars.len() {
370 return Err(LayoutError(format!(
371 "unterminated attribute value for '{}'",
372 attr_name
373 )));
374 }
375 let raw: String = chars[val_start..idx].iter().collect();
376 idx += 1; attrs.push((attr_name, unescape(&raw)));
378 }
379 Ok((name, attrs))
380}
381
382fn unescape(s: &str) -> String {
384 if !s.contains('&') {
385 return s.to_string();
386 }
387 s.replace("<", "<")
388 .replace(">", ">")
389 .replace(""", "\"")
390 .replace("'", "'")
391 .replace("&", "&")
392}
393
394struct Parser {
399 tokens: Vec<Token>,
400 pos: usize,
401}
402
403impl Parser {
404 fn parse_document(&mut self) -> Result<Hdf5Layout, LayoutError> {
405 let mut layout = Hdf5Layout::default();
407 match self.tokens.get(self.pos).cloned() {
408 Some(Token::Open {
409 name, self_closing, ..
410 }) if name == "hdf5_layout" => {
411 self.pos += 1;
412 if self_closing {
413 return Ok(layout);
414 }
415 }
416 _ => return Err(LayoutError("root element <hdf5_layout> not found".into())),
417 }
418
419 loop {
420 match self.tokens.get(self.pos).cloned() {
421 Some(Token::Open {
422 name,
423 attrs,
424 self_closing,
425 }) => {
426 self.pos += 1;
427 match name.as_str() {
428 "group" => {
429 let g = self.parse_group(attrs, self_closing)?;
430 layout.groups.push(g);
431 }
432 "global" => {
433 let n = attr_get(&attrs, "name").unwrap_or_default();
434 if n == "detector_data_destination" {
435 layout.detector_data_destination = attr_get(&attrs, "ndattribute");
436 }
437 if !self_closing {
438 self.skip_to_close("global")?;
439 }
440 }
441 other => {
442 return Err(LayoutError(format!(
443 "unexpected element <{}> in <hdf5_layout>",
444 other
445 )));
446 }
447 }
448 }
449 Some(Token::Close(name)) if name == "hdf5_layout" => {
450 self.pos += 1;
451 break;
452 }
453 Some(Token::Close(other)) => {
454 return Err(LayoutError(format!(
455 "unexpected </{}> at document level",
456 other
457 )));
458 }
459 None => return Err(LayoutError("unterminated <hdf5_layout> element".into())),
460 }
461 }
462 Ok(layout)
463 }
464
465 fn parse_group(
466 &mut self,
467 attrs: Vec<(String, String)>,
468 self_closing: bool,
469 ) -> Result<LayoutGroup, LayoutError> {
470 let name = attr_get(&attrs, "name")
471 .ok_or_else(|| LayoutError("<group> missing required 'name'".into()))?;
472 let ndattr_default = attr_bool(&attrs, "ndattr_default");
473 let mut group = LayoutGroup::new(name.clone(), ndattr_default);
474 if self_closing {
475 return Ok(group);
476 }
477 loop {
478 match self.tokens.get(self.pos).cloned() {
479 Some(Token::Open {
480 name: child,
481 attrs: cattrs,
482 self_closing: sc,
483 }) => {
484 self.pos += 1;
485 match child.as_str() {
486 "group" => group.groups.push(self.parse_group(cattrs, sc)?),
487 "dataset" => group.datasets.push(self.parse_dataset(cattrs, sc)?),
488 "attribute" => {
489 group.attributes.push(parse_attribute(&cattrs)?);
490 if !sc {
491 self.skip_to_close("attribute")?;
492 }
493 }
494 "hardlink" => {
495 group.hardlinks.push(LayoutHardlink {
496 name: attr_get(&cattrs, "name").ok_or_else(|| {
497 LayoutError("<hardlink> missing 'name'".into())
498 })?,
499 target: attr_get(&cattrs, "target").ok_or_else(|| {
500 LayoutError("<hardlink> missing 'target'".into())
501 })?,
502 });
503 if !sc {
504 self.skip_to_close("hardlink")?;
505 }
506 }
507 other => {
508 return Err(LayoutError(format!(
509 "unexpected <{}> inside <group name=\"{}\">",
510 other, name
511 )));
512 }
513 }
514 }
515 Some(Token::Close(close_name)) if close_name == "group" => {
516 self.pos += 1;
517 break;
518 }
519 Some(Token::Close(other)) => {
520 return Err(LayoutError(format!(
521 "mismatched </{}>, expected </group>",
522 other
523 )));
524 }
525 None => {
526 return Err(LayoutError(format!(
527 "unterminated <group name=\"{}\">",
528 name
529 )));
530 }
531 }
532 }
533 Ok(group)
534 }
535
536 fn parse_dataset(
537 &mut self,
538 attrs: Vec<(String, String)>,
539 self_closing: bool,
540 ) -> Result<LayoutDataset, LayoutError> {
541 let name = attr_get(&attrs, "name")
542 .ok_or_else(|| LayoutError("<dataset> missing required 'name'".into()))?;
543 let source = parse_source(&attrs, &name)?;
544 let mut ds = LayoutDataset {
545 name: name.clone(),
546 source,
547 data_type: LayoutDataType::parse(
548 &attr_get(&attrs, "type").unwrap_or_else(|| "string".into()),
549 ),
550 value: attr_get(&attrs, "value").unwrap_or_default(),
551 ndattribute: attr_get(&attrs, "ndattribute").unwrap_or_default(),
552 det_default: attr_bool(&attrs, "det_default"),
553 when: LayoutWhen::parse(&attr_get(&attrs, "when").unwrap_or_default()),
554 attributes: Vec::new(),
555 };
556 if self_closing {
557 return Ok(ds);
558 }
559 loop {
560 match self.tokens.get(self.pos).cloned() {
561 Some(Token::Open {
562 name: child,
563 attrs: cattrs,
564 self_closing: sc,
565 }) => {
566 self.pos += 1;
567 if child == "attribute" {
568 ds.attributes.push(parse_attribute(&cattrs)?);
569 if !sc {
570 self.skip_to_close("attribute")?;
571 }
572 } else {
573 return Err(LayoutError(format!(
574 "unexpected <{}> inside <dataset name=\"{}\">",
575 child, name
576 )));
577 }
578 }
579 Some(Token::Close(close_name)) if close_name == "dataset" => {
580 self.pos += 1;
581 break;
582 }
583 Some(Token::Close(other)) => {
584 return Err(LayoutError(format!(
585 "mismatched </{}>, expected </dataset>",
586 other
587 )));
588 }
589 None => {
590 return Err(LayoutError(format!(
591 "unterminated <dataset name=\"{}\">",
592 name
593 )));
594 }
595 }
596 }
597 Ok(ds)
598 }
599
600 fn skip_to_close(&mut self, tag: &str) -> Result<(), LayoutError> {
603 let mut depth = 1;
604 while let Some(tok) = self.tokens.get(self.pos).cloned() {
605 self.pos += 1;
606 match tok {
607 Token::Open {
608 name, self_closing, ..
609 } if name == tag && !self_closing => depth += 1,
610 Token::Close(name) if name == tag => {
611 depth -= 1;
612 if depth == 0 {
613 return Ok(());
614 }
615 }
616 _ => {}
617 }
618 }
619 Err(LayoutError(format!("unterminated <{}> element", tag)))
620 }
621}
622
623fn parse_attribute(attrs: &[(String, String)]) -> Result<LayoutAttribute, LayoutError> {
624 let name = attr_get(attrs, "name")
625 .ok_or_else(|| LayoutError("<attribute> missing required 'name'".into()))?;
626 let source_str = attr_get(attrs, "source")
627 .ok_or_else(|| LayoutError(format!("<attribute name=\"{}\"> missing 'source'", name)))?;
628 let source = match source_str.as_str() {
629 "constant" => LayoutSource::Constant,
630 "ndattribute" => LayoutSource::NdAttribute,
631 other => {
632 return Err(LayoutError(format!(
633 "<attribute name=\"{}\"> invalid source '{}'",
634 name, other
635 )));
636 }
637 };
638 Ok(LayoutAttribute {
639 name,
640 source,
641 data_type: LayoutDataType::parse(
642 &attr_get(attrs, "type").unwrap_or_else(|| "string".into()),
643 ),
644 value: attr_get(attrs, "value").unwrap_or_default(),
645 ndattribute: attr_get(attrs, "ndattribute").unwrap_or_default(),
646 when: LayoutWhen::parse(&attr_get(attrs, "when").unwrap_or_default()),
647 })
648}
649
650fn parse_source(attrs: &[(String, String)], name: &str) -> Result<LayoutSource, LayoutError> {
651 let s = match attr_get(attrs, "source") {
655 Some(s) => s,
656 None => return Ok(LayoutSource::Unset),
657 };
658 match s.as_str() {
659 "detector" => Ok(LayoutSource::Detector),
660 "constant" => Ok(LayoutSource::Constant),
661 "ndattribute" => Ok(LayoutSource::NdAttribute),
662 other => Err(LayoutError(format!(
663 "<dataset name=\"{}\"> invalid source '{}'",
664 name, other
665 ))),
666 }
667}
668
669fn attr_get(attrs: &[(String, String)], key: &str) -> Option<String> {
670 attrs.iter().find(|(k, _)| k == key).map(|(_, v)| v.clone())
671}
672
673fn attr_bool(attrs: &[(String, String)], key: &str) -> bool {
674 matches!(attr_get(attrs, key).as_deref(), Some("true") | Some("1"))
675}
676
677#[cfg(test)]
678mod tests {
679 use super::*;
680
681 const SAMPLE: &str = r#"<?xml version="1.0"?>
682<hdf5_layout>
683 <global name="detector_data_destination" ndattribute="detdest" />
684 <group name="entry">
685 <attribute name="NX_class" source="constant" value="NXentry" type="string" />
686 <group name="data" ndattr_default="true">
687 <dataset name="data" source="detector" det_default="true">
688 <attribute name="signal" source="constant" value="1" type="int" />
689 </dataset>
690 <dataset name="exposure" source="ndattribute" ndattribute="AcquireTime" type="float" />
691 </group>
692 <group name="instrument">
693 <dataset name="name" source="constant" value="MyBeamline" type="string" />
694 <hardlink name="link_to_data" target="/entry/data/data" />
695 </group>
696 </group>
697</hdf5_layout>"#;
698
699 #[test]
700 fn parses_full_tree() {
701 let layout = Hdf5Layout::parse(SAMPLE).unwrap();
702 assert_eq!(layout.groups.len(), 1);
703 assert_eq!(layout.detector_data_destination.as_deref(), Some("detdest"));
704 let entry = &layout.groups[0];
705 assert_eq!(entry.name, "entry");
706 assert_eq!(entry.attributes.len(), 1);
707 assert_eq!(entry.attributes[0].name, "NX_class");
708 assert_eq!(entry.attributes[0].value, "NXentry");
709 assert_eq!(entry.groups.len(), 2);
710
711 let data_group = &entry.groups[0];
712 assert!(data_group.ndattr_default);
713 assert_eq!(data_group.datasets.len(), 2);
714 assert!(data_group.datasets[0].det_default);
715 assert_eq!(data_group.datasets[0].source, LayoutSource::Detector);
716 assert_eq!(data_group.datasets[0].attributes.len(), 1);
717 assert_eq!(
718 data_group.datasets[0].attributes[0].data_type,
719 LayoutDataType::Int
720 );
721 assert_eq!(data_group.datasets[1].source, LayoutSource::NdAttribute);
722 assert_eq!(data_group.datasets[1].ndattribute, "AcquireTime");
723 assert_eq!(data_group.datasets[1].data_type, LayoutDataType::Float);
724
725 let instr = &entry.groups[1];
726 assert_eq!(instr.hardlinks.len(), 1);
727 assert_eq!(instr.hardlinks[0].target, "/entry/data/data");
728 }
729
730 #[test]
731 fn detector_path_resolves() {
732 let layout = Hdf5Layout::parse(SAMPLE).unwrap();
733 assert_eq!(
734 layout.detector_dataset_path().as_deref(),
735 Some("/entry/data/data")
736 );
737 assert_eq!(
738 layout.ndattr_default_group().as_deref(),
739 Some("/entry/data")
740 );
741 }
742
743 #[test]
744 fn rejects_missing_root() {
745 let err = Hdf5Layout::parse("<foo/>").unwrap_err();
746 assert!(err.0.contains("hdf5_layout"));
747 }
748
749 #[test]
750 fn rejects_missing_dataset_name() {
751 let xml =
752 r#"<hdf5_layout><group name="g"><dataset source="detector"/></group></hdf5_layout>"#;
753 let err = Hdf5Layout::parse(xml).unwrap_err();
754 assert!(err.0.contains("name"));
755 }
756
757 #[test]
758 fn rejects_bad_source() {
759 let xml = r#"<hdf5_layout><group name="g"><dataset name="d" source="bogus"/></group></hdf5_layout>"#;
760 let err = Hdf5Layout::parse(xml).unwrap_err();
761 assert!(err.0.contains("bogus"));
762 }
763
764 #[test]
765 fn handles_comments_and_entities() {
766 let xml = r#"<hdf5_layout>
767 <!-- a comment -->
768 <group name="g">
769 <dataset name="d" source="constant" value="a & b" type="string"/>
770 </group>
771 </hdf5_layout>"#;
772 let layout = Hdf5Layout::parse(xml).unwrap();
773 assert_eq!(layout.groups[0].datasets[0].value, "a & b");
774 }
775}