1use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18 pub segments: Vec<AssembledSegment>,
19 pub groups: Vec<AssembledGroup>,
20 #[serde(default)]
23 pub post_group_start: usize,
24 #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29 pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35 pub tag: String,
36 pub elements: Vec<Vec<String>>,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub mig_number: Option<String>,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct AssembledGroup {
48 pub group_id: String,
49 pub repetitions: Vec<AssembledGroupInstance>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct AssembledGroupInstance {
55 pub segments: Vec<AssembledSegment>,
56 pub child_groups: Vec<AssembledGroup>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub entry_mig_number: Option<String>,
60 #[serde(default, skip_serializing_if = "Vec::is_empty")]
68 pub variant_mig_numbers: Vec<String>,
69 #[serde(default, skip_serializing_if = "Vec::is_empty")]
73 pub skipped_segments: Vec<AssembledSegment>,
74}
75
76impl AssembledGroupInstance {
77 pub fn as_assembled_tree(&self) -> AssembledTree {
84 AssembledTree {
85 segments: self.segments.clone(),
86 groups: self.child_groups.clone(),
87 post_group_start: self.segments.len(),
88 inter_group_segments: std::collections::BTreeMap::new(),
89 }
90 }
91}
92
93#[derive(Debug, Clone, Default)]
95pub struct AssemblerConfig {
96 pub skip_unknown_segments: bool,
103}
104
105pub struct Assembler<'a> {
110 mig: &'a MigSchema,
111 config: AssemblerConfig,
112}
113
114impl<'a> Assembler<'a> {
115 pub fn new(mig: &'a MigSchema) -> Self {
116 Self {
117 mig,
118 config: AssemblerConfig::default(),
119 }
120 }
121
122 pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
123 Self { mig, config }
124 }
125
126 pub fn assemble_generic(
128 &self,
129 segments: &[OwnedSegment],
130 ) -> Result<AssembledTree, AssemblyError> {
131 let mut cursor = SegmentCursor::new(segments.len());
132 let mut tree = AssembledTree {
133 segments: Vec::new(),
134 groups: Vec::new(),
135 post_group_start: 0,
136 inter_group_segments: std::collections::BTreeMap::new(),
137 };
138
139 let mut matched_seg_indices = Vec::new();
141
142 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
144 if cursor.is_exhausted() {
145 break;
146 }
147 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
148 tree.segments.push(assembled);
149 matched_seg_indices.push(i);
150 }
151 }
152
153 let mut group_idx = 0;
162 while group_idx < self.mig.segment_groups.len() {
163 if cursor.is_exhausted() {
164 break;
165 }
166
167 let mig_group = &self.mig.segment_groups[group_idx];
168
169 let tree_group_idx = tree.groups.len();
171 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
172 if cursor.is_exhausted() {
173 break;
174 }
175 if matched_seg_indices.contains(&i) {
176 continue;
177 }
178 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
179 tree.inter_group_segments
180 .entry(tree_group_idx)
181 .or_default()
182 .push(assembled);
183 matched_seg_indices.push(i);
184 }
185 }
186
187 if mig_group.variant_code.is_some() {
189 let variant_count = self.mig.segment_groups[group_idx..]
190 .iter()
191 .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
192 .count();
193 let variant_end = group_idx + variant_count;
194
195 let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
196 if let Some(combined) =
197 self.try_consume_variant_groups(segments, &mut cursor, variant_groups)?
198 {
199 tree.groups.push(combined);
200 }
201 group_idx = variant_end;
202 } else {
203 if let Some(assembled) = self.try_consume_group(segments, &mut cursor, mig_group)? {
204 tree.groups.push(assembled);
205 }
206 group_idx += 1;
207 }
208 }
209
210 tree.post_group_start = tree.segments.len();
212
213 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
215 if cursor.is_exhausted() {
216 break;
217 }
218 if matched_seg_indices.contains(&i) {
219 continue;
220 }
221 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
222 tree.segments.push(assembled);
223 }
224 }
225
226 Ok(tree)
227 }
228
229 fn try_consume_segment(
230 &self,
231 segments: &[OwnedSegment],
232 cursor: &mut SegmentCursor,
233 mig_seg: &MigSegment,
234 ) -> Result<Option<AssembledSegment>, AssemblyError> {
235 if cursor.is_exhausted() {
236 return Ok(None);
237 }
238 let seg = &segments[cursor.position()];
239 if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
240 let mut assembled = owned_to_assembled(seg);
241 assembled.mig_number = mig_seg.number.clone();
242 cursor.advance();
243 Ok(Some(assembled))
244 } else {
245 Ok(None) }
247 }
248
249 fn try_consume_group(
250 &self,
251 segments: &[OwnedSegment],
252 cursor: &mut SegmentCursor,
253 mig_group: &MigSegmentGroup,
254 ) -> Result<Option<AssembledGroup>, AssemblyError> {
255 let mut repetitions = Vec::new();
256 let entry_segment = mig_group.segments.first().ok_or_else(|| {
257 AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
258 })?;
259
260 while !cursor.is_exhausted() {
262 let seg = &segments[cursor.position()];
263 if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
264 break; }
266
267 if !mig_group.variant_codes.is_empty() {
269 let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
270 let actual_qual = seg
271 .elements
272 .get(ei)
273 .and_then(|e| e.get(ci))
274 .map(|s| s.as_str())
275 .unwrap_or("");
276 if !mig_group
277 .variant_codes
278 .iter()
279 .any(|c| actual_qual.eq_ignore_ascii_case(c))
280 {
281 break;
282 }
283 } else if let Some(ref expected_code) = mig_group.variant_code {
284 let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
285 let actual_qual = seg
286 .elements
287 .get(ei)
288 .and_then(|e| e.get(ci))
289 .map(|s| s.as_str())
290 .unwrap_or("");
291 if !actual_qual.eq_ignore_ascii_case(expected_code) {
292 break;
293 }
294 }
295
296 let mut instance = AssembledGroupInstance {
297 segments: Vec::new(),
298 child_groups: Vec::new(),
299 entry_mig_number: entry_segment.number.clone(),
300 variant_mig_numbers: collect_mig_numbers(mig_group),
301 skipped_segments: Vec::new(),
302 };
303
304 let mut slot_idx = 0;
315 let mut is_entry_run = true;
316 while slot_idx < mig_group.segments.len() {
317 if cursor.is_exhausted() {
318 break;
319 }
320 let current_tag = &mig_group.segments[slot_idx].id;
321 let run_len = mig_group.segments[slot_idx..]
322 .iter()
323 .take_while(|s| s.id == *current_tag)
324 .count();
325
326 if is_entry_run {
327 for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
329 if cursor.is_exhausted() {
330 break;
331 }
332 if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
333 instance.segments.push(assembled);
334 }
335 }
336 is_entry_run = false;
337 } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
338 if cursor.is_exhausted() {
356 break;
357 }
358 let seg = &segments[cursor.position()];
359 if !matcher::matches_segment_tag(&seg.id, current_tag) {
360 break;
361 }
362 let has_following_non_entry = if cursor.position() + 1 < segments.len() {
365 let next = &segments[cursor.position() + 1];
366 !matcher::matches_segment_tag(&next.id, &entry_segment.id)
367 && mig_group.segments.iter().any(|s| {
368 matcher::matches_segment_tag(&next.id, &s.id)
369 && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
370 })
371 } else {
372 false
373 };
374 if has_following_non_entry {
375 instance.segments.push(owned_to_assembled(seg));
377 cursor.advance();
378 } else {
379 break;
381 }
382 } else {
383 while !cursor.is_exhausted() {
385 let seg = &segments[cursor.position()];
386 if matcher::matches_segment_tag(&seg.id, current_tag) {
387 instance.segments.push(owned_to_assembled(seg));
388 cursor.advance();
389 } else {
390 break;
391 }
392 }
393 }
394
395 slot_idx += run_len;
396
397 if self.config.skip_unknown_segments {
402 while !cursor.is_exhausted() {
403 let seg = &segments[cursor.position()];
404 if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
406 break;
407 }
408 if mig_group.segments[slot_idx..]
410 .iter()
411 .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
412 {
413 break;
414 }
415 if mig_group.nested_groups.iter().any(|ng| {
417 ng.segments
418 .first()
419 .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
420 }) {
421 break;
422 }
423 instance.skipped_segments.push(owned_to_assembled(seg));
425 cursor.advance();
426 }
427 }
428 }
429
430 let mut nested_idx = 0;
432 while nested_idx < mig_group.nested_groups.len() {
433 if cursor.is_exhausted() {
434 break;
435 }
436 let nested = &mig_group.nested_groups[nested_idx];
437
438 if nested.variant_code.is_some() {
439 let variant_count = mig_group.nested_groups[nested_idx..]
441 .iter()
442 .take_while(|g| g.id == nested.id && g.variant_code.is_some())
443 .count();
444 let variant_end = nested_idx + variant_count;
445 let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
446 if let Some(combined) =
447 self.try_consume_variant_groups(segments, cursor, variant_groups)?
448 {
449 instance.child_groups.push(combined);
450 }
451 nested_idx = variant_end;
452 } else {
453 if let Some(assembled) = self.try_consume_group(segments, cursor, nested)? {
454 instance.child_groups.push(assembled);
455 }
456 nested_idx += 1;
457 }
458 }
459
460 repetitions.push(instance);
461 }
462
463 if repetitions.is_empty() {
464 Ok(None)
465 } else {
466 Ok(Some(AssembledGroup {
467 group_id: mig_group.id.clone(),
468 repetitions,
469 }))
470 }
471 }
472
473 fn try_consume_variant_groups(
479 &self,
480 segments: &[OwnedSegment],
481 cursor: &mut SegmentCursor,
482 variants: &[MigSegmentGroup],
483 ) -> Result<Option<AssembledGroup>, AssemblyError> {
484 let group_id = variants[0].id.clone();
485 let entry_tag = variants[0]
486 .segments
487 .first()
488 .map(|s| s.id.as_str())
489 .unwrap_or("");
490 let mut all_reps = Vec::new();
491
492 while !cursor.is_exhausted() {
493 let seg = &segments[cursor.position()];
494 if !matcher::matches_segment_tag(&seg.id, entry_tag) {
495 break;
496 }
497
498 let matched = variants.iter().find(|v| {
502 let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
503 let actual_qual = seg
504 .elements
505 .get(ei)
506 .and_then(|e| e.get(ci))
507 .map(|s| s.as_str())
508 .unwrap_or("");
509 if !v.variant_codes.is_empty() {
510 v.variant_codes
511 .iter()
512 .any(|c| actual_qual.eq_ignore_ascii_case(c))
513 } else if let Some(ref expected_code) = v.variant_code {
514 actual_qual.eq_ignore_ascii_case(expected_code)
515 } else {
516 false
517 }
518 });
519
520 if let Some(variant) = matched {
521 if let Some(group) = self.try_consume_group(segments, cursor, variant)? {
522 all_reps.extend(group.repetitions);
523 } else {
524 break;
525 }
526 } else {
527 if let Some(group) = self.try_consume_group(segments, cursor, &variants[0])? {
531 all_reps.extend(group.repetitions);
532 } else {
533 break;
534 }
535 }
536 }
537
538 if all_reps.is_empty() {
539 Ok(None)
540 } else {
541 Ok(Some(AssembledGroup {
542 group_id,
543 repetitions: all_reps,
544 }))
545 }
546 }
547
548 pub fn assemble_with_diagnostics(
553 &self,
554 segments: &[OwnedSegment],
555 ) -> (AssembledTree, Vec<StructureDiagnostic>) {
556 let mut diagnostics = Vec::new();
557
558 let tree = match self.assemble_generic(segments) {
559 Ok(tree) => tree,
560 Err(e) => {
561 diagnostics.push(StructureDiagnostic {
562 kind: StructureDiagnosticKind::UnexpectedSegment,
563 segment_id: String::new(),
564 position: 0,
565 message: format!("Assembly failed: {e}"),
566 });
567 return (
568 AssembledTree {
569 segments: Vec::new(),
570 groups: Vec::new(),
571 post_group_start: 0,
572 inter_group_segments: std::collections::BTreeMap::new(),
573 },
574 diagnostics,
575 );
576 }
577 };
578
579 let consumed = count_tree_segments(&tree);
581
582 for (i, seg) in segments.iter().enumerate().skip(consumed) {
584 diagnostics.push(StructureDiagnostic {
585 kind: StructureDiagnosticKind::UnexpectedSegment,
586 segment_id: seg.id.clone(),
587 position: i,
588 message: format!(
589 "Segment '{}' at position {} was not consumed by MIG-guided assembly",
590 seg.id, i
591 ),
592 });
593 }
594
595 (tree, diagnostics)
596 }
597}
598
599fn count_tree_segments(tree: &AssembledTree) -> usize {
600 let mut count = tree.segments.len();
601 for group in &tree.groups {
602 count += count_group_segments(group);
603 }
604 for segs in tree.inter_group_segments.values() {
606 count += segs.len();
607 }
608 count
609}
610
611fn count_group_segments(group: &AssembledGroup) -> usize {
612 let mut count = 0;
613 for rep in &group.repetitions {
614 count += rep.segments.len();
615 count += rep.skipped_segments.len();
616 for child in &rep.child_groups {
617 count += count_group_segments(child);
618 }
619 }
620 count
621}
622
623fn collect_mig_numbers(group: &MigSegmentGroup) -> Vec<String> {
628 let mut numbers = Vec::new();
629 for seg in &group.segments {
630 if let Some(ref num) = seg.number {
631 numbers.push(num.clone());
632 }
633 }
634 for nested in &group.nested_groups {
635 numbers.extend(collect_mig_numbers(nested));
636 }
637 numbers
638}
639
640pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
641 AssembledSegment {
642 tag: seg.id.clone(),
643 elements: seg.elements.clone(),
644 mig_number: None,
645 }
646}
647
648#[cfg(test)]
649mod tests {
650 use super::*;
651 use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
652
653 fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
654 OwnedSegment {
655 id: id.to_string(),
656 elements: elements
657 .into_iter()
658 .map(|e| e.into_iter().map(|c| c.to_string()).collect())
659 .collect(),
660 segment_number: 0,
661 }
662 }
663
664 fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
665 MigSchema {
666 message_type: "UTILMD".to_string(),
667 variant: Some("Strom".to_string()),
668 version: "S2.1".to_string(),
669 publication_date: "2025-03-20".to_string(),
670 author: "BDEW".to_string(),
671 format_version: "FV2504".to_string(),
672 source_file: "test".to_string(),
673 segments: segments.into_iter().map(make_mig_segment).collect(),
674 segment_groups: groups,
675 }
676 }
677
678 #[test]
679 fn test_assembler_top_level_segments_only() {
680 let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
681
682 let segments = vec![
683 make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
684 make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
685 make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
686 make_owned_seg("UNT", vec![vec!["4", "001"]]),
687 ];
688
689 let assembler = Assembler::new(&mig);
690 let result = assembler.assemble_generic(&segments).unwrap();
691
692 assert_eq!(result.segments.len(), 4);
693 assert_eq!(result.segments[0].tag, "UNH");
694 assert_eq!(result.segments[1].tag, "BGM");
695 assert_eq!(result.segments[2].tag, "DTM");
696 assert_eq!(result.segments[3].tag, "UNT");
697 assert!(result.groups.is_empty());
698 }
699
700 #[test]
701 fn test_assembler_with_segment_group() {
702 let mig = make_mig_schema(
703 vec!["UNH", "BGM"],
704 vec![
705 make_mig_group("SG2", vec!["NAD"], vec![]),
706 make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
707 ],
708 );
709
710 let segments = vec![
711 make_owned_seg("UNH", vec![vec!["001"]]),
712 make_owned_seg("BGM", vec![vec!["E01"]]),
713 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
714 make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
715 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
716 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
717 ];
718
719 let assembler = Assembler::new(&mig);
720 let result = assembler.assemble_generic(&segments).unwrap();
721
722 assert_eq!(result.segments.len(), 2);
724 assert_eq!(result.groups.len(), 2);
726 assert_eq!(result.groups[0].group_id, "SG2");
727 assert_eq!(result.groups[0].repetitions.len(), 2);
728 assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
729 assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
730 assert_eq!(result.groups[1].group_id, "SG4");
732 assert_eq!(result.groups[1].repetitions.len(), 1);
733 assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
734 }
735
736 #[test]
737 fn test_assembler_nested_groups() {
738 let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
739 let mig = make_mig_schema(
740 vec!["UNH", "BGM"],
741 vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
742 );
743
744 let segments = vec![
745 make_owned_seg("UNH", vec![vec!["001"]]),
746 make_owned_seg("BGM", vec![vec!["E01"]]),
747 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
748 make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
749 make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
750 ];
751
752 let assembler = Assembler::new(&mig);
753 let result = assembler.assemble_generic(&segments).unwrap();
754
755 let sg2 = &result.groups[0];
757 assert_eq!(sg2.group_id, "SG2");
758 assert_eq!(sg2.repetitions.len(), 1);
759
760 let sg2_inst = &sg2.repetitions[0];
761 assert_eq!(sg2_inst.segments[0].tag, "NAD");
762
763 assert_eq!(sg2_inst.child_groups.len(), 1);
765 let sg3 = &sg2_inst.child_groups[0];
766 assert_eq!(sg3.group_id, "SG3");
767 assert_eq!(sg3.repetitions[0].segments.len(), 2);
768 assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
769 assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
770 }
771
772 #[test]
773 fn test_assembler_optional_segments_skipped() {
774 let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
776
777 let segments = vec![
778 make_owned_seg("UNH", vec![vec!["001"]]),
779 make_owned_seg("BGM", vec![vec!["E01"]]),
780 make_owned_seg("UNT", vec![vec!["2", "001"]]),
781 ];
782
783 let assembler = Assembler::new(&mig);
784 let result = assembler.assemble_generic(&segments).unwrap();
785
786 assert_eq!(result.segments.len(), 3);
788 assert_eq!(result.segments[0].tag, "UNH");
789 assert_eq!(result.segments[1].tag, "BGM");
790 assert_eq!(result.segments[2].tag, "UNT");
791 }
792
793 #[test]
794 fn test_assembler_empty_segments() {
795 let mig = make_mig_schema(vec!["UNH"], vec![]);
796 let assembler = Assembler::new(&mig);
797 let result = assembler.assemble_generic(&[]).unwrap();
798 assert!(result.segments.is_empty());
799 assert!(result.groups.is_empty());
800 }
801
802 #[test]
803 fn test_assembler_preserves_element_data() {
804 let mig = make_mig_schema(vec!["DTM"], vec![]);
805
806 let segments = vec![make_owned_seg(
807 "DTM",
808 vec![vec!["137", "202501010000+01", "303"]],
809 )];
810
811 let assembler = Assembler::new(&mig);
812 let result = assembler.assemble_generic(&segments).unwrap();
813
814 let dtm = &result.segments[0];
815 assert_eq!(dtm.elements[0][0], "137");
816 assert_eq!(dtm.elements[0][1], "202501010000+01");
817 assert_eq!(dtm.elements[0][2], "303");
818 }
819
820 #[test]
821 fn test_group_instance_as_assembled_tree() {
822 let sg5 = AssembledGroup {
824 group_id: "SG5".to_string(),
825 repetitions: vec![AssembledGroupInstance {
826 segments: vec![AssembledSegment {
827 tag: "LOC".to_string(),
828 elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
829 mig_number: None,
830 }],
831 child_groups: vec![],
832 entry_mig_number: None,
833 variant_mig_numbers: vec![],
834 skipped_segments: vec![],
835 }],
836 };
837
838 let sg4_instance = AssembledGroupInstance {
839 segments: vec![
840 AssembledSegment {
841 tag: "IDE".to_string(),
842 elements: vec![vec!["24".to_string(), "TX001".to_string()]],
843 mig_number: None,
844 },
845 AssembledSegment {
846 tag: "STS".to_string(),
847 elements: vec![vec!["7".to_string()]],
848 mig_number: None,
849 },
850 ],
851 child_groups: vec![sg5],
852 entry_mig_number: None,
853 variant_mig_numbers: vec![],
854 skipped_segments: vec![],
855 };
856
857 let sub_tree = sg4_instance.as_assembled_tree();
858
859 assert_eq!(sub_tree.segments.len(), 2);
861 assert_eq!(sub_tree.segments[0].tag, "IDE");
862 assert_eq!(sub_tree.segments[1].tag, "STS");
863
864 assert_eq!(sub_tree.groups.len(), 1);
866 assert_eq!(sub_tree.groups[0].group_id, "SG5");
867
868 assert_eq!(sub_tree.post_group_start, 2);
870 }
871
872 #[test]
873 fn test_assembler_from_parsed_edifact() {
874 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
876 let segments = crate::tokenize::parse_to_segments(input).unwrap();
877
878 let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
879
880 let assembler = Assembler::new(&mig);
881 let result = assembler.assemble_generic(&segments).unwrap();
882
883 assert!(result.segments.iter().any(|s| s.tag == "UNH"));
884 assert!(result.segments.iter().any(|s| s.tag == "BGM"));
885 assert!(result.segments.iter().any(|s| s.tag == "DTM"));
886 }
887
888 #[test]
889 fn test_assemble_with_diagnostics_clean_input() {
890 let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
891 let segments = vec![
892 make_owned_seg("UNH", vec![vec!["001"]]),
893 make_owned_seg("BGM", vec![vec!["E01"]]),
894 make_owned_seg("UNT", vec![vec!["2", "001"]]),
895 ];
896 let assembler = Assembler::new(&mig);
897 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
898 assert_eq!(tree.segments.len(), 3);
899 assert!(
900 diagnostics.is_empty(),
901 "Clean input should have no diagnostics"
902 );
903 }
904
905 #[test]
906 fn test_assemble_with_diagnostics_unconsumed_segments() {
907 let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
908 let segments = vec![
909 make_owned_seg("UNH", vec![vec!["001"]]),
910 make_owned_seg("BGM", vec![vec!["E01"]]),
911 make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
912 ];
913 let assembler = Assembler::new(&mig);
914 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
915 assert_eq!(tree.segments.len(), 2);
916 assert_eq!(diagnostics.len(), 1);
917 assert_eq!(
918 diagnostics[0].kind,
919 StructureDiagnosticKind::UnexpectedSegment
920 );
921 assert_eq!(diagnostics[0].segment_id, "FTX");
922 assert_eq!(diagnostics[0].position, 2);
923 }
924
925 #[test]
926 fn test_assemble_with_diagnostics_multiple_unconsumed() {
927 let mig = make_mig_schema(vec!["UNH"], vec![]);
928 let segments = vec![
929 make_owned_seg("UNH", vec![vec!["001"]]),
930 make_owned_seg("FOO", vec![]),
931 make_owned_seg("BAR", vec![]),
932 make_owned_seg("BAZ", vec![]),
933 ];
934 let assembler = Assembler::new(&mig);
935 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
936 assert_eq!(tree.segments.len(), 1);
937 assert_eq!(diagnostics.len(), 3);
938 assert_eq!(diagnostics[0].segment_id, "FOO");
939 assert_eq!(diagnostics[1].segment_id, "BAR");
940 assert_eq!(diagnostics[2].segment_id, "BAZ");
941 }
942
943 #[test]
946 fn test_skip_unknown_segment_between_slots() {
947 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
951 let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
952
953 let segments = vec![
954 make_owned_seg("UNH", vec![vec!["001"]]),
955 make_owned_seg("SEQ", vec![vec!["Z98"]]),
956 make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
957 make_owned_seg("CCI", vec![vec!["Z30"]]),
958 ];
959
960 let off = Assembler::new(&mig);
962 let tree_off = off.assemble_generic(&segments).unwrap();
963 let sg8_off = &tree_off.groups[0];
964 assert_eq!(sg8_off.repetitions[0].segments.len(), 1); assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
966
967 let on = Assembler::with_config(
969 &mig,
970 AssemblerConfig {
971 skip_unknown_segments: true,
972 },
973 );
974 let tree_on = on.assemble_generic(&segments).unwrap();
975 let sg8_on = &tree_on.groups[0];
976 assert_eq!(sg8_on.repetitions[0].segments.len(), 2); assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
978 assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
979 }
980
981 #[test]
982 fn test_skip_preserves_on_instance() {
983 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
985 let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
986
987 let segments = vec![
988 make_owned_seg("UNH", vec![vec!["001"]]),
989 make_owned_seg("SEQ", vec![vec!["Z98"]]),
990 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
991 make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
992 make_owned_seg("CCI", vec![vec!["Z30"]]),
993 ];
994
995 let assembler = Assembler::with_config(
996 &mig,
997 AssemblerConfig {
998 skip_unknown_segments: true,
999 },
1000 );
1001 let tree = assembler.assemble_generic(&segments).unwrap();
1002 let instance = &tree.groups[0].repetitions[0];
1003
1004 assert_eq!(instance.segments.len(), 2); assert_eq!(instance.skipped_segments.len(), 2); assert_eq!(instance.skipped_segments[0].tag, "RFF");
1007 assert_eq!(instance.skipped_segments[1].tag, "DTM");
1008 }
1009
1010 #[test]
1011 fn test_skip_mode_off_default() {
1012 let mig = make_mig_schema(vec![], vec![]);
1014 let assembler = Assembler::new(&mig);
1015 assert!(!assembler.config.skip_unknown_segments);
1016 }
1017
1018 #[test]
1019 fn test_skip_does_not_consume_nested_group_entry() {
1020 let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
1024 let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
1025 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1026
1027 let segments = vec![
1028 make_owned_seg("UNH", vec![vec!["001"]]),
1029 make_owned_seg("IDE", vec![vec!["24"]]),
1030 make_owned_seg("FOO", vec![vec!["unknown"]]),
1031 make_owned_seg("STS", vec![vec!["7"]]),
1032 make_owned_seg("LOC", vec![vec!["Z16"]]),
1033 ];
1034
1035 let assembler = Assembler::with_config(
1036 &mig,
1037 AssemblerConfig {
1038 skip_unknown_segments: true,
1039 },
1040 );
1041 let tree = assembler.assemble_generic(&segments).unwrap();
1042 let sg4 = &tree.groups[0];
1043 let inst = &sg4.repetitions[0];
1044
1045 assert_eq!(inst.segments.len(), 2);
1047 assert_eq!(inst.segments[0].tag, "IDE");
1048 assert_eq!(inst.segments[1].tag, "STS");
1049 assert_eq!(inst.skipped_segments.len(), 1);
1050 assert_eq!(inst.skipped_segments[0].tag, "FOO");
1051
1052 assert_eq!(inst.child_groups.len(), 1);
1054 assert_eq!(inst.child_groups[0].group_id, "SG5");
1055 assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1056 }
1057
1058 #[test]
1059 fn test_roundtrip_with_skip() {
1060 use crate::disassembler::Disassembler;
1063 use crate::renderer::render_edifact;
1064
1065 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1066 let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1067
1068 let segments = vec![
1069 make_owned_seg("UNH", vec![vec!["001"]]),
1070 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1071 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1072 make_owned_seg("CCI", vec![vec!["Z30"]]),
1073 make_owned_seg("UNT", vec![vec!["4", "001"]]),
1074 ];
1075
1076 let assembler = Assembler::with_config(
1077 &mig,
1078 AssemblerConfig {
1079 skip_unknown_segments: true,
1080 },
1081 );
1082 let tree = assembler.assemble_generic(&segments).unwrap();
1083
1084 let disassembler = Disassembler::new(&mig);
1085 let dis = disassembler.disassemble(&tree);
1086 let delimiters = edifact_primitives::EdifactDelimiters::default();
1087 let rendered = render_edifact(&dis, &delimiters);
1088
1089 assert_eq!(dis.len(), 5);
1094 assert_eq!(dis[0].tag, "UNH");
1095 assert_eq!(dis[1].tag, "SEQ");
1096 assert_eq!(dis[2].tag, "CCI");
1097 assert_eq!(dis[3].tag, "RFF"); assert_eq!(dis[4].tag, "UNT");
1099
1100 assert!(rendered.contains("UNH+001"));
1102 assert!(rendered.contains("SEQ+Z98"));
1103 assert!(rendered.contains("RFF+Z38:REF1"));
1104 assert!(rendered.contains("CCI+Z30"));
1105 assert!(rendered.contains("UNT+4:001"));
1106 }
1107
1108 #[test]
1111 fn test_variant_groups_interleaved_reps() {
1112 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1116 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1117
1118 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1119
1120 let segments = vec![
1121 make_owned_seg("UNH", vec![vec!["001"]]),
1122 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1123 make_owned_seg("CCI", vec![vec!["Z30"]]),
1124 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1125 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1126 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1127 make_owned_seg("CCI", vec![vec!["Z31"]]),
1128 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1129 make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1130 ];
1131
1132 let assembler = Assembler::new(&mig);
1133 let result = assembler.assemble_generic(&segments).unwrap();
1134
1135 assert_eq!(result.segments.len(), 1); assert_eq!(result.groups.len(), 1); let sg8 = &result.groups[0];
1138 assert_eq!(sg8.group_id, "SG8");
1139 assert_eq!(sg8.repetitions.len(), 4);
1140
1141 assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1143 assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1144 assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1145 assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1146 assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1147 assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1148 }
1149
1150 #[test]
1151 fn test_variant_groups_single_variant_type() {
1152 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1154 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1155
1156 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1157
1158 let segments = vec![
1159 make_owned_seg("UNH", vec![vec!["001"]]),
1160 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1161 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1162 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1163 make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1164 ];
1165
1166 let assembler = Assembler::new(&mig);
1167 let result = assembler.assemble_generic(&segments).unwrap();
1168
1169 assert_eq!(result.groups.len(), 1);
1170 assert_eq!(result.groups[0].repetitions.len(), 2);
1171 assert_eq!(
1172 result.groups[0].repetitions[0].segments[0].elements[0][0],
1173 "Z98"
1174 );
1175 assert_eq!(
1176 result.groups[0].repetitions[1].segments[0].elements[0][0],
1177 "Z98"
1178 );
1179 }
1180
1181 #[test]
1182 fn test_non_variant_groups_unchanged() {
1183 let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1185 let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1186
1187 let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1188
1189 let segments = vec![
1190 make_owned_seg("UNH", vec![vec!["001"]]),
1191 make_owned_seg("BGM", vec![vec!["E01"]]),
1192 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1193 make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1194 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1195 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1196 ];
1197
1198 let assembler = Assembler::new(&mig);
1199 let result = assembler.assemble_generic(&segments).unwrap();
1200
1201 assert_eq!(result.segments.len(), 2);
1202 assert_eq!(result.groups.len(), 2);
1203 assert_eq!(result.groups[0].group_id, "SG2");
1204 assert_eq!(result.groups[0].repetitions.len(), 2);
1205 assert_eq!(result.groups[1].group_id, "SG4");
1206 assert_eq!(result.groups[1].repetitions.len(), 1);
1207 }
1208
1209 #[test]
1210 fn test_variant_groups_with_nested_children() {
1211 let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
1213 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1214 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
1215
1216 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1217
1218 let segments = vec![
1219 make_owned_seg("UNH", vec![vec!["001"]]),
1220 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1221 make_owned_seg("CCI", vec![vec!["Z30"]]),
1222 make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
1223 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1224 make_owned_seg("CCI", vec![vec!["Z31"]]),
1225 make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
1226 ];
1227
1228 let assembler = Assembler::new(&mig);
1229 let result = assembler.assemble_generic(&segments).unwrap();
1230
1231 assert_eq!(result.groups.len(), 1);
1232 let sg8 = &result.groups[0];
1233 assert_eq!(sg8.repetitions.len(), 2);
1234
1235 assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
1237 assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
1238 assert_eq!(
1239 sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
1240 "Z30"
1241 );
1242
1243 assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
1245 assert_eq!(
1246 sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
1247 "Z31"
1248 );
1249 }
1250
1251 #[test]
1252 fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
1253 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1256
1257 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
1258
1259 let segments = vec![
1260 make_owned_seg("UNH", vec![vec!["001"]]),
1261 make_owned_seg("SEQ", vec![vec!["Z98"]]), make_owned_seg("CCI", vec![vec!["Z30"]]),
1263 ];
1264
1265 let assembler = Assembler::new(&mig);
1266 let result = assembler.assemble_generic(&segments).unwrap();
1267
1268 assert!(result.groups.is_empty());
1270 }
1271
1272 #[test]
1273 fn test_mixed_variant_and_non_variant_groups() {
1274 let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1276 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1277 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1278 let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1279
1280 let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
1281
1282 let segments = vec![
1283 make_owned_seg("UNH", vec![vec!["001"]]),
1284 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1285 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1286 make_owned_seg("CCI", vec![vec!["Z30"]]),
1287 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1288 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1289 make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
1290 ];
1291
1292 let assembler = Assembler::new(&mig);
1293 let result = assembler.assemble_generic(&segments).unwrap();
1294
1295 assert_eq!(result.groups.len(), 3); assert_eq!(result.groups[0].group_id, "SG2");
1297 assert_eq!(result.groups[0].repetitions.len(), 1);
1298 assert_eq!(result.groups[1].group_id, "SG8");
1299 assert_eq!(result.groups[1].repetitions.len(), 2);
1300 assert_eq!(result.groups[2].group_id, "SG12");
1301 assert_eq!(result.groups[2].repetitions.len(), 1);
1302 }
1303}