1use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18 pub segments: Vec<AssembledSegment>,
19 pub groups: Vec<AssembledGroup>,
20 #[serde(default)]
23 pub post_group_start: usize,
24 #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29 pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35 pub tag: String,
36 pub elements: Vec<Vec<String>>,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub mig_number: Option<String>,
43}
44
45#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct AssembledGroup {
48 pub group_id: String,
49 pub repetitions: Vec<AssembledGroupInstance>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct AssembledGroupInstance {
55 pub segments: Vec<AssembledSegment>,
56 pub child_groups: Vec<AssembledGroup>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub entry_mig_number: Option<String>,
60 #[serde(default, skip_serializing_if = "Vec::is_empty")]
68 pub variant_mig_numbers: Vec<String>,
69 #[serde(default, skip_serializing_if = "Vec::is_empty")]
73 pub skipped_segments: Vec<AssembledSegment>,
74}
75
76impl AssembledGroupInstance {
77 pub fn as_assembled_tree(&self) -> AssembledTree {
84 AssembledTree {
85 segments: self.segments.clone(),
86 groups: self.child_groups.clone(),
87 post_group_start: self.segments.len(),
88 inter_group_segments: std::collections::BTreeMap::new(),
89 }
90 }
91}
92
93#[derive(Debug, Clone, Default)]
95pub struct AssemblerConfig {
96 pub skip_unknown_segments: bool,
103}
104
105pub struct Assembler<'a> {
110 mig: &'a MigSchema,
111 config: AssemblerConfig,
112}
113
114impl<'a> Assembler<'a> {
115 pub fn new(mig: &'a MigSchema) -> Self {
116 Self {
117 mig,
118 config: AssemblerConfig::default(),
119 }
120 }
121
122 pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
123 Self { mig, config }
124 }
125
126 pub fn assemble_generic(
128 &self,
129 segments: &[OwnedSegment],
130 ) -> Result<AssembledTree, AssemblyError> {
131 let mut cursor = SegmentCursor::new(segments.len());
132 let mut tree = AssembledTree {
133 segments: Vec::new(),
134 groups: Vec::new(),
135 post_group_start: 0,
136 inter_group_segments: std::collections::BTreeMap::new(),
137 };
138
139 let mut matched_seg_indices = Vec::new();
141
142 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
144 if cursor.is_exhausted() {
145 break;
146 }
147 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
148 tree.segments.push(assembled);
149 matched_seg_indices.push(i);
150 }
151 }
152
153 let mut group_idx = 0;
162 while group_idx < self.mig.segment_groups.len() {
163 if cursor.is_exhausted() {
164 break;
165 }
166
167 let mig_group = &self.mig.segment_groups[group_idx];
168
169 let tree_group_idx = tree.groups.len();
171 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
172 if cursor.is_exhausted() {
173 break;
174 }
175 if matched_seg_indices.contains(&i) {
176 continue;
177 }
178 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
179 tree.inter_group_segments
180 .entry(tree_group_idx)
181 .or_default()
182 .push(assembled);
183 matched_seg_indices.push(i);
184 }
185 }
186
187 if mig_group.variant_code.is_some() {
189 let variant_count = self.mig.segment_groups[group_idx..]
190 .iter()
191 .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
192 .count();
193 let variant_end = group_idx + variant_count;
194
195 let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
196 if let Some(combined) =
197 self.try_consume_variant_groups(segments, &mut cursor, variant_groups)?
198 {
199 tree.groups.push(combined);
200 }
201 group_idx = variant_end;
202 } else {
203 if let Some(assembled) = self.try_consume_group(segments, &mut cursor, mig_group)? {
204 tree.groups.push(assembled);
205 }
206 group_idx += 1;
207 }
208 }
209
210 tree.post_group_start = tree.segments.len();
212
213 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
215 if cursor.is_exhausted() {
216 break;
217 }
218 if matched_seg_indices.contains(&i) {
219 continue;
220 }
221 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
222 tree.segments.push(assembled);
223 }
224 }
225
226 Ok(tree)
227 }
228
229 fn try_consume_segment(
230 &self,
231 segments: &[OwnedSegment],
232 cursor: &mut SegmentCursor,
233 mig_seg: &MigSegment,
234 ) -> Result<Option<AssembledSegment>, AssemblyError> {
235 if cursor.is_exhausted() {
236 return Ok(None);
237 }
238 let seg = &segments[cursor.position()];
239 if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
240 let mut assembled = owned_to_assembled(seg);
241 assembled.mig_number = mig_seg.number.clone();
242 cursor.advance();
243 Ok(Some(assembled))
244 } else {
245 Ok(None) }
247 }
248
249 fn try_consume_group(
250 &self,
251 segments: &[OwnedSegment],
252 cursor: &mut SegmentCursor,
253 mig_group: &MigSegmentGroup,
254 ) -> Result<Option<AssembledGroup>, AssemblyError> {
255 let mut repetitions = Vec::new();
256 let entry_segment = mig_group.segments.first().ok_or_else(|| {
257 AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
258 })?;
259
260 while !cursor.is_exhausted() {
262 let seg = &segments[cursor.position()];
263 if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
264 break; }
266
267 if !mig_group.variant_codes.is_empty() {
269 let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
270 let actual_qual = seg
271 .elements
272 .get(ei)
273 .and_then(|e| e.get(ci))
274 .map(|s| s.as_str())
275 .unwrap_or("");
276 if !mig_group
277 .variant_codes
278 .iter()
279 .any(|c| actual_qual.eq_ignore_ascii_case(c))
280 {
281 break;
282 }
283 } else if let Some(ref expected_code) = mig_group.variant_code {
284 let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
285 let actual_qual = seg
286 .elements
287 .get(ei)
288 .and_then(|e| e.get(ci))
289 .map(|s| s.as_str())
290 .unwrap_or("");
291 if !actual_qual.eq_ignore_ascii_case(expected_code) {
292 break;
293 }
294 }
295
296 let mut instance = AssembledGroupInstance {
297 segments: Vec::new(),
298 child_groups: Vec::new(),
299 entry_mig_number: entry_segment.number.clone(),
300 variant_mig_numbers: collect_mig_numbers(mig_group),
301 skipped_segments: Vec::new(),
302 };
303
304 let mut slot_idx = 0;
315 let mut is_entry_run = true;
316 while slot_idx < mig_group.segments.len() {
317 if cursor.is_exhausted() {
318 break;
319 }
320 let current_tag = &mig_group.segments[slot_idx].id;
321 let run_len = mig_group.segments[slot_idx..]
322 .iter()
323 .take_while(|s| s.id == *current_tag)
324 .count();
325
326 if is_entry_run {
327 for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
329 if cursor.is_exhausted() {
330 break;
331 }
332 if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
333 instance.segments.push(assembled);
334 }
335 }
336 is_entry_run = false;
337 } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
338 if cursor.is_exhausted() {
356 break;
357 }
358 let seg = &segments[cursor.position()];
359 if !matcher::matches_segment_tag(&seg.id, current_tag) {
360 break;
361 }
362 let has_following_non_entry = if cursor.position() + 1 < segments.len() {
365 let next = &segments[cursor.position() + 1];
366 !matcher::matches_segment_tag(&next.id, &entry_segment.id)
367 && mig_group.segments.iter().any(|s| {
368 matcher::matches_segment_tag(&next.id, &s.id)
369 && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
370 })
371 } else {
372 false
373 };
374 if has_following_non_entry {
375 instance.segments.push(owned_to_assembled(seg));
377 cursor.advance();
378 } else {
379 break;
381 }
382 } else {
383 for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
389 if cursor.is_exhausted() {
390 break;
391 }
392 if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
393 instance.segments.push(assembled);
394 }
395 }
396 while !cursor.is_exhausted() {
398 let seg = &segments[cursor.position()];
399 if matcher::matches_segment_tag(&seg.id, current_tag) {
400 instance.segments.push(owned_to_assembled(seg));
401 cursor.advance();
402 } else {
403 break;
404 }
405 }
406 }
407
408 slot_idx += run_len;
409
410 if self.config.skip_unknown_segments {
415 while !cursor.is_exhausted() {
416 let seg = &segments[cursor.position()];
417 if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
419 break;
420 }
421 if mig_group.segments[slot_idx..]
423 .iter()
424 .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
425 {
426 break;
427 }
428 if mig_group.nested_groups.iter().any(|ng| {
430 ng.segments
431 .first()
432 .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
433 }) {
434 break;
435 }
436 instance.skipped_segments.push(owned_to_assembled(seg));
438 cursor.advance();
439 }
440 }
441 }
442
443 let mut nested_idx = 0;
445 while nested_idx < mig_group.nested_groups.len() {
446 if cursor.is_exhausted() {
447 break;
448 }
449 let nested = &mig_group.nested_groups[nested_idx];
450
451 if nested.variant_code.is_some() {
452 let variant_count = mig_group.nested_groups[nested_idx..]
454 .iter()
455 .take_while(|g| g.id == nested.id && g.variant_code.is_some())
456 .count();
457 let variant_end = nested_idx + variant_count;
458 let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
459 if let Some(combined) =
460 self.try_consume_variant_groups(segments, cursor, variant_groups)?
461 {
462 instance.child_groups.push(combined);
463 }
464 nested_idx = variant_end;
465 } else {
466 if let Some(assembled) = self.try_consume_group(segments, cursor, nested)? {
467 instance.child_groups.push(assembled);
468 }
469 nested_idx += 1;
470 }
471 }
472
473 repetitions.push(instance);
474 }
475
476 if repetitions.is_empty() {
477 Ok(None)
478 } else {
479 Ok(Some(AssembledGroup {
480 group_id: mig_group.id.clone(),
481 repetitions,
482 }))
483 }
484 }
485
486 fn try_consume_variant_groups(
492 &self,
493 segments: &[OwnedSegment],
494 cursor: &mut SegmentCursor,
495 variants: &[MigSegmentGroup],
496 ) -> Result<Option<AssembledGroup>, AssemblyError> {
497 let group_id = variants[0].id.clone();
498 let entry_tag = variants[0]
499 .segments
500 .first()
501 .map(|s| s.id.as_str())
502 .unwrap_or("");
503 let mut all_reps = Vec::new();
504
505 while !cursor.is_exhausted() {
506 let seg = &segments[cursor.position()];
507 if !matcher::matches_segment_tag(&seg.id, entry_tag) {
508 break;
509 }
510
511 let matched = variants.iter().find(|v| {
515 let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
516 let actual_qual = seg
517 .elements
518 .get(ei)
519 .and_then(|e| e.get(ci))
520 .map(|s| s.as_str())
521 .unwrap_or("");
522 if !v.variant_codes.is_empty() {
523 v.variant_codes
524 .iter()
525 .any(|c| actual_qual.eq_ignore_ascii_case(c))
526 } else if let Some(ref expected_code) = v.variant_code {
527 actual_qual.eq_ignore_ascii_case(expected_code)
528 } else {
529 false
530 }
531 });
532
533 if let Some(variant) = matched {
534 if let Some(group) = self.try_consume_group(segments, cursor, variant)? {
535 all_reps.extend(group.repetitions);
536 } else {
537 break;
538 }
539 } else {
540 if let Some(group) = self.try_consume_group(segments, cursor, &variants[0])? {
544 all_reps.extend(group.repetitions);
545 } else {
546 break;
547 }
548 }
549 }
550
551 if all_reps.is_empty() {
552 Ok(None)
553 } else {
554 Ok(Some(AssembledGroup {
555 group_id,
556 repetitions: all_reps,
557 }))
558 }
559 }
560
561 pub fn assemble_with_diagnostics(
566 &self,
567 segments: &[OwnedSegment],
568 ) -> (AssembledTree, Vec<StructureDiagnostic>) {
569 let mut diagnostics = Vec::new();
570
571 let tree = match self.assemble_generic(segments) {
572 Ok(tree) => tree,
573 Err(e) => {
574 diagnostics.push(StructureDiagnostic {
575 kind: StructureDiagnosticKind::UnexpectedSegment,
576 segment_id: String::new(),
577 position: 0,
578 message: format!("Assembly failed: {e}"),
579 });
580 return (
581 AssembledTree {
582 segments: Vec::new(),
583 groups: Vec::new(),
584 post_group_start: 0,
585 inter_group_segments: std::collections::BTreeMap::new(),
586 },
587 diagnostics,
588 );
589 }
590 };
591
592 let consumed = count_tree_segments(&tree);
594
595 for (i, seg) in segments.iter().enumerate().skip(consumed) {
597 diagnostics.push(StructureDiagnostic {
598 kind: StructureDiagnosticKind::UnexpectedSegment,
599 segment_id: seg.id.clone(),
600 position: i,
601 message: format!(
602 "Segment '{}' at position {} was not consumed by MIG-guided assembly",
603 seg.id, i
604 ),
605 });
606 }
607
608 (tree, diagnostics)
609 }
610}
611
612fn count_tree_segments(tree: &AssembledTree) -> usize {
613 let mut count = tree.segments.len();
614 for group in &tree.groups {
615 count += count_group_segments(group);
616 }
617 for segs in tree.inter_group_segments.values() {
619 count += segs.len();
620 }
621 count
622}
623
624fn count_group_segments(group: &AssembledGroup) -> usize {
625 let mut count = 0;
626 for rep in &group.repetitions {
627 count += rep.segments.len();
628 count += rep.skipped_segments.len();
629 for child in &rep.child_groups {
630 count += count_group_segments(child);
631 }
632 }
633 count
634}
635
636fn collect_mig_numbers(group: &MigSegmentGroup) -> Vec<String> {
641 let mut numbers = Vec::new();
642 for seg in &group.segments {
643 if let Some(ref num) = seg.number {
644 numbers.push(num.clone());
645 }
646 }
647 for nested in &group.nested_groups {
648 numbers.extend(collect_mig_numbers(nested));
649 }
650 numbers
651}
652
653pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
654 AssembledSegment {
655 tag: seg.id.clone(),
656 elements: seg.elements.clone(),
657 mig_number: None,
658 }
659}
660
661#[cfg(test)]
662mod tests {
663 use super::*;
664 use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
665
666 fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
667 OwnedSegment {
668 id: id.to_string(),
669 elements: elements
670 .into_iter()
671 .map(|e| e.into_iter().map(|c| c.to_string()).collect())
672 .collect(),
673 segment_number: 0,
674 }
675 }
676
677 fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
678 MigSchema {
679 message_type: "UTILMD".to_string(),
680 variant: Some("Strom".to_string()),
681 version: "S2.1".to_string(),
682 publication_date: "2025-03-20".to_string(),
683 author: "BDEW".to_string(),
684 format_version: "FV2504".to_string(),
685 source_file: "test".to_string(),
686 segments: segments.into_iter().map(make_mig_segment).collect(),
687 segment_groups: groups,
688 }
689 }
690
691 #[test]
692 fn test_assembler_top_level_segments_only() {
693 let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
694
695 let segments = vec![
696 make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
697 make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
698 make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
699 make_owned_seg("UNT", vec![vec!["4", "001"]]),
700 ];
701
702 let assembler = Assembler::new(&mig);
703 let result = assembler.assemble_generic(&segments).unwrap();
704
705 assert_eq!(result.segments.len(), 4);
706 assert_eq!(result.segments[0].tag, "UNH");
707 assert_eq!(result.segments[1].tag, "BGM");
708 assert_eq!(result.segments[2].tag, "DTM");
709 assert_eq!(result.segments[3].tag, "UNT");
710 assert!(result.groups.is_empty());
711 }
712
713 #[test]
714 fn test_assembler_with_segment_group() {
715 let mig = make_mig_schema(
716 vec!["UNH", "BGM"],
717 vec![
718 make_mig_group("SG2", vec!["NAD"], vec![]),
719 make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
720 ],
721 );
722
723 let segments = vec![
724 make_owned_seg("UNH", vec![vec!["001"]]),
725 make_owned_seg("BGM", vec![vec!["E01"]]),
726 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
727 make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
728 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
729 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
730 ];
731
732 let assembler = Assembler::new(&mig);
733 let result = assembler.assemble_generic(&segments).unwrap();
734
735 assert_eq!(result.segments.len(), 2);
737 assert_eq!(result.groups.len(), 2);
739 assert_eq!(result.groups[0].group_id, "SG2");
740 assert_eq!(result.groups[0].repetitions.len(), 2);
741 assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
742 assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
743 assert_eq!(result.groups[1].group_id, "SG4");
745 assert_eq!(result.groups[1].repetitions.len(), 1);
746 assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
747 }
748
749 #[test]
750 fn test_assembler_nested_groups() {
751 let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
752 let mig = make_mig_schema(
753 vec!["UNH", "BGM"],
754 vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
755 );
756
757 let segments = vec![
758 make_owned_seg("UNH", vec![vec!["001"]]),
759 make_owned_seg("BGM", vec![vec!["E01"]]),
760 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
761 make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
762 make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
763 ];
764
765 let assembler = Assembler::new(&mig);
766 let result = assembler.assemble_generic(&segments).unwrap();
767
768 let sg2 = &result.groups[0];
770 assert_eq!(sg2.group_id, "SG2");
771 assert_eq!(sg2.repetitions.len(), 1);
772
773 let sg2_inst = &sg2.repetitions[0];
774 assert_eq!(sg2_inst.segments[0].tag, "NAD");
775
776 assert_eq!(sg2_inst.child_groups.len(), 1);
778 let sg3 = &sg2_inst.child_groups[0];
779 assert_eq!(sg3.group_id, "SG3");
780 assert_eq!(sg3.repetitions[0].segments.len(), 2);
781 assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
782 assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
783 }
784
785 #[test]
786 fn test_assembler_optional_segments_skipped() {
787 let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
789
790 let segments = vec![
791 make_owned_seg("UNH", vec![vec!["001"]]),
792 make_owned_seg("BGM", vec![vec!["E01"]]),
793 make_owned_seg("UNT", vec![vec!["2", "001"]]),
794 ];
795
796 let assembler = Assembler::new(&mig);
797 let result = assembler.assemble_generic(&segments).unwrap();
798
799 assert_eq!(result.segments.len(), 3);
801 assert_eq!(result.segments[0].tag, "UNH");
802 assert_eq!(result.segments[1].tag, "BGM");
803 assert_eq!(result.segments[2].tag, "UNT");
804 }
805
806 #[test]
807 fn test_assembler_empty_segments() {
808 let mig = make_mig_schema(vec!["UNH"], vec![]);
809 let assembler = Assembler::new(&mig);
810 let result = assembler.assemble_generic(&[]).unwrap();
811 assert!(result.segments.is_empty());
812 assert!(result.groups.is_empty());
813 }
814
815 #[test]
816 fn test_assembler_preserves_element_data() {
817 let mig = make_mig_schema(vec!["DTM"], vec![]);
818
819 let segments = vec![make_owned_seg(
820 "DTM",
821 vec![vec!["137", "202501010000+01", "303"]],
822 )];
823
824 let assembler = Assembler::new(&mig);
825 let result = assembler.assemble_generic(&segments).unwrap();
826
827 let dtm = &result.segments[0];
828 assert_eq!(dtm.elements[0][0], "137");
829 assert_eq!(dtm.elements[0][1], "202501010000+01");
830 assert_eq!(dtm.elements[0][2], "303");
831 }
832
833 #[test]
834 fn test_group_instance_as_assembled_tree() {
835 let sg5 = AssembledGroup {
837 group_id: "SG5".to_string(),
838 repetitions: vec![AssembledGroupInstance {
839 segments: vec![AssembledSegment {
840 tag: "LOC".to_string(),
841 elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
842 mig_number: None,
843 }],
844 child_groups: vec![],
845 entry_mig_number: None,
846 variant_mig_numbers: vec![],
847 skipped_segments: vec![],
848 }],
849 };
850
851 let sg4_instance = AssembledGroupInstance {
852 segments: vec![
853 AssembledSegment {
854 tag: "IDE".to_string(),
855 elements: vec![vec!["24".to_string(), "TX001".to_string()]],
856 mig_number: None,
857 },
858 AssembledSegment {
859 tag: "STS".to_string(),
860 elements: vec![vec!["7".to_string()]],
861 mig_number: None,
862 },
863 ],
864 child_groups: vec![sg5],
865 entry_mig_number: None,
866 variant_mig_numbers: vec![],
867 skipped_segments: vec![],
868 };
869
870 let sub_tree = sg4_instance.as_assembled_tree();
871
872 assert_eq!(sub_tree.segments.len(), 2);
874 assert_eq!(sub_tree.segments[0].tag, "IDE");
875 assert_eq!(sub_tree.segments[1].tag, "STS");
876
877 assert_eq!(sub_tree.groups.len(), 1);
879 assert_eq!(sub_tree.groups[0].group_id, "SG5");
880
881 assert_eq!(sub_tree.post_group_start, 2);
883 }
884
885 #[test]
886 fn test_assembler_from_parsed_edifact() {
887 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
889 let segments = crate::tokenize::parse_to_segments(input).unwrap();
890
891 let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
892
893 let assembler = Assembler::new(&mig);
894 let result = assembler.assemble_generic(&segments).unwrap();
895
896 assert!(result.segments.iter().any(|s| s.tag == "UNH"));
897 assert!(result.segments.iter().any(|s| s.tag == "BGM"));
898 assert!(result.segments.iter().any(|s| s.tag == "DTM"));
899 }
900
901 #[test]
902 fn test_assemble_with_diagnostics_clean_input() {
903 let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
904 let segments = vec![
905 make_owned_seg("UNH", vec![vec!["001"]]),
906 make_owned_seg("BGM", vec![vec!["E01"]]),
907 make_owned_seg("UNT", vec![vec!["2", "001"]]),
908 ];
909 let assembler = Assembler::new(&mig);
910 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
911 assert_eq!(tree.segments.len(), 3);
912 assert!(
913 diagnostics.is_empty(),
914 "Clean input should have no diagnostics"
915 );
916 }
917
918 #[test]
919 fn test_assemble_with_diagnostics_unconsumed_segments() {
920 let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
921 let segments = vec![
922 make_owned_seg("UNH", vec![vec!["001"]]),
923 make_owned_seg("BGM", vec![vec!["E01"]]),
924 make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
925 ];
926 let assembler = Assembler::new(&mig);
927 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
928 assert_eq!(tree.segments.len(), 2);
929 assert_eq!(diagnostics.len(), 1);
930 assert_eq!(
931 diagnostics[0].kind,
932 StructureDiagnosticKind::UnexpectedSegment
933 );
934 assert_eq!(diagnostics[0].segment_id, "FTX");
935 assert_eq!(diagnostics[0].position, 2);
936 }
937
938 #[test]
939 fn test_assemble_with_diagnostics_multiple_unconsumed() {
940 let mig = make_mig_schema(vec!["UNH"], vec![]);
941 let segments = vec![
942 make_owned_seg("UNH", vec![vec!["001"]]),
943 make_owned_seg("FOO", vec![]),
944 make_owned_seg("BAR", vec![]),
945 make_owned_seg("BAZ", vec![]),
946 ];
947 let assembler = Assembler::new(&mig);
948 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
949 assert_eq!(tree.segments.len(), 1);
950 assert_eq!(diagnostics.len(), 3);
951 assert_eq!(diagnostics[0].segment_id, "FOO");
952 assert_eq!(diagnostics[1].segment_id, "BAR");
953 assert_eq!(diagnostics[2].segment_id, "BAZ");
954 }
955
956 #[test]
959 fn test_skip_unknown_segment_between_slots() {
960 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
964 let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
965
966 let segments = vec![
967 make_owned_seg("UNH", vec![vec!["001"]]),
968 make_owned_seg("SEQ", vec![vec!["Z98"]]),
969 make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
970 make_owned_seg("CCI", vec![vec!["Z30"]]),
971 ];
972
973 let off = Assembler::new(&mig);
975 let tree_off = off.assemble_generic(&segments).unwrap();
976 let sg8_off = &tree_off.groups[0];
977 assert_eq!(sg8_off.repetitions[0].segments.len(), 1); assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
979
980 let on = Assembler::with_config(
982 &mig,
983 AssemblerConfig {
984 skip_unknown_segments: true,
985 },
986 );
987 let tree_on = on.assemble_generic(&segments).unwrap();
988 let sg8_on = &tree_on.groups[0];
989 assert_eq!(sg8_on.repetitions[0].segments.len(), 2); assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
991 assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
992 }
993
994 #[test]
995 fn test_skip_preserves_on_instance() {
996 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
998 let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
999
1000 let segments = vec![
1001 make_owned_seg("UNH", vec![vec!["001"]]),
1002 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1003 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1004 make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
1005 make_owned_seg("CCI", vec![vec!["Z30"]]),
1006 ];
1007
1008 let assembler = Assembler::with_config(
1009 &mig,
1010 AssemblerConfig {
1011 skip_unknown_segments: true,
1012 },
1013 );
1014 let tree = assembler.assemble_generic(&segments).unwrap();
1015 let instance = &tree.groups[0].repetitions[0];
1016
1017 assert_eq!(instance.segments.len(), 2); assert_eq!(instance.skipped_segments.len(), 2); assert_eq!(instance.skipped_segments[0].tag, "RFF");
1020 assert_eq!(instance.skipped_segments[1].tag, "DTM");
1021 }
1022
1023 #[test]
1024 fn test_skip_mode_off_default() {
1025 let mig = make_mig_schema(vec![], vec![]);
1027 let assembler = Assembler::new(&mig);
1028 assert!(!assembler.config.skip_unknown_segments);
1029 }
1030
1031 #[test]
1032 fn test_skip_does_not_consume_nested_group_entry() {
1033 let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
1037 let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
1038 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1039
1040 let segments = vec![
1041 make_owned_seg("UNH", vec![vec!["001"]]),
1042 make_owned_seg("IDE", vec![vec!["24"]]),
1043 make_owned_seg("FOO", vec![vec!["unknown"]]),
1044 make_owned_seg("STS", vec![vec!["7"]]),
1045 make_owned_seg("LOC", vec![vec!["Z16"]]),
1046 ];
1047
1048 let assembler = Assembler::with_config(
1049 &mig,
1050 AssemblerConfig {
1051 skip_unknown_segments: true,
1052 },
1053 );
1054 let tree = assembler.assemble_generic(&segments).unwrap();
1055 let sg4 = &tree.groups[0];
1056 let inst = &sg4.repetitions[0];
1057
1058 assert_eq!(inst.segments.len(), 2);
1060 assert_eq!(inst.segments[0].tag, "IDE");
1061 assert_eq!(inst.segments[1].tag, "STS");
1062 assert_eq!(inst.skipped_segments.len(), 1);
1063 assert_eq!(inst.skipped_segments[0].tag, "FOO");
1064
1065 assert_eq!(inst.child_groups.len(), 1);
1067 assert_eq!(inst.child_groups[0].group_id, "SG5");
1068 assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1069 }
1070
1071 #[test]
1072 fn test_roundtrip_with_skip() {
1073 use crate::disassembler::Disassembler;
1076 use crate::renderer::render_edifact;
1077
1078 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1079 let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1080
1081 let segments = vec![
1082 make_owned_seg("UNH", vec![vec!["001"]]),
1083 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1084 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1085 make_owned_seg("CCI", vec![vec!["Z30"]]),
1086 make_owned_seg("UNT", vec![vec!["4", "001"]]),
1087 ];
1088
1089 let assembler = Assembler::with_config(
1090 &mig,
1091 AssemblerConfig {
1092 skip_unknown_segments: true,
1093 },
1094 );
1095 let tree = assembler.assemble_generic(&segments).unwrap();
1096
1097 let disassembler = Disassembler::new(&mig);
1098 let dis = disassembler.disassemble(&tree);
1099 let delimiters = edifact_primitives::EdifactDelimiters::default();
1100 let rendered = render_edifact(&dis, &delimiters);
1101
1102 assert_eq!(dis.len(), 5);
1107 assert_eq!(dis[0].tag, "UNH");
1108 assert_eq!(dis[1].tag, "SEQ");
1109 assert_eq!(dis[2].tag, "CCI");
1110 assert_eq!(dis[3].tag, "RFF"); assert_eq!(dis[4].tag, "UNT");
1112
1113 assert!(rendered.contains("UNH+001"));
1115 assert!(rendered.contains("SEQ+Z98"));
1116 assert!(rendered.contains("RFF+Z38:REF1"));
1117 assert!(rendered.contains("CCI+Z30"));
1118 assert!(rendered.contains("UNT+4:001"));
1119 }
1120
1121 #[test]
1124 fn test_variant_groups_interleaved_reps() {
1125 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1129 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1130
1131 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1132
1133 let segments = vec![
1134 make_owned_seg("UNH", vec![vec!["001"]]),
1135 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1136 make_owned_seg("CCI", vec![vec!["Z30"]]),
1137 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1138 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1139 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1140 make_owned_seg("CCI", vec![vec!["Z31"]]),
1141 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1142 make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1143 ];
1144
1145 let assembler = Assembler::new(&mig);
1146 let result = assembler.assemble_generic(&segments).unwrap();
1147
1148 assert_eq!(result.segments.len(), 1); assert_eq!(result.groups.len(), 1); let sg8 = &result.groups[0];
1151 assert_eq!(sg8.group_id, "SG8");
1152 assert_eq!(sg8.repetitions.len(), 4);
1153
1154 assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1156 assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1157 assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1158 assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1159 assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1160 assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1161 }
1162
1163 #[test]
1164 fn test_variant_groups_single_variant_type() {
1165 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1167 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1168
1169 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1170
1171 let segments = vec![
1172 make_owned_seg("UNH", vec![vec!["001"]]),
1173 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1174 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1175 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1176 make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1177 ];
1178
1179 let assembler = Assembler::new(&mig);
1180 let result = assembler.assemble_generic(&segments).unwrap();
1181
1182 assert_eq!(result.groups.len(), 1);
1183 assert_eq!(result.groups[0].repetitions.len(), 2);
1184 assert_eq!(
1185 result.groups[0].repetitions[0].segments[0].elements[0][0],
1186 "Z98"
1187 );
1188 assert_eq!(
1189 result.groups[0].repetitions[1].segments[0].elements[0][0],
1190 "Z98"
1191 );
1192 }
1193
1194 #[test]
1195 fn test_non_variant_groups_unchanged() {
1196 let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1198 let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1199
1200 let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1201
1202 let segments = vec![
1203 make_owned_seg("UNH", vec![vec!["001"]]),
1204 make_owned_seg("BGM", vec![vec!["E01"]]),
1205 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1206 make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1207 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1208 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1209 ];
1210
1211 let assembler = Assembler::new(&mig);
1212 let result = assembler.assemble_generic(&segments).unwrap();
1213
1214 assert_eq!(result.segments.len(), 2);
1215 assert_eq!(result.groups.len(), 2);
1216 assert_eq!(result.groups[0].group_id, "SG2");
1217 assert_eq!(result.groups[0].repetitions.len(), 2);
1218 assert_eq!(result.groups[1].group_id, "SG4");
1219 assert_eq!(result.groups[1].repetitions.len(), 1);
1220 }
1221
1222 #[test]
1223 fn test_variant_groups_with_nested_children() {
1224 let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
1226 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1227 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
1228
1229 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1230
1231 let segments = vec![
1232 make_owned_seg("UNH", vec![vec!["001"]]),
1233 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1234 make_owned_seg("CCI", vec![vec!["Z30"]]),
1235 make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
1236 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1237 make_owned_seg("CCI", vec![vec!["Z31"]]),
1238 make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
1239 ];
1240
1241 let assembler = Assembler::new(&mig);
1242 let result = assembler.assemble_generic(&segments).unwrap();
1243
1244 assert_eq!(result.groups.len(), 1);
1245 let sg8 = &result.groups[0];
1246 assert_eq!(sg8.repetitions.len(), 2);
1247
1248 assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
1250 assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
1251 assert_eq!(
1252 sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
1253 "Z30"
1254 );
1255
1256 assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
1258 assert_eq!(
1259 sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
1260 "Z31"
1261 );
1262 }
1263
1264 #[test]
1265 fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
1266 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1269
1270 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
1271
1272 let segments = vec![
1273 make_owned_seg("UNH", vec![vec!["001"]]),
1274 make_owned_seg("SEQ", vec![vec!["Z98"]]), make_owned_seg("CCI", vec![vec!["Z30"]]),
1276 ];
1277
1278 let assembler = Assembler::new(&mig);
1279 let result = assembler.assemble_generic(&segments).unwrap();
1280
1281 assert!(result.groups.is_empty());
1283 }
1284
1285 #[test]
1286 fn test_mixed_variant_and_non_variant_groups() {
1287 let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1289 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1290 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1291 let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1292
1293 let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
1294
1295 let segments = vec![
1296 make_owned_seg("UNH", vec![vec!["001"]]),
1297 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1298 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1299 make_owned_seg("CCI", vec![vec!["Z30"]]),
1300 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1301 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1302 make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
1303 ];
1304
1305 let assembler = Assembler::new(&mig);
1306 let result = assembler.assemble_generic(&segments).unwrap();
1307
1308 assert_eq!(result.groups.len(), 3); assert_eq!(result.groups[0].group_id, "SG2");
1310 assert_eq!(result.groups[0].repetitions.len(), 1);
1311 assert_eq!(result.groups[1].group_id, "SG8");
1312 assert_eq!(result.groups[1].repetitions.len(), 2);
1313 assert_eq!(result.groups[2].group_id, "SG12");
1314 assert_eq!(result.groups[2].repetitions.len(), 1);
1315 }
1316}