1use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18 pub segments: Vec<AssembledSegment>,
19 pub groups: Vec<AssembledGroup>,
20 #[serde(default)]
23 pub post_group_start: usize,
24 #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29 pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35 pub tag: String,
36 pub elements: Vec<Vec<String>>,
38 #[serde(default, skip_serializing_if = "Option::is_none")]
42 pub mig_number: Option<String>,
43 #[serde(default, skip_serializing_if = "Option::is_none")]
49 pub segment_number: Option<u32>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct AssembledGroup {
55 pub group_id: String,
56 pub repetitions: Vec<AssembledGroupInstance>,
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct AssembledGroupInstance {
62 pub segments: Vec<AssembledSegment>,
63 pub child_groups: Vec<AssembledGroup>,
64 #[serde(default, skip_serializing_if = "Option::is_none")]
66 pub entry_mig_number: Option<String>,
67 #[serde(default, skip_serializing_if = "Vec::is_empty")]
75 pub variant_mig_numbers: Vec<String>,
76 #[serde(default, skip_serializing_if = "Vec::is_empty")]
80 pub skipped_segments: Vec<AssembledSegment>,
81 #[serde(default, skip_serializing_if = "Vec::is_empty")]
85 pub skipped_positions: Vec<usize>,
86}
87
88impl AssembledGroupInstance {
89 pub fn as_assembled_tree(&self) -> AssembledTree {
96 AssembledTree {
97 segments: self.segments.clone(),
98 groups: self.child_groups.clone(),
99 post_group_start: self.segments.len(),
100 inter_group_segments: std::collections::BTreeMap::new(),
101 }
102 }
103}
104
105#[derive(Debug, Clone, Default)]
107pub struct AssemblerConfig {
108 pub skip_unknown_segments: bool,
115
116 pub qualifier_map: std::collections::HashMap<String, (usize, usize, String)>,
128
129 pub strict_code_matching: bool,
140}
141
142pub struct Assembler<'a> {
147 mig: &'a MigSchema,
148 config: AssemblerConfig,
149}
150
151impl<'a> Assembler<'a> {
152 pub fn new(mig: &'a MigSchema) -> Self {
153 Self {
154 mig,
155 config: AssemblerConfig::default(),
156 }
157 }
158
159 pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
160 Self { mig, config }
161 }
162
163 fn top_level_enclosing_for_group(
177 &self,
178 current_group_idx: usize,
179 matched_seg_indices: &[usize],
180 ) -> std::collections::HashSet<String> {
181 if !self.config.skip_unknown_segments {
182 return std::collections::HashSet::new();
183 }
184 let mut tags: std::collections::HashSet<String> = self
185 .mig
186 .segments
187 .iter()
188 .enumerate()
189 .filter_map(|(i, s)| {
190 if matched_seg_indices.contains(&i) {
191 None
192 } else {
193 Some(s.id.clone())
194 }
195 })
196 .collect();
197 for (idx, group) in self.mig.segment_groups.iter().enumerate() {
198 if idx < current_group_idx {
199 continue;
200 }
201 if let Some(entry) = group.segments.first() {
202 tags.insert(entry.id.clone());
203 }
204 }
205 tags
206 }
207
208 pub fn assemble_generic(
210 &self,
211 segments: &[OwnedSegment],
212 ) -> Result<AssembledTree, AssemblyError> {
213 let mut cursor = SegmentCursor::new(segments.len());
214 let mut tree = AssembledTree {
215 segments: Vec::new(),
216 groups: Vec::new(),
217 post_group_start: 0,
218 inter_group_segments: std::collections::BTreeMap::new(),
219 };
220
221 let mut matched_seg_indices = Vec::new();
223
224 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
226 if cursor.is_exhausted() {
227 break;
228 }
229 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
230 tree.segments.push(assembled);
231 matched_seg_indices.push(i);
232 }
233 }
234
235 let mut group_idx = 0;
244 while group_idx < self.mig.segment_groups.len() {
245 if cursor.is_exhausted() {
246 break;
247 }
248
249 let mig_group = &self.mig.segment_groups[group_idx];
250
251 if self.config.skip_unknown_segments {
260 let tree_group_idx = tree.groups.len();
261 while !cursor.is_exhausted() {
262 let seg = &segments[cursor.position()];
263 let tag = &seg.id;
264 let is_unmatched_root_seg = self
265 .mig
266 .segments
267 .iter()
268 .enumerate()
269 .any(|(i, ms)| !matched_seg_indices.contains(&i) && ms.id == *tag);
270 let is_any_group_entry = self
271 .mig
272 .segment_groups
273 .iter()
274 .any(|g| g.segments.first().is_some_and(|s| s.id == *tag));
275 if is_unmatched_root_seg || is_any_group_entry {
276 break;
277 }
278 tree.inter_group_segments
279 .entry(tree_group_idx)
280 .or_default()
281 .push(owned_to_assembled(seg));
282 cursor.advance();
283 }
284 if cursor.is_exhausted() {
285 break;
286 }
287 }
288
289 let tree_group_idx = tree.groups.len();
291 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
292 if cursor.is_exhausted() {
293 break;
294 }
295 if matched_seg_indices.contains(&i) {
296 continue;
297 }
298 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
299 tree.inter_group_segments
300 .entry(tree_group_idx)
301 .or_default()
302 .push(assembled);
303 matched_seg_indices.push(i);
304 }
305 }
306
307 let top_enclosing =
313 self.top_level_enclosing_for_group(group_idx, &matched_seg_indices);
314
315 if mig_group.variant_code.is_some() {
317 let variant_count = self.mig.segment_groups[group_idx..]
318 .iter()
319 .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
320 .count();
321 let variant_end = group_idx + variant_count;
322
323 let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
324 if let Some(combined) = self.try_consume_variant_groups(
325 segments,
326 &mut cursor,
327 variant_groups,
328 &top_enclosing,
329 )? {
330 tree.groups.push(combined);
331 }
332 group_idx = variant_end;
333 } else {
334 if let Some(assembled) = self.try_consume_group(
335 segments,
336 &mut cursor,
337 mig_group,
338 &top_enclosing,
339 )? {
340 tree.groups.push(assembled);
341 }
342 group_idx += 1;
343 }
344 }
345
346 tree.post_group_start = tree.segments.len();
348
349 for (i, mig_seg) in self.mig.segments.iter().enumerate() {
351 if cursor.is_exhausted() {
352 break;
353 }
354 if matched_seg_indices.contains(&i) {
355 continue;
356 }
357 if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
358 tree.segments.push(assembled);
359 }
360 }
361
362 Ok(tree)
363 }
364
365 fn try_consume_segment(
366 &self,
367 segments: &[OwnedSegment],
368 cursor: &mut SegmentCursor,
369 mig_seg: &MigSegment,
370 ) -> Result<Option<AssembledSegment>, AssemblyError> {
371 if cursor.is_exhausted() {
372 return Ok(None);
373 }
374 let seg = &segments[cursor.position()];
375 if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
376 if let Some(ref num) = mig_seg.number {
379 if let Some((el_idx, comp_idx, expected)) = self.config.qualifier_map.get(num) {
380 let actual = seg
381 .elements
382 .get(*el_idx)
383 .and_then(|e| e.get(*comp_idx))
384 .map(|s| s.as_str())
385 .unwrap_or("");
386 if actual != expected {
387 return Ok(None); }
389 }
390 }
391 let mut assembled = owned_to_assembled(seg);
399 assembled.mig_number = mig_seg.number.clone();
400 cursor.advance();
401 Ok(Some(assembled))
402 } else {
403 Ok(None) }
405 }
406
407 fn consume_entry_run_best_match(
417 &self,
418 segments: &[OwnedSegment],
419 cursor: &mut SegmentCursor,
420 entry_slots: &[MigSegment],
421 instance: &mut AssembledGroupInstance,
422 ) -> Result<(), AssemblyError> {
423 let mut used = vec![false; entry_slots.len()];
424 for _ in 0..entry_slots.len() {
425 if cursor.is_exhausted() {
426 break;
427 }
428 let seg = &segments[cursor.position()];
429 let mut strict_match: Option<usize> = None;
430 let mut tag_match: Option<usize> = None;
431 for (i, slot) in entry_slots.iter().enumerate() {
432 if used[i] {
433 continue;
434 }
435 if !matcher::matches_segment_tag(&seg.id, &slot.id) {
436 continue;
437 }
438 if !self.segment_passes_qualifier_map(seg, slot) {
439 continue;
440 }
441 if tag_match.is_none() {
442 tag_match = Some(i);
443 }
444 if strict_match.is_none() && segment_matches_mig_codes(seg, slot) {
445 strict_match = Some(i);
446 }
447 }
448 let Some(i) = strict_match.or(tag_match) else {
449 break;
450 };
451 used[i] = true;
452 let slot = &entry_slots[i];
453 let mut assembled = owned_to_assembled(seg);
454 assembled.mig_number = slot.number.clone();
455 instance.segments.push(assembled);
456 cursor.advance();
457 }
458 Ok(())
459 }
460
461 fn segment_passes_qualifier_map(&self, seg: &OwnedSegment, mig_seg: &MigSegment) -> bool {
462 let Some(ref num) = mig_seg.number else {
463 return true;
464 };
465 let Some((el_idx, comp_idx, expected)) = self.config.qualifier_map.get(num) else {
466 return true;
467 };
468 let actual = seg
469 .elements
470 .get(*el_idx)
471 .and_then(|e| e.get(*comp_idx))
472 .map(|s| s.as_str())
473 .unwrap_or("");
474 actual == expected
475 }
476
477 fn try_consume_group(
478 &self,
479 segments: &[OwnedSegment],
480 cursor: &mut SegmentCursor,
481 mig_group: &MigSegmentGroup,
482 enclosing: &std::collections::HashSet<String>,
483 ) -> Result<Option<AssembledGroup>, AssemblyError> {
484 let mut repetitions = Vec::new();
485 let entry_segment = mig_group.segments.first().ok_or_else(|| {
486 AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
487 })?;
488
489 let nested_enclosing: std::collections::HashSet<String> =
494 if self.config.skip_unknown_segments {
495 let mut set = enclosing.clone();
496 set.extend(group_local_scope(mig_group));
497 set
498 } else {
499 std::collections::HashSet::new()
500 };
501
502 while !cursor.is_exhausted() {
504 let iter_start = cursor.position();
505 let seg = &segments[cursor.position()];
506 if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
507 break; }
509
510 if !mig_group.variant_codes.is_empty() {
512 let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
513 let actual_qual = seg
514 .elements
515 .get(ei)
516 .and_then(|e| e.get(ci))
517 .map(|s| s.as_str())
518 .unwrap_or("");
519 if !mig_group
520 .variant_codes
521 .iter()
522 .any(|c| actual_qual.eq_ignore_ascii_case(c))
523 {
524 break;
525 }
526 } else if let Some(ref expected_code) = mig_group.variant_code {
527 let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
528 let actual_qual = seg
529 .elements
530 .get(ei)
531 .and_then(|e| e.get(ci))
532 .map(|s| s.as_str())
533 .unwrap_or("");
534 if !actual_qual.eq_ignore_ascii_case(expected_code) {
535 break;
536 }
537 }
538
539 let mut instance = AssembledGroupInstance {
540 segments: Vec::new(),
541 child_groups: Vec::new(),
542 entry_mig_number: entry_segment.number.clone(),
543 variant_mig_numbers: collect_mig_numbers(mig_group),
544 skipped_segments: Vec::new(),
545 skipped_positions: Vec::new(),
546 };
547
548 let mut slot_idx = 0;
559 let mut is_entry_run = true;
560 while slot_idx < mig_group.segments.len() {
561 if cursor.is_exhausted() {
562 break;
563 }
564 let current_tag = &mig_group.segments[slot_idx].id;
565 let run_len = mig_group.segments[slot_idx..]
566 .iter()
567 .take_while(|s| s.id == *current_tag)
568 .count();
569
570 if is_entry_run {
571 let entry_slots = &mig_group.segments[slot_idx..slot_idx + run_len];
573 if self.config.strict_code_matching && run_len > 1 {
574 self.consume_entry_run_best_match(
580 segments,
581 cursor,
582 entry_slots,
583 &mut instance,
584 )?;
585 } else {
586 for slot in entry_slots {
587 if cursor.is_exhausted() {
588 break;
589 }
590 if let Some(assembled) =
591 self.try_consume_segment(segments, cursor, slot)?
592 {
593 instance.segments.push(assembled);
594 }
595 }
596 }
597 is_entry_run = false;
598 } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
599 if cursor.is_exhausted() {
617 break;
618 }
619 let seg = &segments[cursor.position()];
620 if !matcher::matches_segment_tag(&seg.id, current_tag) {
621 break;
622 }
623 let has_following_non_entry = if cursor.position() + 1 < segments.len() {
626 let next = &segments[cursor.position() + 1];
627 !matcher::matches_segment_tag(&next.id, &entry_segment.id)
628 && mig_group.segments.iter().any(|s| {
629 matcher::matches_segment_tag(&next.id, &s.id)
630 && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
631 })
632 } else {
633 false
634 };
635 if has_following_non_entry {
636 instance.segments.push(owned_to_assembled(seg));
638 cursor.advance();
639 } else {
640 break;
642 }
643 } else {
644 let slots = &mig_group.segments[slot_idx..slot_idx + run_len];
650 if self.config.strict_code_matching && run_len > 1 {
651 self.consume_entry_run_best_match(segments, cursor, slots, &mut instance)?;
652 } else {
653 for slot in slots {
654 if cursor.is_exhausted() {
655 break;
656 }
657 if let Some(assembled) =
658 self.try_consume_segment(segments, cursor, slot)?
659 {
660 instance.segments.push(assembled);
661 }
662 }
663 }
664 while !cursor.is_exhausted() {
666 let seg = &segments[cursor.position()];
667 if matcher::matches_segment_tag(&seg.id, current_tag) {
668 instance.segments.push(owned_to_assembled(seg));
669 cursor.advance();
670 } else {
671 break;
672 }
673 }
674 }
675
676 slot_idx += run_len;
677
678 if self.config.skip_unknown_segments {
685 while !cursor.is_exhausted() {
686 let seg = &segments[cursor.position()];
687 if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
689 break;
690 }
691 if mig_group.segments[slot_idx..]
693 .iter()
694 .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
695 {
696 break;
697 }
698 if mig_group.nested_groups.iter().any(|ng| {
700 ng.segments
701 .first()
702 .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
703 }) {
704 break;
705 }
706 if enclosing.contains(&seg.id) {
712 break;
713 }
714 instance.skipped_positions.push(cursor.position());
716 instance.skipped_segments.push(owned_to_assembled(seg));
717 cursor.advance();
718 }
719 }
720 }
721
722 loop {
732 let pass_start = cursor.position();
733 let mut nested_idx = 0;
734 while nested_idx < mig_group.nested_groups.len() {
735 if cursor.is_exhausted() {
736 break;
737 }
738 let nested = &mig_group.nested_groups[nested_idx];
739
740 if nested.variant_code.is_some() {
741 let variant_count = mig_group.nested_groups[nested_idx..]
743 .iter()
744 .take_while(|g| g.id == nested.id && g.variant_code.is_some())
745 .count();
746 let variant_end = nested_idx + variant_count;
747 let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
748 if let Some(combined) = self.try_consume_variant_groups(
749 segments,
750 cursor,
751 variant_groups,
752 &nested_enclosing,
753 )? {
754 push_or_merge_child(&mut instance.child_groups, combined);
755 }
756 nested_idx = variant_end;
757 } else {
758 if let Some(assembled) =
759 self.try_consume_group(segments, cursor, nested, &nested_enclosing)?
760 {
761 push_or_merge_child(&mut instance.child_groups, assembled);
762 }
763 nested_idx += 1;
764 }
765 }
766
767 if !self.config.skip_unknown_segments || cursor.is_exhausted() {
768 break;
769 }
770 let seg = &segments[cursor.position()];
771 if enclosing.contains(&seg.id) {
786 break;
787 }
788 if cursor.position() == pass_start && !self.config.skip_unknown_segments {
792 break;
793 }
794
795 instance.skipped_positions.push(cursor.position());
796 instance.skipped_segments.push(owned_to_assembled(seg));
797 cursor.advance();
798 }
799
800 if cursor.position() == iter_start {
806 break;
807 }
808 repetitions.push(instance);
809 }
810
811 if repetitions.is_empty() {
812 Ok(None)
813 } else {
814 Ok(Some(AssembledGroup {
815 group_id: mig_group.id.clone(),
816 repetitions,
817 }))
818 }
819 }
820
821 fn try_consume_variant_groups(
827 &self,
828 segments: &[OwnedSegment],
829 cursor: &mut SegmentCursor,
830 variants: &[MigSegmentGroup],
831 enclosing: &std::collections::HashSet<String>,
832 ) -> Result<Option<AssembledGroup>, AssemblyError> {
833 let group_id = variants[0].id.clone();
834 let entry_tag = variants[0]
835 .segments
836 .first()
837 .map(|s| s.id.as_str())
838 .unwrap_or("");
839 let mut all_reps = Vec::new();
840
841 while !cursor.is_exhausted() {
842 let seg = &segments[cursor.position()];
843 if !matcher::matches_segment_tag(&seg.id, entry_tag) {
844 break;
845 }
846
847 let matched = variants.iter().find(|v| {
851 let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
852 let actual_qual = seg
853 .elements
854 .get(ei)
855 .and_then(|e| e.get(ci))
856 .map(|s| s.as_str())
857 .unwrap_or("");
858 if !v.variant_codes.is_empty() {
859 v.variant_codes
860 .iter()
861 .any(|c| actual_qual.eq_ignore_ascii_case(c))
862 } else if let Some(ref expected_code) = v.variant_code {
863 actual_qual.eq_ignore_ascii_case(expected_code)
864 } else {
865 false
866 }
867 });
868
869 if let Some(variant) = matched {
870 if let Some(group) =
871 self.try_consume_group(segments, cursor, variant, enclosing)?
872 {
873 all_reps.extend(group.repetitions);
874 } else {
875 break;
876 }
877 } else {
878 if let Some(group) =
882 self.try_consume_group(segments, cursor, &variants[0], enclosing)?
883 {
884 all_reps.extend(group.repetitions);
885 } else {
886 break;
887 }
888 }
889 }
890
891 if all_reps.is_empty() {
892 Ok(None)
893 } else {
894 Ok(Some(AssembledGroup {
895 group_id,
896 repetitions: all_reps,
897 }))
898 }
899 }
900
901 pub fn assemble_with_diagnostics(
906 &self,
907 segments: &[OwnedSegment],
908 ) -> (AssembledTree, Vec<StructureDiagnostic>) {
909 let mut diagnostics = Vec::new();
910
911 let tree = match self.assemble_generic(segments) {
912 Ok(tree) => tree,
913 Err(e) => {
914 diagnostics.push(StructureDiagnostic {
915 kind: StructureDiagnosticKind::UnexpectedSegment,
916 segment_id: String::new(),
917 position: 0,
918 message: format!("Assembly failed: {e}"),
919 });
920 return (
921 AssembledTree {
922 segments: Vec::new(),
923 groups: Vec::new(),
924 post_group_start: 0,
925 inter_group_segments: std::collections::BTreeMap::new(),
926 },
927 diagnostics,
928 );
929 }
930 };
931
932 let consumed = count_tree_segments(&tree);
936
937 for (i, seg) in segments.iter().enumerate().skip(consumed) {
940 diagnostics.push(StructureDiagnostic {
941 kind: StructureDiagnosticKind::UnexpectedSegment,
942 segment_id: seg.id.clone(),
943 position: i,
944 message: format!(
945 "Segment '{}' at position {} was not consumed by MIG-guided assembly",
946 seg.id, i
947 ),
948 });
949 }
950
951 let mut skipped: Vec<(usize, String)> = Vec::new();
955 collect_skipped(&tree, &mut skipped);
956 skipped.sort_by_key(|(pos, _)| *pos);
957 for (pos, tag) in skipped {
958 diagnostics.push(StructureDiagnostic {
959 kind: StructureDiagnosticKind::SkippedUnknownSegment,
960 segment_id: tag.clone(),
961 position: pos,
962 message: format!(
963 "Segment '{tag}' at position {pos} is not defined in the PID-filtered MIG; the assembler advanced past it",
964 ),
965 });
966 }
967
968 (tree, diagnostics)
969 }
970}
971
972fn collect_skipped(tree: &AssembledTree, out: &mut Vec<(usize, String)>) {
973 for group in &tree.groups {
974 collect_skipped_from_group(group, out);
975 }
976}
977
978fn collect_skipped_from_group(group: &AssembledGroup, out: &mut Vec<(usize, String)>) {
979 for rep in &group.repetitions {
980 for (i, seg) in rep.skipped_segments.iter().enumerate() {
981 let pos = rep.skipped_positions.get(i).copied().unwrap_or(0);
982 out.push((pos, seg.tag.clone()));
983 }
984 for child in &rep.child_groups {
985 collect_skipped_from_group(child, out);
986 }
987 }
988}
989
990fn count_tree_segments(tree: &AssembledTree) -> usize {
991 let mut count = tree.segments.len();
992 for group in &tree.groups {
993 count += count_group_segments(group);
994 }
995 for segs in tree.inter_group_segments.values() {
997 count += segs.len();
998 }
999 count
1000}
1001
1002fn count_group_segments(group: &AssembledGroup) -> usize {
1003 let mut count = 0;
1004 for rep in &group.repetitions {
1005 count += rep.segments.len();
1006 count += rep.skipped_segments.len();
1007 for child in &rep.child_groups {
1008 count += count_group_segments(child);
1009 }
1010 }
1011 count
1012}
1013
1014fn group_local_scope(mig_group: &MigSegmentGroup) -> std::collections::HashSet<String> {
1020 let mut tags = std::collections::HashSet::new();
1021 for seg in &mig_group.segments {
1022 tags.insert(seg.id.clone());
1023 }
1024 for nested in &mig_group.nested_groups {
1025 if let Some(entry) = nested.segments.first() {
1026 tags.insert(entry.id.clone());
1027 }
1028 }
1029 tags
1030}
1031
1032fn push_or_merge_child(child_groups: &mut Vec<AssembledGroup>, new: AssembledGroup) {
1037 if let Some(existing) = child_groups.iter_mut().find(|g| g.group_id == new.group_id) {
1038 existing.repetitions.extend(new.repetitions);
1039 } else {
1040 child_groups.push(new);
1041 }
1042}
1043
1044fn collect_mig_numbers(group: &MigSegmentGroup) -> Vec<String> {
1049 let mut numbers = Vec::new();
1050 for seg in &group.segments {
1051 if let Some(ref num) = seg.number {
1052 numbers.push(num.clone());
1053 }
1054 }
1055 for nested in &group.nested_groups {
1056 numbers.extend(collect_mig_numbers(nested));
1057 }
1058 numbers
1059}
1060
1061pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
1062 AssembledSegment {
1063 tag: seg.id.clone(),
1064 elements: seg.elements.clone(),
1065 mig_number: None,
1066 segment_number: Some(seg.segment_number),
1067 }
1068}
1069
1070fn segment_matches_mig_codes(seg: &OwnedSegment, mig_seg: &MigSegment) -> bool {
1078 let actual_at = |el: usize, c: usize| -> &str {
1079 seg.elements
1080 .get(el)
1081 .and_then(|e| e.get(c))
1082 .map(|s| s.as_str())
1083 .unwrap_or("")
1084 };
1085 for de in &mig_seg.data_elements {
1086 if !de.codes.is_empty() {
1087 let actual = actual_at(de.position, 0);
1088 if !actual.is_empty() && !de.codes.iter().any(|c| c.value == actual) {
1089 return false;
1090 }
1091 }
1092 }
1093 for comp in &mig_seg.composites {
1094 for de in &comp.data_elements {
1095 if !de.codes.is_empty() {
1096 let actual = actual_at(comp.position, de.position);
1097 if !actual.is_empty() && !de.codes.iter().any(|c| c.value == actual) {
1098 return false;
1099 }
1100 }
1101 }
1102 }
1103 true
1104}
1105
1106#[cfg(test)]
1107mod tests {
1108 use super::*;
1109 use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
1110
1111 fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
1112 OwnedSegment {
1113 id: id.to_string(),
1114 elements: elements
1115 .into_iter()
1116 .map(|e| e.into_iter().map(|c| c.to_string()).collect())
1117 .collect(),
1118 segment_number: 0,
1119 }
1120 }
1121
1122 fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
1123 MigSchema {
1124 message_type: "UTILMD".to_string(),
1125 variant: Some("Strom".to_string()),
1126 version: "S2.1".to_string(),
1127 publication_date: "2025-03-20".to_string(),
1128 author: "BDEW".to_string(),
1129 format_version: "FV2504".to_string(),
1130 source_file: "test".to_string(),
1131 segments: segments.into_iter().map(make_mig_segment).collect(),
1132 segment_groups: groups,
1133 }
1134 }
1135
1136 #[test]
1137 fn test_assembler_top_level_segments_only() {
1138 let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
1139
1140 let segments = vec![
1141 make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
1142 make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
1143 make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
1144 make_owned_seg("UNT", vec![vec!["4", "001"]]),
1145 ];
1146
1147 let assembler = Assembler::new(&mig);
1148 let result = assembler.assemble_generic(&segments).unwrap();
1149
1150 assert_eq!(result.segments.len(), 4);
1151 assert_eq!(result.segments[0].tag, "UNH");
1152 assert_eq!(result.segments[1].tag, "BGM");
1153 assert_eq!(result.segments[2].tag, "DTM");
1154 assert_eq!(result.segments[3].tag, "UNT");
1155 assert!(result.groups.is_empty());
1156 }
1157
1158 #[test]
1159 fn test_assembler_with_segment_group() {
1160 let mig = make_mig_schema(
1161 vec!["UNH", "BGM"],
1162 vec![
1163 make_mig_group("SG2", vec!["NAD"], vec![]),
1164 make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
1165 ],
1166 );
1167
1168 let segments = vec![
1169 make_owned_seg("UNH", vec![vec!["001"]]),
1170 make_owned_seg("BGM", vec![vec!["E01"]]),
1171 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1172 make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1173 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1174 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1175 ];
1176
1177 let assembler = Assembler::new(&mig);
1178 let result = assembler.assemble_generic(&segments).unwrap();
1179
1180 assert_eq!(result.segments.len(), 2);
1182 assert_eq!(result.groups.len(), 2);
1184 assert_eq!(result.groups[0].group_id, "SG2");
1185 assert_eq!(result.groups[0].repetitions.len(), 2);
1186 assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
1187 assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
1188 assert_eq!(result.groups[1].group_id, "SG4");
1190 assert_eq!(result.groups[1].repetitions.len(), 1);
1191 assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
1192 }
1193
1194 #[test]
1195 fn test_assembler_nested_groups() {
1196 let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
1197 let mig = make_mig_schema(
1198 vec!["UNH", "BGM"],
1199 vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
1200 );
1201
1202 let segments = vec![
1203 make_owned_seg("UNH", vec![vec!["001"]]),
1204 make_owned_seg("BGM", vec![vec!["E01"]]),
1205 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1206 make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
1207 make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
1208 ];
1209
1210 let assembler = Assembler::new(&mig);
1211 let result = assembler.assemble_generic(&segments).unwrap();
1212
1213 let sg2 = &result.groups[0];
1215 assert_eq!(sg2.group_id, "SG2");
1216 assert_eq!(sg2.repetitions.len(), 1);
1217
1218 let sg2_inst = &sg2.repetitions[0];
1219 assert_eq!(sg2_inst.segments[0].tag, "NAD");
1220
1221 assert_eq!(sg2_inst.child_groups.len(), 1);
1223 let sg3 = &sg2_inst.child_groups[0];
1224 assert_eq!(sg3.group_id, "SG3");
1225 assert_eq!(sg3.repetitions[0].segments.len(), 2);
1226 assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
1227 assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
1228 }
1229
1230 #[test]
1231 fn test_assembler_optional_segments_skipped() {
1232 let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
1234
1235 let segments = vec![
1236 make_owned_seg("UNH", vec![vec!["001"]]),
1237 make_owned_seg("BGM", vec![vec!["E01"]]),
1238 make_owned_seg("UNT", vec![vec!["2", "001"]]),
1239 ];
1240
1241 let assembler = Assembler::new(&mig);
1242 let result = assembler.assemble_generic(&segments).unwrap();
1243
1244 assert_eq!(result.segments.len(), 3);
1246 assert_eq!(result.segments[0].tag, "UNH");
1247 assert_eq!(result.segments[1].tag, "BGM");
1248 assert_eq!(result.segments[2].tag, "UNT");
1249 }
1250
1251 #[test]
1252 fn test_assembler_empty_segments() {
1253 let mig = make_mig_schema(vec!["UNH"], vec![]);
1254 let assembler = Assembler::new(&mig);
1255 let result = assembler.assemble_generic(&[]).unwrap();
1256 assert!(result.segments.is_empty());
1257 assert!(result.groups.is_empty());
1258 }
1259
1260 #[test]
1261 fn test_assembler_preserves_element_data() {
1262 let mig = make_mig_schema(vec!["DTM"], vec![]);
1263
1264 let segments = vec![make_owned_seg(
1265 "DTM",
1266 vec![vec!["137", "202501010000+01", "303"]],
1267 )];
1268
1269 let assembler = Assembler::new(&mig);
1270 let result = assembler.assemble_generic(&segments).unwrap();
1271
1272 let dtm = &result.segments[0];
1273 assert_eq!(dtm.elements[0][0], "137");
1274 assert_eq!(dtm.elements[0][1], "202501010000+01");
1275 assert_eq!(dtm.elements[0][2], "303");
1276 }
1277
1278 #[test]
1279 fn test_group_instance_as_assembled_tree() {
1280 let sg5 = AssembledGroup {
1282 group_id: "SG5".to_string(),
1283 repetitions: vec![AssembledGroupInstance {
1284 segments: vec![AssembledSegment {
1285 tag: "LOC".to_string(),
1286 elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
1287 mig_number: None,
1288 segment_number: None,
1289 }],
1290 child_groups: vec![],
1291 entry_mig_number: None,
1292 variant_mig_numbers: vec![],
1293 skipped_segments: vec![],
1294 skipped_positions: Vec::new(),
1295 }],
1296 };
1297
1298 let sg4_instance = AssembledGroupInstance {
1299 segments: vec![
1300 AssembledSegment {
1301 tag: "IDE".to_string(),
1302 elements: vec![vec!["24".to_string(), "TX001".to_string()]],
1303 mig_number: None,
1304 segment_number: None,
1305 },
1306 AssembledSegment {
1307 tag: "STS".to_string(),
1308 elements: vec![vec!["7".to_string()]],
1309 mig_number: None,
1310 segment_number: None,
1311 },
1312 ],
1313 child_groups: vec![sg5],
1314 entry_mig_number: None,
1315 variant_mig_numbers: vec![],
1316 skipped_segments: vec![],
1317 skipped_positions: Vec::new(),
1318 };
1319
1320 let sub_tree = sg4_instance.as_assembled_tree();
1321
1322 assert_eq!(sub_tree.segments.len(), 2);
1324 assert_eq!(sub_tree.segments[0].tag, "IDE");
1325 assert_eq!(sub_tree.segments[1].tag, "STS");
1326
1327 assert_eq!(sub_tree.groups.len(), 1);
1329 assert_eq!(sub_tree.groups[0].group_id, "SG5");
1330
1331 assert_eq!(sub_tree.post_group_start, 2);
1333 }
1334
1335 #[test]
1336 fn test_assembler_from_parsed_edifact() {
1337 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
1339 let segments = crate::tokenize::parse_to_segments(input).unwrap();
1340
1341 let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
1342
1343 let assembler = Assembler::new(&mig);
1344 let result = assembler.assemble_generic(&segments).unwrap();
1345
1346 assert!(result.segments.iter().any(|s| s.tag == "UNH"));
1347 assert!(result.segments.iter().any(|s| s.tag == "BGM"));
1348 assert!(result.segments.iter().any(|s| s.tag == "DTM"));
1349 }
1350
1351 #[test]
1352 fn test_assemble_with_diagnostics_clean_input() {
1353 let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
1354 let segments = vec![
1355 make_owned_seg("UNH", vec![vec!["001"]]),
1356 make_owned_seg("BGM", vec![vec!["E01"]]),
1357 make_owned_seg("UNT", vec![vec!["2", "001"]]),
1358 ];
1359 let assembler = Assembler::new(&mig);
1360 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
1361 assert_eq!(tree.segments.len(), 3);
1362 assert!(
1363 diagnostics.is_empty(),
1364 "Clean input should have no diagnostics"
1365 );
1366 }
1367
1368 #[test]
1369 fn test_assemble_with_diagnostics_unconsumed_segments() {
1370 let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
1371 let segments = vec![
1372 make_owned_seg("UNH", vec![vec!["001"]]),
1373 make_owned_seg("BGM", vec![vec!["E01"]]),
1374 make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
1375 ];
1376 let assembler = Assembler::new(&mig);
1377 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
1378 assert_eq!(tree.segments.len(), 2);
1379 assert_eq!(diagnostics.len(), 1);
1380 assert_eq!(
1381 diagnostics[0].kind,
1382 StructureDiagnosticKind::UnexpectedSegment
1383 );
1384 assert_eq!(diagnostics[0].segment_id, "FTX");
1385 assert_eq!(diagnostics[0].position, 2);
1386 }
1387
1388 #[test]
1389 fn test_assemble_with_diagnostics_multiple_unconsumed() {
1390 let mig = make_mig_schema(vec!["UNH"], vec![]);
1391 let segments = vec![
1392 make_owned_seg("UNH", vec![vec!["001"]]),
1393 make_owned_seg("FOO", vec![]),
1394 make_owned_seg("BAR", vec![]),
1395 make_owned_seg("BAZ", vec![]),
1396 ];
1397 let assembler = Assembler::new(&mig);
1398 let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
1399 assert_eq!(tree.segments.len(), 1);
1400 assert_eq!(diagnostics.len(), 3);
1401 assert_eq!(diagnostics[0].segment_id, "FOO");
1402 assert_eq!(diagnostics[1].segment_id, "BAR");
1403 assert_eq!(diagnostics[2].segment_id, "BAZ");
1404 }
1405
1406 #[test]
1409 fn test_non_entry_segments_get_mig_number_from_bounded_slots() {
1410 use crate::test_support::make_mig_segment_numbered;
1414
1415 let sg4 = MigSegmentGroup {
1416 segments: vec![
1417 make_mig_segment_numbered("IDE", "00020"),
1418 make_mig_segment_numbered("DTM", "00023"),
1419 make_mig_segment_numbered("DTM", "00024"),
1420 make_mig_segment_numbered("STS", "00035"),
1421 ],
1422 ..make_mig_group("SG4", vec![], vec![])
1423 };
1424 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1425
1426 let segments = vec![
1427 make_owned_seg("UNH", vec![vec!["001"]]),
1428 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1429 make_owned_seg("DTM", vec![vec!["92", "202505312200+00", "303"]]),
1430 make_owned_seg("DTM", vec![vec!["93", "202512312300+00", "303"]]),
1431 make_owned_seg("STS", vec![vec!["7"], vec![], vec!["E01"]]),
1432 ];
1433
1434 let assembler = Assembler::new(&mig);
1435 let tree = assembler.assemble_generic(&segments).unwrap();
1436
1437 let sg4_instance = &tree.groups[0].repetitions[0];
1438
1439 assert_eq!(sg4_instance.segments[0].tag, "IDE");
1441 assert_eq!(sg4_instance.segments[0].mig_number.as_deref(), Some("00020"));
1442
1443 assert_eq!(sg4_instance.segments[1].tag, "DTM");
1445 assert_eq!(sg4_instance.segments[1].mig_number.as_deref(), Some("00023"));
1446
1447 assert_eq!(sg4_instance.segments[2].tag, "DTM");
1449 assert_eq!(sg4_instance.segments[2].mig_number.as_deref(), Some("00024"));
1450
1451 assert_eq!(sg4_instance.segments[3].tag, "STS");
1453 assert_eq!(sg4_instance.segments[3].mig_number.as_deref(), Some("00035"));
1454
1455 assert!(sg4_instance.variant_mig_numbers.contains(&"00020".to_string()));
1457 assert!(sg4_instance.variant_mig_numbers.contains(&"00023".to_string()));
1458 assert!(sg4_instance.variant_mig_numbers.contains(&"00024".to_string()));
1459 assert!(sg4_instance.variant_mig_numbers.contains(&"00035".to_string()));
1460 }
1461
1462 #[test]
1463 fn test_greedy_extra_segments_get_no_mig_number() {
1464 use crate::test_support::make_mig_segment_numbered;
1467
1468 let sg4 = MigSegmentGroup {
1469 segments: vec![
1470 make_mig_segment_numbered("IDE", "00020"),
1471 make_mig_segment_numbered("DTM", "00023"),
1472 ],
1473 ..make_mig_group("SG4", vec![], vec![])
1474 };
1475 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1476
1477 let segments = vec![
1478 make_owned_seg("UNH", vec![vec!["001"]]),
1479 make_owned_seg("IDE", vec![vec!["24"]]),
1480 make_owned_seg("DTM", vec![vec!["92", "20250531"]]),
1481 make_owned_seg("DTM", vec![vec!["93", "20251231"]]), ];
1483
1484 let assembler = Assembler::new(&mig);
1485 let tree = assembler.assemble_generic(&segments).unwrap();
1486
1487 let sg4_instance = &tree.groups[0].repetitions[0];
1488 assert_eq!(sg4_instance.segments.len(), 3); assert_eq!(sg4_instance.segments[1].mig_number.as_deref(), Some("00023"));
1492
1493 assert_eq!(sg4_instance.segments[2].mig_number, None);
1495 }
1496
1497 #[test]
1500 fn test_qualifier_map_prevents_wrong_slot_consumption() {
1501 use crate::test_support::make_mig_segment_numbered;
1506 use std::collections::HashMap;
1507
1508 let sg4 = MigSegmentGroup {
1509 segments: vec![
1510 make_mig_segment_numbered("IDE", "00020"),
1511 make_mig_segment_numbered("DTM", "00023"),
1512 make_mig_segment_numbered("DTM", "00024"),
1513 ],
1514 ..make_mig_group("SG4", vec![], vec![])
1515 };
1516 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1517
1518 let segments = vec![
1519 make_owned_seg("UNH", vec![vec!["001"]]),
1520 make_owned_seg("IDE", vec![vec!["24"]]),
1521 make_owned_seg("DTM", vec![vec!["93", "202512312300+00", "303"]]),
1522 ];
1523
1524 let mut qualifier_map = HashMap::new();
1525 qualifier_map.insert("00023".to_string(), (0, 0, "92".to_string()));
1526 qualifier_map.insert("00024".to_string(), (0, 0, "93".to_string()));
1527
1528 let config = AssemblerConfig {
1529 skip_unknown_segments: false,
1530 qualifier_map,
1531 ..Default::default()
1532 };
1533 let assembler = Assembler::with_config(&mig, config);
1534 let tree = assembler.assemble_generic(&segments).unwrap();
1535
1536 let sg4_instance = &tree.groups[0].repetitions[0];
1537
1538 assert_eq!(sg4_instance.segments.len(), 2); let dtm = &sg4_instance.segments[1];
1541 assert_eq!(dtm.tag, "DTM");
1542 assert_eq!(
1543 dtm.mig_number.as_deref(),
1544 Some("00024"),
1545 "DTM+93 should get mig_number 00024 (not 00023)"
1546 );
1547 }
1548
1549 #[test]
1550 fn test_group_entry_qualifier_mismatch_does_not_infinite_loop() {
1551 use crate::test_support::make_mig_segment_numbered;
1561 use std::collections::HashMap;
1562
1563 let sg5 = MigSegmentGroup {
1564 segments: vec![make_mig_segment_numbered("LOC", "00050")],
1565 ..make_mig_group("SG5", vec![], vec![])
1566 };
1567 let mig = make_mig_schema(vec!["UNH"], vec![sg5]);
1568
1569 let segments = vec![
1570 make_owned_seg("UNH", vec![vec!["001"]]),
1571 make_owned_seg("LOC", vec![vec!["172"], vec!["92003964705"]]),
1573 ];
1574
1575 let mut qualifier_map = HashMap::new();
1576 qualifier_map.insert("00050".to_string(), (0, 0, "Z16".to_string()));
1577
1578 let config = AssemblerConfig {
1579 skip_unknown_segments: false,
1580 qualifier_map,
1581 ..Default::default()
1582 };
1583 let assembler = Assembler::with_config(&mig, config);
1584
1585 let start = std::time::Instant::now();
1589 let tree = assembler.assemble_generic(&segments).unwrap();
1590 assert!(
1591 start.elapsed() < std::time::Duration::from_secs(5),
1592 "assembly took {:?} — suspected infinite-loop regression",
1593 start.elapsed()
1594 );
1595
1596 assert!(tree.groups.is_empty());
1600 }
1601
1602 #[test]
1605 fn test_skip_unknown_segment_between_slots() {
1606 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1610 let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
1611
1612 let segments = vec![
1613 make_owned_seg("UNH", vec![vec!["001"]]),
1614 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1615 make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
1616 make_owned_seg("CCI", vec![vec!["Z30"]]),
1617 ];
1618
1619 let off = Assembler::new(&mig);
1621 let tree_off = off.assemble_generic(&segments).unwrap();
1622 let sg8_off = &tree_off.groups[0];
1623 assert_eq!(sg8_off.repetitions[0].segments.len(), 1); assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
1625
1626 let on = Assembler::with_config(
1628 &mig,
1629 AssemblerConfig {
1630 skip_unknown_segments: true,
1631 ..Default::default()
1632 },
1633 );
1634 let tree_on = on.assemble_generic(&segments).unwrap();
1635 let sg8_on = &tree_on.groups[0];
1636 assert_eq!(sg8_on.repetitions[0].segments.len(), 2); assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
1638 assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
1639 }
1640
1641 #[test]
1642 fn test_skip_preserves_on_instance() {
1643 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1645 let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
1646
1647 let segments = vec![
1648 make_owned_seg("UNH", vec![vec!["001"]]),
1649 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1650 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1651 make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
1652 make_owned_seg("CCI", vec![vec!["Z30"]]),
1653 ];
1654
1655 let assembler = Assembler::with_config(
1656 &mig,
1657 AssemblerConfig {
1658 skip_unknown_segments: true,
1659 ..Default::default()
1660 },
1661 );
1662 let tree = assembler.assemble_generic(&segments).unwrap();
1663 let instance = &tree.groups[0].repetitions[0];
1664
1665 assert_eq!(instance.segments.len(), 2); assert_eq!(instance.skipped_segments.len(), 2); assert_eq!(instance.skipped_segments[0].tag, "RFF");
1668 assert_eq!(instance.skipped_segments[1].tag, "DTM");
1669 }
1670
1671 #[test]
1672 fn test_skip_mode_off_default() {
1673 let mig = make_mig_schema(vec![], vec![]);
1675 let assembler = Assembler::new(&mig);
1676 assert!(!assembler.config.skip_unknown_segments);
1677 }
1678
1679 #[test]
1680 fn test_skip_does_not_consume_nested_group_entry() {
1681 let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
1685 let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
1686 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1687
1688 let segments = vec![
1689 make_owned_seg("UNH", vec![vec!["001"]]),
1690 make_owned_seg("IDE", vec![vec!["24"]]),
1691 make_owned_seg("FOO", vec![vec!["unknown"]]),
1692 make_owned_seg("STS", vec![vec!["7"]]),
1693 make_owned_seg("LOC", vec![vec!["Z16"]]),
1694 ];
1695
1696 let assembler = Assembler::with_config(
1697 &mig,
1698 AssemblerConfig {
1699 skip_unknown_segments: true,
1700 ..Default::default()
1701 },
1702 );
1703 let tree = assembler.assemble_generic(&segments).unwrap();
1704 let sg4 = &tree.groups[0];
1705 let inst = &sg4.repetitions[0];
1706
1707 assert_eq!(inst.segments.len(), 2);
1709 assert_eq!(inst.segments[0].tag, "IDE");
1710 assert_eq!(inst.segments[1].tag, "STS");
1711 assert_eq!(inst.skipped_segments.len(), 1);
1712 assert_eq!(inst.skipped_segments[0].tag, "FOO");
1713
1714 assert_eq!(inst.child_groups.len(), 1);
1716 assert_eq!(inst.child_groups[0].group_id, "SG5");
1717 assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1718 }
1719
1720 #[test]
1721 fn test_skip_unknown_between_nested_group_reps() {
1722 let sg10 = make_mig_group("SG10", vec!["CCI"], vec![]);
1740 let sg8_zd7 =
1741 make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1742 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![], "Z98");
1743 let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1744 let sg4 = make_mig_group(
1745 "SG4",
1746 vec!["IDE", "STS"],
1747 vec![sg8_zd7, sg8_z98, sg12],
1748 );
1749 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1750
1751 let segments = vec![
1752 make_owned_seg("UNH", vec![vec!["001"]]),
1753 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1754 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1755 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1756 make_owned_seg("CCI", vec![vec!["Z30"]]),
1757 make_owned_seg("FOO", vec![vec!["orphan"]]),
1758 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1759 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1760 ];
1761
1762 let assembler = Assembler::with_config(
1763 &mig,
1764 AssemblerConfig {
1765 skip_unknown_segments: true,
1766 ..Default::default()
1767 },
1768 );
1769 let tree = assembler.assemble_generic(&segments).unwrap();
1770
1771 assert_eq!(tree.groups.len(), 1);
1772 let sg4_inst = &tree.groups[0].repetitions[0];
1773 assert_eq!(sg4_inst.child_groups.len(), 2, "expected SG8 + SG12");
1775 let sg8_tree = &sg4_inst.child_groups[0];
1776 assert_eq!(sg8_tree.group_id, "SG8");
1777 assert_eq!(
1778 sg8_tree.repetitions.len(),
1779 2,
1780 "SG8 should have both ZD7 and Z98 reps after orphan skip"
1781 );
1782 assert_eq!(sg8_tree.repetitions[0].segments[0].elements[0][0], "ZD7");
1783 assert_eq!(sg8_tree.repetitions[1].segments[0].elements[0][0], "Z98");
1784
1785 let sg12_tree = &sg4_inst.child_groups[1];
1786 assert_eq!(sg12_tree.group_id, "SG12");
1787 assert_eq!(sg12_tree.repetitions.len(), 1);
1788
1789 fn count_foo(inst: &AssembledGroupInstance) -> usize {
1794 let mut n = inst
1795 .skipped_segments
1796 .iter()
1797 .filter(|s| s.tag == "FOO")
1798 .count();
1799 for child in &inst.child_groups {
1800 for rep in &child.repetitions {
1801 n += count_foo(rep);
1802 }
1803 }
1804 n
1805 }
1806 assert_eq!(count_foo(sg4_inst), 1, "FOO should be recorded exactly once");
1807 }
1808
1809 #[test]
1810 fn test_skip_off_preserves_cascade_behavior() {
1811 let sg10 = make_mig_group("SG10", vec!["CCI"], vec![]);
1815 let sg8_zd7 =
1816 make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "ZD7");
1817 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![], "Z98");
1818 let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1819 let sg4 = make_mig_group(
1820 "SG4",
1821 vec!["IDE", "STS"],
1822 vec![sg8_zd7, sg8_z98, sg12],
1823 );
1824 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1825
1826 let segments = vec![
1827 make_owned_seg("UNH", vec![vec!["001"]]),
1828 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1829 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1830 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1831 make_owned_seg("CCI", vec![vec!["Z30"]]),
1832 make_owned_seg("FOO", vec![vec!["orphan"]]),
1833 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1834 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1835 ];
1836
1837 let assembler = Assembler::new(&mig);
1838 let tree = assembler.assemble_generic(&segments).unwrap();
1839
1840 let sg4_inst = &tree.groups[0].repetitions[0];
1841 let sg8_tree = sg4_inst
1842 .child_groups
1843 .iter()
1844 .find(|g| g.group_id == "SG8")
1845 .expect("SG8 should still be present");
1846 assert_eq!(sg8_tree.repetitions.len(), 1);
1848 assert!(
1849 sg4_inst
1850 .skipped_segments
1851 .iter()
1852 .all(|s| s.tag != "FOO"),
1853 "FOO must not be skipped when skip mode is off"
1854 );
1855 }
1856
1857 #[test]
1858 fn test_roundtrip_with_skip() {
1859 use crate::disassembler::Disassembler;
1862 use crate::renderer::render_edifact;
1863
1864 let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1865 let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1866
1867 let segments = vec![
1868 make_owned_seg("UNH", vec![vec!["001"]]),
1869 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1870 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1871 make_owned_seg("CCI", vec![vec!["Z30"]]),
1872 make_owned_seg("UNT", vec![vec!["4", "001"]]),
1873 ];
1874
1875 let assembler = Assembler::with_config(
1876 &mig,
1877 AssemblerConfig {
1878 skip_unknown_segments: true,
1879 ..Default::default()
1880 },
1881 );
1882 let tree = assembler.assemble_generic(&segments).unwrap();
1883
1884 let disassembler = Disassembler::new(&mig);
1885 let dis = disassembler.disassemble(&tree);
1886 let delimiters = edifact_primitives::EdifactDelimiters::default();
1887 let rendered = render_edifact(&dis, &delimiters);
1888
1889 assert_eq!(dis.len(), 5);
1894 assert_eq!(dis[0].tag, "UNH");
1895 assert_eq!(dis[1].tag, "SEQ");
1896 assert_eq!(dis[2].tag, "CCI");
1897 assert_eq!(dis[3].tag, "RFF"); assert_eq!(dis[4].tag, "UNT");
1899
1900 assert!(rendered.contains("UNH+001"));
1902 assert!(rendered.contains("SEQ+Z98"));
1903 assert!(rendered.contains("RFF+Z38:REF1"));
1904 assert!(rendered.contains("CCI+Z30"));
1905 assert!(rendered.contains("UNT+4:001"));
1906 }
1907
1908 #[test]
1911 fn test_variant_groups_interleaved_reps() {
1912 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1916 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1917
1918 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1919
1920 let segments = vec![
1921 make_owned_seg("UNH", vec![vec!["001"]]),
1922 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1923 make_owned_seg("CCI", vec![vec!["Z30"]]),
1924 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1925 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1926 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1927 make_owned_seg("CCI", vec![vec!["Z31"]]),
1928 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1929 make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1930 ];
1931
1932 let assembler = Assembler::new(&mig);
1933 let result = assembler.assemble_generic(&segments).unwrap();
1934
1935 assert_eq!(result.segments.len(), 1); assert_eq!(result.groups.len(), 1); let sg8 = &result.groups[0];
1938 assert_eq!(sg8.group_id, "SG8");
1939 assert_eq!(sg8.repetitions.len(), 4);
1940
1941 assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1943 assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1944 assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1945 assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1946 assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1947 assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1948 }
1949
1950 #[test]
1951 fn test_variant_groups_single_variant_type() {
1952 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1954 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1955
1956 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1957
1958 let segments = vec![
1959 make_owned_seg("UNH", vec![vec!["001"]]),
1960 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1961 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1962 make_owned_seg("SEQ", vec![vec!["Z98"]]),
1963 make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1964 ];
1965
1966 let assembler = Assembler::new(&mig);
1967 let result = assembler.assemble_generic(&segments).unwrap();
1968
1969 assert_eq!(result.groups.len(), 1);
1970 assert_eq!(result.groups[0].repetitions.len(), 2);
1971 assert_eq!(
1972 result.groups[0].repetitions[0].segments[0].elements[0][0],
1973 "Z98"
1974 );
1975 assert_eq!(
1976 result.groups[0].repetitions[1].segments[0].elements[0][0],
1977 "Z98"
1978 );
1979 }
1980
1981 #[test]
1982 fn test_non_variant_groups_unchanged() {
1983 let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1985 let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1986
1987 let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1988
1989 let segments = vec![
1990 make_owned_seg("UNH", vec![vec!["001"]]),
1991 make_owned_seg("BGM", vec![vec!["E01"]]),
1992 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1993 make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1994 make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1995 make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1996 ];
1997
1998 let assembler = Assembler::new(&mig);
1999 let result = assembler.assemble_generic(&segments).unwrap();
2000
2001 assert_eq!(result.segments.len(), 2);
2002 assert_eq!(result.groups.len(), 2);
2003 assert_eq!(result.groups[0].group_id, "SG2");
2004 assert_eq!(result.groups[0].repetitions.len(), 2);
2005 assert_eq!(result.groups[1].group_id, "SG4");
2006 assert_eq!(result.groups[1].repetitions.len(), 1);
2007 }
2008
2009 #[test]
2010 fn test_variant_groups_with_nested_children() {
2011 let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
2013 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
2014 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
2015
2016 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
2017
2018 let segments = vec![
2019 make_owned_seg("UNH", vec![vec!["001"]]),
2020 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
2021 make_owned_seg("CCI", vec![vec!["Z30"]]),
2022 make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
2023 make_owned_seg("SEQ", vec![vec!["Z98"]]),
2024 make_owned_seg("CCI", vec![vec!["Z31"]]),
2025 make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
2026 ];
2027
2028 let assembler = Assembler::new(&mig);
2029 let result = assembler.assemble_generic(&segments).unwrap();
2030
2031 assert_eq!(result.groups.len(), 1);
2032 let sg8 = &result.groups[0];
2033 assert_eq!(sg8.repetitions.len(), 2);
2034
2035 assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
2037 assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
2038 assert_eq!(
2039 sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
2040 "Z30"
2041 );
2042
2043 assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
2045 assert_eq!(
2046 sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
2047 "Z31"
2048 );
2049 }
2050
2051 #[test]
2052 fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
2053 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
2056
2057 let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
2058
2059 let segments = vec![
2060 make_owned_seg("UNH", vec![vec!["001"]]),
2061 make_owned_seg("SEQ", vec![vec!["Z98"]]), make_owned_seg("CCI", vec![vec!["Z30"]]),
2063 ];
2064
2065 let assembler = Assembler::new(&mig);
2066 let result = assembler.assemble_generic(&segments).unwrap();
2067
2068 assert!(result.groups.is_empty());
2070 }
2071
2072 #[test]
2073 fn test_mixed_variant_and_non_variant_groups() {
2074 let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
2076 let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
2077 let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
2078 let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
2079
2080 let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
2081
2082 let segments = vec![
2083 make_owned_seg("UNH", vec![vec!["001"]]),
2084 make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
2085 make_owned_seg("SEQ", vec![vec!["ZD7"]]),
2086 make_owned_seg("CCI", vec![vec!["Z30"]]),
2087 make_owned_seg("SEQ", vec![vec!["Z98"]]),
2088 make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
2089 make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
2090 ];
2091
2092 let assembler = Assembler::new(&mig);
2093 let result = assembler.assemble_generic(&segments).unwrap();
2094
2095 assert_eq!(result.groups.len(), 3); assert_eq!(result.groups[0].group_id, "SG2");
2097 assert_eq!(result.groups[0].repetitions.len(), 1);
2098 assert_eq!(result.groups[1].group_id, "SG8");
2099 assert_eq!(result.groups[1].repetitions.len(), 2);
2100 assert_eq!(result.groups[2].group_id, "SG12");
2101 assert_eq!(result.groups[2].repetitions.len(), 1);
2102 }
2103
2104 #[test]
2105 fn test_assembler_disambiguates_shared_qualifier_by_full_code_profile() {
2106 use mig_types::schema::common::CodeDefinition;
2114 use mig_types::schema::mig::{MigComposite, MigDataElement};
2115 use std::collections::HashMap;
2116
2117 fn code(value: &str) -> CodeDefinition {
2118 CodeDefinition {
2119 value: value.to_string(),
2120 name: value.to_string(),
2121 description: None,
2122 }
2123 }
2124
2125 fn pia_slot(number: &str, composite_code: &str) -> MigSegment {
2126 MigSegment {
2127 id: "PIA".to_string(),
2128 name: "PIA".to_string(),
2129 description: None,
2130 counter: None,
2131 level: 1,
2132 number: Some(number.to_string()),
2133 max_rep_std: 1,
2134 max_rep_spec: 1,
2135 status_std: Some("M".to_string()),
2136 status_spec: Some("M".to_string()),
2137 example: None,
2138 data_elements: vec![MigDataElement {
2139 id: "4347".to_string(),
2140 name: "Produkt-ID-Funktion".to_string(),
2141 description: None,
2142 status_std: Some("M".to_string()),
2143 status_spec: Some("M".to_string()),
2144 format_std: None,
2145 format_spec: None,
2146 codes: vec![code("5")],
2147 position: 0,
2148 }],
2149 composites: vec![MigComposite {
2150 id: "C212".to_string(),
2151 name: "Item Identifier".to_string(),
2152 description: None,
2153 status_std: Some("M".to_string()),
2154 status_spec: Some("M".to_string()),
2155 data_elements: vec![MigDataElement {
2156 id: "7143".to_string(),
2157 name: "Artikel/Dienstleistung-ID".to_string(),
2158 description: None,
2159 status_std: Some("M".to_string()),
2160 status_spec: Some("M".to_string()),
2161 format_std: None,
2162 format_spec: None,
2163 codes: vec![code(composite_code)],
2164 position: 0,
2165 }],
2166 position: 1,
2167 }],
2168 }
2169 }
2170
2171 let sg4 = MigSegmentGroup {
2172 segments: vec![
2173 crate::test_support::make_mig_segment_numbered("IDE", "00020"),
2174 pia_slot("00108", "Z12"),
2175 pia_slot("00197", "SRW"),
2176 ],
2177 ..make_mig_group("SG4", vec![], vec![])
2178 };
2179 let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
2180
2181 let segments = vec![
2182 make_owned_seg("UNH", vec![vec!["001"]]),
2183 make_owned_seg("IDE", vec![vec!["24"]]),
2184 make_owned_seg("PIA", vec![vec!["5"], vec!["SRW"]]),
2186 ];
2187
2188 let mut qualifier_map = HashMap::new();
2192 qualifier_map.insert("00108".to_string(), (0, 0, "5".to_string()));
2193 qualifier_map.insert("00197".to_string(), (0, 0, "5".to_string()));
2194
2195 let config = AssemblerConfig {
2196 skip_unknown_segments: false,
2197 qualifier_map,
2198 strict_code_matching: true,
2199 };
2200 let assembler = Assembler::with_config(&mig, config);
2201 let tree = assembler.assemble_generic(&segments).unwrap();
2202
2203 let sg4_instance = &tree.groups[0].repetitions[0];
2204 let pia = sg4_instance
2205 .segments
2206 .iter()
2207 .find(|s| s.tag == "PIA")
2208 .expect("PIA consumed into SG4");
2209 assert_eq!(
2210 pia.mig_number.as_deref(),
2211 Some("00197"),
2212 "PIA+5+:::SRW must be assigned the SRW variant (mig=00197), not the Z12 variant"
2213 );
2214 }
2215}