Skip to main content

mig_assembly/
assembler.rs

1//! Recursive descent assembler — MIG-guided segment consumption.
2//!
3//! The assembler walks the MIG tree structure and consumes matching
4//! segments from the input. It produces a generic tree representation
5//! that can be converted to typed PID structs.
6
7use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15/// A generic assembled tree node (before PID-specific typing).
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18    pub segments: Vec<AssembledSegment>,
19    pub groups: Vec<AssembledGroup>,
20    /// Index in `segments` where post-group segments start (e.g., UNT, UNZ).
21    /// Segments before this index appear before groups in EDIFACT order.
22    #[serde(default)]
23    pub post_group_start: usize,
24    /// Root segments consumed between groups during assembly (e.g., UNS
25    /// section separator in MSCONS). Key = index into `groups` vec; value =
26    /// segments that appear immediately before that group in the EDIFACT
27    /// stream. Empty for messages without inter-group root segments.
28    #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29    pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32/// An assembled segment with its data elements.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35    pub tag: String,
36    /// `elements[i][j]` = component `j` of element `i`
37    pub elements: Vec<Vec<String>>,
38}
39
40/// An assembled segment group (may repeat).
41#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct AssembledGroup {
43    pub group_id: String,
44    pub repetitions: Vec<AssembledGroupInstance>,
45}
46
47/// One repetition of a segment group.
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct AssembledGroupInstance {
50    pub segments: Vec<AssembledSegment>,
51    pub child_groups: Vec<AssembledGroup>,
52    /// Segments that were present in the EDIFACT input but not defined in
53    /// the PID-filtered MIG for this group. Only populated when the assembler
54    /// runs with [`AssemblerConfig::skip_unknown_segments`] enabled.
55    #[serde(default, skip_serializing_if = "Vec::is_empty")]
56    pub skipped_segments: Vec<AssembledSegment>,
57}
58
59impl AssembledGroupInstance {
60    /// Create a virtual `AssembledTree` scoped to this group instance.
61    ///
62    /// The instance's own segments become the tree's root segments,
63    /// and its child groups become the tree's groups. This enables
64    /// running `MappingEngine::map_all_forward()` on a single
65    /// transaction group as if it were a complete message.
66    pub fn as_assembled_tree(&self) -> AssembledTree {
67        AssembledTree {
68            segments: self.segments.clone(),
69            groups: self.child_groups.clone(),
70            post_group_start: self.segments.len(),
71            inter_group_segments: std::collections::BTreeMap::new(),
72        }
73    }
74}
75
76/// Configuration for the assembler.
77#[derive(Debug, Clone, Default)]
78pub struct AssemblerConfig {
79    /// When `true`, the assembler skips segments inside a group instance that
80    /// don't match any remaining MIG slot, nested-group entry, or the group's
81    /// entry tag (next repetition). Skipped segments are preserved on
82    /// [`AssembledGroupInstance::skipped_segments`] for roundtrip re-emission.
83    ///
84    /// Default: `false` (strict AHB — unknown segments stall the cursor).
85    pub skip_unknown_segments: bool,
86}
87
88/// MIG-guided assembler.
89///
90/// Takes a MIG schema and uses it as a grammar to guide consumption
91/// of parsed EDIFACT segments. Produces a generic `AssembledTree`.
92pub struct Assembler<'a> {
93    mig: &'a MigSchema,
94    config: AssemblerConfig,
95}
96
97impl<'a> Assembler<'a> {
98    pub fn new(mig: &'a MigSchema) -> Self {
99        Self {
100            mig,
101            config: AssemblerConfig::default(),
102        }
103    }
104
105    pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
106        Self { mig, config }
107    }
108
109    /// Assemble segments into a generic tree following MIG structure.
110    pub fn assemble_generic(
111        &self,
112        segments: &[OwnedSegment],
113    ) -> Result<AssembledTree, AssemblyError> {
114        let mut cursor = SegmentCursor::new(segments.len());
115        let mut tree = AssembledTree {
116            segments: Vec::new(),
117            groups: Vec::new(),
118            post_group_start: 0,
119            inter_group_segments: std::collections::BTreeMap::new(),
120        };
121
122        // Track which MIG segment indices were matched in the first pass
123        let mut matched_seg_indices = Vec::new();
124
125        // Process top-level segments (first pass — before groups)
126        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
127            if cursor.is_exhausted() {
128                break;
129            }
130            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
131                tree.segments.push(assembled);
132                matched_seg_indices.push(i);
133            }
134        }
135
136        // Process segment groups, interleaving root segment consumption.
137        // Some message types (e.g., MSCONS) have root segments like UNS
138        // between groups (SG2 and SG5). Before trying each group, consume
139        // any unmatched root segments at the current cursor position.
140        //
141        // When consecutive same-ID groups have variant_code set (e.g., 3 SG8
142        // entries for ZD7, Z98, ZF3), the assembler tries ALL variants at each
143        // cursor position to handle interleaved reps.
144        let mut group_idx = 0;
145        while group_idx < self.mig.segment_groups.len() {
146            if cursor.is_exhausted() {
147                break;
148            }
149
150            let mig_group = &self.mig.segment_groups[group_idx];
151
152            // Try consuming unmatched root segments before this group
153            let tree_group_idx = tree.groups.len();
154            for (i, mig_seg) in self.mig.segments.iter().enumerate() {
155                if cursor.is_exhausted() {
156                    break;
157                }
158                if matched_seg_indices.contains(&i) {
159                    continue;
160                }
161                if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
162                    tree.inter_group_segments
163                        .entry(tree_group_idx)
164                        .or_default()
165                        .push(assembled);
166                    matched_seg_indices.push(i);
167                }
168            }
169
170            // Check if this starts a variant set (consecutive same-ID groups with variant_code)
171            if mig_group.variant_code.is_some() {
172                let variant_count = self.mig.segment_groups[group_idx..]
173                    .iter()
174                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
175                    .count();
176                let variant_end = group_idx + variant_count;
177
178                let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
179                if let Some(combined) =
180                    self.try_consume_variant_groups(segments, &mut cursor, variant_groups)?
181                {
182                    tree.groups.push(combined);
183                }
184                group_idx = variant_end;
185            } else {
186                if let Some(assembled) = self.try_consume_group(segments, &mut cursor, mig_group)? {
187                    tree.groups.push(assembled);
188                }
189                group_idx += 1;
190            }
191        }
192
193        // Mark where post-group segments start
194        tree.post_group_start = tree.segments.len();
195
196        // Second pass: try unmatched top-level segments (e.g., UNT, UNZ after groups)
197        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
198            if cursor.is_exhausted() {
199                break;
200            }
201            if matched_seg_indices.contains(&i) {
202                continue;
203            }
204            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
205                tree.segments.push(assembled);
206            }
207        }
208
209        Ok(tree)
210    }
211
212    fn try_consume_segment(
213        &self,
214        segments: &[OwnedSegment],
215        cursor: &mut SegmentCursor,
216        mig_seg: &MigSegment,
217    ) -> Result<Option<AssembledSegment>, AssemblyError> {
218        if cursor.is_exhausted() {
219            return Ok(None);
220        }
221        let seg = &segments[cursor.position()];
222        if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
223            let assembled = owned_to_assembled(seg);
224            cursor.advance();
225            Ok(Some(assembled))
226        } else {
227            Ok(None) // Segment not present (optional)
228        }
229    }
230
231    fn try_consume_group(
232        &self,
233        segments: &[OwnedSegment],
234        cursor: &mut SegmentCursor,
235        mig_group: &MigSegmentGroup,
236    ) -> Result<Option<AssembledGroup>, AssemblyError> {
237        let mut repetitions = Vec::new();
238        let entry_segment = mig_group.segments.first().ok_or_else(|| {
239            AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
240        })?;
241
242        // Loop for repeating groups
243        while !cursor.is_exhausted() {
244            let seg = &segments[cursor.position()];
245            if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
246                break; // Current segment doesn't match group entry — stop repeating
247            }
248
249            // Check variant qualifier if set — tag matches but wrong variant
250            if !mig_group.variant_codes.is_empty() {
251                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
252                let actual_qual = seg
253                    .elements
254                    .get(ei)
255                    .and_then(|e| e.get(ci))
256                    .map(|s| s.as_str())
257                    .unwrap_or("");
258                if !mig_group
259                    .variant_codes
260                    .iter()
261                    .any(|c| actual_qual.eq_ignore_ascii_case(c))
262                {
263                    break;
264                }
265            } else if let Some(ref expected_code) = mig_group.variant_code {
266                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
267                let actual_qual = seg
268                    .elements
269                    .get(ei)
270                    .and_then(|e| e.get(ci))
271                    .map(|s| s.as_str())
272                    .unwrap_or("");
273                if !actual_qual.eq_ignore_ascii_case(expected_code) {
274                    break;
275                }
276            }
277
278            let mut instance = AssembledGroupInstance {
279                segments: Vec::new(),
280                child_groups: Vec::new(),
281                skipped_segments: Vec::new(),
282            };
283
284            // Consume segments within this group instance.
285            // Process MIG slots in tag runs: for consecutive slots with the
286            // same tag, consume ALL matching input segments — not just the
287            // defined count. This handles real-world fixtures with more
288            // repetitions than the merged MIG predicts (e.g., 6 RFFs when
289            // the schema defines max 4).
290            //
291            // The entry segment (first tag run) is consumed bounded — one per
292            // defined slot — because the outer while loop uses the entry tag
293            // to delineate group repetitions.
294            let mut slot_idx = 0;
295            let mut is_entry_run = true;
296            while slot_idx < mig_group.segments.len() {
297                if cursor.is_exhausted() {
298                    break;
299                }
300                let current_tag = &mig_group.segments[slot_idx].id;
301                let run_len = mig_group.segments[slot_idx..]
302                    .iter()
303                    .take_while(|s| s.id == *current_tag)
304                    .count();
305
306                if is_entry_run {
307                    // Entry tag: consume at most run_len (preserves group boundaries)
308                    for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
309                        if cursor.is_exhausted() {
310                            break;
311                        }
312                        if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
313                            instance.segments.push(assembled);
314                        }
315                    }
316                    is_entry_run = false;
317                } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
318                    // Non-entry slot with SAME tag as entry (e.g., CCI appears as
319                    // both entry and non-entry in merged SG30).
320                    //
321                    // Only consume if we haven't yet consumed any NON-entry-tag
322                    // segments (i.e., we're still in a consecutive entry-tag run).
323                    // Once we've consumed a different tag (like CAV), seeing the
324                    // entry tag again means a new rep boundary.
325                    //
326                    // z35: entry CCI → CAV CAV → sees CCI → has_other=true → break ✓
327                    // z39: entry CCI → (no CAV) → sees CCI → has_other=false → consume ✓
328                    //      then CCI CCI → CAV → sees CCI → has_other=true → break
329                    //      BUT: z39 needs CCI-CAV-CCI-CAV structure
330                    //
331                    // Better heuristic: check if ALL remaining slots from here are
332                    // entry-tag + non-entry pairs. If the current slot is entry-tag
333                    // and the NEXT input segment after it would be a non-entry tag,
334                    // consume — it's a continuation. Otherwise break.
335                    if cursor.is_exhausted() {
336                        break;
337                    }
338                    let seg = &segments[cursor.position()];
339                    if !matcher::matches_segment_tag(&seg.id, current_tag) {
340                        break;
341                    }
342                    // Check: is there a non-entry segment AFTER this entry-tag?
343                    // If so, this CCI+CAV pair is part of the current rep.
344                    let has_following_non_entry = if cursor.position() + 1 < segments.len() {
345                        let next = &segments[cursor.position() + 1];
346                        !matcher::matches_segment_tag(&next.id, &entry_segment.id)
347                            && mig_group.segments.iter().any(|s| {
348                                matcher::matches_segment_tag(&next.id, &s.id)
349                                    && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
350                            })
351                    } else {
352                        false
353                    };
354                    if has_following_non_entry {
355                        // CCI followed by CAV → consume as continuation pair
356                        instance.segments.push(owned_to_assembled(seg));
357                        cursor.advance();
358                    } else {
359                        // CCI followed by CCI or unknown → let outer loop decide
360                        break;
361                    }
362                } else {
363                    // Non-entry tag: greedily consume all matching segments
364                    while !cursor.is_exhausted() {
365                        let seg = &segments[cursor.position()];
366                        if matcher::matches_segment_tag(&seg.id, current_tag) {
367                            instance.segments.push(owned_to_assembled(seg));
368                            cursor.advance();
369                        } else {
370                            break;
371                        }
372                    }
373                }
374
375                slot_idx += run_len;
376
377                // Point A: Skip unknown segments between MIG slot runs.
378                // When skip mode is ON and we just finished a slot run but the
379                // current segment doesn't match any remaining MIG slot, nested
380                // group entry, or the entry tag, skip it.
381                if self.config.skip_unknown_segments {
382                    while !cursor.is_exhausted() {
383                        let seg = &segments[cursor.position()];
384                        // Stop if it matches the entry tag (next group repetition)
385                        if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
386                            break;
387                        }
388                        // Stop if it matches any remaining MIG slot
389                        if mig_group.segments[slot_idx..]
390                            .iter()
391                            .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
392                        {
393                            break;
394                        }
395                        // Stop if it matches any nested group entry
396                        if mig_group.nested_groups.iter().any(|ng| {
397                            ng.segments
398                                .first()
399                                .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
400                        }) {
401                            break;
402                        }
403                        // Unknown segment — skip it
404                        instance.skipped_segments.push(owned_to_assembled(seg));
405                        cursor.advance();
406                    }
407                }
408            }
409
410            // Consume nested groups (variant-aware for same-ID groups)
411            let mut nested_idx = 0;
412            while nested_idx < mig_group.nested_groups.len() {
413                if cursor.is_exhausted() {
414                    break;
415                }
416                let nested = &mig_group.nested_groups[nested_idx];
417
418                if nested.variant_code.is_some() {
419                    // Variant set: collect consecutive same-ID groups with variant_code
420                    let variant_count = mig_group.nested_groups[nested_idx..]
421                        .iter()
422                        .take_while(|g| g.id == nested.id && g.variant_code.is_some())
423                        .count();
424                    let variant_end = nested_idx + variant_count;
425                    let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
426                    if let Some(combined) =
427                        self.try_consume_variant_groups(segments, cursor, variant_groups)?
428                    {
429                        instance.child_groups.push(combined);
430                    }
431                    nested_idx = variant_end;
432                } else {
433                    if let Some(assembled) = self.try_consume_group(segments, cursor, nested)? {
434                        instance.child_groups.push(assembled);
435                    }
436                    nested_idx += 1;
437                }
438            }
439
440            repetitions.push(instance);
441        }
442
443        if repetitions.is_empty() {
444            Ok(None)
445        } else {
446            Ok(Some(AssembledGroup {
447                group_id: mig_group.id.clone(),
448                repetitions,
449            }))
450        }
451    }
452
453    /// Consume interleaved repetitions of variant groups.
454    ///
455    /// At each cursor position, tries all variant definitions to find which one
456    /// matches the entry segment's qualifier. Collects all reps into one
457    /// `AssembledGroup` with the shared group_id.
458    fn try_consume_variant_groups(
459        &self,
460        segments: &[OwnedSegment],
461        cursor: &mut SegmentCursor,
462        variants: &[MigSegmentGroup],
463    ) -> Result<Option<AssembledGroup>, AssemblyError> {
464        let group_id = variants[0].id.clone();
465        let entry_tag = variants[0]
466            .segments
467            .first()
468            .map(|s| s.id.as_str())
469            .unwrap_or("");
470        let mut all_reps = Vec::new();
471
472        while !cursor.is_exhausted() {
473            let seg = &segments[cursor.position()];
474            if !matcher::matches_segment_tag(&seg.id, entry_tag) {
475                break;
476            }
477
478            // Find which variant matches this segment's qualifier.
479            // Each variant may have its qualifier at a different element position
480            // (e.g., CCI+Z19 has qualifier at [0][0], but CCI+++Z15 at [2][0]).
481            let matched = variants.iter().find(|v| {
482                let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
483                let actual_qual = seg
484                    .elements
485                    .get(ei)
486                    .and_then(|e| e.get(ci))
487                    .map(|s| s.as_str())
488                    .unwrap_or("");
489                if !v.variant_codes.is_empty() {
490                    v.variant_codes
491                        .iter()
492                        .any(|c| actual_qual.eq_ignore_ascii_case(c))
493                } else if let Some(ref expected_code) = v.variant_code {
494                    actual_qual.eq_ignore_ascii_case(expected_code)
495                } else {
496                    false
497                }
498            });
499
500            if let Some(variant) = matched {
501                if let Some(group) = self.try_consume_group(segments, cursor, variant)? {
502                    all_reps.extend(group.repetitions);
503                } else {
504                    break;
505                }
506            } else {
507                // No variant matches — try consuming with the first variant as
508                // fallback to avoid getting stuck. This handles edge cases where
509                // the qualifier doesn't exactly match any variant code.
510                if let Some(group) = self.try_consume_group(segments, cursor, &variants[0])? {
511                    all_reps.extend(group.repetitions);
512                } else {
513                    break;
514                }
515            }
516        }
517
518        if all_reps.is_empty() {
519            Ok(None)
520        } else {
521            Ok(Some(AssembledGroup {
522                group_id,
523                repetitions: all_reps,
524            }))
525        }
526    }
527
528    /// Assemble segments with diagnostic collection.
529    ///
530    /// Returns the assembled tree plus diagnostics for segments not consumed
531    /// by the MIG-guided assembly. Existing `assemble_generic()` is unchanged.
532    pub fn assemble_with_diagnostics(
533        &self,
534        segments: &[OwnedSegment],
535    ) -> (AssembledTree, Vec<StructureDiagnostic>) {
536        let mut diagnostics = Vec::new();
537
538        let tree = match self.assemble_generic(segments) {
539            Ok(tree) => tree,
540            Err(e) => {
541                diagnostics.push(StructureDiagnostic {
542                    kind: StructureDiagnosticKind::UnexpectedSegment,
543                    segment_id: String::new(),
544                    position: 0,
545                    message: format!("Assembly failed: {e}"),
546                });
547                return (
548                    AssembledTree {
549                        segments: Vec::new(),
550                        groups: Vec::new(),
551                        post_group_start: 0,
552                        inter_group_segments: std::collections::BTreeMap::new(),
553                    },
554                    diagnostics,
555                );
556            }
557        };
558
559        // Count consumed segments in the assembled tree
560        let consumed = count_tree_segments(&tree);
561
562        // Segments beyond consumed count are unconsumed
563        for (i, seg) in segments.iter().enumerate().skip(consumed) {
564            diagnostics.push(StructureDiagnostic {
565                kind: StructureDiagnosticKind::UnexpectedSegment,
566                segment_id: seg.id.clone(),
567                position: i,
568                message: format!(
569                    "Segment '{}' at position {} was not consumed by MIG-guided assembly",
570                    seg.id, i
571                ),
572            });
573        }
574
575        (tree, diagnostics)
576    }
577}
578
579fn count_tree_segments(tree: &AssembledTree) -> usize {
580    let mut count = tree.segments.len();
581    for group in &tree.groups {
582        count += count_group_segments(group);
583    }
584    // Count inter-group segments (e.g., UNS+D between groups)
585    for segs in tree.inter_group_segments.values() {
586        count += segs.len();
587    }
588    count
589}
590
591fn count_group_segments(group: &AssembledGroup) -> usize {
592    let mut count = 0;
593    for rep in &group.repetitions {
594        count += rep.segments.len();
595        count += rep.skipped_segments.len();
596        for child in &rep.child_groups {
597            count += count_group_segments(child);
598        }
599    }
600    count
601}
602
603pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
604    AssembledSegment {
605        tag: seg.id.clone(),
606        elements: seg.elements.clone(),
607    }
608}
609
610#[cfg(test)]
611mod tests {
612    use super::*;
613    use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
614
615    fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
616        OwnedSegment {
617            id: id.to_string(),
618            elements: elements
619                .into_iter()
620                .map(|e| e.into_iter().map(|c| c.to_string()).collect())
621                .collect(),
622            segment_number: 0,
623        }
624    }
625
626    fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
627        MigSchema {
628            message_type: "UTILMD".to_string(),
629            variant: Some("Strom".to_string()),
630            version: "S2.1".to_string(),
631            publication_date: "2025-03-20".to_string(),
632            author: "BDEW".to_string(),
633            format_version: "FV2504".to_string(),
634            source_file: "test".to_string(),
635            segments: segments.into_iter().map(make_mig_segment).collect(),
636            segment_groups: groups,
637        }
638    }
639
640    #[test]
641    fn test_assembler_top_level_segments_only() {
642        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
643
644        let segments = vec![
645            make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
646            make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
647            make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
648            make_owned_seg("UNT", vec![vec!["4", "001"]]),
649        ];
650
651        let assembler = Assembler::new(&mig);
652        let result = assembler.assemble_generic(&segments).unwrap();
653
654        assert_eq!(result.segments.len(), 4);
655        assert_eq!(result.segments[0].tag, "UNH");
656        assert_eq!(result.segments[1].tag, "BGM");
657        assert_eq!(result.segments[2].tag, "DTM");
658        assert_eq!(result.segments[3].tag, "UNT");
659        assert!(result.groups.is_empty());
660    }
661
662    #[test]
663    fn test_assembler_with_segment_group() {
664        let mig = make_mig_schema(
665            vec!["UNH", "BGM"],
666            vec![
667                make_mig_group("SG2", vec!["NAD"], vec![]),
668                make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
669            ],
670        );
671
672        let segments = vec![
673            make_owned_seg("UNH", vec![vec!["001"]]),
674            make_owned_seg("BGM", vec![vec!["E01"]]),
675            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
676            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
677            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
678            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
679        ];
680
681        let assembler = Assembler::new(&mig);
682        let result = assembler.assemble_generic(&segments).unwrap();
683
684        // Top-level: UNH, BGM
685        assert_eq!(result.segments.len(), 2);
686        // SG2: 2 repetitions (two NAD segments)
687        assert_eq!(result.groups.len(), 2);
688        assert_eq!(result.groups[0].group_id, "SG2");
689        assert_eq!(result.groups[0].repetitions.len(), 2);
690        assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
691        assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
692        // SG4: 1 repetition (IDE + STS)
693        assert_eq!(result.groups[1].group_id, "SG4");
694        assert_eq!(result.groups[1].repetitions.len(), 1);
695        assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
696    }
697
698    #[test]
699    fn test_assembler_nested_groups() {
700        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
701        let mig = make_mig_schema(
702            vec!["UNH", "BGM"],
703            vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
704        );
705
706        let segments = vec![
707            make_owned_seg("UNH", vec![vec!["001"]]),
708            make_owned_seg("BGM", vec![vec!["E01"]]),
709            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
710            make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
711            make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
712        ];
713
714        let assembler = Assembler::new(&mig);
715        let result = assembler.assemble_generic(&segments).unwrap();
716
717        // SG2 has 1 repetition
718        let sg2 = &result.groups[0];
719        assert_eq!(sg2.group_id, "SG2");
720        assert_eq!(sg2.repetitions.len(), 1);
721
722        let sg2_inst = &sg2.repetitions[0];
723        assert_eq!(sg2_inst.segments[0].tag, "NAD");
724
725        // SG3 nested inside SG2
726        assert_eq!(sg2_inst.child_groups.len(), 1);
727        let sg3 = &sg2_inst.child_groups[0];
728        assert_eq!(sg3.group_id, "SG3");
729        assert_eq!(sg3.repetitions[0].segments.len(), 2);
730        assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
731        assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
732    }
733
734    #[test]
735    fn test_assembler_optional_segments_skipped() {
736        // MIG expects UNH, BGM, DTM, UNT but input has no DTM
737        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
738
739        let segments = vec![
740            make_owned_seg("UNH", vec![vec!["001"]]),
741            make_owned_seg("BGM", vec![vec!["E01"]]),
742            make_owned_seg("UNT", vec![vec!["2", "001"]]),
743        ];
744
745        let assembler = Assembler::new(&mig);
746        let result = assembler.assemble_generic(&segments).unwrap();
747
748        // DTM is skipped (optional), UNT consumed
749        assert_eq!(result.segments.len(), 3);
750        assert_eq!(result.segments[0].tag, "UNH");
751        assert_eq!(result.segments[1].tag, "BGM");
752        assert_eq!(result.segments[2].tag, "UNT");
753    }
754
755    #[test]
756    fn test_assembler_empty_segments() {
757        let mig = make_mig_schema(vec!["UNH"], vec![]);
758        let assembler = Assembler::new(&mig);
759        let result = assembler.assemble_generic(&[]).unwrap();
760        assert!(result.segments.is_empty());
761        assert!(result.groups.is_empty());
762    }
763
764    #[test]
765    fn test_assembler_preserves_element_data() {
766        let mig = make_mig_schema(vec!["DTM"], vec![]);
767
768        let segments = vec![make_owned_seg(
769            "DTM",
770            vec![vec!["137", "202501010000+01", "303"]],
771        )];
772
773        let assembler = Assembler::new(&mig);
774        let result = assembler.assemble_generic(&segments).unwrap();
775
776        let dtm = &result.segments[0];
777        assert_eq!(dtm.elements[0][0], "137");
778        assert_eq!(dtm.elements[0][1], "202501010000+01");
779        assert_eq!(dtm.elements[0][2], "303");
780    }
781
782    #[test]
783    fn test_group_instance_as_assembled_tree() {
784        // Build an SG4 instance with root segments (IDE, STS) and child groups (SG5)
785        let sg5 = AssembledGroup {
786            group_id: "SG5".to_string(),
787            repetitions: vec![AssembledGroupInstance {
788                segments: vec![AssembledSegment {
789                    tag: "LOC".to_string(),
790                    elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
791                }],
792                child_groups: vec![],
793                skipped_segments: vec![],
794            }],
795        };
796
797        let sg4_instance = AssembledGroupInstance {
798            segments: vec![
799                AssembledSegment {
800                    tag: "IDE".to_string(),
801                    elements: vec![vec!["24".to_string(), "TX001".to_string()]],
802                },
803                AssembledSegment {
804                    tag: "STS".to_string(),
805                    elements: vec![vec!["7".to_string()]],
806                },
807            ],
808            child_groups: vec![sg5],
809            skipped_segments: vec![],
810        };
811
812        let sub_tree = sg4_instance.as_assembled_tree();
813
814        // Root segments of sub-tree are the SG4 instance's segments
815        assert_eq!(sub_tree.segments.len(), 2);
816        assert_eq!(sub_tree.segments[0].tag, "IDE");
817        assert_eq!(sub_tree.segments[1].tag, "STS");
818
819        // Groups of sub-tree are the SG4 instance's child groups
820        assert_eq!(sub_tree.groups.len(), 1);
821        assert_eq!(sub_tree.groups[0].group_id, "SG5");
822
823        // post_group_start marks where root segments end
824        assert_eq!(sub_tree.post_group_start, 2);
825    }
826
827    #[test]
828    fn test_assembler_from_parsed_edifact() {
829        // End-to-end: parse raw EDIFACT, then assemble
830        let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
831        let segments = crate::tokenize::parse_to_segments(input).unwrap();
832
833        let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
834
835        let assembler = Assembler::new(&mig);
836        let result = assembler.assemble_generic(&segments).unwrap();
837
838        assert!(result.segments.iter().any(|s| s.tag == "UNH"));
839        assert!(result.segments.iter().any(|s| s.tag == "BGM"));
840        assert!(result.segments.iter().any(|s| s.tag == "DTM"));
841    }
842
843    #[test]
844    fn test_assemble_with_diagnostics_clean_input() {
845        let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
846        let segments = vec![
847            make_owned_seg("UNH", vec![vec!["001"]]),
848            make_owned_seg("BGM", vec![vec!["E01"]]),
849            make_owned_seg("UNT", vec![vec!["2", "001"]]),
850        ];
851        let assembler = Assembler::new(&mig);
852        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
853        assert_eq!(tree.segments.len(), 3);
854        assert!(
855            diagnostics.is_empty(),
856            "Clean input should have no diagnostics"
857        );
858    }
859
860    #[test]
861    fn test_assemble_with_diagnostics_unconsumed_segments() {
862        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
863        let segments = vec![
864            make_owned_seg("UNH", vec![vec!["001"]]),
865            make_owned_seg("BGM", vec![vec!["E01"]]),
866            make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
867        ];
868        let assembler = Assembler::new(&mig);
869        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
870        assert_eq!(tree.segments.len(), 2);
871        assert_eq!(diagnostics.len(), 1);
872        assert_eq!(
873            diagnostics[0].kind,
874            StructureDiagnosticKind::UnexpectedSegment
875        );
876        assert_eq!(diagnostics[0].segment_id, "FTX");
877        assert_eq!(diagnostics[0].position, 2);
878    }
879
880    #[test]
881    fn test_assemble_with_diagnostics_multiple_unconsumed() {
882        let mig = make_mig_schema(vec!["UNH"], vec![]);
883        let segments = vec![
884            make_owned_seg("UNH", vec![vec!["001"]]),
885            make_owned_seg("FOO", vec![]),
886            make_owned_seg("BAR", vec![]),
887            make_owned_seg("BAZ", vec![]),
888        ];
889        let assembler = Assembler::new(&mig);
890        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
891        assert_eq!(tree.segments.len(), 1);
892        assert_eq!(diagnostics.len(), 3);
893        assert_eq!(diagnostics[0].segment_id, "FOO");
894        assert_eq!(diagnostics[1].segment_id, "BAR");
895        assert_eq!(diagnostics[2].segment_id, "BAZ");
896    }
897
898    // ── Skip-unknown-segments tests ──
899
900    #[test]
901    fn test_skip_unknown_segment_between_slots() {
902        // MIG group expects [SEQ, CCI], input has [SEQ, RFF, CCI].
903        // With skip ON, RFF is skipped and CCI is consumed.
904        // With skip OFF (default), CCI is lost because RFF stalls the cursor.
905        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
906        let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
907
908        let segments = vec![
909            make_owned_seg("UNH", vec![vec!["001"]]),
910            make_owned_seg("SEQ", vec![vec!["Z98"]]),
911            make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
912            make_owned_seg("CCI", vec![vec!["Z30"]]),
913        ];
914
915        // Skip OFF: CCI not consumed (RFF stalls cursor after SEQ)
916        let off = Assembler::new(&mig);
917        let tree_off = off.assemble_generic(&segments).unwrap();
918        let sg8_off = &tree_off.groups[0];
919        assert_eq!(sg8_off.repetitions[0].segments.len(), 1); // Only SEQ
920        assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
921
922        // Skip ON: RFF skipped, CCI consumed
923        let on = Assembler::with_config(
924            &mig,
925            AssemblerConfig {
926                skip_unknown_segments: true,
927            },
928        );
929        let tree_on = on.assemble_generic(&segments).unwrap();
930        let sg8_on = &tree_on.groups[0];
931        assert_eq!(sg8_on.repetitions[0].segments.len(), 2); // SEQ + CCI
932        assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
933        assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
934    }
935
936    #[test]
937    fn test_skip_preserves_on_instance() {
938        // Skipped segments are stored in instance.skipped_segments
939        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
940        let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
941
942        let segments = vec![
943            make_owned_seg("UNH", vec![vec!["001"]]),
944            make_owned_seg("SEQ", vec![vec!["Z98"]]),
945            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
946            make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
947            make_owned_seg("CCI", vec![vec!["Z30"]]),
948        ];
949
950        let assembler = Assembler::with_config(
951            &mig,
952            AssemblerConfig {
953                skip_unknown_segments: true,
954            },
955        );
956        let tree = assembler.assemble_generic(&segments).unwrap();
957        let instance = &tree.groups[0].repetitions[0];
958
959        assert_eq!(instance.segments.len(), 2); // SEQ + CCI
960        assert_eq!(instance.skipped_segments.len(), 2); // RFF + DTM
961        assert_eq!(instance.skipped_segments[0].tag, "RFF");
962        assert_eq!(instance.skipped_segments[1].tag, "DTM");
963    }
964
965    #[test]
966    fn test_skip_mode_off_default() {
967        // Assembler::new() doesn't skip (backwards compat)
968        let mig = make_mig_schema(vec![], vec![]);
969        let assembler = Assembler::new(&mig);
970        assert!(!assembler.config.skip_unknown_segments);
971    }
972
973    #[test]
974    fn test_skip_does_not_consume_nested_group_entry() {
975        // Skip must NOT consume segments that are nested group entries.
976        // SG4 expects [IDE, STS], nested SG5 expects [LOC].
977        // Input: IDE, FOO, STS, LOC. FOO should be skipped, LOC goes to SG5.
978        let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
979        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
980        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
981
982        let segments = vec![
983            make_owned_seg("UNH", vec![vec!["001"]]),
984            make_owned_seg("IDE", vec![vec!["24"]]),
985            make_owned_seg("FOO", vec![vec!["unknown"]]),
986            make_owned_seg("STS", vec![vec!["7"]]),
987            make_owned_seg("LOC", vec![vec!["Z16"]]),
988        ];
989
990        let assembler = Assembler::with_config(
991            &mig,
992            AssemblerConfig {
993                skip_unknown_segments: true,
994            },
995        );
996        let tree = assembler.assemble_generic(&segments).unwrap();
997        let sg4 = &tree.groups[0];
998        let inst = &sg4.repetitions[0];
999
1000        // IDE + STS consumed, FOO skipped
1001        assert_eq!(inst.segments.len(), 2);
1002        assert_eq!(inst.segments[0].tag, "IDE");
1003        assert_eq!(inst.segments[1].tag, "STS");
1004        assert_eq!(inst.skipped_segments.len(), 1);
1005        assert_eq!(inst.skipped_segments[0].tag, "FOO");
1006
1007        // LOC went to nested SG5
1008        assert_eq!(inst.child_groups.len(), 1);
1009        assert_eq!(inst.child_groups[0].group_id, "SG5");
1010        assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1011    }
1012
1013    #[test]
1014    fn test_roundtrip_with_skip() {
1015        // Full roundtrip: assemble with skip → disassemble → byte-identical
1016        // including skipped segments in the output.
1017        use crate::disassembler::Disassembler;
1018        use crate::renderer::render_edifact;
1019
1020        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1021        let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1022
1023        let segments = vec![
1024            make_owned_seg("UNH", vec![vec!["001"]]),
1025            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1026            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1027            make_owned_seg("CCI", vec![vec!["Z30"]]),
1028            make_owned_seg("UNT", vec![vec!["4", "001"]]),
1029        ];
1030
1031        let assembler = Assembler::with_config(
1032            &mig,
1033            AssemblerConfig {
1034                skip_unknown_segments: true,
1035            },
1036        );
1037        let tree = assembler.assemble_generic(&segments).unwrap();
1038
1039        let disassembler = Disassembler::new(&mig);
1040        let dis = disassembler.disassemble(&tree);
1041        let delimiters = edifact_primitives::EdifactDelimiters::default();
1042        let rendered = render_edifact(&dis, &delimiters);
1043
1044        // All 5 segments should appear in output (including skipped RFF).
1045        // Disassembler emits MIG-guided segments first (SEQ, CCI),
1046        // then skipped segments (RFF) — so order within the group differs
1047        // from the original input, but all content is preserved.
1048        assert_eq!(dis.len(), 5);
1049        assert_eq!(dis[0].tag, "UNH");
1050        assert_eq!(dis[1].tag, "SEQ");
1051        assert_eq!(dis[2].tag, "CCI");
1052        assert_eq!(dis[3].tag, "RFF"); // skipped → emitted after MIG segments
1053        assert_eq!(dis[4].tag, "UNT");
1054
1055        // Rendered output contains all segments
1056        assert!(rendered.contains("UNH+001"));
1057        assert!(rendered.contains("SEQ+Z98"));
1058        assert!(rendered.contains("RFF+Z38:REF1"));
1059        assert!(rendered.contains("CCI+Z30"));
1060        assert!(rendered.contains("UNT+4:001"));
1061    }
1062
1063    // ── Variant-aware assembly tests ──
1064
1065    #[test]
1066    fn test_variant_groups_interleaved_reps() {
1067        // Two SG8 variant definitions: one for SEQ+ZD7, one for SEQ+Z98.
1068        // Input has interleaved reps: ZD7, Z98, ZD7, Z98.
1069        // All should be collected into one SG8 group with 4 reps.
1070        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1071        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1072
1073        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1074
1075        let segments = vec![
1076            make_owned_seg("UNH", vec![vec!["001"]]),
1077            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1078            make_owned_seg("CCI", vec![vec!["Z30"]]),
1079            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1080            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1081            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1082            make_owned_seg("CCI", vec![vec!["Z31"]]),
1083            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1084            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1085        ];
1086
1087        let assembler = Assembler::new(&mig);
1088        let result = assembler.assemble_generic(&segments).unwrap();
1089
1090        assert_eq!(result.segments.len(), 1); // UNH
1091        assert_eq!(result.groups.len(), 1); // One combined SG8
1092        let sg8 = &result.groups[0];
1093        assert_eq!(sg8.group_id, "SG8");
1094        assert_eq!(sg8.repetitions.len(), 4);
1095
1096        // ZD7 reps have SEQ+CCI, Z98 reps have SEQ+RFF
1097        assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1098        assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1099        assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1100        assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1101        assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1102        assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1103    }
1104
1105    #[test]
1106    fn test_variant_groups_single_variant_type() {
1107        // Only Z98 reps, no ZD7 — still works with variant matching
1108        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1109        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1110
1111        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1112
1113        let segments = vec![
1114            make_owned_seg("UNH", vec![vec!["001"]]),
1115            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1116            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1117            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1118            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1119        ];
1120
1121        let assembler = Assembler::new(&mig);
1122        let result = assembler.assemble_generic(&segments).unwrap();
1123
1124        assert_eq!(result.groups.len(), 1);
1125        assert_eq!(result.groups[0].repetitions.len(), 2);
1126        assert_eq!(
1127            result.groups[0].repetitions[0].segments[0].elements[0][0],
1128            "Z98"
1129        );
1130        assert_eq!(
1131            result.groups[0].repetitions[1].segments[0].elements[0][0],
1132            "Z98"
1133        );
1134    }
1135
1136    #[test]
1137    fn test_non_variant_groups_unchanged() {
1138        // Groups without variant_code behave exactly as before
1139        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1140        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1141
1142        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1143
1144        let segments = vec![
1145            make_owned_seg("UNH", vec![vec!["001"]]),
1146            make_owned_seg("BGM", vec![vec!["E01"]]),
1147            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1148            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1149            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1150            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1151        ];
1152
1153        let assembler = Assembler::new(&mig);
1154        let result = assembler.assemble_generic(&segments).unwrap();
1155
1156        assert_eq!(result.segments.len(), 2);
1157        assert_eq!(result.groups.len(), 2);
1158        assert_eq!(result.groups[0].group_id, "SG2");
1159        assert_eq!(result.groups[0].repetitions.len(), 2);
1160        assert_eq!(result.groups[1].group_id, "SG4");
1161        assert_eq!(result.groups[1].repetitions.len(), 1);
1162    }
1163
1164    #[test]
1165    fn test_variant_groups_with_nested_children() {
1166        // Variant groups can have nested child groups
1167        let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
1168        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1169        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
1170
1171        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1172
1173        let segments = vec![
1174            make_owned_seg("UNH", vec![vec!["001"]]),
1175            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1176            make_owned_seg("CCI", vec![vec!["Z30"]]),
1177            make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
1178            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1179            make_owned_seg("CCI", vec![vec!["Z31"]]),
1180            make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
1181        ];
1182
1183        let assembler = Assembler::new(&mig);
1184        let result = assembler.assemble_generic(&segments).unwrap();
1185
1186        assert_eq!(result.groups.len(), 1);
1187        let sg8 = &result.groups[0];
1188        assert_eq!(sg8.repetitions.len(), 2);
1189
1190        // First rep (ZD7) has nested SG10
1191        assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
1192        assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
1193        assert_eq!(
1194            sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
1195            "Z30"
1196        );
1197
1198        // Second rep (Z98) has nested SG10
1199        assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
1200        assert_eq!(
1201            sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
1202            "Z31"
1203        );
1204    }
1205
1206    #[test]
1207    fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
1208        // try_consume_group with variant_code set should NOT consume a segment
1209        // whose qualifier doesn't match, even if the tag matches.
1210        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1211
1212        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
1213
1214        let segments = vec![
1215            make_owned_seg("UNH", vec![vec!["001"]]),
1216            make_owned_seg("SEQ", vec![vec!["Z98"]]), // Wrong qualifier
1217            make_owned_seg("CCI", vec![vec!["Z30"]]),
1218        ];
1219
1220        let assembler = Assembler::new(&mig);
1221        let result = assembler.assemble_generic(&segments).unwrap();
1222
1223        // SG8 should have no reps because Z98 != ZD7
1224        assert!(result.groups.is_empty());
1225    }
1226
1227    #[test]
1228    fn test_mixed_variant_and_non_variant_groups() {
1229        // SG2 (no variant), then variant SG8s, then SG12 (no variant)
1230        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1231        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1232        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1233        let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1234
1235        let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
1236
1237        let segments = vec![
1238            make_owned_seg("UNH", vec![vec!["001"]]),
1239            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1240            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1241            make_owned_seg("CCI", vec![vec!["Z30"]]),
1242            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1243            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1244            make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
1245        ];
1246
1247        let assembler = Assembler::new(&mig);
1248        let result = assembler.assemble_generic(&segments).unwrap();
1249
1250        assert_eq!(result.groups.len(), 3); // SG2, SG8 (combined), SG12
1251        assert_eq!(result.groups[0].group_id, "SG2");
1252        assert_eq!(result.groups[0].repetitions.len(), 1);
1253        assert_eq!(result.groups[1].group_id, "SG8");
1254        assert_eq!(result.groups[1].repetitions.len(), 2);
1255        assert_eq!(result.groups[2].group_id, "SG12");
1256        assert_eq!(result.groups[2].repetitions.len(), 1);
1257    }
1258}