Skip to main content

mig_assembly/
assembler.rs

1//! Recursive descent assembler — MIG-guided segment consumption.
2//!
3//! The assembler walks the MIG tree structure and consumes matching
4//! segments from the input. It produces a generic tree representation
5//! that can be converted to typed PID structs.
6
7use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15/// A generic assembled tree node (before PID-specific typing).
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18    pub segments: Vec<AssembledSegment>,
19    pub groups: Vec<AssembledGroup>,
20    /// Index in `segments` where post-group segments start (e.g., UNT, UNZ).
21    /// Segments before this index appear before groups in EDIFACT order.
22    #[serde(default)]
23    pub post_group_start: usize,
24    /// Root segments consumed between groups during assembly (e.g., UNS
25    /// section separator in MSCONS). Key = index into `groups` vec; value =
26    /// segments that appear immediately before that group in the EDIFACT
27    /// stream. Empty for messages without inter-group root segments.
28    #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29    pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32/// An assembled segment with its data elements.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35    pub tag: String,
36    /// `elements[i][j]` = component `j` of element `i`
37    pub elements: Vec<Vec<String>>,
38    /// MIG `Number` attribute identifying this segment variant.
39    /// Two segments with the same tag (e.g., DTM) but different roles
40    /// (DTM+92 vs DTM+93) have distinct MIG numbers.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub mig_number: Option<String>,
43}
44
45/// An assembled segment group (may repeat).
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct AssembledGroup {
48    pub group_id: String,
49    pub repetitions: Vec<AssembledGroupInstance>,
50}
51
52/// One repetition of a segment group.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct AssembledGroupInstance {
55    pub segments: Vec<AssembledSegment>,
56    pub child_groups: Vec<AssembledGroup>,
57    /// MIG `Number` of the entry segment that identified this group instance's variant.
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub entry_mig_number: Option<String>,
60    /// All MIG `Number`s defined for this group variant — includes segments that
61    /// may be absent in the EDIFACT but are defined in the MIG for this variant.
62    ///
63    /// Used by the validator to determine which AHB rules belong to this instance:
64    /// a rule with `mig_number` in this set applies here, even if the segment is
65    /// missing (which is then a missing-field error). Without this, rules for
66    /// absent-but-required segments would be incorrectly filtered out.
67    #[serde(default, skip_serializing_if = "Vec::is_empty")]
68    pub variant_mig_numbers: Vec<String>,
69    /// Segments that were present in the EDIFACT input but not defined in
70    /// the PID-filtered MIG for this group. Only populated when the assembler
71    /// runs with [`AssemblerConfig::skip_unknown_segments`] enabled.
72    #[serde(default, skip_serializing_if = "Vec::is_empty")]
73    pub skipped_segments: Vec<AssembledSegment>,
74}
75
76impl AssembledGroupInstance {
77    /// Create a virtual `AssembledTree` scoped to this group instance.
78    ///
79    /// The instance's own segments become the tree's root segments,
80    /// and its child groups become the tree's groups. This enables
81    /// running `MappingEngine::map_all_forward()` on a single
82    /// transaction group as if it were a complete message.
83    pub fn as_assembled_tree(&self) -> AssembledTree {
84        AssembledTree {
85            segments: self.segments.clone(),
86            groups: self.child_groups.clone(),
87            post_group_start: self.segments.len(),
88            inter_group_segments: std::collections::BTreeMap::new(),
89        }
90    }
91}
92
93/// Configuration for the assembler.
94#[derive(Debug, Clone, Default)]
95pub struct AssemblerConfig {
96    /// When `true`, the assembler skips segments inside a group instance that
97    /// don't match any remaining MIG slot, nested-group entry, or the group's
98    /// entry tag (next repetition). Skipped segments are preserved on
99    /// [`AssembledGroupInstance::skipped_segments`] for roundtrip re-emission.
100    ///
101    /// Default: `false` (strict AHB — unknown segments stall the cursor).
102    pub skip_unknown_segments: bool,
103}
104
105/// MIG-guided assembler.
106///
107/// Takes a MIG schema and uses it as a grammar to guide consumption
108/// of parsed EDIFACT segments. Produces a generic `AssembledTree`.
109pub struct Assembler<'a> {
110    mig: &'a MigSchema,
111    config: AssemblerConfig,
112}
113
114impl<'a> Assembler<'a> {
115    pub fn new(mig: &'a MigSchema) -> Self {
116        Self {
117            mig,
118            config: AssemblerConfig::default(),
119        }
120    }
121
122    pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
123        Self { mig, config }
124    }
125
126    /// Assemble segments into a generic tree following MIG structure.
127    pub fn assemble_generic(
128        &self,
129        segments: &[OwnedSegment],
130    ) -> Result<AssembledTree, AssemblyError> {
131        let mut cursor = SegmentCursor::new(segments.len());
132        let mut tree = AssembledTree {
133            segments: Vec::new(),
134            groups: Vec::new(),
135            post_group_start: 0,
136            inter_group_segments: std::collections::BTreeMap::new(),
137        };
138
139        // Track which MIG segment indices were matched in the first pass
140        let mut matched_seg_indices = Vec::new();
141
142        // Process top-level segments (first pass — before groups)
143        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
144            if cursor.is_exhausted() {
145                break;
146            }
147            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
148                tree.segments.push(assembled);
149                matched_seg_indices.push(i);
150            }
151        }
152
153        // Process segment groups, interleaving root segment consumption.
154        // Some message types (e.g., MSCONS) have root segments like UNS
155        // between groups (SG2 and SG5). Before trying each group, consume
156        // any unmatched root segments at the current cursor position.
157        //
158        // When consecutive same-ID groups have variant_code set (e.g., 3 SG8
159        // entries for ZD7, Z98, ZF3), the assembler tries ALL variants at each
160        // cursor position to handle interleaved reps.
161        let mut group_idx = 0;
162        while group_idx < self.mig.segment_groups.len() {
163            if cursor.is_exhausted() {
164                break;
165            }
166
167            let mig_group = &self.mig.segment_groups[group_idx];
168
169            // Try consuming unmatched root segments before this group
170            let tree_group_idx = tree.groups.len();
171            for (i, mig_seg) in self.mig.segments.iter().enumerate() {
172                if cursor.is_exhausted() {
173                    break;
174                }
175                if matched_seg_indices.contains(&i) {
176                    continue;
177                }
178                if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
179                    tree.inter_group_segments
180                        .entry(tree_group_idx)
181                        .or_default()
182                        .push(assembled);
183                    matched_seg_indices.push(i);
184                }
185            }
186
187            // Check if this starts a variant set (consecutive same-ID groups with variant_code)
188            if mig_group.variant_code.is_some() {
189                let variant_count = self.mig.segment_groups[group_idx..]
190                    .iter()
191                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
192                    .count();
193                let variant_end = group_idx + variant_count;
194
195                let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
196                if let Some(combined) =
197                    self.try_consume_variant_groups(segments, &mut cursor, variant_groups)?
198                {
199                    tree.groups.push(combined);
200                }
201                group_idx = variant_end;
202            } else {
203                if let Some(assembled) = self.try_consume_group(segments, &mut cursor, mig_group)? {
204                    tree.groups.push(assembled);
205                }
206                group_idx += 1;
207            }
208        }
209
210        // Mark where post-group segments start
211        tree.post_group_start = tree.segments.len();
212
213        // Second pass: try unmatched top-level segments (e.g., UNT, UNZ after groups)
214        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
215            if cursor.is_exhausted() {
216                break;
217            }
218            if matched_seg_indices.contains(&i) {
219                continue;
220            }
221            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
222                tree.segments.push(assembled);
223            }
224        }
225
226        Ok(tree)
227    }
228
229    fn try_consume_segment(
230        &self,
231        segments: &[OwnedSegment],
232        cursor: &mut SegmentCursor,
233        mig_seg: &MigSegment,
234    ) -> Result<Option<AssembledSegment>, AssemblyError> {
235        if cursor.is_exhausted() {
236            return Ok(None);
237        }
238        let seg = &segments[cursor.position()];
239        if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
240            let mut assembled = owned_to_assembled(seg);
241            assembled.mig_number = mig_seg.number.clone();
242            cursor.advance();
243            Ok(Some(assembled))
244        } else {
245            Ok(None) // Segment not present (optional)
246        }
247    }
248
249    fn try_consume_group(
250        &self,
251        segments: &[OwnedSegment],
252        cursor: &mut SegmentCursor,
253        mig_group: &MigSegmentGroup,
254    ) -> Result<Option<AssembledGroup>, AssemblyError> {
255        let mut repetitions = Vec::new();
256        let entry_segment = mig_group.segments.first().ok_or_else(|| {
257            AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
258        })?;
259
260        // Loop for repeating groups
261        while !cursor.is_exhausted() {
262            let seg = &segments[cursor.position()];
263            if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
264                break; // Current segment doesn't match group entry — stop repeating
265            }
266
267            // Check variant qualifier if set — tag matches but wrong variant
268            if !mig_group.variant_codes.is_empty() {
269                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
270                let actual_qual = seg
271                    .elements
272                    .get(ei)
273                    .and_then(|e| e.get(ci))
274                    .map(|s| s.as_str())
275                    .unwrap_or("");
276                if !mig_group
277                    .variant_codes
278                    .iter()
279                    .any(|c| actual_qual.eq_ignore_ascii_case(c))
280                {
281                    break;
282                }
283            } else if let Some(ref expected_code) = mig_group.variant_code {
284                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
285                let actual_qual = seg
286                    .elements
287                    .get(ei)
288                    .and_then(|e| e.get(ci))
289                    .map(|s| s.as_str())
290                    .unwrap_or("");
291                if !actual_qual.eq_ignore_ascii_case(expected_code) {
292                    break;
293                }
294            }
295
296            let mut instance = AssembledGroupInstance {
297                segments: Vec::new(),
298                child_groups: Vec::new(),
299                entry_mig_number: entry_segment.number.clone(),
300                variant_mig_numbers: collect_mig_numbers(mig_group),
301                skipped_segments: Vec::new(),
302            };
303
304            // Consume segments within this group instance.
305            // Process MIG slots in tag runs: for consecutive slots with the
306            // same tag, consume ALL matching input segments — not just the
307            // defined count. This handles real-world fixtures with more
308            // repetitions than the merged MIG predicts (e.g., 6 RFFs when
309            // the schema defines max 4).
310            //
311            // The entry segment (first tag run) is consumed bounded — one per
312            // defined slot — because the outer while loop uses the entry tag
313            // to delineate group repetitions.
314            let mut slot_idx = 0;
315            let mut is_entry_run = true;
316            while slot_idx < mig_group.segments.len() {
317                if cursor.is_exhausted() {
318                    break;
319                }
320                let current_tag = &mig_group.segments[slot_idx].id;
321                let run_len = mig_group.segments[slot_idx..]
322                    .iter()
323                    .take_while(|s| s.id == *current_tag)
324                    .count();
325
326                if is_entry_run {
327                    // Entry tag: consume at most run_len (preserves group boundaries)
328                    for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
329                        if cursor.is_exhausted() {
330                            break;
331                        }
332                        if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
333                            instance.segments.push(assembled);
334                        }
335                    }
336                    is_entry_run = false;
337                } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
338                    // Non-entry slot with SAME tag as entry (e.g., CCI appears as
339                    // both entry and non-entry in merged SG30).
340                    //
341                    // Only consume if we haven't yet consumed any NON-entry-tag
342                    // segments (i.e., we're still in a consecutive entry-tag run).
343                    // Once we've consumed a different tag (like CAV), seeing the
344                    // entry tag again means a new rep boundary.
345                    //
346                    // z35: entry CCI → CAV CAV → sees CCI → has_other=true → break ✓
347                    // z39: entry CCI → (no CAV) → sees CCI → has_other=false → consume ✓
348                    //      then CCI CCI → CAV → sees CCI → has_other=true → break
349                    //      BUT: z39 needs CCI-CAV-CCI-CAV structure
350                    //
351                    // Better heuristic: check if ALL remaining slots from here are
352                    // entry-tag + non-entry pairs. If the current slot is entry-tag
353                    // and the NEXT input segment after it would be a non-entry tag,
354                    // consume — it's a continuation. Otherwise break.
355                    if cursor.is_exhausted() {
356                        break;
357                    }
358                    let seg = &segments[cursor.position()];
359                    if !matcher::matches_segment_tag(&seg.id, current_tag) {
360                        break;
361                    }
362                    // Check: is there a non-entry segment AFTER this entry-tag?
363                    // If so, this CCI+CAV pair is part of the current rep.
364                    let has_following_non_entry = if cursor.position() + 1 < segments.len() {
365                        let next = &segments[cursor.position() + 1];
366                        !matcher::matches_segment_tag(&next.id, &entry_segment.id)
367                            && mig_group.segments.iter().any(|s| {
368                                matcher::matches_segment_tag(&next.id, &s.id)
369                                    && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
370                            })
371                    } else {
372                        false
373                    };
374                    if has_following_non_entry {
375                        // CCI followed by CAV → consume as continuation pair
376                        instance.segments.push(owned_to_assembled(seg));
377                        cursor.advance();
378                    } else {
379                        // CCI followed by CCI or unknown → let outer loop decide
380                        break;
381                    }
382                } else {
383                    // Non-entry tag: consume bounded slots first (with mig_number),
384                    // then greedily consume extras (without mig_number).
385                    // The bounded slots get mig_number from the MIG definition so
386                    // the validator can distinguish same-tag segments (e.g., DTM+92
387                    // vs DTM+93 both in SG4).
388                    for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
389                        if cursor.is_exhausted() {
390                            break;
391                        }
392                        if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
393                            instance.segments.push(assembled);
394                        }
395                    }
396                    // Greedily consume any remaining same-tag segments beyond the MIG count
397                    while !cursor.is_exhausted() {
398                        let seg = &segments[cursor.position()];
399                        if matcher::matches_segment_tag(&seg.id, current_tag) {
400                            instance.segments.push(owned_to_assembled(seg));
401                            cursor.advance();
402                        } else {
403                            break;
404                        }
405                    }
406                }
407
408                slot_idx += run_len;
409
410                // Point A: Skip unknown segments between MIG slot runs.
411                // When skip mode is ON and we just finished a slot run but the
412                // current segment doesn't match any remaining MIG slot, nested
413                // group entry, or the entry tag, skip it.
414                if self.config.skip_unknown_segments {
415                    while !cursor.is_exhausted() {
416                        let seg = &segments[cursor.position()];
417                        // Stop if it matches the entry tag (next group repetition)
418                        if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
419                            break;
420                        }
421                        // Stop if it matches any remaining MIG slot
422                        if mig_group.segments[slot_idx..]
423                            .iter()
424                            .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
425                        {
426                            break;
427                        }
428                        // Stop if it matches any nested group entry
429                        if mig_group.nested_groups.iter().any(|ng| {
430                            ng.segments
431                                .first()
432                                .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
433                        }) {
434                            break;
435                        }
436                        // Unknown segment — skip it
437                        instance.skipped_segments.push(owned_to_assembled(seg));
438                        cursor.advance();
439                    }
440                }
441            }
442
443            // Consume nested groups (variant-aware for same-ID groups)
444            let mut nested_idx = 0;
445            while nested_idx < mig_group.nested_groups.len() {
446                if cursor.is_exhausted() {
447                    break;
448                }
449                let nested = &mig_group.nested_groups[nested_idx];
450
451                if nested.variant_code.is_some() {
452                    // Variant set: collect consecutive same-ID groups with variant_code
453                    let variant_count = mig_group.nested_groups[nested_idx..]
454                        .iter()
455                        .take_while(|g| g.id == nested.id && g.variant_code.is_some())
456                        .count();
457                    let variant_end = nested_idx + variant_count;
458                    let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
459                    if let Some(combined) =
460                        self.try_consume_variant_groups(segments, cursor, variant_groups)?
461                    {
462                        instance.child_groups.push(combined);
463                    }
464                    nested_idx = variant_end;
465                } else {
466                    if let Some(assembled) = self.try_consume_group(segments, cursor, nested)? {
467                        instance.child_groups.push(assembled);
468                    }
469                    nested_idx += 1;
470                }
471            }
472
473            repetitions.push(instance);
474        }
475
476        if repetitions.is_empty() {
477            Ok(None)
478        } else {
479            Ok(Some(AssembledGroup {
480                group_id: mig_group.id.clone(),
481                repetitions,
482            }))
483        }
484    }
485
486    /// Consume interleaved repetitions of variant groups.
487    ///
488    /// At each cursor position, tries all variant definitions to find which one
489    /// matches the entry segment's qualifier. Collects all reps into one
490    /// `AssembledGroup` with the shared group_id.
491    fn try_consume_variant_groups(
492        &self,
493        segments: &[OwnedSegment],
494        cursor: &mut SegmentCursor,
495        variants: &[MigSegmentGroup],
496    ) -> Result<Option<AssembledGroup>, AssemblyError> {
497        let group_id = variants[0].id.clone();
498        let entry_tag = variants[0]
499            .segments
500            .first()
501            .map(|s| s.id.as_str())
502            .unwrap_or("");
503        let mut all_reps = Vec::new();
504
505        while !cursor.is_exhausted() {
506            let seg = &segments[cursor.position()];
507            if !matcher::matches_segment_tag(&seg.id, entry_tag) {
508                break;
509            }
510
511            // Find which variant matches this segment's qualifier.
512            // Each variant may have its qualifier at a different element position
513            // (e.g., CCI+Z19 has qualifier at [0][0], but CCI+++Z15 at [2][0]).
514            let matched = variants.iter().find(|v| {
515                let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
516                let actual_qual = seg
517                    .elements
518                    .get(ei)
519                    .and_then(|e| e.get(ci))
520                    .map(|s| s.as_str())
521                    .unwrap_or("");
522                if !v.variant_codes.is_empty() {
523                    v.variant_codes
524                        .iter()
525                        .any(|c| actual_qual.eq_ignore_ascii_case(c))
526                } else if let Some(ref expected_code) = v.variant_code {
527                    actual_qual.eq_ignore_ascii_case(expected_code)
528                } else {
529                    false
530                }
531            });
532
533            if let Some(variant) = matched {
534                if let Some(group) = self.try_consume_group(segments, cursor, variant)? {
535                    all_reps.extend(group.repetitions);
536                } else {
537                    break;
538                }
539            } else {
540                // No variant matches — try consuming with the first variant as
541                // fallback to avoid getting stuck. This handles edge cases where
542                // the qualifier doesn't exactly match any variant code.
543                if let Some(group) = self.try_consume_group(segments, cursor, &variants[0])? {
544                    all_reps.extend(group.repetitions);
545                } else {
546                    break;
547                }
548            }
549        }
550
551        if all_reps.is_empty() {
552            Ok(None)
553        } else {
554            Ok(Some(AssembledGroup {
555                group_id,
556                repetitions: all_reps,
557            }))
558        }
559    }
560
561    /// Assemble segments with diagnostic collection.
562    ///
563    /// Returns the assembled tree plus diagnostics for segments not consumed
564    /// by the MIG-guided assembly. Existing `assemble_generic()` is unchanged.
565    pub fn assemble_with_diagnostics(
566        &self,
567        segments: &[OwnedSegment],
568    ) -> (AssembledTree, Vec<StructureDiagnostic>) {
569        let mut diagnostics = Vec::new();
570
571        let tree = match self.assemble_generic(segments) {
572            Ok(tree) => tree,
573            Err(e) => {
574                diagnostics.push(StructureDiagnostic {
575                    kind: StructureDiagnosticKind::UnexpectedSegment,
576                    segment_id: String::new(),
577                    position: 0,
578                    message: format!("Assembly failed: {e}"),
579                });
580                return (
581                    AssembledTree {
582                        segments: Vec::new(),
583                        groups: Vec::new(),
584                        post_group_start: 0,
585                        inter_group_segments: std::collections::BTreeMap::new(),
586                    },
587                    diagnostics,
588                );
589            }
590        };
591
592        // Count consumed segments in the assembled tree
593        let consumed = count_tree_segments(&tree);
594
595        // Segments beyond consumed count are unconsumed
596        for (i, seg) in segments.iter().enumerate().skip(consumed) {
597            diagnostics.push(StructureDiagnostic {
598                kind: StructureDiagnosticKind::UnexpectedSegment,
599                segment_id: seg.id.clone(),
600                position: i,
601                message: format!(
602                    "Segment '{}' at position {} was not consumed by MIG-guided assembly",
603                    seg.id, i
604                ),
605            });
606        }
607
608        (tree, diagnostics)
609    }
610}
611
612fn count_tree_segments(tree: &AssembledTree) -> usize {
613    let mut count = tree.segments.len();
614    for group in &tree.groups {
615        count += count_group_segments(group);
616    }
617    // Count inter-group segments (e.g., UNS+D between groups)
618    for segs in tree.inter_group_segments.values() {
619        count += segs.len();
620    }
621    count
622}
623
624fn count_group_segments(group: &AssembledGroup) -> usize {
625    let mut count = 0;
626    for rep in &group.repetitions {
627        count += rep.segments.len();
628        count += rep.skipped_segments.len();
629        for child in &rep.child_groups {
630            count += count_group_segments(child);
631        }
632    }
633    count
634}
635
636/// Collect all MIG `Number`s from a segment group definition, recursively.
637///
638/// This includes numbers from direct segments and from nested groups.
639/// Used to populate `AssembledGroupInstance::variant_mig_numbers`.
640fn collect_mig_numbers(group: &MigSegmentGroup) -> Vec<String> {
641    let mut numbers = Vec::new();
642    for seg in &group.segments {
643        if let Some(ref num) = seg.number {
644            numbers.push(num.clone());
645        }
646    }
647    for nested in &group.nested_groups {
648        numbers.extend(collect_mig_numbers(nested));
649    }
650    numbers
651}
652
653pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
654    AssembledSegment {
655        tag: seg.id.clone(),
656        elements: seg.elements.clone(),
657        mig_number: None,
658    }
659}
660
661#[cfg(test)]
662mod tests {
663    use super::*;
664    use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
665
666    fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
667        OwnedSegment {
668            id: id.to_string(),
669            elements: elements
670                .into_iter()
671                .map(|e| e.into_iter().map(|c| c.to_string()).collect())
672                .collect(),
673            segment_number: 0,
674        }
675    }
676
677    fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
678        MigSchema {
679            message_type: "UTILMD".to_string(),
680            variant: Some("Strom".to_string()),
681            version: "S2.1".to_string(),
682            publication_date: "2025-03-20".to_string(),
683            author: "BDEW".to_string(),
684            format_version: "FV2504".to_string(),
685            source_file: "test".to_string(),
686            segments: segments.into_iter().map(make_mig_segment).collect(),
687            segment_groups: groups,
688        }
689    }
690
691    #[test]
692    fn test_assembler_top_level_segments_only() {
693        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
694
695        let segments = vec![
696            make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
697            make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
698            make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
699            make_owned_seg("UNT", vec![vec!["4", "001"]]),
700        ];
701
702        let assembler = Assembler::new(&mig);
703        let result = assembler.assemble_generic(&segments).unwrap();
704
705        assert_eq!(result.segments.len(), 4);
706        assert_eq!(result.segments[0].tag, "UNH");
707        assert_eq!(result.segments[1].tag, "BGM");
708        assert_eq!(result.segments[2].tag, "DTM");
709        assert_eq!(result.segments[3].tag, "UNT");
710        assert!(result.groups.is_empty());
711    }
712
713    #[test]
714    fn test_assembler_with_segment_group() {
715        let mig = make_mig_schema(
716            vec!["UNH", "BGM"],
717            vec![
718                make_mig_group("SG2", vec!["NAD"], vec![]),
719                make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
720            ],
721        );
722
723        let segments = vec![
724            make_owned_seg("UNH", vec![vec!["001"]]),
725            make_owned_seg("BGM", vec![vec!["E01"]]),
726            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
727            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
728            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
729            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
730        ];
731
732        let assembler = Assembler::new(&mig);
733        let result = assembler.assemble_generic(&segments).unwrap();
734
735        // Top-level: UNH, BGM
736        assert_eq!(result.segments.len(), 2);
737        // SG2: 2 repetitions (two NAD segments)
738        assert_eq!(result.groups.len(), 2);
739        assert_eq!(result.groups[0].group_id, "SG2");
740        assert_eq!(result.groups[0].repetitions.len(), 2);
741        assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
742        assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
743        // SG4: 1 repetition (IDE + STS)
744        assert_eq!(result.groups[1].group_id, "SG4");
745        assert_eq!(result.groups[1].repetitions.len(), 1);
746        assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
747    }
748
749    #[test]
750    fn test_assembler_nested_groups() {
751        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
752        let mig = make_mig_schema(
753            vec!["UNH", "BGM"],
754            vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
755        );
756
757        let segments = vec![
758            make_owned_seg("UNH", vec![vec!["001"]]),
759            make_owned_seg("BGM", vec![vec!["E01"]]),
760            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
761            make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
762            make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
763        ];
764
765        let assembler = Assembler::new(&mig);
766        let result = assembler.assemble_generic(&segments).unwrap();
767
768        // SG2 has 1 repetition
769        let sg2 = &result.groups[0];
770        assert_eq!(sg2.group_id, "SG2");
771        assert_eq!(sg2.repetitions.len(), 1);
772
773        let sg2_inst = &sg2.repetitions[0];
774        assert_eq!(sg2_inst.segments[0].tag, "NAD");
775
776        // SG3 nested inside SG2
777        assert_eq!(sg2_inst.child_groups.len(), 1);
778        let sg3 = &sg2_inst.child_groups[0];
779        assert_eq!(sg3.group_id, "SG3");
780        assert_eq!(sg3.repetitions[0].segments.len(), 2);
781        assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
782        assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
783    }
784
785    #[test]
786    fn test_assembler_optional_segments_skipped() {
787        // MIG expects UNH, BGM, DTM, UNT but input has no DTM
788        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
789
790        let segments = vec![
791            make_owned_seg("UNH", vec![vec!["001"]]),
792            make_owned_seg("BGM", vec![vec!["E01"]]),
793            make_owned_seg("UNT", vec![vec!["2", "001"]]),
794        ];
795
796        let assembler = Assembler::new(&mig);
797        let result = assembler.assemble_generic(&segments).unwrap();
798
799        // DTM is skipped (optional), UNT consumed
800        assert_eq!(result.segments.len(), 3);
801        assert_eq!(result.segments[0].tag, "UNH");
802        assert_eq!(result.segments[1].tag, "BGM");
803        assert_eq!(result.segments[2].tag, "UNT");
804    }
805
806    #[test]
807    fn test_assembler_empty_segments() {
808        let mig = make_mig_schema(vec!["UNH"], vec![]);
809        let assembler = Assembler::new(&mig);
810        let result = assembler.assemble_generic(&[]).unwrap();
811        assert!(result.segments.is_empty());
812        assert!(result.groups.is_empty());
813    }
814
815    #[test]
816    fn test_assembler_preserves_element_data() {
817        let mig = make_mig_schema(vec!["DTM"], vec![]);
818
819        let segments = vec![make_owned_seg(
820            "DTM",
821            vec![vec!["137", "202501010000+01", "303"]],
822        )];
823
824        let assembler = Assembler::new(&mig);
825        let result = assembler.assemble_generic(&segments).unwrap();
826
827        let dtm = &result.segments[0];
828        assert_eq!(dtm.elements[0][0], "137");
829        assert_eq!(dtm.elements[0][1], "202501010000+01");
830        assert_eq!(dtm.elements[0][2], "303");
831    }
832
833    #[test]
834    fn test_group_instance_as_assembled_tree() {
835        // Build an SG4 instance with root segments (IDE, STS) and child groups (SG5)
836        let sg5 = AssembledGroup {
837            group_id: "SG5".to_string(),
838            repetitions: vec![AssembledGroupInstance {
839                segments: vec![AssembledSegment {
840                    tag: "LOC".to_string(),
841                    elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
842                    mig_number: None,
843                }],
844                child_groups: vec![],
845                entry_mig_number: None,
846                variant_mig_numbers: vec![],
847                skipped_segments: vec![],
848            }],
849        };
850
851        let sg4_instance = AssembledGroupInstance {
852            segments: vec![
853                AssembledSegment {
854                    tag: "IDE".to_string(),
855                    elements: vec![vec!["24".to_string(), "TX001".to_string()]],
856                    mig_number: None,
857                },
858                AssembledSegment {
859                    tag: "STS".to_string(),
860                    elements: vec![vec!["7".to_string()]],
861                    mig_number: None,
862                },
863            ],
864            child_groups: vec![sg5],
865            entry_mig_number: None,
866            variant_mig_numbers: vec![],
867            skipped_segments: vec![],
868        };
869
870        let sub_tree = sg4_instance.as_assembled_tree();
871
872        // Root segments of sub-tree are the SG4 instance's segments
873        assert_eq!(sub_tree.segments.len(), 2);
874        assert_eq!(sub_tree.segments[0].tag, "IDE");
875        assert_eq!(sub_tree.segments[1].tag, "STS");
876
877        // Groups of sub-tree are the SG4 instance's child groups
878        assert_eq!(sub_tree.groups.len(), 1);
879        assert_eq!(sub_tree.groups[0].group_id, "SG5");
880
881        // post_group_start marks where root segments end
882        assert_eq!(sub_tree.post_group_start, 2);
883    }
884
885    #[test]
886    fn test_assembler_from_parsed_edifact() {
887        // End-to-end: parse raw EDIFACT, then assemble
888        let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
889        let segments = crate::tokenize::parse_to_segments(input).unwrap();
890
891        let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
892
893        let assembler = Assembler::new(&mig);
894        let result = assembler.assemble_generic(&segments).unwrap();
895
896        assert!(result.segments.iter().any(|s| s.tag == "UNH"));
897        assert!(result.segments.iter().any(|s| s.tag == "BGM"));
898        assert!(result.segments.iter().any(|s| s.tag == "DTM"));
899    }
900
901    #[test]
902    fn test_assemble_with_diagnostics_clean_input() {
903        let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
904        let segments = vec![
905            make_owned_seg("UNH", vec![vec!["001"]]),
906            make_owned_seg("BGM", vec![vec!["E01"]]),
907            make_owned_seg("UNT", vec![vec!["2", "001"]]),
908        ];
909        let assembler = Assembler::new(&mig);
910        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
911        assert_eq!(tree.segments.len(), 3);
912        assert!(
913            diagnostics.is_empty(),
914            "Clean input should have no diagnostics"
915        );
916    }
917
918    #[test]
919    fn test_assemble_with_diagnostics_unconsumed_segments() {
920        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
921        let segments = vec![
922            make_owned_seg("UNH", vec![vec!["001"]]),
923            make_owned_seg("BGM", vec![vec!["E01"]]),
924            make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
925        ];
926        let assembler = Assembler::new(&mig);
927        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
928        assert_eq!(tree.segments.len(), 2);
929        assert_eq!(diagnostics.len(), 1);
930        assert_eq!(
931            diagnostics[0].kind,
932            StructureDiagnosticKind::UnexpectedSegment
933        );
934        assert_eq!(diagnostics[0].segment_id, "FTX");
935        assert_eq!(diagnostics[0].position, 2);
936    }
937
938    #[test]
939    fn test_assemble_with_diagnostics_multiple_unconsumed() {
940        let mig = make_mig_schema(vec!["UNH"], vec![]);
941        let segments = vec![
942            make_owned_seg("UNH", vec![vec!["001"]]),
943            make_owned_seg("FOO", vec![]),
944            make_owned_seg("BAR", vec![]),
945            make_owned_seg("BAZ", vec![]),
946        ];
947        let assembler = Assembler::new(&mig);
948        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
949        assert_eq!(tree.segments.len(), 1);
950        assert_eq!(diagnostics.len(), 3);
951        assert_eq!(diagnostics[0].segment_id, "FOO");
952        assert_eq!(diagnostics[1].segment_id, "BAR");
953        assert_eq!(diagnostics[2].segment_id, "BAZ");
954    }
955
956    // ── Skip-unknown-segments tests ──
957
958    #[test]
959    fn test_skip_unknown_segment_between_slots() {
960        // MIG group expects [SEQ, CCI], input has [SEQ, RFF, CCI].
961        // With skip ON, RFF is skipped and CCI is consumed.
962        // With skip OFF (default), CCI is lost because RFF stalls the cursor.
963        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
964        let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
965
966        let segments = vec![
967            make_owned_seg("UNH", vec![vec!["001"]]),
968            make_owned_seg("SEQ", vec![vec!["Z98"]]),
969            make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
970            make_owned_seg("CCI", vec![vec!["Z30"]]),
971        ];
972
973        // Skip OFF: CCI not consumed (RFF stalls cursor after SEQ)
974        let off = Assembler::new(&mig);
975        let tree_off = off.assemble_generic(&segments).unwrap();
976        let sg8_off = &tree_off.groups[0];
977        assert_eq!(sg8_off.repetitions[0].segments.len(), 1); // Only SEQ
978        assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
979
980        // Skip ON: RFF skipped, CCI consumed
981        let on = Assembler::with_config(
982            &mig,
983            AssemblerConfig {
984                skip_unknown_segments: true,
985            },
986        );
987        let tree_on = on.assemble_generic(&segments).unwrap();
988        let sg8_on = &tree_on.groups[0];
989        assert_eq!(sg8_on.repetitions[0].segments.len(), 2); // SEQ + CCI
990        assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
991        assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
992    }
993
994    #[test]
995    fn test_skip_preserves_on_instance() {
996        // Skipped segments are stored in instance.skipped_segments
997        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
998        let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
999
1000        let segments = vec![
1001            make_owned_seg("UNH", vec![vec!["001"]]),
1002            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1003            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1004            make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
1005            make_owned_seg("CCI", vec![vec!["Z30"]]),
1006        ];
1007
1008        let assembler = Assembler::with_config(
1009            &mig,
1010            AssemblerConfig {
1011                skip_unknown_segments: true,
1012            },
1013        );
1014        let tree = assembler.assemble_generic(&segments).unwrap();
1015        let instance = &tree.groups[0].repetitions[0];
1016
1017        assert_eq!(instance.segments.len(), 2); // SEQ + CCI
1018        assert_eq!(instance.skipped_segments.len(), 2); // RFF + DTM
1019        assert_eq!(instance.skipped_segments[0].tag, "RFF");
1020        assert_eq!(instance.skipped_segments[1].tag, "DTM");
1021    }
1022
1023    #[test]
1024    fn test_skip_mode_off_default() {
1025        // Assembler::new() doesn't skip (backwards compat)
1026        let mig = make_mig_schema(vec![], vec![]);
1027        let assembler = Assembler::new(&mig);
1028        assert!(!assembler.config.skip_unknown_segments);
1029    }
1030
1031    #[test]
1032    fn test_skip_does_not_consume_nested_group_entry() {
1033        // Skip must NOT consume segments that are nested group entries.
1034        // SG4 expects [IDE, STS], nested SG5 expects [LOC].
1035        // Input: IDE, FOO, STS, LOC. FOO should be skipped, LOC goes to SG5.
1036        let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
1037        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
1038        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1039
1040        let segments = vec![
1041            make_owned_seg("UNH", vec![vec!["001"]]),
1042            make_owned_seg("IDE", vec![vec!["24"]]),
1043            make_owned_seg("FOO", vec![vec!["unknown"]]),
1044            make_owned_seg("STS", vec![vec!["7"]]),
1045            make_owned_seg("LOC", vec![vec!["Z16"]]),
1046        ];
1047
1048        let assembler = Assembler::with_config(
1049            &mig,
1050            AssemblerConfig {
1051                skip_unknown_segments: true,
1052            },
1053        );
1054        let tree = assembler.assemble_generic(&segments).unwrap();
1055        let sg4 = &tree.groups[0];
1056        let inst = &sg4.repetitions[0];
1057
1058        // IDE + STS consumed, FOO skipped
1059        assert_eq!(inst.segments.len(), 2);
1060        assert_eq!(inst.segments[0].tag, "IDE");
1061        assert_eq!(inst.segments[1].tag, "STS");
1062        assert_eq!(inst.skipped_segments.len(), 1);
1063        assert_eq!(inst.skipped_segments[0].tag, "FOO");
1064
1065        // LOC went to nested SG5
1066        assert_eq!(inst.child_groups.len(), 1);
1067        assert_eq!(inst.child_groups[0].group_id, "SG5");
1068        assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1069    }
1070
1071    #[test]
1072    fn test_roundtrip_with_skip() {
1073        // Full roundtrip: assemble with skip → disassemble → byte-identical
1074        // including skipped segments in the output.
1075        use crate::disassembler::Disassembler;
1076        use crate::renderer::render_edifact;
1077
1078        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1079        let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1080
1081        let segments = vec![
1082            make_owned_seg("UNH", vec![vec!["001"]]),
1083            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1084            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1085            make_owned_seg("CCI", vec![vec!["Z30"]]),
1086            make_owned_seg("UNT", vec![vec!["4", "001"]]),
1087        ];
1088
1089        let assembler = Assembler::with_config(
1090            &mig,
1091            AssemblerConfig {
1092                skip_unknown_segments: true,
1093            },
1094        );
1095        let tree = assembler.assemble_generic(&segments).unwrap();
1096
1097        let disassembler = Disassembler::new(&mig);
1098        let dis = disassembler.disassemble(&tree);
1099        let delimiters = edifact_primitives::EdifactDelimiters::default();
1100        let rendered = render_edifact(&dis, &delimiters);
1101
1102        // All 5 segments should appear in output (including skipped RFF).
1103        // Disassembler emits MIG-guided segments first (SEQ, CCI),
1104        // then skipped segments (RFF) — so order within the group differs
1105        // from the original input, but all content is preserved.
1106        assert_eq!(dis.len(), 5);
1107        assert_eq!(dis[0].tag, "UNH");
1108        assert_eq!(dis[1].tag, "SEQ");
1109        assert_eq!(dis[2].tag, "CCI");
1110        assert_eq!(dis[3].tag, "RFF"); // skipped → emitted after MIG segments
1111        assert_eq!(dis[4].tag, "UNT");
1112
1113        // Rendered output contains all segments
1114        assert!(rendered.contains("UNH+001"));
1115        assert!(rendered.contains("SEQ+Z98"));
1116        assert!(rendered.contains("RFF+Z38:REF1"));
1117        assert!(rendered.contains("CCI+Z30"));
1118        assert!(rendered.contains("UNT+4:001"));
1119    }
1120
1121    // ── Variant-aware assembly tests ──
1122
1123    #[test]
1124    fn test_variant_groups_interleaved_reps() {
1125        // Two SG8 variant definitions: one for SEQ+ZD7, one for SEQ+Z98.
1126        // Input has interleaved reps: ZD7, Z98, ZD7, Z98.
1127        // All should be collected into one SG8 group with 4 reps.
1128        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1129        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1130
1131        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1132
1133        let segments = vec![
1134            make_owned_seg("UNH", vec![vec!["001"]]),
1135            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1136            make_owned_seg("CCI", vec![vec!["Z30"]]),
1137            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1138            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1139            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1140            make_owned_seg("CCI", vec![vec!["Z31"]]),
1141            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1142            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1143        ];
1144
1145        let assembler = Assembler::new(&mig);
1146        let result = assembler.assemble_generic(&segments).unwrap();
1147
1148        assert_eq!(result.segments.len(), 1); // UNH
1149        assert_eq!(result.groups.len(), 1); // One combined SG8
1150        let sg8 = &result.groups[0];
1151        assert_eq!(sg8.group_id, "SG8");
1152        assert_eq!(sg8.repetitions.len(), 4);
1153
1154        // ZD7 reps have SEQ+CCI, Z98 reps have SEQ+RFF
1155        assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1156        assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1157        assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1158        assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1159        assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1160        assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1161    }
1162
1163    #[test]
1164    fn test_variant_groups_single_variant_type() {
1165        // Only Z98 reps, no ZD7 — still works with variant matching
1166        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1167        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1168
1169        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1170
1171        let segments = vec![
1172            make_owned_seg("UNH", vec![vec!["001"]]),
1173            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1174            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1175            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1176            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1177        ];
1178
1179        let assembler = Assembler::new(&mig);
1180        let result = assembler.assemble_generic(&segments).unwrap();
1181
1182        assert_eq!(result.groups.len(), 1);
1183        assert_eq!(result.groups[0].repetitions.len(), 2);
1184        assert_eq!(
1185            result.groups[0].repetitions[0].segments[0].elements[0][0],
1186            "Z98"
1187        );
1188        assert_eq!(
1189            result.groups[0].repetitions[1].segments[0].elements[0][0],
1190            "Z98"
1191        );
1192    }
1193
1194    #[test]
1195    fn test_non_variant_groups_unchanged() {
1196        // Groups without variant_code behave exactly as before
1197        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1198        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1199
1200        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1201
1202        let segments = vec![
1203            make_owned_seg("UNH", vec![vec!["001"]]),
1204            make_owned_seg("BGM", vec![vec!["E01"]]),
1205            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1206            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1207            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1208            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1209        ];
1210
1211        let assembler = Assembler::new(&mig);
1212        let result = assembler.assemble_generic(&segments).unwrap();
1213
1214        assert_eq!(result.segments.len(), 2);
1215        assert_eq!(result.groups.len(), 2);
1216        assert_eq!(result.groups[0].group_id, "SG2");
1217        assert_eq!(result.groups[0].repetitions.len(), 2);
1218        assert_eq!(result.groups[1].group_id, "SG4");
1219        assert_eq!(result.groups[1].repetitions.len(), 1);
1220    }
1221
1222    #[test]
1223    fn test_variant_groups_with_nested_children() {
1224        // Variant groups can have nested child groups
1225        let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
1226        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1227        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
1228
1229        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1230
1231        let segments = vec![
1232            make_owned_seg("UNH", vec![vec!["001"]]),
1233            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1234            make_owned_seg("CCI", vec![vec!["Z30"]]),
1235            make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
1236            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1237            make_owned_seg("CCI", vec![vec!["Z31"]]),
1238            make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
1239        ];
1240
1241        let assembler = Assembler::new(&mig);
1242        let result = assembler.assemble_generic(&segments).unwrap();
1243
1244        assert_eq!(result.groups.len(), 1);
1245        let sg8 = &result.groups[0];
1246        assert_eq!(sg8.repetitions.len(), 2);
1247
1248        // First rep (ZD7) has nested SG10
1249        assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
1250        assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
1251        assert_eq!(
1252            sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
1253            "Z30"
1254        );
1255
1256        // Second rep (Z98) has nested SG10
1257        assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
1258        assert_eq!(
1259            sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
1260            "Z31"
1261        );
1262    }
1263
1264    #[test]
1265    fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
1266        // try_consume_group with variant_code set should NOT consume a segment
1267        // whose qualifier doesn't match, even if the tag matches.
1268        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1269
1270        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
1271
1272        let segments = vec![
1273            make_owned_seg("UNH", vec![vec!["001"]]),
1274            make_owned_seg("SEQ", vec![vec!["Z98"]]), // Wrong qualifier
1275            make_owned_seg("CCI", vec![vec!["Z30"]]),
1276        ];
1277
1278        let assembler = Assembler::new(&mig);
1279        let result = assembler.assemble_generic(&segments).unwrap();
1280
1281        // SG8 should have no reps because Z98 != ZD7
1282        assert!(result.groups.is_empty());
1283    }
1284
1285    #[test]
1286    fn test_mixed_variant_and_non_variant_groups() {
1287        // SG2 (no variant), then variant SG8s, then SG12 (no variant)
1288        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1289        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1290        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1291        let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1292
1293        let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
1294
1295        let segments = vec![
1296            make_owned_seg("UNH", vec![vec!["001"]]),
1297            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1298            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1299            make_owned_seg("CCI", vec![vec!["Z30"]]),
1300            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1301            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1302            make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
1303        ];
1304
1305        let assembler = Assembler::new(&mig);
1306        let result = assembler.assemble_generic(&segments).unwrap();
1307
1308        assert_eq!(result.groups.len(), 3); // SG2, SG8 (combined), SG12
1309        assert_eq!(result.groups[0].group_id, "SG2");
1310        assert_eq!(result.groups[0].repetitions.len(), 1);
1311        assert_eq!(result.groups[1].group_id, "SG8");
1312        assert_eq!(result.groups[1].repetitions.len(), 2);
1313        assert_eq!(result.groups[2].group_id, "SG12");
1314        assert_eq!(result.groups[2].repetitions.len(), 1);
1315    }
1316}