Skip to main content

mig_assembly/
assembler.rs

1//! Recursive descent assembler — MIG-guided segment consumption.
2//!
3//! The assembler walks the MIG tree structure and consumes matching
4//! segments from the input. It produces a generic tree representation
5//! that can be converted to typed PID structs.
6
7use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15/// A generic assembled tree node (before PID-specific typing).
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18    pub segments: Vec<AssembledSegment>,
19    pub groups: Vec<AssembledGroup>,
20    /// Index in `segments` where post-group segments start (e.g., UNT, UNZ).
21    /// Segments before this index appear before groups in EDIFACT order.
22    #[serde(default)]
23    pub post_group_start: usize,
24    /// Root segments consumed between groups during assembly (e.g., UNS
25    /// section separator in MSCONS). Key = index into `groups` vec; value =
26    /// segments that appear immediately before that group in the EDIFACT
27    /// stream. Empty for messages without inter-group root segments.
28    #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29    pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32/// An assembled segment with its data elements.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35    pub tag: String,
36    /// `elements[i][j]` = component `j` of element `i`
37    pub elements: Vec<Vec<String>>,
38    /// MIG `Number` attribute identifying this segment variant.
39    /// Two segments with the same tag (e.g., DTM) but different roles
40    /// (DTM+92 vs DTM+93) have distinct MIG numbers.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub mig_number: Option<String>,
43}
44
45/// An assembled segment group (may repeat).
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct AssembledGroup {
48    pub group_id: String,
49    pub repetitions: Vec<AssembledGroupInstance>,
50}
51
52/// One repetition of a segment group.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct AssembledGroupInstance {
55    pub segments: Vec<AssembledSegment>,
56    pub child_groups: Vec<AssembledGroup>,
57    /// MIG `Number` of the entry segment that identified this group instance's variant.
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub entry_mig_number: Option<String>,
60    /// All MIG `Number`s defined for this group variant — includes segments that
61    /// may be absent in the EDIFACT but are defined in the MIG for this variant.
62    ///
63    /// Used by the validator to determine which AHB rules belong to this instance:
64    /// a rule with `mig_number` in this set applies here, even if the segment is
65    /// missing (which is then a missing-field error). Without this, rules for
66    /// absent-but-required segments would be incorrectly filtered out.
67    #[serde(default, skip_serializing_if = "Vec::is_empty")]
68    pub variant_mig_numbers: Vec<String>,
69    /// Segments that were present in the EDIFACT input but not defined in
70    /// the PID-filtered MIG for this group. Only populated when the assembler
71    /// runs with [`AssemblerConfig::skip_unknown_segments`] enabled.
72    #[serde(default, skip_serializing_if = "Vec::is_empty")]
73    pub skipped_segments: Vec<AssembledSegment>,
74}
75
76impl AssembledGroupInstance {
77    /// Create a virtual `AssembledTree` scoped to this group instance.
78    ///
79    /// The instance's own segments become the tree's root segments,
80    /// and its child groups become the tree's groups. This enables
81    /// running `MappingEngine::map_all_forward()` on a single
82    /// transaction group as if it were a complete message.
83    pub fn as_assembled_tree(&self) -> AssembledTree {
84        AssembledTree {
85            segments: self.segments.clone(),
86            groups: self.child_groups.clone(),
87            post_group_start: self.segments.len(),
88            inter_group_segments: std::collections::BTreeMap::new(),
89        }
90    }
91}
92
93/// Configuration for the assembler.
94#[derive(Debug, Clone, Default)]
95pub struct AssemblerConfig {
96    /// When `true`, the assembler skips segments inside a group instance that
97    /// don't match any remaining MIG slot, nested-group entry, or the group's
98    /// entry tag (next repetition). Skipped segments are preserved on
99    /// [`AssembledGroupInstance::skipped_segments`] for roundtrip re-emission.
100    ///
101    /// Default: `false` (strict AHB — unknown segments stall the cursor).
102    pub skip_unknown_segments: bool,
103}
104
105/// MIG-guided assembler.
106///
107/// Takes a MIG schema and uses it as a grammar to guide consumption
108/// of parsed EDIFACT segments. Produces a generic `AssembledTree`.
109pub struct Assembler<'a> {
110    mig: &'a MigSchema,
111    config: AssemblerConfig,
112}
113
114impl<'a> Assembler<'a> {
115    pub fn new(mig: &'a MigSchema) -> Self {
116        Self {
117            mig,
118            config: AssemblerConfig::default(),
119        }
120    }
121
122    pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
123        Self { mig, config }
124    }
125
126    /// Assemble segments into a generic tree following MIG structure.
127    pub fn assemble_generic(
128        &self,
129        segments: &[OwnedSegment],
130    ) -> Result<AssembledTree, AssemblyError> {
131        let mut cursor = SegmentCursor::new(segments.len());
132        let mut tree = AssembledTree {
133            segments: Vec::new(),
134            groups: Vec::new(),
135            post_group_start: 0,
136            inter_group_segments: std::collections::BTreeMap::new(),
137        };
138
139        // Track which MIG segment indices were matched in the first pass
140        let mut matched_seg_indices = Vec::new();
141
142        // Process top-level segments (first pass — before groups)
143        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
144            if cursor.is_exhausted() {
145                break;
146            }
147            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
148                tree.segments.push(assembled);
149                matched_seg_indices.push(i);
150            }
151        }
152
153        // Process segment groups, interleaving root segment consumption.
154        // Some message types (e.g., MSCONS) have root segments like UNS
155        // between groups (SG2 and SG5). Before trying each group, consume
156        // any unmatched root segments at the current cursor position.
157        //
158        // When consecutive same-ID groups have variant_code set (e.g., 3 SG8
159        // entries for ZD7, Z98, ZF3), the assembler tries ALL variants at each
160        // cursor position to handle interleaved reps.
161        let mut group_idx = 0;
162        while group_idx < self.mig.segment_groups.len() {
163            if cursor.is_exhausted() {
164                break;
165            }
166
167            let mig_group = &self.mig.segment_groups[group_idx];
168
169            // Try consuming unmatched root segments before this group
170            let tree_group_idx = tree.groups.len();
171            for (i, mig_seg) in self.mig.segments.iter().enumerate() {
172                if cursor.is_exhausted() {
173                    break;
174                }
175                if matched_seg_indices.contains(&i) {
176                    continue;
177                }
178                if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
179                    tree.inter_group_segments
180                        .entry(tree_group_idx)
181                        .or_default()
182                        .push(assembled);
183                    matched_seg_indices.push(i);
184                }
185            }
186
187            // Check if this starts a variant set (consecutive same-ID groups with variant_code)
188            if mig_group.variant_code.is_some() {
189                let variant_count = self.mig.segment_groups[group_idx..]
190                    .iter()
191                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
192                    .count();
193                let variant_end = group_idx + variant_count;
194
195                let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
196                if let Some(combined) =
197                    self.try_consume_variant_groups(segments, &mut cursor, variant_groups)?
198                {
199                    tree.groups.push(combined);
200                }
201                group_idx = variant_end;
202            } else {
203                if let Some(assembled) = self.try_consume_group(segments, &mut cursor, mig_group)? {
204                    tree.groups.push(assembled);
205                }
206                group_idx += 1;
207            }
208        }
209
210        // Mark where post-group segments start
211        tree.post_group_start = tree.segments.len();
212
213        // Second pass: try unmatched top-level segments (e.g., UNT, UNZ after groups)
214        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
215            if cursor.is_exhausted() {
216                break;
217            }
218            if matched_seg_indices.contains(&i) {
219                continue;
220            }
221            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
222                tree.segments.push(assembled);
223            }
224        }
225
226        Ok(tree)
227    }
228
229    fn try_consume_segment(
230        &self,
231        segments: &[OwnedSegment],
232        cursor: &mut SegmentCursor,
233        mig_seg: &MigSegment,
234    ) -> Result<Option<AssembledSegment>, AssemblyError> {
235        if cursor.is_exhausted() {
236            return Ok(None);
237        }
238        let seg = &segments[cursor.position()];
239        if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
240            let mut assembled = owned_to_assembled(seg);
241            assembled.mig_number = mig_seg.number.clone();
242            cursor.advance();
243            Ok(Some(assembled))
244        } else {
245            Ok(None) // Segment not present (optional)
246        }
247    }
248
249    fn try_consume_group(
250        &self,
251        segments: &[OwnedSegment],
252        cursor: &mut SegmentCursor,
253        mig_group: &MigSegmentGroup,
254    ) -> Result<Option<AssembledGroup>, AssemblyError> {
255        let mut repetitions = Vec::new();
256        let entry_segment = mig_group.segments.first().ok_or_else(|| {
257            AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
258        })?;
259
260        // Loop for repeating groups
261        while !cursor.is_exhausted() {
262            let seg = &segments[cursor.position()];
263            if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
264                break; // Current segment doesn't match group entry — stop repeating
265            }
266
267            // Check variant qualifier if set — tag matches but wrong variant
268            if !mig_group.variant_codes.is_empty() {
269                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
270                let actual_qual = seg
271                    .elements
272                    .get(ei)
273                    .and_then(|e| e.get(ci))
274                    .map(|s| s.as_str())
275                    .unwrap_or("");
276                if !mig_group
277                    .variant_codes
278                    .iter()
279                    .any(|c| actual_qual.eq_ignore_ascii_case(c))
280                {
281                    break;
282                }
283            } else if let Some(ref expected_code) = mig_group.variant_code {
284                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
285                let actual_qual = seg
286                    .elements
287                    .get(ei)
288                    .and_then(|e| e.get(ci))
289                    .map(|s| s.as_str())
290                    .unwrap_or("");
291                if !actual_qual.eq_ignore_ascii_case(expected_code) {
292                    break;
293                }
294            }
295
296            let mut instance = AssembledGroupInstance {
297                segments: Vec::new(),
298                child_groups: Vec::new(),
299                entry_mig_number: entry_segment.number.clone(),
300                variant_mig_numbers: collect_mig_numbers(mig_group),
301                skipped_segments: Vec::new(),
302            };
303
304            // Consume segments within this group instance.
305            // Process MIG slots in tag runs: for consecutive slots with the
306            // same tag, consume ALL matching input segments — not just the
307            // defined count. This handles real-world fixtures with more
308            // repetitions than the merged MIG predicts (e.g., 6 RFFs when
309            // the schema defines max 4).
310            //
311            // The entry segment (first tag run) is consumed bounded — one per
312            // defined slot — because the outer while loop uses the entry tag
313            // to delineate group repetitions.
314            let mut slot_idx = 0;
315            let mut is_entry_run = true;
316            while slot_idx < mig_group.segments.len() {
317                if cursor.is_exhausted() {
318                    break;
319                }
320                let current_tag = &mig_group.segments[slot_idx].id;
321                let run_len = mig_group.segments[slot_idx..]
322                    .iter()
323                    .take_while(|s| s.id == *current_tag)
324                    .count();
325
326                if is_entry_run {
327                    // Entry tag: consume at most run_len (preserves group boundaries)
328                    for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
329                        if cursor.is_exhausted() {
330                            break;
331                        }
332                        if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
333                            instance.segments.push(assembled);
334                        }
335                    }
336                    is_entry_run = false;
337                } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
338                    // Non-entry slot with SAME tag as entry (e.g., CCI appears as
339                    // both entry and non-entry in merged SG30).
340                    //
341                    // Only consume if we haven't yet consumed any NON-entry-tag
342                    // segments (i.e., we're still in a consecutive entry-tag run).
343                    // Once we've consumed a different tag (like CAV), seeing the
344                    // entry tag again means a new rep boundary.
345                    //
346                    // z35: entry CCI → CAV CAV → sees CCI → has_other=true → break ✓
347                    // z39: entry CCI → (no CAV) → sees CCI → has_other=false → consume ✓
348                    //      then CCI CCI → CAV → sees CCI → has_other=true → break
349                    //      BUT: z39 needs CCI-CAV-CCI-CAV structure
350                    //
351                    // Better heuristic: check if ALL remaining slots from here are
352                    // entry-tag + non-entry pairs. If the current slot is entry-tag
353                    // and the NEXT input segment after it would be a non-entry tag,
354                    // consume — it's a continuation. Otherwise break.
355                    if cursor.is_exhausted() {
356                        break;
357                    }
358                    let seg = &segments[cursor.position()];
359                    if !matcher::matches_segment_tag(&seg.id, current_tag) {
360                        break;
361                    }
362                    // Check: is there a non-entry segment AFTER this entry-tag?
363                    // If so, this CCI+CAV pair is part of the current rep.
364                    let has_following_non_entry = if cursor.position() + 1 < segments.len() {
365                        let next = &segments[cursor.position() + 1];
366                        !matcher::matches_segment_tag(&next.id, &entry_segment.id)
367                            && mig_group.segments.iter().any(|s| {
368                                matcher::matches_segment_tag(&next.id, &s.id)
369                                    && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
370                            })
371                    } else {
372                        false
373                    };
374                    if has_following_non_entry {
375                        // CCI followed by CAV → consume as continuation pair
376                        instance.segments.push(owned_to_assembled(seg));
377                        cursor.advance();
378                    } else {
379                        // CCI followed by CCI or unknown → let outer loop decide
380                        break;
381                    }
382                } else {
383                    // Non-entry tag: greedily consume all matching segments
384                    while !cursor.is_exhausted() {
385                        let seg = &segments[cursor.position()];
386                        if matcher::matches_segment_tag(&seg.id, current_tag) {
387                            instance.segments.push(owned_to_assembled(seg));
388                            cursor.advance();
389                        } else {
390                            break;
391                        }
392                    }
393                }
394
395                slot_idx += run_len;
396
397                // Point A: Skip unknown segments between MIG slot runs.
398                // When skip mode is ON and we just finished a slot run but the
399                // current segment doesn't match any remaining MIG slot, nested
400                // group entry, or the entry tag, skip it.
401                if self.config.skip_unknown_segments {
402                    while !cursor.is_exhausted() {
403                        let seg = &segments[cursor.position()];
404                        // Stop if it matches the entry tag (next group repetition)
405                        if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
406                            break;
407                        }
408                        // Stop if it matches any remaining MIG slot
409                        if mig_group.segments[slot_idx..]
410                            .iter()
411                            .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
412                        {
413                            break;
414                        }
415                        // Stop if it matches any nested group entry
416                        if mig_group.nested_groups.iter().any(|ng| {
417                            ng.segments
418                                .first()
419                                .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
420                        }) {
421                            break;
422                        }
423                        // Unknown segment — skip it
424                        instance.skipped_segments.push(owned_to_assembled(seg));
425                        cursor.advance();
426                    }
427                }
428            }
429
430            // Consume nested groups (variant-aware for same-ID groups)
431            let mut nested_idx = 0;
432            while nested_idx < mig_group.nested_groups.len() {
433                if cursor.is_exhausted() {
434                    break;
435                }
436                let nested = &mig_group.nested_groups[nested_idx];
437
438                if nested.variant_code.is_some() {
439                    // Variant set: collect consecutive same-ID groups with variant_code
440                    let variant_count = mig_group.nested_groups[nested_idx..]
441                        .iter()
442                        .take_while(|g| g.id == nested.id && g.variant_code.is_some())
443                        .count();
444                    let variant_end = nested_idx + variant_count;
445                    let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
446                    if let Some(combined) =
447                        self.try_consume_variant_groups(segments, cursor, variant_groups)?
448                    {
449                        instance.child_groups.push(combined);
450                    }
451                    nested_idx = variant_end;
452                } else {
453                    if let Some(assembled) = self.try_consume_group(segments, cursor, nested)? {
454                        instance.child_groups.push(assembled);
455                    }
456                    nested_idx += 1;
457                }
458            }
459
460            repetitions.push(instance);
461        }
462
463        if repetitions.is_empty() {
464            Ok(None)
465        } else {
466            Ok(Some(AssembledGroup {
467                group_id: mig_group.id.clone(),
468                repetitions,
469            }))
470        }
471    }
472
473    /// Consume interleaved repetitions of variant groups.
474    ///
475    /// At each cursor position, tries all variant definitions to find which one
476    /// matches the entry segment's qualifier. Collects all reps into one
477    /// `AssembledGroup` with the shared group_id.
478    fn try_consume_variant_groups(
479        &self,
480        segments: &[OwnedSegment],
481        cursor: &mut SegmentCursor,
482        variants: &[MigSegmentGroup],
483    ) -> Result<Option<AssembledGroup>, AssemblyError> {
484        let group_id = variants[0].id.clone();
485        let entry_tag = variants[0]
486            .segments
487            .first()
488            .map(|s| s.id.as_str())
489            .unwrap_or("");
490        let mut all_reps = Vec::new();
491
492        while !cursor.is_exhausted() {
493            let seg = &segments[cursor.position()];
494            if !matcher::matches_segment_tag(&seg.id, entry_tag) {
495                break;
496            }
497
498            // Find which variant matches this segment's qualifier.
499            // Each variant may have its qualifier at a different element position
500            // (e.g., CCI+Z19 has qualifier at [0][0], but CCI+++Z15 at [2][0]).
501            let matched = variants.iter().find(|v| {
502                let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
503                let actual_qual = seg
504                    .elements
505                    .get(ei)
506                    .and_then(|e| e.get(ci))
507                    .map(|s| s.as_str())
508                    .unwrap_or("");
509                if !v.variant_codes.is_empty() {
510                    v.variant_codes
511                        .iter()
512                        .any(|c| actual_qual.eq_ignore_ascii_case(c))
513                } else if let Some(ref expected_code) = v.variant_code {
514                    actual_qual.eq_ignore_ascii_case(expected_code)
515                } else {
516                    false
517                }
518            });
519
520            if let Some(variant) = matched {
521                if let Some(group) = self.try_consume_group(segments, cursor, variant)? {
522                    all_reps.extend(group.repetitions);
523                } else {
524                    break;
525                }
526            } else {
527                // No variant matches — try consuming with the first variant as
528                // fallback to avoid getting stuck. This handles edge cases where
529                // the qualifier doesn't exactly match any variant code.
530                if let Some(group) = self.try_consume_group(segments, cursor, &variants[0])? {
531                    all_reps.extend(group.repetitions);
532                } else {
533                    break;
534                }
535            }
536        }
537
538        if all_reps.is_empty() {
539            Ok(None)
540        } else {
541            Ok(Some(AssembledGroup {
542                group_id,
543                repetitions: all_reps,
544            }))
545        }
546    }
547
548    /// Assemble segments with diagnostic collection.
549    ///
550    /// Returns the assembled tree plus diagnostics for segments not consumed
551    /// by the MIG-guided assembly. Existing `assemble_generic()` is unchanged.
552    pub fn assemble_with_diagnostics(
553        &self,
554        segments: &[OwnedSegment],
555    ) -> (AssembledTree, Vec<StructureDiagnostic>) {
556        let mut diagnostics = Vec::new();
557
558        let tree = match self.assemble_generic(segments) {
559            Ok(tree) => tree,
560            Err(e) => {
561                diagnostics.push(StructureDiagnostic {
562                    kind: StructureDiagnosticKind::UnexpectedSegment,
563                    segment_id: String::new(),
564                    position: 0,
565                    message: format!("Assembly failed: {e}"),
566                });
567                return (
568                    AssembledTree {
569                        segments: Vec::new(),
570                        groups: Vec::new(),
571                        post_group_start: 0,
572                        inter_group_segments: std::collections::BTreeMap::new(),
573                    },
574                    diagnostics,
575                );
576            }
577        };
578
579        // Count consumed segments in the assembled tree
580        let consumed = count_tree_segments(&tree);
581
582        // Segments beyond consumed count are unconsumed
583        for (i, seg) in segments.iter().enumerate().skip(consumed) {
584            diagnostics.push(StructureDiagnostic {
585                kind: StructureDiagnosticKind::UnexpectedSegment,
586                segment_id: seg.id.clone(),
587                position: i,
588                message: format!(
589                    "Segment '{}' at position {} was not consumed by MIG-guided assembly",
590                    seg.id, i
591                ),
592            });
593        }
594
595        (tree, diagnostics)
596    }
597}
598
599fn count_tree_segments(tree: &AssembledTree) -> usize {
600    let mut count = tree.segments.len();
601    for group in &tree.groups {
602        count += count_group_segments(group);
603    }
604    // Count inter-group segments (e.g., UNS+D between groups)
605    for segs in tree.inter_group_segments.values() {
606        count += segs.len();
607    }
608    count
609}
610
611fn count_group_segments(group: &AssembledGroup) -> usize {
612    let mut count = 0;
613    for rep in &group.repetitions {
614        count += rep.segments.len();
615        count += rep.skipped_segments.len();
616        for child in &rep.child_groups {
617            count += count_group_segments(child);
618        }
619    }
620    count
621}
622
623/// Collect all MIG `Number`s from a segment group definition, recursively.
624///
625/// This includes numbers from direct segments and from nested groups.
626/// Used to populate `AssembledGroupInstance::variant_mig_numbers`.
627fn collect_mig_numbers(group: &MigSegmentGroup) -> Vec<String> {
628    let mut numbers = Vec::new();
629    for seg in &group.segments {
630        if let Some(ref num) = seg.number {
631            numbers.push(num.clone());
632        }
633    }
634    for nested in &group.nested_groups {
635        numbers.extend(collect_mig_numbers(nested));
636    }
637    numbers
638}
639
640pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
641    AssembledSegment {
642        tag: seg.id.clone(),
643        elements: seg.elements.clone(),
644        mig_number: None,
645    }
646}
647
648#[cfg(test)]
649mod tests {
650    use super::*;
651    use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
652
653    fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
654        OwnedSegment {
655            id: id.to_string(),
656            elements: elements
657                .into_iter()
658                .map(|e| e.into_iter().map(|c| c.to_string()).collect())
659                .collect(),
660            segment_number: 0,
661        }
662    }
663
664    fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
665        MigSchema {
666            message_type: "UTILMD".to_string(),
667            variant: Some("Strom".to_string()),
668            version: "S2.1".to_string(),
669            publication_date: "2025-03-20".to_string(),
670            author: "BDEW".to_string(),
671            format_version: "FV2504".to_string(),
672            source_file: "test".to_string(),
673            segments: segments.into_iter().map(make_mig_segment).collect(),
674            segment_groups: groups,
675        }
676    }
677
678    #[test]
679    fn test_assembler_top_level_segments_only() {
680        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
681
682        let segments = vec![
683            make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
684            make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
685            make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
686            make_owned_seg("UNT", vec![vec!["4", "001"]]),
687        ];
688
689        let assembler = Assembler::new(&mig);
690        let result = assembler.assemble_generic(&segments).unwrap();
691
692        assert_eq!(result.segments.len(), 4);
693        assert_eq!(result.segments[0].tag, "UNH");
694        assert_eq!(result.segments[1].tag, "BGM");
695        assert_eq!(result.segments[2].tag, "DTM");
696        assert_eq!(result.segments[3].tag, "UNT");
697        assert!(result.groups.is_empty());
698    }
699
700    #[test]
701    fn test_assembler_with_segment_group() {
702        let mig = make_mig_schema(
703            vec!["UNH", "BGM"],
704            vec![
705                make_mig_group("SG2", vec!["NAD"], vec![]),
706                make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
707            ],
708        );
709
710        let segments = vec![
711            make_owned_seg("UNH", vec![vec!["001"]]),
712            make_owned_seg("BGM", vec![vec!["E01"]]),
713            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
714            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
715            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
716            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
717        ];
718
719        let assembler = Assembler::new(&mig);
720        let result = assembler.assemble_generic(&segments).unwrap();
721
722        // Top-level: UNH, BGM
723        assert_eq!(result.segments.len(), 2);
724        // SG2: 2 repetitions (two NAD segments)
725        assert_eq!(result.groups.len(), 2);
726        assert_eq!(result.groups[0].group_id, "SG2");
727        assert_eq!(result.groups[0].repetitions.len(), 2);
728        assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
729        assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
730        // SG4: 1 repetition (IDE + STS)
731        assert_eq!(result.groups[1].group_id, "SG4");
732        assert_eq!(result.groups[1].repetitions.len(), 1);
733        assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
734    }
735
736    #[test]
737    fn test_assembler_nested_groups() {
738        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
739        let mig = make_mig_schema(
740            vec!["UNH", "BGM"],
741            vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
742        );
743
744        let segments = vec![
745            make_owned_seg("UNH", vec![vec!["001"]]),
746            make_owned_seg("BGM", vec![vec!["E01"]]),
747            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
748            make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
749            make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
750        ];
751
752        let assembler = Assembler::new(&mig);
753        let result = assembler.assemble_generic(&segments).unwrap();
754
755        // SG2 has 1 repetition
756        let sg2 = &result.groups[0];
757        assert_eq!(sg2.group_id, "SG2");
758        assert_eq!(sg2.repetitions.len(), 1);
759
760        let sg2_inst = &sg2.repetitions[0];
761        assert_eq!(sg2_inst.segments[0].tag, "NAD");
762
763        // SG3 nested inside SG2
764        assert_eq!(sg2_inst.child_groups.len(), 1);
765        let sg3 = &sg2_inst.child_groups[0];
766        assert_eq!(sg3.group_id, "SG3");
767        assert_eq!(sg3.repetitions[0].segments.len(), 2);
768        assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
769        assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
770    }
771
772    #[test]
773    fn test_assembler_optional_segments_skipped() {
774        // MIG expects UNH, BGM, DTM, UNT but input has no DTM
775        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
776
777        let segments = vec![
778            make_owned_seg("UNH", vec![vec!["001"]]),
779            make_owned_seg("BGM", vec![vec!["E01"]]),
780            make_owned_seg("UNT", vec![vec!["2", "001"]]),
781        ];
782
783        let assembler = Assembler::new(&mig);
784        let result = assembler.assemble_generic(&segments).unwrap();
785
786        // DTM is skipped (optional), UNT consumed
787        assert_eq!(result.segments.len(), 3);
788        assert_eq!(result.segments[0].tag, "UNH");
789        assert_eq!(result.segments[1].tag, "BGM");
790        assert_eq!(result.segments[2].tag, "UNT");
791    }
792
793    #[test]
794    fn test_assembler_empty_segments() {
795        let mig = make_mig_schema(vec!["UNH"], vec![]);
796        let assembler = Assembler::new(&mig);
797        let result = assembler.assemble_generic(&[]).unwrap();
798        assert!(result.segments.is_empty());
799        assert!(result.groups.is_empty());
800    }
801
802    #[test]
803    fn test_assembler_preserves_element_data() {
804        let mig = make_mig_schema(vec!["DTM"], vec![]);
805
806        let segments = vec![make_owned_seg(
807            "DTM",
808            vec![vec!["137", "202501010000+01", "303"]],
809        )];
810
811        let assembler = Assembler::new(&mig);
812        let result = assembler.assemble_generic(&segments).unwrap();
813
814        let dtm = &result.segments[0];
815        assert_eq!(dtm.elements[0][0], "137");
816        assert_eq!(dtm.elements[0][1], "202501010000+01");
817        assert_eq!(dtm.elements[0][2], "303");
818    }
819
820    #[test]
821    fn test_group_instance_as_assembled_tree() {
822        // Build an SG4 instance with root segments (IDE, STS) and child groups (SG5)
823        let sg5 = AssembledGroup {
824            group_id: "SG5".to_string(),
825            repetitions: vec![AssembledGroupInstance {
826                segments: vec![AssembledSegment {
827                    tag: "LOC".to_string(),
828                    elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
829                    mig_number: None,
830                }],
831                child_groups: vec![],
832                entry_mig_number: None,
833                variant_mig_numbers: vec![],
834                skipped_segments: vec![],
835            }],
836        };
837
838        let sg4_instance = AssembledGroupInstance {
839            segments: vec![
840                AssembledSegment {
841                    tag: "IDE".to_string(),
842                    elements: vec![vec!["24".to_string(), "TX001".to_string()]],
843                    mig_number: None,
844                },
845                AssembledSegment {
846                    tag: "STS".to_string(),
847                    elements: vec![vec!["7".to_string()]],
848                    mig_number: None,
849                },
850            ],
851            child_groups: vec![sg5],
852            entry_mig_number: None,
853            variant_mig_numbers: vec![],
854            skipped_segments: vec![],
855        };
856
857        let sub_tree = sg4_instance.as_assembled_tree();
858
859        // Root segments of sub-tree are the SG4 instance's segments
860        assert_eq!(sub_tree.segments.len(), 2);
861        assert_eq!(sub_tree.segments[0].tag, "IDE");
862        assert_eq!(sub_tree.segments[1].tag, "STS");
863
864        // Groups of sub-tree are the SG4 instance's child groups
865        assert_eq!(sub_tree.groups.len(), 1);
866        assert_eq!(sub_tree.groups[0].group_id, "SG5");
867
868        // post_group_start marks where root segments end
869        assert_eq!(sub_tree.post_group_start, 2);
870    }
871
872    #[test]
873    fn test_assembler_from_parsed_edifact() {
874        // End-to-end: parse raw EDIFACT, then assemble
875        let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
876        let segments = crate::tokenize::parse_to_segments(input).unwrap();
877
878        let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
879
880        let assembler = Assembler::new(&mig);
881        let result = assembler.assemble_generic(&segments).unwrap();
882
883        assert!(result.segments.iter().any(|s| s.tag == "UNH"));
884        assert!(result.segments.iter().any(|s| s.tag == "BGM"));
885        assert!(result.segments.iter().any(|s| s.tag == "DTM"));
886    }
887
888    #[test]
889    fn test_assemble_with_diagnostics_clean_input() {
890        let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
891        let segments = vec![
892            make_owned_seg("UNH", vec![vec!["001"]]),
893            make_owned_seg("BGM", vec![vec!["E01"]]),
894            make_owned_seg("UNT", vec![vec!["2", "001"]]),
895        ];
896        let assembler = Assembler::new(&mig);
897        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
898        assert_eq!(tree.segments.len(), 3);
899        assert!(
900            diagnostics.is_empty(),
901            "Clean input should have no diagnostics"
902        );
903    }
904
905    #[test]
906    fn test_assemble_with_diagnostics_unconsumed_segments() {
907        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
908        let segments = vec![
909            make_owned_seg("UNH", vec![vec!["001"]]),
910            make_owned_seg("BGM", vec![vec!["E01"]]),
911            make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
912        ];
913        let assembler = Assembler::new(&mig);
914        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
915        assert_eq!(tree.segments.len(), 2);
916        assert_eq!(diagnostics.len(), 1);
917        assert_eq!(
918            diagnostics[0].kind,
919            StructureDiagnosticKind::UnexpectedSegment
920        );
921        assert_eq!(diagnostics[0].segment_id, "FTX");
922        assert_eq!(diagnostics[0].position, 2);
923    }
924
925    #[test]
926    fn test_assemble_with_diagnostics_multiple_unconsumed() {
927        let mig = make_mig_schema(vec!["UNH"], vec![]);
928        let segments = vec![
929            make_owned_seg("UNH", vec![vec!["001"]]),
930            make_owned_seg("FOO", vec![]),
931            make_owned_seg("BAR", vec![]),
932            make_owned_seg("BAZ", vec![]),
933        ];
934        let assembler = Assembler::new(&mig);
935        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
936        assert_eq!(tree.segments.len(), 1);
937        assert_eq!(diagnostics.len(), 3);
938        assert_eq!(diagnostics[0].segment_id, "FOO");
939        assert_eq!(diagnostics[1].segment_id, "BAR");
940        assert_eq!(diagnostics[2].segment_id, "BAZ");
941    }
942
943    // ── Skip-unknown-segments tests ──
944
945    #[test]
946    fn test_skip_unknown_segment_between_slots() {
947        // MIG group expects [SEQ, CCI], input has [SEQ, RFF, CCI].
948        // With skip ON, RFF is skipped and CCI is consumed.
949        // With skip OFF (default), CCI is lost because RFF stalls the cursor.
950        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
951        let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
952
953        let segments = vec![
954            make_owned_seg("UNH", vec![vec!["001"]]),
955            make_owned_seg("SEQ", vec![vec!["Z98"]]),
956            make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
957            make_owned_seg("CCI", vec![vec!["Z30"]]),
958        ];
959
960        // Skip OFF: CCI not consumed (RFF stalls cursor after SEQ)
961        let off = Assembler::new(&mig);
962        let tree_off = off.assemble_generic(&segments).unwrap();
963        let sg8_off = &tree_off.groups[0];
964        assert_eq!(sg8_off.repetitions[0].segments.len(), 1); // Only SEQ
965        assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
966
967        // Skip ON: RFF skipped, CCI consumed
968        let on = Assembler::with_config(
969            &mig,
970            AssemblerConfig {
971                skip_unknown_segments: true,
972            },
973        );
974        let tree_on = on.assemble_generic(&segments).unwrap();
975        let sg8_on = &tree_on.groups[0];
976        assert_eq!(sg8_on.repetitions[0].segments.len(), 2); // SEQ + CCI
977        assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
978        assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
979    }
980
981    #[test]
982    fn test_skip_preserves_on_instance() {
983        // Skipped segments are stored in instance.skipped_segments
984        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
985        let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
986
987        let segments = vec![
988            make_owned_seg("UNH", vec![vec!["001"]]),
989            make_owned_seg("SEQ", vec![vec!["Z98"]]),
990            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
991            make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
992            make_owned_seg("CCI", vec![vec!["Z30"]]),
993        ];
994
995        let assembler = Assembler::with_config(
996            &mig,
997            AssemblerConfig {
998                skip_unknown_segments: true,
999            },
1000        );
1001        let tree = assembler.assemble_generic(&segments).unwrap();
1002        let instance = &tree.groups[0].repetitions[0];
1003
1004        assert_eq!(instance.segments.len(), 2); // SEQ + CCI
1005        assert_eq!(instance.skipped_segments.len(), 2); // RFF + DTM
1006        assert_eq!(instance.skipped_segments[0].tag, "RFF");
1007        assert_eq!(instance.skipped_segments[1].tag, "DTM");
1008    }
1009
1010    #[test]
1011    fn test_skip_mode_off_default() {
1012        // Assembler::new() doesn't skip (backwards compat)
1013        let mig = make_mig_schema(vec![], vec![]);
1014        let assembler = Assembler::new(&mig);
1015        assert!(!assembler.config.skip_unknown_segments);
1016    }
1017
1018    #[test]
1019    fn test_skip_does_not_consume_nested_group_entry() {
1020        // Skip must NOT consume segments that are nested group entries.
1021        // SG4 expects [IDE, STS], nested SG5 expects [LOC].
1022        // Input: IDE, FOO, STS, LOC. FOO should be skipped, LOC goes to SG5.
1023        let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
1024        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
1025        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1026
1027        let segments = vec![
1028            make_owned_seg("UNH", vec![vec!["001"]]),
1029            make_owned_seg("IDE", vec![vec!["24"]]),
1030            make_owned_seg("FOO", vec![vec!["unknown"]]),
1031            make_owned_seg("STS", vec![vec!["7"]]),
1032            make_owned_seg("LOC", vec![vec!["Z16"]]),
1033        ];
1034
1035        let assembler = Assembler::with_config(
1036            &mig,
1037            AssemblerConfig {
1038                skip_unknown_segments: true,
1039            },
1040        );
1041        let tree = assembler.assemble_generic(&segments).unwrap();
1042        let sg4 = &tree.groups[0];
1043        let inst = &sg4.repetitions[0];
1044
1045        // IDE + STS consumed, FOO skipped
1046        assert_eq!(inst.segments.len(), 2);
1047        assert_eq!(inst.segments[0].tag, "IDE");
1048        assert_eq!(inst.segments[1].tag, "STS");
1049        assert_eq!(inst.skipped_segments.len(), 1);
1050        assert_eq!(inst.skipped_segments[0].tag, "FOO");
1051
1052        // LOC went to nested SG5
1053        assert_eq!(inst.child_groups.len(), 1);
1054        assert_eq!(inst.child_groups[0].group_id, "SG5");
1055        assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1056    }
1057
1058    #[test]
1059    fn test_roundtrip_with_skip() {
1060        // Full roundtrip: assemble with skip → disassemble → byte-identical
1061        // including skipped segments in the output.
1062        use crate::disassembler::Disassembler;
1063        use crate::renderer::render_edifact;
1064
1065        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1066        let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1067
1068        let segments = vec![
1069            make_owned_seg("UNH", vec![vec!["001"]]),
1070            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1071            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1072            make_owned_seg("CCI", vec![vec!["Z30"]]),
1073            make_owned_seg("UNT", vec![vec!["4", "001"]]),
1074        ];
1075
1076        let assembler = Assembler::with_config(
1077            &mig,
1078            AssemblerConfig {
1079                skip_unknown_segments: true,
1080            },
1081        );
1082        let tree = assembler.assemble_generic(&segments).unwrap();
1083
1084        let disassembler = Disassembler::new(&mig);
1085        let dis = disassembler.disassemble(&tree);
1086        let delimiters = edifact_primitives::EdifactDelimiters::default();
1087        let rendered = render_edifact(&dis, &delimiters);
1088
1089        // All 5 segments should appear in output (including skipped RFF).
1090        // Disassembler emits MIG-guided segments first (SEQ, CCI),
1091        // then skipped segments (RFF) — so order within the group differs
1092        // from the original input, but all content is preserved.
1093        assert_eq!(dis.len(), 5);
1094        assert_eq!(dis[0].tag, "UNH");
1095        assert_eq!(dis[1].tag, "SEQ");
1096        assert_eq!(dis[2].tag, "CCI");
1097        assert_eq!(dis[3].tag, "RFF"); // skipped → emitted after MIG segments
1098        assert_eq!(dis[4].tag, "UNT");
1099
1100        // Rendered output contains all segments
1101        assert!(rendered.contains("UNH+001"));
1102        assert!(rendered.contains("SEQ+Z98"));
1103        assert!(rendered.contains("RFF+Z38:REF1"));
1104        assert!(rendered.contains("CCI+Z30"));
1105        assert!(rendered.contains("UNT+4:001"));
1106    }
1107
1108    // ── Variant-aware assembly tests ──
1109
1110    #[test]
1111    fn test_variant_groups_interleaved_reps() {
1112        // Two SG8 variant definitions: one for SEQ+ZD7, one for SEQ+Z98.
1113        // Input has interleaved reps: ZD7, Z98, ZD7, Z98.
1114        // All should be collected into one SG8 group with 4 reps.
1115        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1116        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1117
1118        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1119
1120        let segments = vec![
1121            make_owned_seg("UNH", vec![vec!["001"]]),
1122            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1123            make_owned_seg("CCI", vec![vec!["Z30"]]),
1124            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1125            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1126            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1127            make_owned_seg("CCI", vec![vec!["Z31"]]),
1128            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1129            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1130        ];
1131
1132        let assembler = Assembler::new(&mig);
1133        let result = assembler.assemble_generic(&segments).unwrap();
1134
1135        assert_eq!(result.segments.len(), 1); // UNH
1136        assert_eq!(result.groups.len(), 1); // One combined SG8
1137        let sg8 = &result.groups[0];
1138        assert_eq!(sg8.group_id, "SG8");
1139        assert_eq!(sg8.repetitions.len(), 4);
1140
1141        // ZD7 reps have SEQ+CCI, Z98 reps have SEQ+RFF
1142        assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1143        assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1144        assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1145        assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1146        assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1147        assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1148    }
1149
1150    #[test]
1151    fn test_variant_groups_single_variant_type() {
1152        // Only Z98 reps, no ZD7 — still works with variant matching
1153        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1154        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1155
1156        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1157
1158        let segments = vec![
1159            make_owned_seg("UNH", vec![vec!["001"]]),
1160            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1161            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1162            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1163            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1164        ];
1165
1166        let assembler = Assembler::new(&mig);
1167        let result = assembler.assemble_generic(&segments).unwrap();
1168
1169        assert_eq!(result.groups.len(), 1);
1170        assert_eq!(result.groups[0].repetitions.len(), 2);
1171        assert_eq!(
1172            result.groups[0].repetitions[0].segments[0].elements[0][0],
1173            "Z98"
1174        );
1175        assert_eq!(
1176            result.groups[0].repetitions[1].segments[0].elements[0][0],
1177            "Z98"
1178        );
1179    }
1180
1181    #[test]
1182    fn test_non_variant_groups_unchanged() {
1183        // Groups without variant_code behave exactly as before
1184        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1185        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1186
1187        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1188
1189        let segments = vec![
1190            make_owned_seg("UNH", vec![vec!["001"]]),
1191            make_owned_seg("BGM", vec![vec!["E01"]]),
1192            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1193            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1194            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1195            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1196        ];
1197
1198        let assembler = Assembler::new(&mig);
1199        let result = assembler.assemble_generic(&segments).unwrap();
1200
1201        assert_eq!(result.segments.len(), 2);
1202        assert_eq!(result.groups.len(), 2);
1203        assert_eq!(result.groups[0].group_id, "SG2");
1204        assert_eq!(result.groups[0].repetitions.len(), 2);
1205        assert_eq!(result.groups[1].group_id, "SG4");
1206        assert_eq!(result.groups[1].repetitions.len(), 1);
1207    }
1208
1209    #[test]
1210    fn test_variant_groups_with_nested_children() {
1211        // Variant groups can have nested child groups
1212        let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
1213        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1214        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
1215
1216        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1217
1218        let segments = vec![
1219            make_owned_seg("UNH", vec![vec!["001"]]),
1220            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1221            make_owned_seg("CCI", vec![vec!["Z30"]]),
1222            make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
1223            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1224            make_owned_seg("CCI", vec![vec!["Z31"]]),
1225            make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
1226        ];
1227
1228        let assembler = Assembler::new(&mig);
1229        let result = assembler.assemble_generic(&segments).unwrap();
1230
1231        assert_eq!(result.groups.len(), 1);
1232        let sg8 = &result.groups[0];
1233        assert_eq!(sg8.repetitions.len(), 2);
1234
1235        // First rep (ZD7) has nested SG10
1236        assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
1237        assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
1238        assert_eq!(
1239            sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
1240            "Z30"
1241        );
1242
1243        // Second rep (Z98) has nested SG10
1244        assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
1245        assert_eq!(
1246            sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
1247            "Z31"
1248        );
1249    }
1250
1251    #[test]
1252    fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
1253        // try_consume_group with variant_code set should NOT consume a segment
1254        // whose qualifier doesn't match, even if the tag matches.
1255        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1256
1257        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
1258
1259        let segments = vec![
1260            make_owned_seg("UNH", vec![vec!["001"]]),
1261            make_owned_seg("SEQ", vec![vec!["Z98"]]), // Wrong qualifier
1262            make_owned_seg("CCI", vec![vec!["Z30"]]),
1263        ];
1264
1265        let assembler = Assembler::new(&mig);
1266        let result = assembler.assemble_generic(&segments).unwrap();
1267
1268        // SG8 should have no reps because Z98 != ZD7
1269        assert!(result.groups.is_empty());
1270    }
1271
1272    #[test]
1273    fn test_mixed_variant_and_non_variant_groups() {
1274        // SG2 (no variant), then variant SG8s, then SG12 (no variant)
1275        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1276        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1277        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1278        let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1279
1280        let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
1281
1282        let segments = vec![
1283            make_owned_seg("UNH", vec![vec!["001"]]),
1284            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1285            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1286            make_owned_seg("CCI", vec![vec!["Z30"]]),
1287            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1288            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1289            make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
1290        ];
1291
1292        let assembler = Assembler::new(&mig);
1293        let result = assembler.assemble_generic(&segments).unwrap();
1294
1295        assert_eq!(result.groups.len(), 3); // SG2, SG8 (combined), SG12
1296        assert_eq!(result.groups[0].group_id, "SG2");
1297        assert_eq!(result.groups[0].repetitions.len(), 1);
1298        assert_eq!(result.groups[1].group_id, "SG8");
1299        assert_eq!(result.groups[1].repetitions.len(), 2);
1300        assert_eq!(result.groups[2].group_id, "SG12");
1301        assert_eq!(result.groups[2].repetitions.len(), 1);
1302    }
1303}