Skip to main content

mig_assembly/
assembler.rs

1//! Recursive descent assembler — MIG-guided segment consumption.
2//!
3//! The assembler walks the MIG tree structure and consumes matching
4//! segments from the input. It produces a generic tree representation
5//! that can be converted to typed PID structs.
6
7use crate::cursor::SegmentCursor;
8use crate::diagnostic::{StructureDiagnostic, StructureDiagnosticKind};
9use crate::matcher;
10use crate::tokenize::OwnedSegment;
11use crate::AssemblyError;
12use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
13use serde::{Deserialize, Serialize};
14
15/// A generic assembled tree node (before PID-specific typing).
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct AssembledTree {
18    pub segments: Vec<AssembledSegment>,
19    pub groups: Vec<AssembledGroup>,
20    /// Index in `segments` where post-group segments start (e.g., UNT, UNZ).
21    /// Segments before this index appear before groups in EDIFACT order.
22    #[serde(default)]
23    pub post_group_start: usize,
24    /// Root segments consumed between groups during assembly (e.g., UNS
25    /// section separator in MSCONS). Key = index into `groups` vec; value =
26    /// segments that appear immediately before that group in the EDIFACT
27    /// stream. Empty for messages without inter-group root segments.
28    #[serde(default, skip_serializing_if = "std::collections::BTreeMap::is_empty")]
29    pub inter_group_segments: std::collections::BTreeMap<usize, Vec<AssembledSegment>>,
30}
31
32/// An assembled segment with its data elements.
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct AssembledSegment {
35    pub tag: String,
36    /// `elements[i][j]` = component `j` of element `i`
37    pub elements: Vec<Vec<String>>,
38    /// MIG `Number` attribute identifying this segment variant.
39    /// Two segments with the same tag (e.g., DTM) but different roles
40    /// (DTM+92 vs DTM+93) have distinct MIG numbers.
41    #[serde(default, skip_serializing_if = "Option::is_none")]
42    pub mig_number: Option<String>,
43}
44
45/// An assembled segment group (may repeat).
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct AssembledGroup {
48    pub group_id: String,
49    pub repetitions: Vec<AssembledGroupInstance>,
50}
51
52/// One repetition of a segment group.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct AssembledGroupInstance {
55    pub segments: Vec<AssembledSegment>,
56    pub child_groups: Vec<AssembledGroup>,
57    /// MIG `Number` of the entry segment that identified this group instance's variant.
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub entry_mig_number: Option<String>,
60    /// All MIG `Number`s defined for this group variant — includes segments that
61    /// may be absent in the EDIFACT but are defined in the MIG for this variant.
62    ///
63    /// Used by the validator to determine which AHB rules belong to this instance:
64    /// a rule with `mig_number` in this set applies here, even if the segment is
65    /// missing (which is then a missing-field error). Without this, rules for
66    /// absent-but-required segments would be incorrectly filtered out.
67    #[serde(default, skip_serializing_if = "Vec::is_empty")]
68    pub variant_mig_numbers: Vec<String>,
69    /// Segments that were present in the EDIFACT input but not defined in
70    /// the PID-filtered MIG for this group. Only populated when the assembler
71    /// runs with [`AssemblerConfig::skip_unknown_segments`] enabled.
72    #[serde(default, skip_serializing_if = "Vec::is_empty")]
73    pub skipped_segments: Vec<AssembledSegment>,
74}
75
76impl AssembledGroupInstance {
77    /// Create a virtual `AssembledTree` scoped to this group instance.
78    ///
79    /// The instance's own segments become the tree's root segments,
80    /// and its child groups become the tree's groups. This enables
81    /// running `MappingEngine::map_all_forward()` on a single
82    /// transaction group as if it were a complete message.
83    pub fn as_assembled_tree(&self) -> AssembledTree {
84        AssembledTree {
85            segments: self.segments.clone(),
86            groups: self.child_groups.clone(),
87            post_group_start: self.segments.len(),
88            inter_group_segments: std::collections::BTreeMap::new(),
89        }
90    }
91}
92
93/// Configuration for the assembler.
94#[derive(Debug, Clone, Default)]
95pub struct AssemblerConfig {
96    /// When `true`, the assembler skips segments inside a group instance that
97    /// don't match any remaining MIG slot, nested-group entry, or the group's
98    /// entry tag (next repetition). Skipped segments are preserved on
99    /// [`AssembledGroupInstance::skipped_segments`] for roundtrip re-emission.
100    ///
101    /// Default: `false` (strict AHB — unknown segments stall the cursor).
102    pub skip_unknown_segments: bool,
103
104    /// Qualifier-aware assembly: maps MIG `Number` to `(element_index, component_index, expected_value)`.
105    ///
106    /// When a bounded slot has a `number` with an entry in this map,
107    /// `try_consume_segment` checks the input segment's value at the
108    /// specified position. If it doesn't match, the slot is skipped (segment
109    /// is for a different qualifier variant).
110    ///
111    /// Build from the PID schema JSON, or construct manually:
112    /// `{ "00023" => (0, 0, "92".to_string()), "00024" => (0, 0, "93".to_string()) }`.
113    ///
114    /// Default: empty (positional assembly, no qualifier checking).
115    pub qualifier_map: std::collections::HashMap<String, (usize, usize, String)>,
116}
117
118/// MIG-guided assembler.
119///
120/// Takes a MIG schema and uses it as a grammar to guide consumption
121/// of parsed EDIFACT segments. Produces a generic `AssembledTree`.
122pub struct Assembler<'a> {
123    mig: &'a MigSchema,
124    config: AssemblerConfig,
125}
126
127impl<'a> Assembler<'a> {
128    pub fn new(mig: &'a MigSchema) -> Self {
129        Self {
130            mig,
131            config: AssemblerConfig::default(),
132        }
133    }
134
135    pub fn with_config(mig: &'a MigSchema, config: AssemblerConfig) -> Self {
136        Self { mig, config }
137    }
138
139    /// Assemble segments into a generic tree following MIG structure.
140    pub fn assemble_generic(
141        &self,
142        segments: &[OwnedSegment],
143    ) -> Result<AssembledTree, AssemblyError> {
144        let mut cursor = SegmentCursor::new(segments.len());
145        let mut tree = AssembledTree {
146            segments: Vec::new(),
147            groups: Vec::new(),
148            post_group_start: 0,
149            inter_group_segments: std::collections::BTreeMap::new(),
150        };
151
152        // Track which MIG segment indices were matched in the first pass
153        let mut matched_seg_indices = Vec::new();
154
155        // Process top-level segments (first pass — before groups)
156        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
157            if cursor.is_exhausted() {
158                break;
159            }
160            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
161                tree.segments.push(assembled);
162                matched_seg_indices.push(i);
163            }
164        }
165
166        // Process segment groups, interleaving root segment consumption.
167        // Some message types (e.g., MSCONS) have root segments like UNS
168        // between groups (SG2 and SG5). Before trying each group, consume
169        // any unmatched root segments at the current cursor position.
170        //
171        // When consecutive same-ID groups have variant_code set (e.g., 3 SG8
172        // entries for ZD7, Z98, ZF3), the assembler tries ALL variants at each
173        // cursor position to handle interleaved reps.
174        let mut group_idx = 0;
175        while group_idx < self.mig.segment_groups.len() {
176            if cursor.is_exhausted() {
177                break;
178            }
179
180            let mig_group = &self.mig.segment_groups[group_idx];
181
182            // Try consuming unmatched root segments before this group
183            let tree_group_idx = tree.groups.len();
184            for (i, mig_seg) in self.mig.segments.iter().enumerate() {
185                if cursor.is_exhausted() {
186                    break;
187                }
188                if matched_seg_indices.contains(&i) {
189                    continue;
190                }
191                if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
192                    tree.inter_group_segments
193                        .entry(tree_group_idx)
194                        .or_default()
195                        .push(assembled);
196                    matched_seg_indices.push(i);
197                }
198            }
199
200            // Check if this starts a variant set (consecutive same-ID groups with variant_code)
201            if mig_group.variant_code.is_some() {
202                let variant_count = self.mig.segment_groups[group_idx..]
203                    .iter()
204                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
205                    .count();
206                let variant_end = group_idx + variant_count;
207
208                let variant_groups = &self.mig.segment_groups[group_idx..variant_end];
209                if let Some(combined) =
210                    self.try_consume_variant_groups(segments, &mut cursor, variant_groups)?
211                {
212                    tree.groups.push(combined);
213                }
214                group_idx = variant_end;
215            } else {
216                if let Some(assembled) = self.try_consume_group(segments, &mut cursor, mig_group)? {
217                    tree.groups.push(assembled);
218                }
219                group_idx += 1;
220            }
221        }
222
223        // Mark where post-group segments start
224        tree.post_group_start = tree.segments.len();
225
226        // Second pass: try unmatched top-level segments (e.g., UNT, UNZ after groups)
227        for (i, mig_seg) in self.mig.segments.iter().enumerate() {
228            if cursor.is_exhausted() {
229                break;
230            }
231            if matched_seg_indices.contains(&i) {
232                continue;
233            }
234            if let Some(assembled) = self.try_consume_segment(segments, &mut cursor, mig_seg)? {
235                tree.segments.push(assembled);
236            }
237        }
238
239        Ok(tree)
240    }
241
242    fn try_consume_segment(
243        &self,
244        segments: &[OwnedSegment],
245        cursor: &mut SegmentCursor,
246        mig_seg: &MigSegment,
247    ) -> Result<Option<AssembledSegment>, AssemblyError> {
248        if cursor.is_exhausted() {
249            return Ok(None);
250        }
251        let seg = &segments[cursor.position()];
252        if matcher::matches_segment_tag(&seg.id, &mig_seg.id) {
253            // Qualifier check: if the MIG slot has a qualifier_map entry,
254            // verify the input segment's qualifier matches before consuming.
255            if let Some(ref num) = mig_seg.number {
256                if let Some((el_idx, comp_idx, expected)) = self.config.qualifier_map.get(num) {
257                    let actual = seg
258                        .elements
259                        .get(*el_idx)
260                        .and_then(|e| e.get(*comp_idx))
261                        .map(|s| s.as_str())
262                        .unwrap_or("");
263                    if actual != expected {
264                        return Ok(None); // Wrong qualifier — skip this slot
265                    }
266                }
267            }
268            let mut assembled = owned_to_assembled(seg);
269            assembled.mig_number = mig_seg.number.clone();
270            cursor.advance();
271            Ok(Some(assembled))
272        } else {
273            Ok(None) // Segment not present (optional)
274        }
275    }
276
277    fn try_consume_group(
278        &self,
279        segments: &[OwnedSegment],
280        cursor: &mut SegmentCursor,
281        mig_group: &MigSegmentGroup,
282    ) -> Result<Option<AssembledGroup>, AssemblyError> {
283        let mut repetitions = Vec::new();
284        let entry_segment = mig_group.segments.first().ok_or_else(|| {
285            AssemblyError::ParseError(format!("Group {} has no segments", mig_group.id))
286        })?;
287
288        // Loop for repeating groups
289        while !cursor.is_exhausted() {
290            let seg = &segments[cursor.position()];
291            if !matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
292                break; // Current segment doesn't match group entry — stop repeating
293            }
294
295            // Check variant qualifier if set — tag matches but wrong variant
296            if !mig_group.variant_codes.is_empty() {
297                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
298                let actual_qual = seg
299                    .elements
300                    .get(ei)
301                    .and_then(|e| e.get(ci))
302                    .map(|s| s.as_str())
303                    .unwrap_or("");
304                if !mig_group
305                    .variant_codes
306                    .iter()
307                    .any(|c| actual_qual.eq_ignore_ascii_case(c))
308                {
309                    break;
310                }
311            } else if let Some(ref expected_code) = mig_group.variant_code {
312                let (ei, ci) = mig_group.variant_qualifier_position.unwrap_or((0, 0));
313                let actual_qual = seg
314                    .elements
315                    .get(ei)
316                    .and_then(|e| e.get(ci))
317                    .map(|s| s.as_str())
318                    .unwrap_or("");
319                if !actual_qual.eq_ignore_ascii_case(expected_code) {
320                    break;
321                }
322            }
323
324            let mut instance = AssembledGroupInstance {
325                segments: Vec::new(),
326                child_groups: Vec::new(),
327                entry_mig_number: entry_segment.number.clone(),
328                variant_mig_numbers: collect_mig_numbers(mig_group),
329                skipped_segments: Vec::new(),
330            };
331
332            // Consume segments within this group instance.
333            // Process MIG slots in tag runs: for consecutive slots with the
334            // same tag, consume ALL matching input segments — not just the
335            // defined count. This handles real-world fixtures with more
336            // repetitions than the merged MIG predicts (e.g., 6 RFFs when
337            // the schema defines max 4).
338            //
339            // The entry segment (first tag run) is consumed bounded — one per
340            // defined slot — because the outer while loop uses the entry tag
341            // to delineate group repetitions.
342            let mut slot_idx = 0;
343            let mut is_entry_run = true;
344            while slot_idx < mig_group.segments.len() {
345                if cursor.is_exhausted() {
346                    break;
347                }
348                let current_tag = &mig_group.segments[slot_idx].id;
349                let run_len = mig_group.segments[slot_idx..]
350                    .iter()
351                    .take_while(|s| s.id == *current_tag)
352                    .count();
353
354                if is_entry_run {
355                    // Entry tag: consume at most run_len (preserves group boundaries)
356                    for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
357                        if cursor.is_exhausted() {
358                            break;
359                        }
360                        if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
361                            instance.segments.push(assembled);
362                        }
363                    }
364                    is_entry_run = false;
365                } else if matcher::matches_segment_tag(current_tag, &entry_segment.id) {
366                    // Non-entry slot with SAME tag as entry (e.g., CCI appears as
367                    // both entry and non-entry in merged SG30).
368                    //
369                    // Only consume if we haven't yet consumed any NON-entry-tag
370                    // segments (i.e., we're still in a consecutive entry-tag run).
371                    // Once we've consumed a different tag (like CAV), seeing the
372                    // entry tag again means a new rep boundary.
373                    //
374                    // z35: entry CCI → CAV CAV → sees CCI → has_other=true → break ✓
375                    // z39: entry CCI → (no CAV) → sees CCI → has_other=false → consume ✓
376                    //      then CCI CCI → CAV → sees CCI → has_other=true → break
377                    //      BUT: z39 needs CCI-CAV-CCI-CAV structure
378                    //
379                    // Better heuristic: check if ALL remaining slots from here are
380                    // entry-tag + non-entry pairs. If the current slot is entry-tag
381                    // and the NEXT input segment after it would be a non-entry tag,
382                    // consume — it's a continuation. Otherwise break.
383                    if cursor.is_exhausted() {
384                        break;
385                    }
386                    let seg = &segments[cursor.position()];
387                    if !matcher::matches_segment_tag(&seg.id, current_tag) {
388                        break;
389                    }
390                    // Check: is there a non-entry segment AFTER this entry-tag?
391                    // If so, this CCI+CAV pair is part of the current rep.
392                    let has_following_non_entry = if cursor.position() + 1 < segments.len() {
393                        let next = &segments[cursor.position() + 1];
394                        !matcher::matches_segment_tag(&next.id, &entry_segment.id)
395                            && mig_group.segments.iter().any(|s| {
396                                matcher::matches_segment_tag(&next.id, &s.id)
397                                    && !matcher::matches_segment_tag(&s.id, &entry_segment.id)
398                            })
399                    } else {
400                        false
401                    };
402                    if has_following_non_entry {
403                        // CCI followed by CAV → consume as continuation pair
404                        instance.segments.push(owned_to_assembled(seg));
405                        cursor.advance();
406                    } else {
407                        // CCI followed by CCI or unknown → let outer loop decide
408                        break;
409                    }
410                } else {
411                    // Non-entry tag: consume bounded slots first (with mig_number),
412                    // then greedily consume extras (without mig_number).
413                    // The bounded slots get mig_number from the MIG definition so
414                    // the validator can distinguish same-tag segments (e.g., DTM+92
415                    // vs DTM+93 both in SG4).
416                    for slot in &mig_group.segments[slot_idx..slot_idx + run_len] {
417                        if cursor.is_exhausted() {
418                            break;
419                        }
420                        if let Some(assembled) = self.try_consume_segment(segments, cursor, slot)? {
421                            instance.segments.push(assembled);
422                        }
423                    }
424                    // Greedily consume any remaining same-tag segments beyond the MIG count
425                    while !cursor.is_exhausted() {
426                        let seg = &segments[cursor.position()];
427                        if matcher::matches_segment_tag(&seg.id, current_tag) {
428                            instance.segments.push(owned_to_assembled(seg));
429                            cursor.advance();
430                        } else {
431                            break;
432                        }
433                    }
434                }
435
436                slot_idx += run_len;
437
438                // Point A: Skip unknown segments between MIG slot runs.
439                // When skip mode is ON and we just finished a slot run but the
440                // current segment doesn't match any remaining MIG slot, nested
441                // group entry, or the entry tag, skip it.
442                if self.config.skip_unknown_segments {
443                    while !cursor.is_exhausted() {
444                        let seg = &segments[cursor.position()];
445                        // Stop if it matches the entry tag (next group repetition)
446                        if matcher::matches_segment_tag(&seg.id, &entry_segment.id) {
447                            break;
448                        }
449                        // Stop if it matches any remaining MIG slot
450                        if mig_group.segments[slot_idx..]
451                            .iter()
452                            .any(|s| matcher::matches_segment_tag(&seg.id, &s.id))
453                        {
454                            break;
455                        }
456                        // Stop if it matches any nested group entry
457                        if mig_group.nested_groups.iter().any(|ng| {
458                            ng.segments
459                                .first()
460                                .is_some_and(|es| matcher::matches_segment_tag(&seg.id, &es.id))
461                        }) {
462                            break;
463                        }
464                        // Unknown segment — skip it
465                        instance.skipped_segments.push(owned_to_assembled(seg));
466                        cursor.advance();
467                    }
468                }
469            }
470
471            // Consume nested groups (variant-aware for same-ID groups)
472            let mut nested_idx = 0;
473            while nested_idx < mig_group.nested_groups.len() {
474                if cursor.is_exhausted() {
475                    break;
476                }
477                let nested = &mig_group.nested_groups[nested_idx];
478
479                if nested.variant_code.is_some() {
480                    // Variant set: collect consecutive same-ID groups with variant_code
481                    let variant_count = mig_group.nested_groups[nested_idx..]
482                        .iter()
483                        .take_while(|g| g.id == nested.id && g.variant_code.is_some())
484                        .count();
485                    let variant_end = nested_idx + variant_count;
486                    let variant_groups = &mig_group.nested_groups[nested_idx..variant_end];
487                    if let Some(combined) =
488                        self.try_consume_variant_groups(segments, cursor, variant_groups)?
489                    {
490                        instance.child_groups.push(combined);
491                    }
492                    nested_idx = variant_end;
493                } else {
494                    if let Some(assembled) = self.try_consume_group(segments, cursor, nested)? {
495                        instance.child_groups.push(assembled);
496                    }
497                    nested_idx += 1;
498                }
499            }
500
501            repetitions.push(instance);
502        }
503
504        if repetitions.is_empty() {
505            Ok(None)
506        } else {
507            Ok(Some(AssembledGroup {
508                group_id: mig_group.id.clone(),
509                repetitions,
510            }))
511        }
512    }
513
514    /// Consume interleaved repetitions of variant groups.
515    ///
516    /// At each cursor position, tries all variant definitions to find which one
517    /// matches the entry segment's qualifier. Collects all reps into one
518    /// `AssembledGroup` with the shared group_id.
519    fn try_consume_variant_groups(
520        &self,
521        segments: &[OwnedSegment],
522        cursor: &mut SegmentCursor,
523        variants: &[MigSegmentGroup],
524    ) -> Result<Option<AssembledGroup>, AssemblyError> {
525        let group_id = variants[0].id.clone();
526        let entry_tag = variants[0]
527            .segments
528            .first()
529            .map(|s| s.id.as_str())
530            .unwrap_or("");
531        let mut all_reps = Vec::new();
532
533        while !cursor.is_exhausted() {
534            let seg = &segments[cursor.position()];
535            if !matcher::matches_segment_tag(&seg.id, entry_tag) {
536                break;
537            }
538
539            // Find which variant matches this segment's qualifier.
540            // Each variant may have its qualifier at a different element position
541            // (e.g., CCI+Z19 has qualifier at [0][0], but CCI+++Z15 at [2][0]).
542            let matched = variants.iter().find(|v| {
543                let (ei, ci) = v.variant_qualifier_position.unwrap_or((0, 0));
544                let actual_qual = seg
545                    .elements
546                    .get(ei)
547                    .and_then(|e| e.get(ci))
548                    .map(|s| s.as_str())
549                    .unwrap_or("");
550                if !v.variant_codes.is_empty() {
551                    v.variant_codes
552                        .iter()
553                        .any(|c| actual_qual.eq_ignore_ascii_case(c))
554                } else if let Some(ref expected_code) = v.variant_code {
555                    actual_qual.eq_ignore_ascii_case(expected_code)
556                } else {
557                    false
558                }
559            });
560
561            if let Some(variant) = matched {
562                if let Some(group) = self.try_consume_group(segments, cursor, variant)? {
563                    all_reps.extend(group.repetitions);
564                } else {
565                    break;
566                }
567            } else {
568                // No variant matches — try consuming with the first variant as
569                // fallback to avoid getting stuck. This handles edge cases where
570                // the qualifier doesn't exactly match any variant code.
571                if let Some(group) = self.try_consume_group(segments, cursor, &variants[0])? {
572                    all_reps.extend(group.repetitions);
573                } else {
574                    break;
575                }
576            }
577        }
578
579        if all_reps.is_empty() {
580            Ok(None)
581        } else {
582            Ok(Some(AssembledGroup {
583                group_id,
584                repetitions: all_reps,
585            }))
586        }
587    }
588
589    /// Assemble segments with diagnostic collection.
590    ///
591    /// Returns the assembled tree plus diagnostics for segments not consumed
592    /// by the MIG-guided assembly. Existing `assemble_generic()` is unchanged.
593    pub fn assemble_with_diagnostics(
594        &self,
595        segments: &[OwnedSegment],
596    ) -> (AssembledTree, Vec<StructureDiagnostic>) {
597        let mut diagnostics = Vec::new();
598
599        let tree = match self.assemble_generic(segments) {
600            Ok(tree) => tree,
601            Err(e) => {
602                diagnostics.push(StructureDiagnostic {
603                    kind: StructureDiagnosticKind::UnexpectedSegment,
604                    segment_id: String::new(),
605                    position: 0,
606                    message: format!("Assembly failed: {e}"),
607                });
608                return (
609                    AssembledTree {
610                        segments: Vec::new(),
611                        groups: Vec::new(),
612                        post_group_start: 0,
613                        inter_group_segments: std::collections::BTreeMap::new(),
614                    },
615                    diagnostics,
616                );
617            }
618        };
619
620        // Count consumed segments in the assembled tree
621        let consumed = count_tree_segments(&tree);
622
623        // Segments beyond consumed count are unconsumed
624        for (i, seg) in segments.iter().enumerate().skip(consumed) {
625            diagnostics.push(StructureDiagnostic {
626                kind: StructureDiagnosticKind::UnexpectedSegment,
627                segment_id: seg.id.clone(),
628                position: i,
629                message: format!(
630                    "Segment '{}' at position {} was not consumed by MIG-guided assembly",
631                    seg.id, i
632                ),
633            });
634        }
635
636        (tree, diagnostics)
637    }
638}
639
640fn count_tree_segments(tree: &AssembledTree) -> usize {
641    let mut count = tree.segments.len();
642    for group in &tree.groups {
643        count += count_group_segments(group);
644    }
645    // Count inter-group segments (e.g., UNS+D between groups)
646    for segs in tree.inter_group_segments.values() {
647        count += segs.len();
648    }
649    count
650}
651
652fn count_group_segments(group: &AssembledGroup) -> usize {
653    let mut count = 0;
654    for rep in &group.repetitions {
655        count += rep.segments.len();
656        count += rep.skipped_segments.len();
657        for child in &rep.child_groups {
658            count += count_group_segments(child);
659        }
660    }
661    count
662}
663
664/// Collect all MIG `Number`s from a segment group definition, recursively.
665///
666/// This includes numbers from direct segments and from nested groups.
667/// Used to populate `AssembledGroupInstance::variant_mig_numbers`.
668fn collect_mig_numbers(group: &MigSegmentGroup) -> Vec<String> {
669    let mut numbers = Vec::new();
670    for seg in &group.segments {
671        if let Some(ref num) = seg.number {
672            numbers.push(num.clone());
673        }
674    }
675    for nested in &group.nested_groups {
676        numbers.extend(collect_mig_numbers(nested));
677    }
678    numbers
679}
680
681pub fn owned_to_assembled(seg: &OwnedSegment) -> AssembledSegment {
682    AssembledSegment {
683        tag: seg.id.clone(),
684        elements: seg.elements.clone(),
685        mig_number: None,
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692    use crate::test_support::{make_mig_group, make_mig_group_with_variant, make_mig_segment};
693
694    fn make_owned_seg(id: &str, elements: Vec<Vec<&str>>) -> OwnedSegment {
695        OwnedSegment {
696            id: id.to_string(),
697            elements: elements
698                .into_iter()
699                .map(|e| e.into_iter().map(|c| c.to_string()).collect())
700                .collect(),
701            segment_number: 0,
702        }
703    }
704
705    fn make_mig_schema(segments: Vec<&str>, groups: Vec<MigSegmentGroup>) -> MigSchema {
706        MigSchema {
707            message_type: "UTILMD".to_string(),
708            variant: Some("Strom".to_string()),
709            version: "S2.1".to_string(),
710            publication_date: "2025-03-20".to_string(),
711            author: "BDEW".to_string(),
712            format_version: "FV2504".to_string(),
713            source_file: "test".to_string(),
714            segments: segments.into_iter().map(make_mig_segment).collect(),
715            segment_groups: groups,
716        }
717    }
718
719    #[test]
720    fn test_assembler_top_level_segments_only() {
721        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
722
723        let segments = vec![
724            make_owned_seg("UNH", vec![vec!["001", "UTILMD:D:11A:UN:S2.1"]]),
725            make_owned_seg("BGM", vec![vec!["E01", "DOC001"]]),
726            make_owned_seg("DTM", vec![vec!["137", "20250101", "102"]]),
727            make_owned_seg("UNT", vec![vec!["4", "001"]]),
728        ];
729
730        let assembler = Assembler::new(&mig);
731        let result = assembler.assemble_generic(&segments).unwrap();
732
733        assert_eq!(result.segments.len(), 4);
734        assert_eq!(result.segments[0].tag, "UNH");
735        assert_eq!(result.segments[1].tag, "BGM");
736        assert_eq!(result.segments[2].tag, "DTM");
737        assert_eq!(result.segments[3].tag, "UNT");
738        assert!(result.groups.is_empty());
739    }
740
741    #[test]
742    fn test_assembler_with_segment_group() {
743        let mig = make_mig_schema(
744            vec!["UNH", "BGM"],
745            vec![
746                make_mig_group("SG2", vec!["NAD"], vec![]),
747                make_mig_group("SG4", vec!["IDE", "STS"], vec![]),
748            ],
749        );
750
751        let segments = vec![
752            make_owned_seg("UNH", vec![vec!["001"]]),
753            make_owned_seg("BGM", vec![vec!["E01"]]),
754            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
755            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
756            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
757            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
758        ];
759
760        let assembler = Assembler::new(&mig);
761        let result = assembler.assemble_generic(&segments).unwrap();
762
763        // Top-level: UNH, BGM
764        assert_eq!(result.segments.len(), 2);
765        // SG2: 2 repetitions (two NAD segments)
766        assert_eq!(result.groups.len(), 2);
767        assert_eq!(result.groups[0].group_id, "SG2");
768        assert_eq!(result.groups[0].repetitions.len(), 2);
769        assert_eq!(result.groups[0].repetitions[0].segments[0].tag, "NAD");
770        assert_eq!(result.groups[0].repetitions[1].segments[0].tag, "NAD");
771        // SG4: 1 repetition (IDE + STS)
772        assert_eq!(result.groups[1].group_id, "SG4");
773        assert_eq!(result.groups[1].repetitions.len(), 1);
774        assert_eq!(result.groups[1].repetitions[0].segments.len(), 2);
775    }
776
777    #[test]
778    fn test_assembler_nested_groups() {
779        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
780        let mig = make_mig_schema(
781            vec!["UNH", "BGM"],
782            vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
783        );
784
785        let segments = vec![
786            make_owned_seg("UNH", vec![vec!["001"]]),
787            make_owned_seg("BGM", vec![vec!["E01"]]),
788            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
789            make_owned_seg("CTA", vec![vec!["IC", "Kontakt"]]),
790            make_owned_seg("COM", vec![vec!["040@example.com", "EM"]]),
791        ];
792
793        let assembler = Assembler::new(&mig);
794        let result = assembler.assemble_generic(&segments).unwrap();
795
796        // SG2 has 1 repetition
797        let sg2 = &result.groups[0];
798        assert_eq!(sg2.group_id, "SG2");
799        assert_eq!(sg2.repetitions.len(), 1);
800
801        let sg2_inst = &sg2.repetitions[0];
802        assert_eq!(sg2_inst.segments[0].tag, "NAD");
803
804        // SG3 nested inside SG2
805        assert_eq!(sg2_inst.child_groups.len(), 1);
806        let sg3 = &sg2_inst.child_groups[0];
807        assert_eq!(sg3.group_id, "SG3");
808        assert_eq!(sg3.repetitions[0].segments.len(), 2);
809        assert_eq!(sg3.repetitions[0].segments[0].tag, "CTA");
810        assert_eq!(sg3.repetitions[0].segments[1].tag, "COM");
811    }
812
813    #[test]
814    fn test_assembler_optional_segments_skipped() {
815        // MIG expects UNH, BGM, DTM, UNT but input has no DTM
816        let mig = make_mig_schema(vec!["UNH", "BGM", "DTM", "UNT"], vec![]);
817
818        let segments = vec![
819            make_owned_seg("UNH", vec![vec!["001"]]),
820            make_owned_seg("BGM", vec![vec!["E01"]]),
821            make_owned_seg("UNT", vec![vec!["2", "001"]]),
822        ];
823
824        let assembler = Assembler::new(&mig);
825        let result = assembler.assemble_generic(&segments).unwrap();
826
827        // DTM is skipped (optional), UNT consumed
828        assert_eq!(result.segments.len(), 3);
829        assert_eq!(result.segments[0].tag, "UNH");
830        assert_eq!(result.segments[1].tag, "BGM");
831        assert_eq!(result.segments[2].tag, "UNT");
832    }
833
834    #[test]
835    fn test_assembler_empty_segments() {
836        let mig = make_mig_schema(vec!["UNH"], vec![]);
837        let assembler = Assembler::new(&mig);
838        let result = assembler.assemble_generic(&[]).unwrap();
839        assert!(result.segments.is_empty());
840        assert!(result.groups.is_empty());
841    }
842
843    #[test]
844    fn test_assembler_preserves_element_data() {
845        let mig = make_mig_schema(vec!["DTM"], vec![]);
846
847        let segments = vec![make_owned_seg(
848            "DTM",
849            vec![vec!["137", "202501010000+01", "303"]],
850        )];
851
852        let assembler = Assembler::new(&mig);
853        let result = assembler.assemble_generic(&segments).unwrap();
854
855        let dtm = &result.segments[0];
856        assert_eq!(dtm.elements[0][0], "137");
857        assert_eq!(dtm.elements[0][1], "202501010000+01");
858        assert_eq!(dtm.elements[0][2], "303");
859    }
860
861    #[test]
862    fn test_group_instance_as_assembled_tree() {
863        // Build an SG4 instance with root segments (IDE, STS) and child groups (SG5)
864        let sg5 = AssembledGroup {
865            group_id: "SG5".to_string(),
866            repetitions: vec![AssembledGroupInstance {
867                segments: vec![AssembledSegment {
868                    tag: "LOC".to_string(),
869                    elements: vec![vec!["Z16".to_string(), "DE000111222333".to_string()]],
870                    mig_number: None,
871                }],
872                child_groups: vec![],
873                entry_mig_number: None,
874                variant_mig_numbers: vec![],
875                skipped_segments: vec![],
876            }],
877        };
878
879        let sg4_instance = AssembledGroupInstance {
880            segments: vec![
881                AssembledSegment {
882                    tag: "IDE".to_string(),
883                    elements: vec![vec!["24".to_string(), "TX001".to_string()]],
884                    mig_number: None,
885                },
886                AssembledSegment {
887                    tag: "STS".to_string(),
888                    elements: vec![vec!["7".to_string()]],
889                    mig_number: None,
890                },
891            ],
892            child_groups: vec![sg5],
893            entry_mig_number: None,
894            variant_mig_numbers: vec![],
895            skipped_segments: vec![],
896        };
897
898        let sub_tree = sg4_instance.as_assembled_tree();
899
900        // Root segments of sub-tree are the SG4 instance's segments
901        assert_eq!(sub_tree.segments.len(), 2);
902        assert_eq!(sub_tree.segments[0].tag, "IDE");
903        assert_eq!(sub_tree.segments[1].tag, "STS");
904
905        // Groups of sub-tree are the SG4 instance's child groups
906        assert_eq!(sub_tree.groups.len(), 1);
907        assert_eq!(sub_tree.groups[0].group_id, "SG5");
908
909        // post_group_start marks where root segments end
910        assert_eq!(sub_tree.post_group_start, 2);
911    }
912
913    #[test]
914    fn test_assembler_from_parsed_edifact() {
915        // End-to-end: parse raw EDIFACT, then assemble
916        let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001+9'DTM+137:20250101:102'UNT+3+MSG001'UNZ+1+REF001'";
917        let segments = crate::tokenize::parse_to_segments(input).unwrap();
918
919        let mig = make_mig_schema(vec!["UNB", "UNH", "BGM", "DTM", "UNT", "UNZ"], vec![]);
920
921        let assembler = Assembler::new(&mig);
922        let result = assembler.assemble_generic(&segments).unwrap();
923
924        assert!(result.segments.iter().any(|s| s.tag == "UNH"));
925        assert!(result.segments.iter().any(|s| s.tag == "BGM"));
926        assert!(result.segments.iter().any(|s| s.tag == "DTM"));
927    }
928
929    #[test]
930    fn test_assemble_with_diagnostics_clean_input() {
931        let mig = make_mig_schema(vec!["UNH", "BGM", "UNT"], vec![]);
932        let segments = vec![
933            make_owned_seg("UNH", vec![vec!["001"]]),
934            make_owned_seg("BGM", vec![vec!["E01"]]),
935            make_owned_seg("UNT", vec![vec!["2", "001"]]),
936        ];
937        let assembler = Assembler::new(&mig);
938        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
939        assert_eq!(tree.segments.len(), 3);
940        assert!(
941            diagnostics.is_empty(),
942            "Clean input should have no diagnostics"
943        );
944    }
945
946    #[test]
947    fn test_assemble_with_diagnostics_unconsumed_segments() {
948        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![]);
949        let segments = vec![
950            make_owned_seg("UNH", vec![vec!["001"]]),
951            make_owned_seg("BGM", vec![vec!["E01"]]),
952            make_owned_seg("FTX", vec![vec!["AAA", "extra text"]]),
953        ];
954        let assembler = Assembler::new(&mig);
955        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
956        assert_eq!(tree.segments.len(), 2);
957        assert_eq!(diagnostics.len(), 1);
958        assert_eq!(
959            diagnostics[0].kind,
960            StructureDiagnosticKind::UnexpectedSegment
961        );
962        assert_eq!(diagnostics[0].segment_id, "FTX");
963        assert_eq!(diagnostics[0].position, 2);
964    }
965
966    #[test]
967    fn test_assemble_with_diagnostics_multiple_unconsumed() {
968        let mig = make_mig_schema(vec!["UNH"], vec![]);
969        let segments = vec![
970            make_owned_seg("UNH", vec![vec!["001"]]),
971            make_owned_seg("FOO", vec![]),
972            make_owned_seg("BAR", vec![]),
973            make_owned_seg("BAZ", vec![]),
974        ];
975        let assembler = Assembler::new(&mig);
976        let (tree, diagnostics) = assembler.assemble_with_diagnostics(&segments);
977        assert_eq!(tree.segments.len(), 1);
978        assert_eq!(diagnostics.len(), 3);
979        assert_eq!(diagnostics[0].segment_id, "FOO");
980        assert_eq!(diagnostics[1].segment_id, "BAR");
981        assert_eq!(diagnostics[2].segment_id, "BAZ");
982    }
983
984    // ── Non-entry segment mig_number assignment tests ──
985
986    #[test]
987    fn test_non_entry_segments_get_mig_number_from_bounded_slots() {
988        // MIG group SG4 has entry IDE + two numbered DTMs + STS.
989        // The assembler should assign mig_number from the MIG slots to
990        // each non-entry segment via the bounded consumption path.
991        use crate::test_support::make_mig_segment_numbered;
992
993        let sg4 = MigSegmentGroup {
994            segments: vec![
995                make_mig_segment_numbered("IDE", "00020"),
996                make_mig_segment_numbered("DTM", "00023"),
997                make_mig_segment_numbered("DTM", "00024"),
998                make_mig_segment_numbered("STS", "00035"),
999            ],
1000            ..make_mig_group("SG4", vec![], vec![])
1001        };
1002        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1003
1004        let segments = vec![
1005            make_owned_seg("UNH", vec![vec!["001"]]),
1006            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1007            make_owned_seg("DTM", vec![vec!["92", "202505312200+00", "303"]]),
1008            make_owned_seg("DTM", vec![vec!["93", "202512312300+00", "303"]]),
1009            make_owned_seg("STS", vec![vec!["7"], vec![], vec!["E01"]]),
1010        ];
1011
1012        let assembler = Assembler::new(&mig);
1013        let tree = assembler.assemble_generic(&segments).unwrap();
1014
1015        let sg4_instance = &tree.groups[0].repetitions[0];
1016
1017        // IDE (entry) gets mig_number from try_consume_segment
1018        assert_eq!(sg4_instance.segments[0].tag, "IDE");
1019        assert_eq!(sg4_instance.segments[0].mig_number.as_deref(), Some("00020"));
1020
1021        // DTM+92 gets mig_number "00023" from first DTM slot
1022        assert_eq!(sg4_instance.segments[1].tag, "DTM");
1023        assert_eq!(sg4_instance.segments[1].mig_number.as_deref(), Some("00023"));
1024
1025        // DTM+93 gets mig_number "00024" from second DTM slot
1026        assert_eq!(sg4_instance.segments[2].tag, "DTM");
1027        assert_eq!(sg4_instance.segments[2].mig_number.as_deref(), Some("00024"));
1028
1029        // STS gets mig_number "00035"
1030        assert_eq!(sg4_instance.segments[3].tag, "STS");
1031        assert_eq!(sg4_instance.segments[3].mig_number.as_deref(), Some("00035"));
1032
1033        // variant_mig_numbers should contain all four
1034        assert!(sg4_instance.variant_mig_numbers.contains(&"00020".to_string()));
1035        assert!(sg4_instance.variant_mig_numbers.contains(&"00023".to_string()));
1036        assert!(sg4_instance.variant_mig_numbers.contains(&"00024".to_string()));
1037        assert!(sg4_instance.variant_mig_numbers.contains(&"00035".to_string()));
1038    }
1039
1040    #[test]
1041    fn test_greedy_extra_segments_get_no_mig_number() {
1042        // MIG defines 1 DTM slot, but input has 2 DTMs.
1043        // First DTM gets mig_number from bounded path, second gets None (greedy extra).
1044        use crate::test_support::make_mig_segment_numbered;
1045
1046        let sg4 = MigSegmentGroup {
1047            segments: vec![
1048                make_mig_segment_numbered("IDE", "00020"),
1049                make_mig_segment_numbered("DTM", "00023"),
1050            ],
1051            ..make_mig_group("SG4", vec![], vec![])
1052        };
1053        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1054
1055        let segments = vec![
1056            make_owned_seg("UNH", vec![vec!["001"]]),
1057            make_owned_seg("IDE", vec![vec!["24"]]),
1058            make_owned_seg("DTM", vec![vec!["92", "20250531"]]),
1059            make_owned_seg("DTM", vec![vec!["93", "20251231"]]), // extra beyond MIG
1060        ];
1061
1062        let assembler = Assembler::new(&mig);
1063        let tree = assembler.assemble_generic(&segments).unwrap();
1064
1065        let sg4_instance = &tree.groups[0].repetitions[0];
1066        assert_eq!(sg4_instance.segments.len(), 3); // IDE + 2 DTMs
1067
1068        // First DTM: bounded slot → mig_number set
1069        assert_eq!(sg4_instance.segments[1].mig_number.as_deref(), Some("00023"));
1070
1071        // Second DTM: greedy extra → mig_number None
1072        assert_eq!(sg4_instance.segments[2].mig_number, None);
1073    }
1074
1075    // ── Qualifier-aware assembly tests ──
1076
1077    #[test]
1078    fn test_qualifier_map_prevents_wrong_slot_consumption() {
1079        // MIG defines DTM(00023) + DTM(00024). Input has only DTM+93.
1080        // Without qualifier map: DTM+93 consumed by slot 00023 (wrong).
1081        // With qualifier map: slot 00023 expects "92", skips DTM+93.
1082        //   Slot 00024 expects "93", consumes DTM+93 correctly.
1083        use crate::test_support::make_mig_segment_numbered;
1084        use std::collections::HashMap;
1085
1086        let sg4 = MigSegmentGroup {
1087            segments: vec![
1088                make_mig_segment_numbered("IDE", "00020"),
1089                make_mig_segment_numbered("DTM", "00023"),
1090                make_mig_segment_numbered("DTM", "00024"),
1091            ],
1092            ..make_mig_group("SG4", vec![], vec![])
1093        };
1094        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1095
1096        let segments = vec![
1097            make_owned_seg("UNH", vec![vec!["001"]]),
1098            make_owned_seg("IDE", vec![vec!["24"]]),
1099            make_owned_seg("DTM", vec![vec!["93", "202512312300+00", "303"]]),
1100        ];
1101
1102        let mut qualifier_map = HashMap::new();
1103        qualifier_map.insert("00023".to_string(), (0, 0, "92".to_string()));
1104        qualifier_map.insert("00024".to_string(), (0, 0, "93".to_string()));
1105
1106        let config = AssemblerConfig {
1107            skip_unknown_segments: false,
1108            qualifier_map,
1109        };
1110        let assembler = Assembler::with_config(&mig, config);
1111        let tree = assembler.assemble_generic(&segments).unwrap();
1112
1113        let sg4_instance = &tree.groups[0].repetitions[0];
1114
1115        // DTM+93 should be consumed by slot 00024, NOT slot 00023
1116        assert_eq!(sg4_instance.segments.len(), 2); // IDE + DTM+93
1117        let dtm = &sg4_instance.segments[1];
1118        assert_eq!(dtm.tag, "DTM");
1119        assert_eq!(
1120            dtm.mig_number.as_deref(),
1121            Some("00024"),
1122            "DTM+93 should get mig_number 00024 (not 00023)"
1123        );
1124    }
1125
1126    // ── Skip-unknown-segments tests ──
1127
1128    #[test]
1129    fn test_skip_unknown_segment_between_slots() {
1130        // MIG group expects [SEQ, CCI], input has [SEQ, RFF, CCI].
1131        // With skip ON, RFF is skipped and CCI is consumed.
1132        // With skip OFF (default), CCI is lost because RFF stalls the cursor.
1133        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1134        let mig = make_mig_schema(vec!["UNH"], vec![sg8.clone()]);
1135
1136        let segments = vec![
1137            make_owned_seg("UNH", vec![vec!["001"]]),
1138            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1139            make_owned_seg("RFF", vec![vec!["Z38", "CROSSREF"]]),
1140            make_owned_seg("CCI", vec![vec!["Z30"]]),
1141        ];
1142
1143        // Skip OFF: CCI not consumed (RFF stalls cursor after SEQ)
1144        let off = Assembler::new(&mig);
1145        let tree_off = off.assemble_generic(&segments).unwrap();
1146        let sg8_off = &tree_off.groups[0];
1147        assert_eq!(sg8_off.repetitions[0].segments.len(), 1); // Only SEQ
1148        assert_eq!(sg8_off.repetitions[0].segments[0].tag, "SEQ");
1149
1150        // Skip ON: RFF skipped, CCI consumed
1151        let on = Assembler::with_config(
1152            &mig,
1153            AssemblerConfig {
1154                skip_unknown_segments: true,
1155                ..Default::default()
1156            },
1157        );
1158        let tree_on = on.assemble_generic(&segments).unwrap();
1159        let sg8_on = &tree_on.groups[0];
1160        assert_eq!(sg8_on.repetitions[0].segments.len(), 2); // SEQ + CCI
1161        assert_eq!(sg8_on.repetitions[0].segments[0].tag, "SEQ");
1162        assert_eq!(sg8_on.repetitions[0].segments[1].tag, "CCI");
1163    }
1164
1165    #[test]
1166    fn test_skip_preserves_on_instance() {
1167        // Skipped segments are stored in instance.skipped_segments
1168        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1169        let mig = make_mig_schema(vec!["UNH"], vec![sg8]);
1170
1171        let segments = vec![
1172            make_owned_seg("UNH", vec![vec!["001"]]),
1173            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1174            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1175            make_owned_seg("DTM", vec![vec!["92", "20250101"]]),
1176            make_owned_seg("CCI", vec![vec!["Z30"]]),
1177        ];
1178
1179        let assembler = Assembler::with_config(
1180            &mig,
1181            AssemblerConfig {
1182                skip_unknown_segments: true,
1183                ..Default::default()
1184            },
1185        );
1186        let tree = assembler.assemble_generic(&segments).unwrap();
1187        let instance = &tree.groups[0].repetitions[0];
1188
1189        assert_eq!(instance.segments.len(), 2); // SEQ + CCI
1190        assert_eq!(instance.skipped_segments.len(), 2); // RFF + DTM
1191        assert_eq!(instance.skipped_segments[0].tag, "RFF");
1192        assert_eq!(instance.skipped_segments[1].tag, "DTM");
1193    }
1194
1195    #[test]
1196    fn test_skip_mode_off_default() {
1197        // Assembler::new() doesn't skip (backwards compat)
1198        let mig = make_mig_schema(vec![], vec![]);
1199        let assembler = Assembler::new(&mig);
1200        assert!(!assembler.config.skip_unknown_segments);
1201    }
1202
1203    #[test]
1204    fn test_skip_does_not_consume_nested_group_entry() {
1205        // Skip must NOT consume segments that are nested group entries.
1206        // SG4 expects [IDE, STS], nested SG5 expects [LOC].
1207        // Input: IDE, FOO, STS, LOC. FOO should be skipped, LOC goes to SG5.
1208        let sg5 = make_mig_group("SG5", vec!["LOC"], vec![]);
1209        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![sg5]);
1210        let mig = make_mig_schema(vec!["UNH"], vec![sg4]);
1211
1212        let segments = vec![
1213            make_owned_seg("UNH", vec![vec!["001"]]),
1214            make_owned_seg("IDE", vec![vec!["24"]]),
1215            make_owned_seg("FOO", vec![vec!["unknown"]]),
1216            make_owned_seg("STS", vec![vec!["7"]]),
1217            make_owned_seg("LOC", vec![vec!["Z16"]]),
1218        ];
1219
1220        let assembler = Assembler::with_config(
1221            &mig,
1222            AssemblerConfig {
1223                skip_unknown_segments: true,
1224                ..Default::default()
1225            },
1226        );
1227        let tree = assembler.assemble_generic(&segments).unwrap();
1228        let sg4 = &tree.groups[0];
1229        let inst = &sg4.repetitions[0];
1230
1231        // IDE + STS consumed, FOO skipped
1232        assert_eq!(inst.segments.len(), 2);
1233        assert_eq!(inst.segments[0].tag, "IDE");
1234        assert_eq!(inst.segments[1].tag, "STS");
1235        assert_eq!(inst.skipped_segments.len(), 1);
1236        assert_eq!(inst.skipped_segments[0].tag, "FOO");
1237
1238        // LOC went to nested SG5
1239        assert_eq!(inst.child_groups.len(), 1);
1240        assert_eq!(inst.child_groups[0].group_id, "SG5");
1241        assert_eq!(inst.child_groups[0].repetitions[0].segments[0].tag, "LOC");
1242    }
1243
1244    #[test]
1245    fn test_roundtrip_with_skip() {
1246        // Full roundtrip: assemble with skip → disassemble → byte-identical
1247        // including skipped segments in the output.
1248        use crate::disassembler::Disassembler;
1249        use crate::renderer::render_edifact;
1250
1251        let sg8 = make_mig_group("SG8", vec!["SEQ", "CCI"], vec![]);
1252        let mig = make_mig_schema(vec!["UNH", "UNT"], vec![sg8]);
1253
1254        let segments = vec![
1255            make_owned_seg("UNH", vec![vec!["001"]]),
1256            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1257            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1258            make_owned_seg("CCI", vec![vec!["Z30"]]),
1259            make_owned_seg("UNT", vec![vec!["4", "001"]]),
1260        ];
1261
1262        let assembler = Assembler::with_config(
1263            &mig,
1264            AssemblerConfig {
1265                skip_unknown_segments: true,
1266                ..Default::default()
1267            },
1268        );
1269        let tree = assembler.assemble_generic(&segments).unwrap();
1270
1271        let disassembler = Disassembler::new(&mig);
1272        let dis = disassembler.disassemble(&tree);
1273        let delimiters = edifact_primitives::EdifactDelimiters::default();
1274        let rendered = render_edifact(&dis, &delimiters);
1275
1276        // All 5 segments should appear in output (including skipped RFF).
1277        // Disassembler emits MIG-guided segments first (SEQ, CCI),
1278        // then skipped segments (RFF) — so order within the group differs
1279        // from the original input, but all content is preserved.
1280        assert_eq!(dis.len(), 5);
1281        assert_eq!(dis[0].tag, "UNH");
1282        assert_eq!(dis[1].tag, "SEQ");
1283        assert_eq!(dis[2].tag, "CCI");
1284        assert_eq!(dis[3].tag, "RFF"); // skipped → emitted after MIG segments
1285        assert_eq!(dis[4].tag, "UNT");
1286
1287        // Rendered output contains all segments
1288        assert!(rendered.contains("UNH+001"));
1289        assert!(rendered.contains("SEQ+Z98"));
1290        assert!(rendered.contains("RFF+Z38:REF1"));
1291        assert!(rendered.contains("CCI+Z30"));
1292        assert!(rendered.contains("UNT+4:001"));
1293    }
1294
1295    // ── Variant-aware assembly tests ──
1296
1297    #[test]
1298    fn test_variant_groups_interleaved_reps() {
1299        // Two SG8 variant definitions: one for SEQ+ZD7, one for SEQ+Z98.
1300        // Input has interleaved reps: ZD7, Z98, ZD7, Z98.
1301        // All should be collected into one SG8 group with 4 reps.
1302        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1303        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1304
1305        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1306
1307        let segments = vec![
1308            make_owned_seg("UNH", vec![vec!["001"]]),
1309            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1310            make_owned_seg("CCI", vec![vec!["Z30"]]),
1311            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1312            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1313            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1314            make_owned_seg("CCI", vec![vec!["Z31"]]),
1315            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1316            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1317        ];
1318
1319        let assembler = Assembler::new(&mig);
1320        let result = assembler.assemble_generic(&segments).unwrap();
1321
1322        assert_eq!(result.segments.len(), 1); // UNH
1323        assert_eq!(result.groups.len(), 1); // One combined SG8
1324        let sg8 = &result.groups[0];
1325        assert_eq!(sg8.group_id, "SG8");
1326        assert_eq!(sg8.repetitions.len(), 4);
1327
1328        // ZD7 reps have SEQ+CCI, Z98 reps have SEQ+RFF
1329        assert_eq!(sg8.repetitions[0].segments[0].elements[0][0], "ZD7");
1330        assert_eq!(sg8.repetitions[0].segments[1].tag, "CCI");
1331        assert_eq!(sg8.repetitions[1].segments[0].elements[0][0], "Z98");
1332        assert_eq!(sg8.repetitions[1].segments[1].tag, "RFF");
1333        assert_eq!(sg8.repetitions[2].segments[0].elements[0][0], "ZD7");
1334        assert_eq!(sg8.repetitions[3].segments[0].elements[0][0], "Z98");
1335    }
1336
1337    #[test]
1338    fn test_variant_groups_single_variant_type() {
1339        // Only Z98 reps, no ZD7 — still works with variant matching
1340        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1341        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1342
1343        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1344
1345        let segments = vec![
1346            make_owned_seg("UNH", vec![vec!["001"]]),
1347            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1348            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1349            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1350            make_owned_seg("RFF", vec![vec!["Z38", "REF2"]]),
1351        ];
1352
1353        let assembler = Assembler::new(&mig);
1354        let result = assembler.assemble_generic(&segments).unwrap();
1355
1356        assert_eq!(result.groups.len(), 1);
1357        assert_eq!(result.groups[0].repetitions.len(), 2);
1358        assert_eq!(
1359            result.groups[0].repetitions[0].segments[0].elements[0][0],
1360            "Z98"
1361        );
1362        assert_eq!(
1363            result.groups[0].repetitions[1].segments[0].elements[0][0],
1364            "Z98"
1365        );
1366    }
1367
1368    #[test]
1369    fn test_non_variant_groups_unchanged() {
1370        // Groups without variant_code behave exactly as before
1371        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1372        let sg4 = make_mig_group("SG4", vec!["IDE", "STS"], vec![]);
1373
1374        let mig = make_mig_schema(vec!["UNH", "BGM"], vec![sg2, sg4]);
1375
1376        let segments = vec![
1377            make_owned_seg("UNH", vec![vec!["001"]]),
1378            make_owned_seg("BGM", vec![vec!["E01"]]),
1379            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1380            make_owned_seg("NAD", vec![vec!["MR", "9900456"]]),
1381            make_owned_seg("IDE", vec![vec!["24", "TX001"]]),
1382            make_owned_seg("STS", vec![vec!["7"], vec!["Z33"]]),
1383        ];
1384
1385        let assembler = Assembler::new(&mig);
1386        let result = assembler.assemble_generic(&segments).unwrap();
1387
1388        assert_eq!(result.segments.len(), 2);
1389        assert_eq!(result.groups.len(), 2);
1390        assert_eq!(result.groups[0].group_id, "SG2");
1391        assert_eq!(result.groups[0].repetitions.len(), 2);
1392        assert_eq!(result.groups[1].group_id, "SG4");
1393        assert_eq!(result.groups[1].repetitions.len(), 1);
1394    }
1395
1396    #[test]
1397    fn test_variant_groups_with_nested_children() {
1398        // Variant groups can have nested child groups
1399        let sg10 = make_mig_group("SG10", vec!["CCI", "CAV"], vec![]);
1400        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10.clone()], "ZD7");
1401        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ"], vec![sg10], "Z98");
1402
1403        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7, sg8_z98]);
1404
1405        let segments = vec![
1406            make_owned_seg("UNH", vec![vec!["001"]]),
1407            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1408            make_owned_seg("CCI", vec![vec!["Z30"]]),
1409            make_owned_seg("CAV", vec![vec!["Z91", "Y"]]),
1410            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1411            make_owned_seg("CCI", vec![vec!["Z31"]]),
1412            make_owned_seg("CAV", vec![vec!["Z91", "N"]]),
1413        ];
1414
1415        let assembler = Assembler::new(&mig);
1416        let result = assembler.assemble_generic(&segments).unwrap();
1417
1418        assert_eq!(result.groups.len(), 1);
1419        let sg8 = &result.groups[0];
1420        assert_eq!(sg8.repetitions.len(), 2);
1421
1422        // First rep (ZD7) has nested SG10
1423        assert_eq!(sg8.repetitions[0].child_groups.len(), 1);
1424        assert_eq!(sg8.repetitions[0].child_groups[0].group_id, "SG10");
1425        assert_eq!(
1426            sg8.repetitions[0].child_groups[0].repetitions[0].segments[0].elements[0][0],
1427            "Z30"
1428        );
1429
1430        // Second rep (Z98) has nested SG10
1431        assert_eq!(sg8.repetitions[1].child_groups.len(), 1);
1432        assert_eq!(
1433            sg8.repetitions[1].child_groups[0].repetitions[0].segments[0].elements[0][0],
1434            "Z31"
1435        );
1436    }
1437
1438    #[test]
1439    fn test_variant_qualifier_check_prevents_wrong_variant_consumption() {
1440        // try_consume_group with variant_code set should NOT consume a segment
1441        // whose qualifier doesn't match, even if the tag matches.
1442        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1443
1444        let mig = make_mig_schema(vec!["UNH"], vec![sg8_zd7]);
1445
1446        let segments = vec![
1447            make_owned_seg("UNH", vec![vec!["001"]]),
1448            make_owned_seg("SEQ", vec![vec!["Z98"]]), // Wrong qualifier
1449            make_owned_seg("CCI", vec![vec!["Z30"]]),
1450        ];
1451
1452        let assembler = Assembler::new(&mig);
1453        let result = assembler.assemble_generic(&segments).unwrap();
1454
1455        // SG8 should have no reps because Z98 != ZD7
1456        assert!(result.groups.is_empty());
1457    }
1458
1459    #[test]
1460    fn test_mixed_variant_and_non_variant_groups() {
1461        // SG2 (no variant), then variant SG8s, then SG12 (no variant)
1462        let sg2 = make_mig_group("SG2", vec!["NAD"], vec![]);
1463        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI"], vec![], "ZD7");
1464        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF"], vec![], "Z98");
1465        let sg12 = make_mig_group("SG12", vec!["NAD"], vec![]);
1466
1467        let mig = make_mig_schema(vec!["UNH"], vec![sg2, sg8_zd7, sg8_z98, sg12]);
1468
1469        let segments = vec![
1470            make_owned_seg("UNH", vec![vec!["001"]]),
1471            make_owned_seg("NAD", vec![vec!["MS", "9900123"]]),
1472            make_owned_seg("SEQ", vec![vec!["ZD7"]]),
1473            make_owned_seg("CCI", vec![vec!["Z30"]]),
1474            make_owned_seg("SEQ", vec![vec!["Z98"]]),
1475            make_owned_seg("RFF", vec![vec!["Z38", "REF1"]]),
1476            make_owned_seg("NAD", vec![vec!["Z65", "ID001"]]),
1477        ];
1478
1479        let assembler = Assembler::new(&mig);
1480        let result = assembler.assemble_generic(&segments).unwrap();
1481
1482        assert_eq!(result.groups.len(), 3); // SG2, SG8 (combined), SG12
1483        assert_eq!(result.groups[0].group_id, "SG2");
1484        assert_eq!(result.groups[0].repetitions.len(), 1);
1485        assert_eq!(result.groups[1].group_id, "SG8");
1486        assert_eq!(result.groups[1].repetitions.len(), 2);
1487        assert_eq!(result.groups[2].group_id, "SG12");
1488        assert_eq!(result.groups[2].repetitions.len(), 1);
1489    }
1490}