Skip to main content

mig_assembly/
disassembler.rs

1//! Tree disassembler — converts AssembledTree back to ordered segments.
2//!
3//! Walks the MIG schema tree in order. For each MIG node that has
4//! corresponding data in the assembled tree, emits segments in MIG order.
5
6use crate::assembler::{AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree};
7use mig_types::schema::mig::{MigSchema, MigSegmentGroup};
8
9/// Output segment from disassembly (owned data, ready for rendering).
10#[derive(Debug, Clone)]
11pub struct DisassembledSegment {
12    pub tag: String,
13    pub elements: Vec<Vec<String>>,
14}
15
16/// MIG-guided disassembler — walks the MIG tree to emit segments in correct order.
17pub struct Disassembler<'a> {
18    mig: &'a MigSchema,
19}
20
21impl<'a> Disassembler<'a> {
22    pub fn new(mig: &'a MigSchema) -> Self {
23        Self { mig }
24    }
25
26    /// Disassemble a tree into ordered segments following MIG sequence.
27    ///
28    /// Emits segments in correct EDIFACT order:
29    /// 1. Pre-group top-level segments (e.g., UNB, UNH, BGM, DTM)
30    /// 2. Groups (recursively, in MIG order)
31    /// 3. Post-group top-level segments (e.g., UNT, UNZ)
32    ///
33    /// Uses MIG-guided ordering: walks the MIG schema tree and looks up
34    /// matching data in the assembled tree. This handles both assembler output
35    /// (already in MIG order) and reverse-mapped trees (may be in arbitrary order).
36    pub fn disassemble(&self, tree: &AssembledTree) -> Vec<DisassembledSegment> {
37        let mut output = Vec::new();
38
39        // 1. Emit pre-group segments in MIG order
40        let pre_group = &tree.segments[..tree.post_group_start];
41        let mut consumed = vec![false; pre_group.len()];
42        for mig_seg in &self.mig.segments {
43            if let Some(idx) = pre_group
44                .iter()
45                .enumerate()
46                .position(|(i, s)| !consumed[i] && s.tag == mig_seg.id)
47            {
48                output.push(assembled_to_disassembled(&pre_group[idx]));
49                consumed[idx] = true;
50            }
51        }
52
53        // 2. Emit groups in MIG order (lookup by group ID with consumption tracking).
54        //    Between groups, emit any inter-group root segments (e.g., UNS in MSCONS).
55        //    For variant groups (consecutive same-ID with variant_code), collect all
56        //    variant definitions and match each rep to its correct variant.
57        let mut consumed_groups = vec![false; tree.groups.len()];
58        let mut mig_group_idx = 0;
59        while mig_group_idx < self.mig.segment_groups.len() {
60            let mig_group = &self.mig.segment_groups[mig_group_idx];
61
62            // Check if this starts a variant set
63            if mig_group.variant_code.is_some() {
64                let variant_count = self.mig.segment_groups[mig_group_idx..]
65                    .iter()
66                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
67                    .count();
68                let variant_defs =
69                    &self.mig.segment_groups[mig_group_idx..mig_group_idx + variant_count];
70
71                if let Some(idx) = tree
72                    .groups
73                    .iter()
74                    .enumerate()
75                    .position(|(i, g)| !consumed_groups[i] && g.group_id == mig_group.id)
76                {
77                    if let Some(inter_segs) = tree.inter_group_segments.get(&idx) {
78                        for seg in inter_segs {
79                            output.push(assembled_to_disassembled(seg));
80                        }
81                    }
82                    self.emit_variant_group(&tree.groups[idx], variant_defs, &mut output);
83                    consumed_groups[idx] = true;
84                }
85                mig_group_idx += variant_count;
86            } else {
87                if let Some(idx) = tree
88                    .groups
89                    .iter()
90                    .enumerate()
91                    .position(|(i, g)| !consumed_groups[i] && g.group_id == mig_group.id)
92                {
93                    // Emit any inter-group segments that precede this group
94                    if let Some(inter_segs) = tree.inter_group_segments.get(&idx) {
95                        for seg in inter_segs {
96                            output.push(assembled_to_disassembled(seg));
97                        }
98                    }
99                    self.emit_group(&tree.groups[idx], mig_group, &mut output);
100                    consumed_groups[idx] = true;
101                }
102                mig_group_idx += 1;
103            }
104        }
105
106        // 2b. Emit trailing inter-group segments (after the last group).
107        //     For ORDERS, UNS+S comes after SG29 (detail/summary boundary).
108        let trailing_idx = tree.groups.len();
109        if let Some(inter_segs) = tree.inter_group_segments.get(&trailing_idx) {
110            for seg in inter_segs {
111                output.push(assembled_to_disassembled(seg));
112            }
113        }
114
115        // 3. Emit post-group segments (e.g., UNT, UNZ)
116        for seg in &tree.segments[tree.post_group_start..] {
117            output.push(assembled_to_disassembled(seg));
118        }
119
120        output
121    }
122
123    fn emit_group(
124        &self,
125        group: &AssembledGroup,
126        mig_group: &MigSegmentGroup,
127        output: &mut Vec<DisassembledSegment>,
128    ) {
129        for instance in &group.repetitions {
130            self.emit_group_instance(instance, mig_group, output);
131        }
132    }
133
134    /// Emit a group whose reps come from multiple variant definitions.
135    ///
136    /// For each rep, determine which variant it belongs to by checking the
137    /// entry segment's qualifier against each variant's `variant_code`.
138    /// Then emit the rep using that variant's segment/nested-group ordering.
139    fn emit_variant_group(
140        &self,
141        group: &AssembledGroup,
142        variant_defs: &[MigSegmentGroup],
143        output: &mut Vec<DisassembledSegment>,
144    ) {
145        for instance in &group.repetitions {
146            let entry_qual = instance
147                .segments
148                .first()
149                .and_then(|s| s.elements.first())
150                .and_then(|e| e.first())
151                .map(|v| v.as_str())
152                .unwrap_or("");
153
154            let variant_def = variant_defs
155                .iter()
156                .find(|v| {
157                    v.variant_code
158                        .as_deref()
159                        .map(|vc| vc.eq_ignore_ascii_case(entry_qual))
160                        .unwrap_or(false)
161                })
162                .unwrap_or(&variant_defs[0]); // fallback to first variant
163
164            self.emit_group_instance(instance, variant_def, output);
165        }
166    }
167
168    fn emit_group_instance(
169        &self,
170        instance: &AssembledGroupInstance,
171        mig_group: &MigSegmentGroup,
172        output: &mut Vec<DisassembledSegment>,
173    ) {
174        // Emit segments in MIG order using tag-based lookup with consumption tracking.
175        // This handles both assembler output (in MIG order) and reverse-mapped trees
176        // (may be in arbitrary order).
177        //
178        // After MIG-guided emission, any remaining unconsumed segments are appended.
179        // This handles cases where the assembler captured more segments than the MIG
180        // defines (e.g., 6 RFFs when the merged MIG only has 4 slots).
181        let mut consumed = vec![false; instance.segments.len()];
182        for mig_seg in &mig_group.segments {
183            if let Some(idx) = instance
184                .segments
185                .iter()
186                .enumerate()
187                .position(|(i, s)| !consumed[i] && s.tag == mig_seg.id)
188            {
189                output.push(assembled_to_disassembled(&instance.segments[idx]));
190                consumed[idx] = true;
191            }
192        }
193        // Emit any remaining segments not matched by MIG slots
194        for (i, seg) in instance.segments.iter().enumerate() {
195            if !consumed[i] {
196                output.push(assembled_to_disassembled(seg));
197            }
198        }
199
200        // Re-emit skipped segments (unknown segments preserved for roundtrip fidelity)
201        for skipped in &instance.skipped_segments {
202            output.push(assembled_to_disassembled(skipped));
203        }
204
205        // Child groups — lookup by group ID with consumption tracking.
206        // Applies variant-aware logic recursively for nested variant groups.
207        let mut consumed_child = vec![false; instance.child_groups.len()];
208        let mut nested_idx = 0;
209        while nested_idx < mig_group.nested_groups.len() {
210            let nested_mig = &mig_group.nested_groups[nested_idx];
211
212            if nested_mig.variant_code.is_some() {
213                let variant_count = mig_group.nested_groups[nested_idx..]
214                    .iter()
215                    .take_while(|g| g.id == nested_mig.id && g.variant_code.is_some())
216                    .count();
217                let variant_defs = &mig_group.nested_groups[nested_idx..nested_idx + variant_count];
218
219                if let Some(idx) = instance
220                    .child_groups
221                    .iter()
222                    .enumerate()
223                    .position(|(i, g)| !consumed_child[i] && g.group_id == nested_mig.id)
224                {
225                    self.emit_variant_group(&instance.child_groups[idx], variant_defs, output);
226                    consumed_child[idx] = true;
227                }
228                nested_idx += variant_count;
229            } else {
230                if let Some(idx) = instance
231                    .child_groups
232                    .iter()
233                    .enumerate()
234                    .position(|(i, g)| !consumed_child[i] && g.group_id == nested_mig.id)
235                {
236                    self.emit_group(&instance.child_groups[idx], nested_mig, output);
237                    consumed_child[idx] = true;
238                }
239                nested_idx += 1;
240            }
241        }
242    }
243}
244
245fn assembled_to_disassembled(seg: &AssembledSegment) -> DisassembledSegment {
246    DisassembledSegment {
247        tag: seg.tag.clone(),
248        elements: seg.elements.clone(),
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::assembler::{
256        AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree,
257    };
258    use crate::test_support::{make_mig_group, make_mig_segment};
259    use mig_types::schema::mig::MigSchema;
260
261    #[test]
262    fn test_disassemble_top_level_only() {
263        let mig = MigSchema {
264            message_type: "UTILMD".to_string(),
265            variant: Some("Strom".to_string()),
266            version: "S2.1".to_string(),
267            publication_date: "2025-03-20".to_string(),
268            author: "BDEW".to_string(),
269            format_version: "FV2504".to_string(),
270            source_file: "test".to_string(),
271            segments: vec![make_mig_segment("UNH"), make_mig_segment("BGM")],
272            segment_groups: vec![],
273        };
274
275        let tree = AssembledTree {
276            segments: vec![
277                AssembledSegment {
278                    tag: "UNH".to_string(),
279                    elements: vec![
280                        vec!["1".to_string()],
281                        vec![
282                            "UTILMD".to_string(),
283                            "D".to_string(),
284                            "11A".to_string(),
285                            "UN".to_string(),
286                            "S2.1".to_string(),
287                        ],
288                    ],
289                    mig_number: None,
290                    segment_number: None,
291                },
292                AssembledSegment {
293                    tag: "BGM".to_string(),
294                    elements: vec![
295                        vec!["E01".to_string()],
296                        vec!["MSG001".to_string()],
297                        vec!["9".to_string()],
298                    ],
299                    mig_number: None,
300                    segment_number: None,
301                },
302            ],
303            groups: vec![],
304            post_group_start: 2,
305            inter_group_segments: std::collections::BTreeMap::new(),
306        };
307
308        let disassembler = Disassembler::new(&mig);
309        let segments = disassembler.disassemble(&tree);
310
311        assert_eq!(segments.len(), 2);
312        assert_eq!(segments[0].tag, "UNH");
313        assert_eq!(segments[1].tag, "BGM");
314        assert_eq!(segments[0].elements[0], vec!["1"]);
315    }
316
317    #[test]
318    fn test_disassemble_with_groups() {
319        let mig = MigSchema {
320            message_type: "UTILMD".to_string(),
321            variant: None,
322            version: "S2.1".to_string(),
323            publication_date: "".to_string(),
324            author: "".to_string(),
325            format_version: "FV2504".to_string(),
326            source_file: "test".to_string(),
327            segments: vec![make_mig_segment("UNH"), make_mig_segment("BGM")],
328            segment_groups: vec![make_mig_group("SG2", vec!["NAD", "LOC"], vec![])],
329        };
330
331        let tree = AssembledTree {
332            segments: vec![
333                AssembledSegment {
334                    tag: "UNH".to_string(),
335                    elements: vec![vec!["1".to_string()]],
336                    mig_number: None,
337                    segment_number: None,
338                },
339                AssembledSegment {
340                    tag: "BGM".to_string(),
341                    elements: vec![vec!["E01".to_string()]],
342                    mig_number: None,
343                    segment_number: None,
344                },
345            ],
346            post_group_start: 2,
347            groups: vec![AssembledGroup {
348                group_id: "SG2".to_string(),
349                repetitions: vec![
350                    AssembledGroupInstance {
351                        segments: vec![AssembledSegment {
352                            tag: "NAD".to_string(),
353                            elements: vec![vec!["MS".to_string()]],
354                            mig_number: None,
355                            segment_number: None,
356                        }],
357                        child_groups: vec![],
358                        entry_mig_number: None,
359                        variant_mig_numbers: vec![],
360                        skipped_segments: vec![],
361                        skipped_positions: Vec::new(),
362                    },
363                    AssembledGroupInstance {
364                        segments: vec![AssembledSegment {
365                            tag: "NAD".to_string(),
366                            elements: vec![vec!["MR".to_string()]],
367                            mig_number: None,
368                            segment_number: None,
369                        }],
370                        child_groups: vec![],
371                        entry_mig_number: None,
372                        variant_mig_numbers: vec![],
373                        skipped_segments: vec![],
374                        skipped_positions: Vec::new(),
375                    },
376                ],
377            }],
378            inter_group_segments: std::collections::BTreeMap::new(),
379        };
380
381        let disassembler = Disassembler::new(&mig);
382        let segments = disassembler.disassemble(&tree);
383
384        assert_eq!(segments.len(), 4); // UNH, BGM, NAD(MS), NAD(MR)
385        assert_eq!(segments[0].tag, "UNH");
386        assert_eq!(segments[1].tag, "BGM");
387        assert_eq!(segments[2].tag, "NAD");
388        assert_eq!(segments[2].elements[0][0], "MS");
389        assert_eq!(segments[3].tag, "NAD");
390        assert_eq!(segments[3].elements[0][0], "MR");
391    }
392
393    #[test]
394    fn test_disassemble_nested_groups() {
395        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
396        let mig = MigSchema {
397            message_type: "UTILMD".to_string(),
398            variant: None,
399            version: "S2.1".to_string(),
400            publication_date: "".to_string(),
401            author: "".to_string(),
402            format_version: "FV2504".to_string(),
403            source_file: "test".to_string(),
404            segments: vec![make_mig_segment("UNH")],
405            segment_groups: vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
406        };
407
408        let tree = AssembledTree {
409            segments: vec![AssembledSegment {
410                tag: "UNH".to_string(),
411                elements: vec![vec!["1".to_string()]],
412                mig_number: None,
413                segment_number: None,
414            }],
415            post_group_start: 1,
416            groups: vec![AssembledGroup {
417                group_id: "SG2".to_string(),
418                repetitions: vec![AssembledGroupInstance {
419                    segments: vec![AssembledSegment {
420                        tag: "NAD".to_string(),
421                        elements: vec![vec!["MS".to_string()]],
422                        mig_number: None,
423                        segment_number: None,
424                    }],
425                    child_groups: vec![AssembledGroup {
426                        group_id: "SG3".to_string(),
427                        repetitions: vec![AssembledGroupInstance {
428                            segments: vec![
429                                AssembledSegment {
430                                    tag: "CTA".to_string(),
431                                    elements: vec![vec!["IC".to_string()]],
432                                    mig_number: None,
433                                    segment_number: None,
434                                },
435                                AssembledSegment {
436                                    tag: "COM".to_string(),
437                                    elements: vec![vec![
438                                        "040@ex.com".to_string(),
439                                        "EM".to_string(),
440                                    ]],
441                                    mig_number: None,
442                                    segment_number: None,
443                                },
444                            ],
445                            child_groups: vec![],
446                            entry_mig_number: None,
447                            variant_mig_numbers: vec![],
448                            skipped_segments: vec![],
449                            skipped_positions: Vec::new(),
450                        }],
451                    }],
452                    entry_mig_number: None,
453                    variant_mig_numbers: vec![],
454                    skipped_segments: vec![],
455                    skipped_positions: Vec::new(),
456                }],
457            }],
458            inter_group_segments: std::collections::BTreeMap::new(),
459        };
460
461        let disassembler = Disassembler::new(&mig);
462        let segments = disassembler.disassemble(&tree);
463
464        assert_eq!(segments.len(), 4); // UNH, NAD, CTA, COM
465        assert_eq!(segments[0].tag, "UNH");
466        assert_eq!(segments[1].tag, "NAD");
467        assert_eq!(segments[2].tag, "CTA");
468        assert_eq!(segments[3].tag, "COM");
469    }
470
471    #[test]
472    fn test_disassemble_variant_groups_uses_per_variant_ordering() {
473        use crate::test_support::make_mig_group_with_variant;
474
475        // Two SG8 variant definitions with different segment ordering:
476        // - ZD7 variant: SEQ, RFF, DTM
477        // - Z98 variant: SEQ, CCI, CAV
478        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF", "DTM"], vec![], "ZD7");
479        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI", "CAV"], vec![], "Z98");
480
481        let mig = MigSchema {
482            message_type: "UTILMD".to_string(),
483            variant: None,
484            version: "S2.1".to_string(),
485            publication_date: "".to_string(),
486            author: "".to_string(),
487            format_version: "FV2504".to_string(),
488            source_file: "test".to_string(),
489            segments: vec![make_mig_segment("UNH")],
490            segment_groups: vec![sg8_zd7, sg8_z98],
491        };
492
493        // Assembled tree has 3 reps: ZD7, Z98, ZD7 (interleaved)
494        let tree = AssembledTree {
495            segments: vec![AssembledSegment {
496                tag: "UNH".to_string(),
497                elements: vec![vec!["1".to_string()]],
498                mig_number: None,
499                segment_number: None,
500            }],
501            post_group_start: 1,
502            groups: vec![AssembledGroup {
503                group_id: "SG8".to_string(),
504                repetitions: vec![
505                    // Rep 0: ZD7 variant — has SEQ, DTM, RFF (out of MIG order)
506                    AssembledGroupInstance {
507                        segments: vec![
508                            AssembledSegment {
509                                tag: "SEQ".to_string(),
510                                elements: vec![vec!["ZD7".to_string()]],
511                                mig_number: None,
512                                segment_number: None,
513                            },
514                            AssembledSegment {
515                                tag: "DTM".to_string(),
516                                elements: vec![vec!["303".to_string()]],
517                                mig_number: None,
518                                segment_number: None,
519                            },
520                            AssembledSegment {
521                                tag: "RFF".to_string(),
522                                elements: vec![vec!["Z13".to_string()]],
523                                mig_number: None,
524                                segment_number: None,
525                            },
526                        ],
527                        child_groups: vec![],
528                        entry_mig_number: None,
529                        variant_mig_numbers: vec![],
530                        skipped_segments: vec![],
531                        skipped_positions: Vec::new(),
532                    },
533                    // Rep 1: Z98 variant — has SEQ, CAV, CCI (out of MIG order)
534                    AssembledGroupInstance {
535                        segments: vec![
536                            AssembledSegment {
537                                tag: "SEQ".to_string(),
538                                elements: vec![vec!["Z98".to_string()]],
539                                mig_number: None,
540                                segment_number: None,
541                            },
542                            AssembledSegment {
543                                tag: "CAV".to_string(),
544                                elements: vec![vec!["Z91".to_string()]],
545                                mig_number: None,
546                                segment_number: None,
547                            },
548                            AssembledSegment {
549                                tag: "CCI".to_string(),
550                                elements: vec![vec!["".to_string()]],
551                                mig_number: None,
552                                segment_number: None,
553                            },
554                        ],
555                        child_groups: vec![],
556                        entry_mig_number: None,
557                        variant_mig_numbers: vec![],
558                        skipped_segments: vec![],
559                        skipped_positions: Vec::new(),
560                    },
561                    // Rep 2: another ZD7 variant
562                    AssembledGroupInstance {
563                        segments: vec![
564                            AssembledSegment {
565                                tag: "SEQ".to_string(),
566                                elements: vec![vec!["ZD7".to_string()]],
567                                mig_number: None,
568                                segment_number: None,
569                            },
570                            AssembledSegment {
571                                tag: "RFF".to_string(),
572                                elements: vec![vec!["Z34".to_string()]],
573                                mig_number: None,
574                                segment_number: None,
575                            },
576                        ],
577                        child_groups: vec![],
578                        entry_mig_number: None,
579                        variant_mig_numbers: vec![],
580                        skipped_segments: vec![],
581                        skipped_positions: Vec::new(),
582                    },
583                ],
584            }],
585            inter_group_segments: std::collections::BTreeMap::new(),
586        };
587
588        let disassembler = Disassembler::new(&mig);
589        let segments = disassembler.disassemble(&tree);
590
591        // UNH + 3 reps
592        assert_eq!(segments[0].tag, "UNH");
593
594        // Rep 0 (ZD7): MIG order is SEQ, RFF, DTM
595        assert_eq!(segments[1].tag, "SEQ");
596        assert_eq!(segments[1].elements[0][0], "ZD7");
597        assert_eq!(segments[2].tag, "RFF"); // reordered from position 3 to 2
598        assert_eq!(segments[3].tag, "DTM"); // reordered from position 2 to 3
599
600        // Rep 1 (Z98): MIG order is SEQ, CCI, CAV
601        assert_eq!(segments[4].tag, "SEQ");
602        assert_eq!(segments[4].elements[0][0], "Z98");
603        assert_eq!(segments[5].tag, "CCI"); // reordered from position 3 to 2
604        assert_eq!(segments[6].tag, "CAV"); // reordered from position 2 to 3
605
606        // Rep 2 (ZD7): SEQ, RFF (no DTM)
607        assert_eq!(segments[7].tag, "SEQ");
608        assert_eq!(segments[7].elements[0][0], "ZD7");
609        assert_eq!(segments[8].tag, "RFF");
610
611        assert_eq!(segments.len(), 9);
612    }
613
614    #[test]
615    fn test_disassemble_empty_tree() {
616        let mig = MigSchema {
617            message_type: "UTILMD".to_string(),
618            variant: None,
619            version: "S2.1".to_string(),
620            publication_date: "".to_string(),
621            author: "".to_string(),
622            format_version: "FV2504".to_string(),
623            source_file: "test".to_string(),
624            segments: vec![make_mig_segment("UNH")],
625            segment_groups: vec![],
626        };
627
628        let tree = AssembledTree {
629            segments: vec![],
630            groups: vec![],
631            post_group_start: 0,
632            inter_group_segments: std::collections::BTreeMap::new(),
633        };
634
635        let disassembler = Disassembler::new(&mig);
636        let segments = disassembler.disassemble(&tree);
637        assert!(segments.is_empty());
638    }
639}