Skip to main content

mig_assembly/
disassembler.rs

1//! Tree disassembler — converts AssembledTree back to ordered segments.
2//!
3//! Walks the MIG schema tree in order. For each MIG node that has
4//! corresponding data in the assembled tree, emits segments in MIG order.
5
6use crate::assembler::{AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree};
7use mig_types::schema::mig::{MigSchema, MigSegmentGroup};
8
9/// Output segment from disassembly (owned data, ready for rendering).
10#[derive(Debug, Clone)]
11pub struct DisassembledSegment {
12    pub tag: String,
13    pub elements: Vec<Vec<String>>,
14}
15
16/// MIG-guided disassembler — walks the MIG tree to emit segments in correct order.
17pub struct Disassembler<'a> {
18    mig: &'a MigSchema,
19}
20
21impl<'a> Disassembler<'a> {
22    pub fn new(mig: &'a MigSchema) -> Self {
23        Self { mig }
24    }
25
26    /// Disassemble a tree into ordered segments following MIG sequence.
27    ///
28    /// Emits segments in correct EDIFACT order:
29    /// 1. Pre-group top-level segments (e.g., UNB, UNH, BGM, DTM)
30    /// 2. Groups (recursively, in MIG order)
31    /// 3. Post-group top-level segments (e.g., UNT, UNZ)
32    ///
33    /// Uses MIG-guided ordering: walks the MIG schema tree and looks up
34    /// matching data in the assembled tree. This handles both assembler output
35    /// (already in MIG order) and reverse-mapped trees (may be in arbitrary order).
36    pub fn disassemble(&self, tree: &AssembledTree) -> Vec<DisassembledSegment> {
37        let mut output = Vec::new();
38
39        // 1. Emit pre-group segments in MIG order
40        let pre_group = &tree.segments[..tree.post_group_start];
41        let mut consumed = vec![false; pre_group.len()];
42        for mig_seg in &self.mig.segments {
43            if let Some(idx) = pre_group
44                .iter()
45                .enumerate()
46                .position(|(i, s)| !consumed[i] && s.tag == mig_seg.id)
47            {
48                output.push(assembled_to_disassembled(&pre_group[idx]));
49                consumed[idx] = true;
50            }
51        }
52
53        // 2. Emit groups in MIG order (lookup by group ID with consumption tracking).
54        //    Between groups, emit any inter-group root segments (e.g., UNS in MSCONS).
55        //    For variant groups (consecutive same-ID with variant_code), collect all
56        //    variant definitions and match each rep to its correct variant.
57        let mut consumed_groups = vec![false; tree.groups.len()];
58        let mut mig_group_idx = 0;
59        while mig_group_idx < self.mig.segment_groups.len() {
60            let mig_group = &self.mig.segment_groups[mig_group_idx];
61
62            // Check if this starts a variant set
63            if mig_group.variant_code.is_some() {
64                let variant_count = self.mig.segment_groups[mig_group_idx..]
65                    .iter()
66                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
67                    .count();
68                let variant_defs =
69                    &self.mig.segment_groups[mig_group_idx..mig_group_idx + variant_count];
70
71                if let Some(idx) = tree
72                    .groups
73                    .iter()
74                    .enumerate()
75                    .position(|(i, g)| !consumed_groups[i] && g.group_id == mig_group.id)
76                {
77                    if let Some(inter_segs) = tree.inter_group_segments.get(&idx) {
78                        for seg in inter_segs {
79                            output.push(assembled_to_disassembled(seg));
80                        }
81                    }
82                    self.emit_variant_group(&tree.groups[idx], variant_defs, &mut output);
83                    consumed_groups[idx] = true;
84                }
85                mig_group_idx += variant_count;
86            } else {
87                if let Some(idx) = tree
88                    .groups
89                    .iter()
90                    .enumerate()
91                    .position(|(i, g)| !consumed_groups[i] && g.group_id == mig_group.id)
92                {
93                    // Emit any inter-group segments that precede this group
94                    if let Some(inter_segs) = tree.inter_group_segments.get(&idx) {
95                        for seg in inter_segs {
96                            output.push(assembled_to_disassembled(seg));
97                        }
98                    }
99                    self.emit_group(&tree.groups[idx], mig_group, &mut output);
100                    consumed_groups[idx] = true;
101                }
102                mig_group_idx += 1;
103            }
104        }
105
106        // 2b. Emit trailing inter-group segments (after the last group).
107        //     For ORDERS, UNS+S comes after SG29 (detail/summary boundary).
108        let trailing_idx = tree.groups.len();
109        if let Some(inter_segs) = tree.inter_group_segments.get(&trailing_idx) {
110            for seg in inter_segs {
111                output.push(assembled_to_disassembled(seg));
112            }
113        }
114
115        // 3. Emit post-group segments (e.g., UNT, UNZ)
116        for seg in &tree.segments[tree.post_group_start..] {
117            output.push(assembled_to_disassembled(seg));
118        }
119
120        output
121    }
122
123    fn emit_group(
124        &self,
125        group: &AssembledGroup,
126        mig_group: &MigSegmentGroup,
127        output: &mut Vec<DisassembledSegment>,
128    ) {
129        for instance in &group.repetitions {
130            self.emit_group_instance(instance, mig_group, output);
131        }
132    }
133
134    /// Emit a group whose reps come from multiple variant definitions.
135    ///
136    /// For each rep, determine which variant it belongs to by checking the
137    /// entry segment's qualifier against each variant's `variant_code`.
138    /// Then emit the rep using that variant's segment/nested-group ordering.
139    fn emit_variant_group(
140        &self,
141        group: &AssembledGroup,
142        variant_defs: &[MigSegmentGroup],
143        output: &mut Vec<DisassembledSegment>,
144    ) {
145        for instance in &group.repetitions {
146            let entry_qual = instance
147                .segments
148                .first()
149                .and_then(|s| s.elements.first())
150                .and_then(|e| e.first())
151                .map(|v| v.as_str())
152                .unwrap_or("");
153
154            let variant_def = variant_defs
155                .iter()
156                .find(|v| {
157                    v.variant_code
158                        .as_deref()
159                        .map(|vc| vc.eq_ignore_ascii_case(entry_qual))
160                        .unwrap_or(false)
161                })
162                .unwrap_or(&variant_defs[0]); // fallback to first variant
163
164            self.emit_group_instance(instance, variant_def, output);
165        }
166    }
167
168    fn emit_group_instance(
169        &self,
170        instance: &AssembledGroupInstance,
171        mig_group: &MigSegmentGroup,
172        output: &mut Vec<DisassembledSegment>,
173    ) {
174        // Emit segments in MIG order using tag-based lookup with consumption tracking.
175        // This handles both assembler output (in MIG order) and reverse-mapped trees
176        // (may be in arbitrary order).
177        //
178        // After MIG-guided emission, any remaining unconsumed segments are appended.
179        // This handles cases where the assembler captured more segments than the MIG
180        // defines (e.g., 6 RFFs when the merged MIG only has 4 slots).
181        let mut consumed = vec![false; instance.segments.len()];
182        for mig_seg in &mig_group.segments {
183            if let Some(idx) = instance
184                .segments
185                .iter()
186                .enumerate()
187                .position(|(i, s)| !consumed[i] && s.tag == mig_seg.id)
188            {
189                output.push(assembled_to_disassembled(&instance.segments[idx]));
190                consumed[idx] = true;
191            }
192        }
193        // Emit any remaining segments not matched by MIG slots
194        for (i, seg) in instance.segments.iter().enumerate() {
195            if !consumed[i] {
196                output.push(assembled_to_disassembled(seg));
197            }
198        }
199
200        // Re-emit skipped segments (unknown segments preserved for roundtrip fidelity)
201        for skipped in &instance.skipped_segments {
202            output.push(assembled_to_disassembled(skipped));
203        }
204
205        // Child groups — lookup by group ID with consumption tracking.
206        // Applies variant-aware logic recursively for nested variant groups.
207        let mut consumed_child = vec![false; instance.child_groups.len()];
208        let mut nested_idx = 0;
209        while nested_idx < mig_group.nested_groups.len() {
210            let nested_mig = &mig_group.nested_groups[nested_idx];
211
212            if nested_mig.variant_code.is_some() {
213                let variant_count = mig_group.nested_groups[nested_idx..]
214                    .iter()
215                    .take_while(|g| g.id == nested_mig.id && g.variant_code.is_some())
216                    .count();
217                let variant_defs = &mig_group.nested_groups[nested_idx..nested_idx + variant_count];
218
219                if let Some(idx) = instance
220                    .child_groups
221                    .iter()
222                    .enumerate()
223                    .position(|(i, g)| !consumed_child[i] && g.group_id == nested_mig.id)
224                {
225                    self.emit_variant_group(&instance.child_groups[idx], variant_defs, output);
226                    consumed_child[idx] = true;
227                }
228                nested_idx += variant_count;
229            } else {
230                if let Some(idx) = instance
231                    .child_groups
232                    .iter()
233                    .enumerate()
234                    .position(|(i, g)| !consumed_child[i] && g.group_id == nested_mig.id)
235                {
236                    self.emit_group(&instance.child_groups[idx], nested_mig, output);
237                    consumed_child[idx] = true;
238                }
239                nested_idx += 1;
240            }
241        }
242    }
243}
244
245fn assembled_to_disassembled(seg: &AssembledSegment) -> DisassembledSegment {
246    DisassembledSegment {
247        tag: seg.tag.clone(),
248        elements: seg.elements.clone(),
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::assembler::{
256        AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree,
257    };
258    use crate::test_support::{make_mig_group, make_mig_segment};
259    use mig_types::schema::mig::MigSchema;
260
261    #[test]
262    fn test_disassemble_top_level_only() {
263        let mig = MigSchema {
264            message_type: "UTILMD".to_string(),
265            variant: Some("Strom".to_string()),
266            version: "S2.1".to_string(),
267            publication_date: "2025-03-20".to_string(),
268            author: "BDEW".to_string(),
269            format_version: "FV2504".to_string(),
270            source_file: "test".to_string(),
271            segments: vec![make_mig_segment("UNH"), make_mig_segment("BGM")],
272            segment_groups: vec![],
273        };
274
275        let tree = AssembledTree {
276            segments: vec![
277                AssembledSegment {
278                    tag: "UNH".to_string(),
279                    elements: vec![
280                        vec!["1".to_string()],
281                        vec![
282                            "UTILMD".to_string(),
283                            "D".to_string(),
284                            "11A".to_string(),
285                            "UN".to_string(),
286                            "S2.1".to_string(),
287                        ],
288                    ],
289                },
290                AssembledSegment {
291                    tag: "BGM".to_string(),
292                    elements: vec![
293                        vec!["E01".to_string()],
294                        vec!["MSG001".to_string()],
295                        vec!["9".to_string()],
296                    ],
297                },
298            ],
299            groups: vec![],
300            post_group_start: 2,
301            inter_group_segments: std::collections::BTreeMap::new(),
302        };
303
304        let disassembler = Disassembler::new(&mig);
305        let segments = disassembler.disassemble(&tree);
306
307        assert_eq!(segments.len(), 2);
308        assert_eq!(segments[0].tag, "UNH");
309        assert_eq!(segments[1].tag, "BGM");
310        assert_eq!(segments[0].elements[0], vec!["1"]);
311    }
312
313    #[test]
314    fn test_disassemble_with_groups() {
315        let mig = MigSchema {
316            message_type: "UTILMD".to_string(),
317            variant: None,
318            version: "S2.1".to_string(),
319            publication_date: "".to_string(),
320            author: "".to_string(),
321            format_version: "FV2504".to_string(),
322            source_file: "test".to_string(),
323            segments: vec![make_mig_segment("UNH"), make_mig_segment("BGM")],
324            segment_groups: vec![make_mig_group("SG2", vec!["NAD", "LOC"], vec![])],
325        };
326
327        let tree = AssembledTree {
328            segments: vec![
329                AssembledSegment {
330                    tag: "UNH".to_string(),
331                    elements: vec![vec!["1".to_string()]],
332                },
333                AssembledSegment {
334                    tag: "BGM".to_string(),
335                    elements: vec![vec!["E01".to_string()]],
336                },
337            ],
338            post_group_start: 2,
339            groups: vec![AssembledGroup {
340                group_id: "SG2".to_string(),
341                repetitions: vec![
342                    AssembledGroupInstance {
343                        segments: vec![AssembledSegment {
344                            tag: "NAD".to_string(),
345                            elements: vec![vec!["MS".to_string()]],
346                        }],
347                        child_groups: vec![],
348                        skipped_segments: vec![],
349                    },
350                    AssembledGroupInstance {
351                        segments: vec![AssembledSegment {
352                            tag: "NAD".to_string(),
353                            elements: vec![vec!["MR".to_string()]],
354                        }],
355                        child_groups: vec![],
356                        skipped_segments: vec![],
357                    },
358                ],
359            }],
360            inter_group_segments: std::collections::BTreeMap::new(),
361        };
362
363        let disassembler = Disassembler::new(&mig);
364        let segments = disassembler.disassemble(&tree);
365
366        assert_eq!(segments.len(), 4); // UNH, BGM, NAD(MS), NAD(MR)
367        assert_eq!(segments[0].tag, "UNH");
368        assert_eq!(segments[1].tag, "BGM");
369        assert_eq!(segments[2].tag, "NAD");
370        assert_eq!(segments[2].elements[0][0], "MS");
371        assert_eq!(segments[3].tag, "NAD");
372        assert_eq!(segments[3].elements[0][0], "MR");
373    }
374
375    #[test]
376    fn test_disassemble_nested_groups() {
377        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
378        let mig = MigSchema {
379            message_type: "UTILMD".to_string(),
380            variant: None,
381            version: "S2.1".to_string(),
382            publication_date: "".to_string(),
383            author: "".to_string(),
384            format_version: "FV2504".to_string(),
385            source_file: "test".to_string(),
386            segments: vec![make_mig_segment("UNH")],
387            segment_groups: vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
388        };
389
390        let tree = AssembledTree {
391            segments: vec![AssembledSegment {
392                tag: "UNH".to_string(),
393                elements: vec![vec!["1".to_string()]],
394            }],
395            post_group_start: 1,
396            groups: vec![AssembledGroup {
397                group_id: "SG2".to_string(),
398                repetitions: vec![AssembledGroupInstance {
399                    segments: vec![AssembledSegment {
400                        tag: "NAD".to_string(),
401                        elements: vec![vec!["MS".to_string()]],
402                    }],
403                    child_groups: vec![AssembledGroup {
404                        group_id: "SG3".to_string(),
405                        repetitions: vec![AssembledGroupInstance {
406                            segments: vec![
407                                AssembledSegment {
408                                    tag: "CTA".to_string(),
409                                    elements: vec![vec!["IC".to_string()]],
410                                },
411                                AssembledSegment {
412                                    tag: "COM".to_string(),
413                                    elements: vec![vec![
414                                        "040@ex.com".to_string(),
415                                        "EM".to_string(),
416                                    ]],
417                                },
418                            ],
419                            child_groups: vec![],
420                            skipped_segments: vec![],
421                        }],
422                    }],
423                    skipped_segments: vec![],
424                }],
425            }],
426            inter_group_segments: std::collections::BTreeMap::new(),
427        };
428
429        let disassembler = Disassembler::new(&mig);
430        let segments = disassembler.disassemble(&tree);
431
432        assert_eq!(segments.len(), 4); // UNH, NAD, CTA, COM
433        assert_eq!(segments[0].tag, "UNH");
434        assert_eq!(segments[1].tag, "NAD");
435        assert_eq!(segments[2].tag, "CTA");
436        assert_eq!(segments[3].tag, "COM");
437    }
438
439    #[test]
440    fn test_disassemble_variant_groups_uses_per_variant_ordering() {
441        use crate::test_support::make_mig_group_with_variant;
442
443        // Two SG8 variant definitions with different segment ordering:
444        // - ZD7 variant: SEQ, RFF, DTM
445        // - Z98 variant: SEQ, CCI, CAV
446        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF", "DTM"], vec![], "ZD7");
447        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI", "CAV"], vec![], "Z98");
448
449        let mig = MigSchema {
450            message_type: "UTILMD".to_string(),
451            variant: None,
452            version: "S2.1".to_string(),
453            publication_date: "".to_string(),
454            author: "".to_string(),
455            format_version: "FV2504".to_string(),
456            source_file: "test".to_string(),
457            segments: vec![make_mig_segment("UNH")],
458            segment_groups: vec![sg8_zd7, sg8_z98],
459        };
460
461        // Assembled tree has 3 reps: ZD7, Z98, ZD7 (interleaved)
462        let tree = AssembledTree {
463            segments: vec![AssembledSegment {
464                tag: "UNH".to_string(),
465                elements: vec![vec!["1".to_string()]],
466            }],
467            post_group_start: 1,
468            groups: vec![AssembledGroup {
469                group_id: "SG8".to_string(),
470                repetitions: vec![
471                    // Rep 0: ZD7 variant — has SEQ, DTM, RFF (out of MIG order)
472                    AssembledGroupInstance {
473                        segments: vec![
474                            AssembledSegment {
475                                tag: "SEQ".to_string(),
476                                elements: vec![vec!["ZD7".to_string()]],
477                            },
478                            AssembledSegment {
479                                tag: "DTM".to_string(),
480                                elements: vec![vec!["303".to_string()]],
481                            },
482                            AssembledSegment {
483                                tag: "RFF".to_string(),
484                                elements: vec![vec!["Z13".to_string()]],
485                            },
486                        ],
487                        child_groups: vec![],
488                        skipped_segments: vec![],
489                    },
490                    // Rep 1: Z98 variant — has SEQ, CAV, CCI (out of MIG order)
491                    AssembledGroupInstance {
492                        segments: vec![
493                            AssembledSegment {
494                                tag: "SEQ".to_string(),
495                                elements: vec![vec!["Z98".to_string()]],
496                            },
497                            AssembledSegment {
498                                tag: "CAV".to_string(),
499                                elements: vec![vec!["Z91".to_string()]],
500                            },
501                            AssembledSegment {
502                                tag: "CCI".to_string(),
503                                elements: vec![vec!["".to_string()]],
504                            },
505                        ],
506                        child_groups: vec![],
507                        skipped_segments: vec![],
508                    },
509                    // Rep 2: another ZD7 variant
510                    AssembledGroupInstance {
511                        segments: vec![
512                            AssembledSegment {
513                                tag: "SEQ".to_string(),
514                                elements: vec![vec!["ZD7".to_string()]],
515                            },
516                            AssembledSegment {
517                                tag: "RFF".to_string(),
518                                elements: vec![vec!["Z34".to_string()]],
519                            },
520                        ],
521                        child_groups: vec![],
522                        skipped_segments: vec![],
523                    },
524                ],
525            }],
526            inter_group_segments: std::collections::BTreeMap::new(),
527        };
528
529        let disassembler = Disassembler::new(&mig);
530        let segments = disassembler.disassemble(&tree);
531
532        // UNH + 3 reps
533        assert_eq!(segments[0].tag, "UNH");
534
535        // Rep 0 (ZD7): MIG order is SEQ, RFF, DTM
536        assert_eq!(segments[1].tag, "SEQ");
537        assert_eq!(segments[1].elements[0][0], "ZD7");
538        assert_eq!(segments[2].tag, "RFF"); // reordered from position 3 to 2
539        assert_eq!(segments[3].tag, "DTM"); // reordered from position 2 to 3
540
541        // Rep 1 (Z98): MIG order is SEQ, CCI, CAV
542        assert_eq!(segments[4].tag, "SEQ");
543        assert_eq!(segments[4].elements[0][0], "Z98");
544        assert_eq!(segments[5].tag, "CCI"); // reordered from position 3 to 2
545        assert_eq!(segments[6].tag, "CAV"); // reordered from position 2 to 3
546
547        // Rep 2 (ZD7): SEQ, RFF (no DTM)
548        assert_eq!(segments[7].tag, "SEQ");
549        assert_eq!(segments[7].elements[0][0], "ZD7");
550        assert_eq!(segments[8].tag, "RFF");
551
552        assert_eq!(segments.len(), 9);
553    }
554
555    #[test]
556    fn test_disassemble_empty_tree() {
557        let mig = MigSchema {
558            message_type: "UTILMD".to_string(),
559            variant: None,
560            version: "S2.1".to_string(),
561            publication_date: "".to_string(),
562            author: "".to_string(),
563            format_version: "FV2504".to_string(),
564            source_file: "test".to_string(),
565            segments: vec![make_mig_segment("UNH")],
566            segment_groups: vec![],
567        };
568
569        let tree = AssembledTree {
570            segments: vec![],
571            groups: vec![],
572            post_group_start: 0,
573            inter_group_segments: std::collections::BTreeMap::new(),
574        };
575
576        let disassembler = Disassembler::new(&mig);
577        let segments = disassembler.disassemble(&tree);
578        assert!(segments.is_empty());
579    }
580}