Skip to main content

mig_assembly/
disassembler.rs

1//! Tree disassembler — converts AssembledTree back to ordered segments.
2//!
3//! Walks the MIG schema tree in order. For each MIG node that has
4//! corresponding data in the assembled tree, emits segments in MIG order.
5
6use crate::assembler::{AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree};
7use mig_types::schema::mig::{MigSchema, MigSegmentGroup};
8
9/// Output segment from disassembly (owned data, ready for rendering).
10#[derive(Debug, Clone)]
11pub struct DisassembledSegment {
12    pub tag: String,
13    pub elements: Vec<Vec<String>>,
14}
15
16/// MIG-guided disassembler — walks the MIG tree to emit segments in correct order.
17pub struct Disassembler<'a> {
18    mig: &'a MigSchema,
19}
20
21impl<'a> Disassembler<'a> {
22    pub fn new(mig: &'a MigSchema) -> Self {
23        Self { mig }
24    }
25
26    /// Disassemble a tree into ordered segments following MIG sequence.
27    ///
28    /// Emits segments in correct EDIFACT order:
29    /// 1. Pre-group top-level segments (e.g., UNB, UNH, BGM, DTM)
30    /// 2. Groups (recursively, in MIG order)
31    /// 3. Post-group top-level segments (e.g., UNT, UNZ)
32    ///
33    /// Uses MIG-guided ordering: walks the MIG schema tree and looks up
34    /// matching data in the assembled tree. This handles both assembler output
35    /// (already in MIG order) and reverse-mapped trees (may be in arbitrary order).
36    pub fn disassemble(&self, tree: &AssembledTree) -> Vec<DisassembledSegment> {
37        let mut output = Vec::new();
38
39        // 1. Emit pre-group segments in MIG order
40        let pre_group = &tree.segments[..tree.post_group_start];
41        let mut consumed = vec![false; pre_group.len()];
42        for mig_seg in &self.mig.segments {
43            if let Some(idx) = pre_group
44                .iter()
45                .enumerate()
46                .position(|(i, s)| !consumed[i] && s.tag == mig_seg.id)
47            {
48                output.push(assembled_to_disassembled(&pre_group[idx]));
49                consumed[idx] = true;
50            }
51        }
52
53        // 2. Emit groups in MIG order (lookup by group ID with consumption tracking).
54        //    Between groups, emit any inter-group root segments (e.g., UNS in MSCONS).
55        //    For variant groups (consecutive same-ID with variant_code), collect all
56        //    variant definitions and match each rep to its correct variant.
57        let mut consumed_groups = vec![false; tree.groups.len()];
58        let mut mig_group_idx = 0;
59        while mig_group_idx < self.mig.segment_groups.len() {
60            let mig_group = &self.mig.segment_groups[mig_group_idx];
61
62            // Check if this starts a variant set
63            if mig_group.variant_code.is_some() {
64                let variant_count = self.mig.segment_groups[mig_group_idx..]
65                    .iter()
66                    .take_while(|g| g.id == mig_group.id && g.variant_code.is_some())
67                    .count();
68                let variant_defs =
69                    &self.mig.segment_groups[mig_group_idx..mig_group_idx + variant_count];
70
71                if let Some(idx) = tree
72                    .groups
73                    .iter()
74                    .enumerate()
75                    .position(|(i, g)| !consumed_groups[i] && g.group_id == mig_group.id)
76                {
77                    if let Some(inter_segs) = tree.inter_group_segments.get(&idx) {
78                        for seg in inter_segs {
79                            output.push(assembled_to_disassembled(seg));
80                        }
81                    }
82                    self.emit_variant_group(&tree.groups[idx], variant_defs, &mut output);
83                    consumed_groups[idx] = true;
84                }
85                mig_group_idx += variant_count;
86            } else {
87                if let Some(idx) = tree
88                    .groups
89                    .iter()
90                    .enumerate()
91                    .position(|(i, g)| !consumed_groups[i] && g.group_id == mig_group.id)
92                {
93                    // Emit any inter-group segments that precede this group
94                    if let Some(inter_segs) = tree.inter_group_segments.get(&idx) {
95                        for seg in inter_segs {
96                            output.push(assembled_to_disassembled(seg));
97                        }
98                    }
99                    self.emit_group(&tree.groups[idx], mig_group, &mut output);
100                    consumed_groups[idx] = true;
101                }
102                mig_group_idx += 1;
103            }
104        }
105
106        // 2b. Emit trailing inter-group segments (after the last group).
107        //     For ORDERS, UNS+S comes after SG29 (detail/summary boundary).
108        let trailing_idx = tree.groups.len();
109        if let Some(inter_segs) = tree.inter_group_segments.get(&trailing_idx) {
110            for seg in inter_segs {
111                output.push(assembled_to_disassembled(seg));
112            }
113        }
114
115        // 3. Emit post-group segments (e.g., UNT, UNZ)
116        for seg in &tree.segments[tree.post_group_start..] {
117            output.push(assembled_to_disassembled(seg));
118        }
119
120        output
121    }
122
123    fn emit_group(
124        &self,
125        group: &AssembledGroup,
126        mig_group: &MigSegmentGroup,
127        output: &mut Vec<DisassembledSegment>,
128    ) {
129        for instance in &group.repetitions {
130            self.emit_group_instance(instance, mig_group, output);
131        }
132    }
133
134    /// Emit a group whose reps come from multiple variant definitions.
135    ///
136    /// For each rep, determine which variant it belongs to by checking the
137    /// entry segment's qualifier against each variant's `variant_code`.
138    /// Then emit the rep using that variant's segment/nested-group ordering.
139    fn emit_variant_group(
140        &self,
141        group: &AssembledGroup,
142        variant_defs: &[MigSegmentGroup],
143        output: &mut Vec<DisassembledSegment>,
144    ) {
145        for instance in &group.repetitions {
146            let entry_qual = instance
147                .segments
148                .first()
149                .and_then(|s| s.elements.first())
150                .and_then(|e| e.first())
151                .map(|v| v.as_str())
152                .unwrap_or("");
153
154            let variant_def = variant_defs
155                .iter()
156                .find(|v| {
157                    v.variant_code
158                        .as_deref()
159                        .map(|vc| vc.eq_ignore_ascii_case(entry_qual))
160                        .unwrap_or(false)
161                })
162                .unwrap_or(&variant_defs[0]); // fallback to first variant
163
164            self.emit_group_instance(instance, variant_def, output);
165        }
166    }
167
168    fn emit_group_instance(
169        &self,
170        instance: &AssembledGroupInstance,
171        mig_group: &MigSegmentGroup,
172        output: &mut Vec<DisassembledSegment>,
173    ) {
174        // Emit segments in MIG order using tag-based lookup with consumption tracking.
175        // This handles both assembler output (in MIG order) and reverse-mapped trees
176        // (may be in arbitrary order).
177        //
178        // After MIG-guided emission, any remaining unconsumed segments are appended.
179        // This handles cases where the assembler captured more segments than the MIG
180        // defines (e.g., 6 RFFs when the merged MIG only has 4 slots).
181        let mut consumed = vec![false; instance.segments.len()];
182        for mig_seg in &mig_group.segments {
183            if let Some(idx) = instance
184                .segments
185                .iter()
186                .enumerate()
187                .position(|(i, s)| !consumed[i] && s.tag == mig_seg.id)
188            {
189                output.push(assembled_to_disassembled(&instance.segments[idx]));
190                consumed[idx] = true;
191            }
192        }
193        // Emit any remaining segments not matched by MIG slots
194        for (i, seg) in instance.segments.iter().enumerate() {
195            if !consumed[i] {
196                output.push(assembled_to_disassembled(seg));
197            }
198        }
199
200        // Re-emit skipped segments (unknown segments preserved for roundtrip fidelity)
201        for skipped in &instance.skipped_segments {
202            output.push(assembled_to_disassembled(skipped));
203        }
204
205        // Child groups — lookup by group ID with consumption tracking.
206        // Applies variant-aware logic recursively for nested variant groups.
207        let mut consumed_child = vec![false; instance.child_groups.len()];
208        let mut nested_idx = 0;
209        while nested_idx < mig_group.nested_groups.len() {
210            let nested_mig = &mig_group.nested_groups[nested_idx];
211
212            if nested_mig.variant_code.is_some() {
213                let variant_count = mig_group.nested_groups[nested_idx..]
214                    .iter()
215                    .take_while(|g| g.id == nested_mig.id && g.variant_code.is_some())
216                    .count();
217                let variant_defs = &mig_group.nested_groups[nested_idx..nested_idx + variant_count];
218
219                if let Some(idx) = instance
220                    .child_groups
221                    .iter()
222                    .enumerate()
223                    .position(|(i, g)| !consumed_child[i] && g.group_id == nested_mig.id)
224                {
225                    self.emit_variant_group(&instance.child_groups[idx], variant_defs, output);
226                    consumed_child[idx] = true;
227                }
228                nested_idx += variant_count;
229            } else {
230                if let Some(idx) = instance
231                    .child_groups
232                    .iter()
233                    .enumerate()
234                    .position(|(i, g)| !consumed_child[i] && g.group_id == nested_mig.id)
235                {
236                    self.emit_group(&instance.child_groups[idx], nested_mig, output);
237                    consumed_child[idx] = true;
238                }
239                nested_idx += 1;
240            }
241        }
242    }
243}
244
245fn assembled_to_disassembled(seg: &AssembledSegment) -> DisassembledSegment {
246    DisassembledSegment {
247        tag: seg.tag.clone(),
248        elements: seg.elements.clone(),
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use crate::assembler::{
256        AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree,
257    };
258    use crate::test_support::{make_mig_group, make_mig_segment};
259    use mig_types::schema::mig::MigSchema;
260
261    #[test]
262    fn test_disassemble_top_level_only() {
263        let mig = MigSchema {
264            message_type: "UTILMD".to_string(),
265            variant: Some("Strom".to_string()),
266            version: "S2.1".to_string(),
267            publication_date: "2025-03-20".to_string(),
268            author: "BDEW".to_string(),
269            format_version: "FV2504".to_string(),
270            source_file: "test".to_string(),
271            segments: vec![make_mig_segment("UNH"), make_mig_segment("BGM")],
272            segment_groups: vec![],
273        };
274
275        let tree = AssembledTree {
276            segments: vec![
277                AssembledSegment {
278                    tag: "UNH".to_string(),
279                    elements: vec![
280                        vec!["1".to_string()],
281                        vec![
282                            "UTILMD".to_string(),
283                            "D".to_string(),
284                            "11A".to_string(),
285                            "UN".to_string(),
286                            "S2.1".to_string(),
287                        ],
288                    ],
289                    mig_number: None,
290                },
291                AssembledSegment {
292                    tag: "BGM".to_string(),
293                    elements: vec![
294                        vec!["E01".to_string()],
295                        vec!["MSG001".to_string()],
296                        vec!["9".to_string()],
297                    ],
298                    mig_number: None,
299                },
300            ],
301            groups: vec![],
302            post_group_start: 2,
303            inter_group_segments: std::collections::BTreeMap::new(),
304        };
305
306        let disassembler = Disassembler::new(&mig);
307        let segments = disassembler.disassemble(&tree);
308
309        assert_eq!(segments.len(), 2);
310        assert_eq!(segments[0].tag, "UNH");
311        assert_eq!(segments[1].tag, "BGM");
312        assert_eq!(segments[0].elements[0], vec!["1"]);
313    }
314
315    #[test]
316    fn test_disassemble_with_groups() {
317        let mig = MigSchema {
318            message_type: "UTILMD".to_string(),
319            variant: None,
320            version: "S2.1".to_string(),
321            publication_date: "".to_string(),
322            author: "".to_string(),
323            format_version: "FV2504".to_string(),
324            source_file: "test".to_string(),
325            segments: vec![make_mig_segment("UNH"), make_mig_segment("BGM")],
326            segment_groups: vec![make_mig_group("SG2", vec!["NAD", "LOC"], vec![])],
327        };
328
329        let tree = AssembledTree {
330            segments: vec![
331                AssembledSegment {
332                    tag: "UNH".to_string(),
333                    elements: vec![vec!["1".to_string()]],
334                    mig_number: None,
335                },
336                AssembledSegment {
337                    tag: "BGM".to_string(),
338                    elements: vec![vec!["E01".to_string()]],
339                    mig_number: None,
340                },
341            ],
342            post_group_start: 2,
343            groups: vec![AssembledGroup {
344                group_id: "SG2".to_string(),
345                repetitions: vec![
346                    AssembledGroupInstance {
347                        segments: vec![AssembledSegment {
348                            tag: "NAD".to_string(),
349                            elements: vec![vec!["MS".to_string()]],
350                            mig_number: None,
351                        }],
352                        child_groups: vec![],
353                        entry_mig_number: None,
354                        variant_mig_numbers: vec![],
355                        skipped_segments: vec![],
356                    },
357                    AssembledGroupInstance {
358                        segments: vec![AssembledSegment {
359                            tag: "NAD".to_string(),
360                            elements: vec![vec!["MR".to_string()]],
361                            mig_number: None,
362                        }],
363                        child_groups: vec![],
364                        entry_mig_number: None,
365                        variant_mig_numbers: vec![],
366                        skipped_segments: vec![],
367                    },
368                ],
369            }],
370            inter_group_segments: std::collections::BTreeMap::new(),
371        };
372
373        let disassembler = Disassembler::new(&mig);
374        let segments = disassembler.disassemble(&tree);
375
376        assert_eq!(segments.len(), 4); // UNH, BGM, NAD(MS), NAD(MR)
377        assert_eq!(segments[0].tag, "UNH");
378        assert_eq!(segments[1].tag, "BGM");
379        assert_eq!(segments[2].tag, "NAD");
380        assert_eq!(segments[2].elements[0][0], "MS");
381        assert_eq!(segments[3].tag, "NAD");
382        assert_eq!(segments[3].elements[0][0], "MR");
383    }
384
385    #[test]
386    fn test_disassemble_nested_groups() {
387        let sg3 = make_mig_group("SG3", vec!["CTA", "COM"], vec![]);
388        let mig = MigSchema {
389            message_type: "UTILMD".to_string(),
390            variant: None,
391            version: "S2.1".to_string(),
392            publication_date: "".to_string(),
393            author: "".to_string(),
394            format_version: "FV2504".to_string(),
395            source_file: "test".to_string(),
396            segments: vec![make_mig_segment("UNH")],
397            segment_groups: vec![make_mig_group("SG2", vec!["NAD"], vec![sg3])],
398        };
399
400        let tree = AssembledTree {
401            segments: vec![AssembledSegment {
402                tag: "UNH".to_string(),
403                elements: vec![vec!["1".to_string()]],
404                mig_number: None,
405            }],
406            post_group_start: 1,
407            groups: vec![AssembledGroup {
408                group_id: "SG2".to_string(),
409                repetitions: vec![AssembledGroupInstance {
410                    segments: vec![AssembledSegment {
411                        tag: "NAD".to_string(),
412                        elements: vec![vec!["MS".to_string()]],
413                        mig_number: None,
414                    }],
415                    child_groups: vec![AssembledGroup {
416                        group_id: "SG3".to_string(),
417                        repetitions: vec![AssembledGroupInstance {
418                            segments: vec![
419                                AssembledSegment {
420                                    tag: "CTA".to_string(),
421                                    elements: vec![vec!["IC".to_string()]],
422                                    mig_number: None,
423                                },
424                                AssembledSegment {
425                                    tag: "COM".to_string(),
426                                    elements: vec![vec![
427                                        "040@ex.com".to_string(),
428                                        "EM".to_string(),
429                                    ]],
430                                    mig_number: None,
431                                },
432                            ],
433                            child_groups: vec![],
434                            entry_mig_number: None,
435                            variant_mig_numbers: vec![],
436                            skipped_segments: vec![],
437                        }],
438                    }],
439                    entry_mig_number: None,
440                    variant_mig_numbers: vec![],
441                    skipped_segments: vec![],
442                }],
443            }],
444            inter_group_segments: std::collections::BTreeMap::new(),
445        };
446
447        let disassembler = Disassembler::new(&mig);
448        let segments = disassembler.disassemble(&tree);
449
450        assert_eq!(segments.len(), 4); // UNH, NAD, CTA, COM
451        assert_eq!(segments[0].tag, "UNH");
452        assert_eq!(segments[1].tag, "NAD");
453        assert_eq!(segments[2].tag, "CTA");
454        assert_eq!(segments[3].tag, "COM");
455    }
456
457    #[test]
458    fn test_disassemble_variant_groups_uses_per_variant_ordering() {
459        use crate::test_support::make_mig_group_with_variant;
460
461        // Two SG8 variant definitions with different segment ordering:
462        // - ZD7 variant: SEQ, RFF, DTM
463        // - Z98 variant: SEQ, CCI, CAV
464        let sg8_zd7 = make_mig_group_with_variant("SG8", vec!["SEQ", "RFF", "DTM"], vec![], "ZD7");
465        let sg8_z98 = make_mig_group_with_variant("SG8", vec!["SEQ", "CCI", "CAV"], vec![], "Z98");
466
467        let mig = MigSchema {
468            message_type: "UTILMD".to_string(),
469            variant: None,
470            version: "S2.1".to_string(),
471            publication_date: "".to_string(),
472            author: "".to_string(),
473            format_version: "FV2504".to_string(),
474            source_file: "test".to_string(),
475            segments: vec![make_mig_segment("UNH")],
476            segment_groups: vec![sg8_zd7, sg8_z98],
477        };
478
479        // Assembled tree has 3 reps: ZD7, Z98, ZD7 (interleaved)
480        let tree = AssembledTree {
481            segments: vec![AssembledSegment {
482                tag: "UNH".to_string(),
483                elements: vec![vec!["1".to_string()]],
484                mig_number: None,
485            }],
486            post_group_start: 1,
487            groups: vec![AssembledGroup {
488                group_id: "SG8".to_string(),
489                repetitions: vec![
490                    // Rep 0: ZD7 variant — has SEQ, DTM, RFF (out of MIG order)
491                    AssembledGroupInstance {
492                        segments: vec![
493                            AssembledSegment {
494                                tag: "SEQ".to_string(),
495                                elements: vec![vec!["ZD7".to_string()]],
496                                mig_number: None,
497                            },
498                            AssembledSegment {
499                                tag: "DTM".to_string(),
500                                elements: vec![vec!["303".to_string()]],
501                                mig_number: None,
502                            },
503                            AssembledSegment {
504                                tag: "RFF".to_string(),
505                                elements: vec![vec!["Z13".to_string()]],
506                                mig_number: None,
507                            },
508                        ],
509                        child_groups: vec![],
510                        entry_mig_number: None,
511                        variant_mig_numbers: vec![],
512                        skipped_segments: vec![],
513                    },
514                    // Rep 1: Z98 variant — has SEQ, CAV, CCI (out of MIG order)
515                    AssembledGroupInstance {
516                        segments: vec![
517                            AssembledSegment {
518                                tag: "SEQ".to_string(),
519                                elements: vec![vec!["Z98".to_string()]],
520                                mig_number: None,
521                            },
522                            AssembledSegment {
523                                tag: "CAV".to_string(),
524                                elements: vec![vec!["Z91".to_string()]],
525                                mig_number: None,
526                            },
527                            AssembledSegment {
528                                tag: "CCI".to_string(),
529                                elements: vec![vec!["".to_string()]],
530                                mig_number: None,
531                            },
532                        ],
533                        child_groups: vec![],
534                        entry_mig_number: None,
535                        variant_mig_numbers: vec![],
536                        skipped_segments: vec![],
537                    },
538                    // Rep 2: another ZD7 variant
539                    AssembledGroupInstance {
540                        segments: vec![
541                            AssembledSegment {
542                                tag: "SEQ".to_string(),
543                                elements: vec![vec!["ZD7".to_string()]],
544                                mig_number: None,
545                            },
546                            AssembledSegment {
547                                tag: "RFF".to_string(),
548                                elements: vec![vec!["Z34".to_string()]],
549                                mig_number: None,
550                            },
551                        ],
552                        child_groups: vec![],
553                        entry_mig_number: None,
554                        variant_mig_numbers: vec![],
555                        skipped_segments: vec![],
556                    },
557                ],
558            }],
559            inter_group_segments: std::collections::BTreeMap::new(),
560        };
561
562        let disassembler = Disassembler::new(&mig);
563        let segments = disassembler.disassemble(&tree);
564
565        // UNH + 3 reps
566        assert_eq!(segments[0].tag, "UNH");
567
568        // Rep 0 (ZD7): MIG order is SEQ, RFF, DTM
569        assert_eq!(segments[1].tag, "SEQ");
570        assert_eq!(segments[1].elements[0][0], "ZD7");
571        assert_eq!(segments[2].tag, "RFF"); // reordered from position 3 to 2
572        assert_eq!(segments[3].tag, "DTM"); // reordered from position 2 to 3
573
574        // Rep 1 (Z98): MIG order is SEQ, CCI, CAV
575        assert_eq!(segments[4].tag, "SEQ");
576        assert_eq!(segments[4].elements[0][0], "Z98");
577        assert_eq!(segments[5].tag, "CCI"); // reordered from position 3 to 2
578        assert_eq!(segments[6].tag, "CAV"); // reordered from position 2 to 3
579
580        // Rep 2 (ZD7): SEQ, RFF (no DTM)
581        assert_eq!(segments[7].tag, "SEQ");
582        assert_eq!(segments[7].elements[0][0], "ZD7");
583        assert_eq!(segments[8].tag, "RFF");
584
585        assert_eq!(segments.len(), 9);
586    }
587
588    #[test]
589    fn test_disassemble_empty_tree() {
590        let mig = MigSchema {
591            message_type: "UTILMD".to_string(),
592            variant: None,
593            version: "S2.1".to_string(),
594            publication_date: "".to_string(),
595            author: "".to_string(),
596            format_version: "FV2504".to_string(),
597            source_file: "test".to_string(),
598            segments: vec![make_mig_segment("UNH")],
599            segment_groups: vec![],
600        };
601
602        let tree = AssembledTree {
603            segments: vec![],
604            groups: vec![],
605            post_group_start: 0,
606            inter_group_segments: std::collections::BTreeMap::new(),
607        };
608
609        let disassembler = Disassembler::new(&mig);
610        let segments = disassembler.disassemble(&tree);
611        assert!(segments.is_empty());
612    }
613}