Skip to main content

automapper_validation/validator/
tree.rs

1//! Merged AHB + EDIFACT tree for validation.
2//!
3//! [`build_validated_tree`] joins an [`AhbWorkflow`] (what should be there)
4//! with an [`AssembledTree`] (what is there) into a single [`ValidatedTree`]
5//! where each node carries both the AHB rule and the resolved EDIFACT value.
6
7use std::collections::HashMap;
8
9use crate::expr::ConditionExpr;
10
11use super::validate::{AhbFieldRule, AhbWorkflow};
12use mig_assembly::assembler::{
13    AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree,
14};
15
16/// A single AHB field matched against EDIFACT data.
17#[derive(Debug)]
18pub struct AhbNode<'a> {
19    /// The AHB field rule (segment path, ahb_status, codes, etc.)
20    pub rule: &'a AhbFieldRule,
21    /// The actual value found in the EDIFACT at this position. `None` if absent.
22    pub value: Option<&'a str>,
23    /// The full segment elements for cross-element access. `None` if segment absent.
24    pub segment_elements: Option<&'a [Vec<String>]>,
25}
26
27/// A segment group instance matched against EDIFACT data.
28#[derive(Debug)]
29pub struct AhbGroupNode<'a> {
30    /// Group ID (e.g., "SG4", "SG8").
31    pub group_id: &'a str,
32    /// AHB status of this group.
33    pub ahb_status: Option<&'a str>,
34    /// Fields in this group instance, resolved against EDIFACT segments.
35    pub fields: Vec<AhbNode<'a>>,
36    /// Child group instances.
37    pub children: Vec<AhbGroupNode<'a>>,
38}
39
40/// AHB workflow merged with assembled EDIFACT data.
41#[derive(Debug)]
42pub struct ValidatedTree<'a> {
43    /// The Pruefidentifikator.
44    pub pruefidentifikator: &'a str,
45    /// UB definitions for condition expansion.
46    pub ub_definitions: &'a HashMap<String, ConditionExpr>,
47    /// Root-level fields (outside segment groups).
48    pub root_fields: Vec<AhbNode<'a>>,
49    /// Top-level group instances.
50    pub groups: Vec<AhbGroupNode<'a>>,
51}
52
53/// Build a [`ValidatedTree`] by merging an AHB workflow with an assembled EDIFACT tree.
54///
55/// The join key is `mig_number`: both [`AhbFieldRule::mig_number`] and
56/// [`AssembledSegment::mig_number`] carry the MIG `Number` attribute that
57/// uniquely identifies a segment variant.
58pub fn build_validated_tree<'a>(
59    workflow: &'a AhbWorkflow,
60    tree: &'a AssembledTree,
61) -> ValidatedTree<'a> {
62    // Partition AHB rules into root-level and per-group buckets.
63    let mut root_rules: Vec<&'a AhbFieldRule> = Vec::new();
64    // Map from top-level group prefix (e.g., "SG2", "SG4") to rules.
65    let mut group_rules: HashMap<String, Vec<&'a AhbFieldRule>> = HashMap::new();
66
67    for rule in &workflow.fields {
68        match extract_top_group(&rule.segment_path) {
69            Some(group_id) => {
70                group_rules
71                    .entry(group_id.to_owned())
72                    .or_default()
73                    .push(rule);
74            }
75            None => {
76                root_rules.push(rule);
77            }
78        }
79    }
80
81    // Resolve root-level fields against root segments.
82    let root_fields = resolve_fields(&root_rules, &tree.segments);
83
84    // Resolve groups recursively (depth 0 = top-level groups).
85    let groups = resolve_groups(&tree.groups, &group_rules, 0);
86
87    ValidatedTree {
88        pruefidentifikator: &workflow.pruefidentifikator,
89        ub_definitions: &workflow.ub_definitions,
90        root_fields,
91        groups,
92    }
93}
94
95/// Extract the top-level segment group from a segment path.
96///
97/// `"SG4/DTM/C507/2380"` -> `Some("SG4")`
98/// `"SG4/SG5/LOC/3225"` -> `Some("SG4")`
99/// `"BGM/1004"` -> `None`
100fn extract_top_group(segment_path: &str) -> Option<&str> {
101    let first = segment_path.split('/').next()?;
102    if first.starts_with("SG") {
103        Some(first)
104    } else {
105        None
106    }
107}
108
109/// Extract the child group prefix from a path that already had parent groups stripped.
110///
111/// `"SG5/LOC/3225"` -> `Some("SG5")`
112/// `"DTM/C507/2380"` -> `None`
113fn extract_child_group(stripped_path: &str) -> Option<&str> {
114    let first = stripped_path.split('/').next()?;
115    if first.starts_with("SG") {
116        Some(first)
117    } else {
118        None
119    }
120}
121
122/// Resolve AHB fields against a list of assembled segments.
123///
124/// For each rule, tries to find a matching segment by `mig_number` first,
125/// then falls back to segment tag matching. Rules that don't match any
126/// segment get `value: None` — condition evaluation decides if that's an error.
127fn resolve_fields<'a>(
128    rules: &[&'a AhbFieldRule],
129    segments: &'a [AssembledSegment],
130) -> Vec<AhbNode<'a>> {
131    rules
132        .iter()
133        .map(|rule| {
134            let matched_segment = find_segment(rule, segments);
135            let (value, elements) = match matched_segment {
136                Some(seg) => {
137                    let val = extract_value(seg, rule);
138                    (val, Some(seg.elements.as_slice()))
139                }
140                None => (None, None),
141            };
142            AhbNode {
143                rule,
144                value,
145                segment_elements: elements,
146            }
147        })
148        .collect()
149}
150
151/// Find the assembled segment matching an AHB field rule.
152///
153/// Prefers `mig_number` matching; falls back to segment tag from the path.
154fn find_segment<'a>(
155    rule: &AhbFieldRule,
156    segments: &'a [AssembledSegment],
157) -> Option<&'a AssembledSegment> {
158    // Try mig_number match first.
159    if let Some(ref mig_num) = rule.mig_number {
160        if let Some(seg) = segments
161            .iter()
162            .find(|s| s.mig_number.as_deref() == Some(mig_num.as_str()))
163        {
164            return Some(seg);
165        }
166    }
167
168    // Fall back to segment tag from the path.
169    let tag = extract_segment_tag(&rule.segment_path)?;
170    segments.iter().find(|s| s.tag == tag)
171}
172
173/// Extract the segment tag from a segment path.
174///
175/// `"SG4/DTM/C507/2380"` -> `"DTM"` (first non-SG component)
176/// `"BGM/1004"` -> `"BGM"`
177fn extract_segment_tag(segment_path: &str) -> Option<&str> {
178    segment_path
179        .split('/')
180        .find(|part| !part.starts_with("SG"))
181}
182
183/// Extract a field value from an assembled segment using element/component indices.
184fn extract_value<'a>(segment: &'a AssembledSegment, rule: &AhbFieldRule) -> Option<&'a str> {
185    let elem_idx = rule.element_index.unwrap_or(0);
186    let comp_idx = rule.component_index.unwrap_or(0);
187
188    let element = segment.elements.get(elem_idx)?;
189    let component = element.get(comp_idx)?;
190
191    if component.is_empty() {
192        None
193    } else {
194        Some(component.as_str())
195    }
196}
197
198/// Resolve assembled groups against grouped AHB rules, recursively.
199///
200/// `depth` is the number of group prefixes to strip from each rule's
201/// `segment_path` before classifying it as direct or child.
202fn resolve_groups<'a>(
203    assembled_groups: &'a [AssembledGroup],
204    group_rules: &HashMap<String, Vec<&'a AhbFieldRule>>,
205    depth: usize,
206) -> Vec<AhbGroupNode<'a>> {
207    let mut result = Vec::new();
208
209    for assembled_group in assembled_groups {
210        let rules = group_rules.get(&assembled_group.group_id);
211
212        for instance in &assembled_group.repetitions {
213            let node =
214                resolve_group_instance(&assembled_group.group_id, instance, rules, depth);
215            result.push(node);
216        }
217    }
218
219    result
220}
221
222/// Strip `n` leading group prefixes from a segment path.
223///
224/// `strip_n_groups("SG4/SG5/LOC/3225", 2)` -> `"LOC/3225"`
225fn strip_n_groups(path: &str, n: usize) -> &str {
226    let mut rest = path;
227    for _ in 0..n {
228        match rest.find('/') {
229            Some(idx) => rest = &rest[idx + 1..],
230            None => return rest,
231        }
232    }
233    rest
234}
235
236/// Resolve a single group instance.
237///
238/// `depth` is how many group prefixes have been consumed so far (0 for top-level groups).
239///
240/// Rules are filtered to this instance by `mig_number`: a rule only applies if
241/// its `mig_number` matches a segment in this instance (or a segment in a child
242/// group instance). This prevents rules for SG8/SEQ+Z79 from generating false
243/// missing-field errors against an SG8/SEQ+Z01 rep.
244fn resolve_group_instance<'a>(
245    group_id: &'a str,
246    instance: &'a AssembledGroupInstance,
247    rules: Option<&Vec<&'a AhbFieldRule>>,
248    depth: usize,
249) -> AhbGroupNode<'a> {
250    // We need to strip (depth + 1) group prefixes to get below this group level.
251    let strip_count = depth + 1;
252
253    // Use the variant's full set of mig_numbers (from MIG definition) to
254    // determine which rules belong to this instance. This includes numbers
255    // for segments that may be absent — so missing-field detection still works.
256    // Falls back to collecting from present segments if variant_mig_numbers is empty.
257    let variant_numbers: std::collections::HashSet<&str> = if !instance.variant_mig_numbers.is_empty() {
258        instance.variant_mig_numbers.iter().map(|s| s.as_str()).collect()
259    } else {
260        collect_instance_mig_numbers(instance)
261    };
262
263    let mut direct_rules: Vec<&'a AhbFieldRule> = Vec::new();
264    let mut child_group_rules: HashMap<String, Vec<&'a AhbFieldRule>> = HashMap::new();
265    let mut ahb_status: Option<&'a str> = None;
266
267    if let Some(rules) = rules {
268        for rule in rules {
269            // Skip rules whose mig_number doesn't belong to this variant.
270            // Rules without mig_number pass through (tag-based fallback).
271            if let Some(ref rule_mig) = rule.mig_number {
272                if !variant_numbers.contains(rule_mig.as_str()) {
273                    continue;
274                }
275            }
276
277            // Strip all parent group prefixes plus this group to get the relative path.
278            let stripped = strip_n_groups(&rule.segment_path, strip_count);
279
280            if let Some(child_group_id) = extract_child_group(stripped) {
281                child_group_rules
282                    .entry(child_group_id.to_owned())
283                    .or_default()
284                    .push(rule);
285            } else {
286                direct_rules.push(rule);
287            }
288
289            // Capture the parent group AHB status if present.
290            if ahb_status.is_none() {
291                if let Some(ref status) = rule.parent_group_ahb_status {
292                    ahb_status = Some(status.as_str());
293                }
294            }
295        }
296    }
297
298    // Resolve direct fields against instance segments.
299    let fields = resolve_fields(&direct_rules, &instance.segments);
300
301    // Recurse into child groups one level deeper.
302    let children = resolve_groups(&instance.child_groups, &child_group_rules, strip_count);
303
304    AhbGroupNode {
305        group_id,
306        ahb_status,
307        fields,
308        children,
309    }
310}
311
312/// Collect all mig_numbers present in a group instance, including child groups recursively.
313fn collect_instance_mig_numbers(instance: &AssembledGroupInstance) -> std::collections::HashSet<&str> {
314    let mut numbers = std::collections::HashSet::new();
315    for seg in &instance.segments {
316        if let Some(ref num) = seg.mig_number {
317            numbers.insert(num.as_str());
318        }
319    }
320    for child_group in &instance.child_groups {
321        for child_instance in &child_group.repetitions {
322            numbers.extend(collect_instance_mig_numbers(child_instance));
323        }
324    }
325    numbers
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331    use crate::validator::validate::{AhbFieldRule, AhbWorkflow};
332    use mig_assembly::assembler::{
333        AssembledGroup, AssembledGroupInstance, AssembledSegment, AssembledTree,
334    };
335    use std::collections::{BTreeMap, HashMap};
336
337    fn empty_workflow() -> AhbWorkflow {
338        AhbWorkflow {
339            pruefidentifikator: "11001".to_string(),
340            description: String::new(),
341            communication_direction: None,
342            fields: vec![],
343            ub_definitions: HashMap::new(),
344        }
345    }
346
347    fn empty_tree() -> AssembledTree {
348        AssembledTree {
349            segments: vec![],
350            groups: vec![],
351            post_group_start: 0,
352            inter_group_segments: BTreeMap::new(),
353        }
354    }
355
356    fn make_segment(tag: &str, elements: Vec<Vec<&str>>, mig_number: Option<&str>) -> AssembledSegment {
357        AssembledSegment {
358            tag: tag.to_string(),
359            elements: elements
360                .into_iter()
361                .map(|e| e.into_iter().map(|s| s.to_string()).collect())
362                .collect(),
363            mig_number: mig_number.map(|s| s.to_string()),
364        }
365    }
366
367    fn make_rule(
368        segment_path: &str,
369        name: &str,
370        ahb_status: &str,
371        mig_number: Option<&str>,
372        element_index: Option<usize>,
373        component_index: Option<usize>,
374    ) -> AhbFieldRule {
375        AhbFieldRule {
376            segment_path: segment_path.to_string(),
377            name: name.to_string(),
378            ahb_status: ahb_status.to_string(),
379            codes: vec![],
380            parent_group_ahb_status: None,
381            element_index,
382            component_index,
383            mig_number: mig_number.map(|s| s.to_string()),
384        }
385    }
386
387    #[test]
388    fn test_empty_workflow_empty_tree() {
389        let workflow = empty_workflow();
390        let tree = empty_tree();
391        let result = build_validated_tree(&workflow, &tree);
392
393        assert_eq!(result.pruefidentifikator, "11001");
394        assert!(result.root_fields.is_empty());
395        assert!(result.groups.is_empty());
396    }
397
398    #[test]
399    fn test_root_field_matches_root_segment() {
400        let mut workflow = empty_workflow();
401        workflow.fields.push(make_rule(
402            "BGM/C002/1001",
403            "Nachrichtentyp",
404            "X",
405            Some("0001"),
406            Some(0),
407            Some(0),
408        ));
409
410        let tree = AssembledTree {
411            segments: vec![make_segment("BGM", vec![vec!["E01"]], Some("0001"))],
412            groups: vec![],
413            post_group_start: 1,
414            inter_group_segments: BTreeMap::new(),
415        };
416
417        let result = build_validated_tree(&workflow, &tree);
418
419        assert_eq!(result.root_fields.len(), 1);
420        let node = &result.root_fields[0];
421        assert_eq!(node.value, Some("E01"));
422        assert!(node.segment_elements.is_some());
423        assert_eq!(node.rule.name, "Nachrichtentyp");
424    }
425
426    #[test]
427    fn test_sg4_dtm_mig_number_matching() {
428        // Two DTM segments with different mig_numbers.
429        // AHB rule for DTM+92 (mig_number "0082") should match the correct one.
430        let mut workflow = empty_workflow();
431
432        // Rule for DTM+92: element 0 = qualifier "92", element 1 = the date value.
433        workflow.fields.push(make_rule(
434            "SG4/DTM/C507/2380",
435            "Eingangsdatum",
436            "X",
437            Some("0082"),
438            Some(1), // date value is element 1
439            Some(0),
440        ));
441
442        // Rule for DTM+137.
443        workflow.fields.push(make_rule(
444            "SG4/DTM/C507/2380",
445            "Dokumentendatum",
446            "X",
447            Some("0083"),
448            Some(1),
449            Some(0),
450        ));
451
452        let tree = AssembledTree {
453            segments: vec![],
454            groups: vec![AssembledGroup {
455                group_id: "SG4".to_string(),
456                repetitions: vec![AssembledGroupInstance {
457                    segments: vec![
458                        make_segment(
459                            "DTM",
460                            vec![vec!["92"], vec!["20260101"]],
461                            Some("0082"),
462                        ),
463                        make_segment(
464                            "DTM",
465                            vec![vec!["137"], vec!["20260401"]],
466                            Some("0083"),
467                        ),
468                    ],
469                    child_groups: vec![],
470                    entry_mig_number: None,
471                    variant_mig_numbers: vec![],
472                    skipped_segments: vec![],
473                }],
474            }],
475            post_group_start: 0,
476            inter_group_segments: BTreeMap::new(),
477        };
478
479        let result = build_validated_tree(&workflow, &tree);
480
481        assert_eq!(result.groups.len(), 1);
482        let sg4 = &result.groups[0];
483        assert_eq!(sg4.group_id, "SG4");
484        assert_eq!(sg4.fields.len(), 2);
485
486        // Eingangsdatum (mig_number 0082) should get DTM+92's date.
487        let eingangsdatum = &sg4.fields[0];
488        assert_eq!(eingangsdatum.rule.name, "Eingangsdatum");
489        assert_eq!(eingangsdatum.value, Some("20260101"));
490
491        // Dokumentendatum (mig_number 0083) should get DTM+137's date.
492        let dokumentendatum = &sg4.fields[1];
493        assert_eq!(dokumentendatum.rule.name, "Dokumentendatum");
494        assert_eq!(dokumentendatum.value, Some("20260401"));
495    }
496
497    #[test]
498    fn test_rule_filtered_to_correct_variant() {
499        // A rule with mig_number "0099" is filtered out from an instance
500        // that doesn't contain any segment with that mig_number.
501        // This prevents false missing-field errors for wrong SG8 variants.
502        let mut workflow = empty_workflow();
503        workflow.fields.push(make_rule(
504            "SG4/RFF/C506/1154",
505            "Referenz",
506            "X",
507            Some("0099"),
508            Some(0),
509            Some(1),
510        ));
511
512        let tree = AssembledTree {
513            segments: vec![],
514            groups: vec![AssembledGroup {
515                group_id: "SG4".to_string(),
516                repetitions: vec![AssembledGroupInstance {
517                    segments: vec![], // No segments — wrong variant
518                    child_groups: vec![],
519                    entry_mig_number: None,
520                    variant_mig_numbers: vec![],
521                    skipped_segments: vec![],
522                }],
523            }],
524            post_group_start: 0,
525            inter_group_segments: BTreeMap::new(),
526        };
527
528        let result = build_validated_tree(&workflow, &tree);
529        assert_eq!(result.groups.len(), 1);
530        // Rule is filtered out — its mig_number doesn't match this instance
531        assert_eq!(result.groups[0].fields.len(), 0);
532    }
533
534    #[test]
535    fn test_missing_segment_within_correct_variant() {
536        // A rule with mig_number "0099" IS included when the instance's
537        // variant_mig_numbers lists it — even though the segment is absent.
538        // This enables missing-field detection within the correct variant.
539        let mut workflow = empty_workflow();
540        // Entry segment rule (present)
541        workflow.fields.push(make_rule(
542            "SG4/SEQ/1229",
543            "Qualifier",
544            "X",
545            Some("0098"),
546            Some(0),
547            Some(0),
548        ));
549        // Second segment rule (absent — should report missing)
550        workflow.fields.push(make_rule(
551            "SG4/RFF/C506/1154",
552            "Referenz",
553            "X",
554            Some("0099"),
555            Some(0),
556            Some(1),
557        ));
558
559        let tree = AssembledTree {
560            segments: vec![],
561            groups: vec![AssembledGroup {
562                group_id: "SG4".to_string(),
563                repetitions: vec![AssembledGroupInstance {
564                    segments: vec![
565                        // Only the entry segment — RFF is missing
566                        make_segment("SEQ", vec![vec!["Z01"]], Some("0098")),
567                    ],
568                    child_groups: vec![],
569                    entry_mig_number: Some("0098".to_string()),
570                    // variant_mig_numbers includes both "0098" (SEQ) and "0099" (RFF)
571                    variant_mig_numbers: vec!["0098".to_string(), "0099".to_string()],
572                    skipped_segments: vec![],
573                }],
574            }],
575            post_group_start: 0,
576            inter_group_segments: BTreeMap::new(),
577        };
578
579        let result = build_validated_tree(&workflow, &tree);
580        assert_eq!(result.groups.len(), 1);
581        // Both rules match because variant_mig_numbers includes both
582        assert_eq!(result.groups[0].fields.len(), 2);
583        assert_eq!(result.groups[0].fields[0].rule.name, "Qualifier");
584        assert_eq!(result.groups[0].fields[0].value, Some("Z01"));
585        // RFF is missing — value is None (condition eval will report it)
586        assert_eq!(result.groups[0].fields[1].rule.name, "Referenz");
587        assert_eq!(result.groups[0].fields[1].value, None);
588    }
589
590    #[test]
591    fn test_fallback_to_tag_when_no_mig_number() {
592        let mut workflow = empty_workflow();
593        workflow.fields.push(make_rule(
594            "BGM/C002/1001",
595            "Nachrichtentyp",
596            "X",
597            None, // No mig_number — should fall back to tag.
598            Some(0),
599            Some(0),
600        ));
601
602        let tree = AssembledTree {
603            segments: vec![make_segment("BGM", vec![vec!["E01"]], None)],
604            groups: vec![],
605            post_group_start: 1,
606            inter_group_segments: BTreeMap::new(),
607        };
608
609        let result = build_validated_tree(&workflow, &tree);
610        assert_eq!(result.root_fields.len(), 1);
611        assert_eq!(result.root_fields[0].value, Some("E01"));
612    }
613
614    #[test]
615    fn test_nested_child_groups() {
616        let mut workflow = empty_workflow();
617        // A rule in SG4/SG5.
618        workflow.fields.push(make_rule(
619            "SG4/SG5/LOC/C517/3225",
620            "Marktlokations-ID",
621            "X",
622            Some("0050"),
623            Some(0),
624            Some(0),
625        ));
626
627        let tree = AssembledTree {
628            segments: vec![],
629            groups: vec![AssembledGroup {
630                group_id: "SG4".to_string(),
631                repetitions: vec![AssembledGroupInstance {
632                    segments: vec![],
633                    entry_mig_number: None,
634                    child_groups: vec![AssembledGroup {
635                        group_id: "SG5".to_string(),
636                        repetitions: vec![AssembledGroupInstance {
637                            segments: vec![make_segment(
638                                "LOC",
639                                vec![vec!["DE00012345678"]],
640                                Some("0050"),
641                            )],
642                            child_groups: vec![],
643                            entry_mig_number: None,
644                            variant_mig_numbers: vec![],
645                            skipped_segments: vec![],
646                        }],
647                    }],
648                    variant_mig_numbers: vec![],
649                    skipped_segments: vec![],
650                }],
651            }],
652            post_group_start: 0,
653            inter_group_segments: BTreeMap::new(),
654        };
655
656        let result = build_validated_tree(&workflow, &tree);
657        assert_eq!(result.groups.len(), 1);
658        let sg4 = &result.groups[0];
659        assert_eq!(sg4.children.len(), 1);
660
661        let sg5 = &sg4.children[0];
662        assert_eq!(sg5.group_id, "SG5");
663        assert_eq!(sg5.fields.len(), 1);
664        assert_eq!(sg5.fields[0].value, Some("DE00012345678"));
665        assert_eq!(sg5.fields[0].rule.name, "Marktlokations-ID");
666    }
667}