Skip to main content

mig_bo4e/
error_mapping.rs

1//! Maps EDIFACT validation paths to BO4E field paths.
2//!
3//! When validation runs on EDIFACT produced by reverse-mapping BO4E JSON,
4//! the resulting `ValidationIssue`s contain EDIFACT segment paths like
5//! `SG4/SG5/LOC/C517/3225`. This module resolves those back to BO4E paths
6//! like `stammdaten.Marktlokation.marktlokationsId` so users can find
7//! the source of the problem in their BO4E input.
8
9use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
10
11use crate::definition::{FieldMapping, MappingDefinition};
12use crate::path_resolver::ReversePathResolver;
13
14/// Maps EDIFACT segment paths from validation errors to BO4E field paths.
15pub struct Bo4eFieldIndex {
16    entries: Vec<IndexEntry>,
17}
18
19struct IndexEntry {
20    /// EDIFACT group+segment prefix: "SG4/SG5/LOC", "SG2/NAD", "SG4/IDE", etc.
21    edifact_prefix: String,
22    /// BO4E entity name from TOML meta: "Marktlokation", "Prozessdaten"
23    entity: String,
24    /// Whether this entity is in stammdaten or transaktionsdaten.
25    location: FieldLocation,
26    /// Optional companion type (for companion_fields entries).
27    companion_type: Option<String>,
28    /// Individual field mappings within this segment.
29    fields: Vec<FieldEntry>,
30}
31
32#[derive(Clone, Copy)]
33enum FieldLocation {
34    Stammdaten,
35}
36
37struct FieldEntry {
38    /// Full EDIFACT field_path this matches (e.g., "SG4/SG5/LOC/C517/3225").
39    edifact_path: String,
40    /// BO4E target field name (e.g., "marktlokationsId").
41    bo4e_field: String,
42    /// Whether this is a companion field.
43    is_companion: bool,
44    /// Optional qualifier from the TOML path (e.g., "93" from "dtm[93].0.1").
45    /// Used to disambiguate fields that share the same EDIFACT path.
46    qualifier: Option<String>,
47}
48
49impl Bo4eFieldIndex {
50    /// Build the index from TOML mapping definitions and a MIG schema.
51    ///
52    /// For each field in each definition, resolves the TOML numeric path
53    /// (e.g., `loc.1.0`) to an AHB-style EDIFACT path (e.g., `SG4/SG5/LOC/C517/3225`)
54    /// using the MIG schema for element ID lookup.
55    pub fn build(definitions: &[MappingDefinition], mig: &MigSchema) -> Self {
56        Self::build_inner(definitions, mig, None)
57    }
58
59    /// Build the index using a `ReversePathResolver` for element ID lookup.
60    ///
61    /// This is more accurate than `build` because the resolver is built from
62    /// unmerged PID schema JSONs, avoiding data loss from MIG group merging
63    /// (e.g., CCI composites lost when merging SG10 variants).
64    pub fn build_with_resolver(
65        definitions: &[MappingDefinition],
66        mig: &MigSchema,
67        resolver: &ReversePathResolver,
68    ) -> Self {
69        Self::build_inner(definitions, mig, Some(resolver))
70    }
71
72    fn build_inner(
73        definitions: &[MappingDefinition],
74        mig: &MigSchema,
75        resolver: Option<&ReversePathResolver>,
76    ) -> Self {
77        let mut entries = Vec::new();
78
79        for def in definitions {
80            let group_path = source_group_to_slash(&def.meta.source_group);
81            let location = classify_entity(&def.meta.entity);
82            let companion_type = def.meta.companion_type.clone();
83
84            let mut fields = Vec::new();
85
86            // Process [fields]
87            Self::collect_fields_inner(
88                &def.fields,
89                &group_path,
90                mig,
91                resolver,
92                false,
93                &mut fields,
94            );
95
96            // Process [companion_fields]
97            if let Some(ref companion) = def.companion_fields {
98                Self::collect_fields_inner(
99                    companion,
100                    &group_path,
101                    mig,
102                    resolver,
103                    true,
104                    &mut fields,
105                );
106            }
107
108            if !fields.is_empty() {
109                entries.push(IndexEntry {
110                    edifact_prefix: group_path.clone(),
111                    entity: def.meta.entity.clone(),
112                    location,
113                    companion_type,
114                    fields,
115                });
116            }
117        }
118
119        Self { entries }
120    }
121
122    /// Given an EDIFACT field_path from a ValidationIssue, return the BO4E path.
123    ///
124    /// `hint` is an optional disambiguation string (e.g., expected value or AHB rule)
125    /// that helps pick the right entry when multiple TOML definitions map to the same
126    /// EDIFACT path (e.g., DTM+92 and DTM+93 both map to SG4/DTM/C507/2005).
127    pub fn resolve(&self, edifact_field_path: &str, hint: Option<&str>) -> Option<String> {
128        // Exact match on field entries — prefer qualifier-matching entries when hint is available
129        let mut exact_matches: Vec<(&IndexEntry, &FieldEntry)> = Vec::new();
130        for entry in &self.entries {
131            for field in &entry.fields {
132                if field.edifact_path == edifact_field_path {
133                    exact_matches.push((entry, field));
134                }
135            }
136        }
137
138        if !exact_matches.is_empty() {
139            // If we have a hint, try to match it against field qualifiers
140            if let Some(hint) = hint {
141                if let Some((entry, field)) = exact_matches
142                    .iter()
143                    .find(|(_, f)| f.qualifier.as_deref() == Some(hint))
144                {
145                    return Some(self.build_bo4e_path(entry, field));
146                }
147                // Also try matching hint as a substring of qualifier or vice versa
148                if let Some((entry, field)) = exact_matches
149                    .iter()
150                    .find(|(_, f)| {
151                        f.qualifier
152                            .as_deref()
153                            .is_some_and(|q| hint.contains(q) || q.contains(hint))
154                    })
155                {
156                    return Some(self.build_bo4e_path(entry, field));
157                }
158            }
159            // No hint match — return first exact match
160            let (entry, field) = exact_matches[0];
161            return Some(self.build_bo4e_path(entry, field));
162        }
163
164        // Sibling match: for qualifier fields like SG4/DTM/C507/2005, find entries
165        // for the same segment composite (SG4/DTM/C507/*) that match the hint qualifier.
166        // This handles cases where the qualifier element (2005) doesn't have its own
167        // index entry but its sibling data field (2380) does.
168        if let Some(hint) = hint {
169            let composite_prefix = edifact_field_path.rsplit_once('/').map(|(p, _)| p);
170            if let Some(prefix) = composite_prefix {
171                for entry in &self.entries {
172                    for field in &entry.fields {
173                        if field.edifact_path.starts_with(prefix)
174                            && field.qualifier.as_deref() == Some(hint)
175                        {
176                            return Some(self.build_bo4e_path(entry, field));
177                        }
178                    }
179                }
180            }
181        }
182
183        // Prefix match for code/qualifier paths — longest prefix wins
184        let mut best: Option<&IndexEntry> = None;
185        for entry in &self.entries {
186            if !entry.edifact_prefix.is_empty()
187                && edifact_field_path.starts_with(&entry.edifact_prefix)
188                && best
189                    .map(|b| entry.edifact_prefix.len() > b.edifact_prefix.len())
190                    .unwrap_or(true)
191            {
192                best = Some(entry);
193            }
194        }
195        best.map(|entry| self.build_entity_path(entry))
196    }
197
198    /// Debug: return all entries as (edifact_path, entity, bo4e_field) tuples.
199    pub fn debug_entries(&self) -> Vec<(String, String, String)> {
200        let mut out = Vec::new();
201        for entry in &self.entries {
202            for field in &entry.fields {
203                out.push((
204                    field.edifact_path.clone(),
205                    entry.entity.clone(),
206                    field.bo4e_field.clone(),
207                ));
208            }
209        }
210        out
211    }
212
213    fn collect_fields_inner(
214        field_map: &indexmap::IndexMap<String, FieldMapping>,
215        group_path: &str,
216        mig: &MigSchema,
217        resolver: Option<&ReversePathResolver>,
218        is_companion: bool,
219        out: &mut Vec<FieldEntry>,
220    ) {
221        // First pass: collect qualifier paths (empty target) and data field paths.
222        // Qualifier paths like dtm[92].c507.d2005 have empty target but carry a
223        // default value (the qualifier code). We'll create entries for them in
224        // a second pass, pointing to their sibling data field.
225        struct QualifierPath {
226            parsed: ParsedTomlPath,
227        }
228        let mut qualifier_paths: Vec<QualifierPath> = Vec::new();
229        // Map from (tag, qualifier) → first data field BO4E name for sibling lookup
230        let mut tag_qualifier_to_field: std::collections::HashMap<(String, String), String> =
231            std::collections::HashMap::new();
232
233        for (toml_path, mapping) in field_map {
234            let target = match mapping {
235                FieldMapping::Simple(s) => s.as_str(),
236                FieldMapping::Structured(s) => s.target.as_str(),
237                FieldMapping::Nested(_) => continue,
238            };
239
240            let parsed = match parse_toml_path(toml_path) {
241                Some(p) => p,
242                None => continue,
243            };
244
245            if target.is_empty() {
246                // Qualifier/default field — collect for second pass
247                qualifier_paths.push(QualifierPath { parsed });
248                continue;
249            }
250
251            // Track first data field per (tag, qualifier) for sibling lookup
252            if let Some(ref q) = parsed.qualifier {
253                tag_qualifier_to_field
254                    .entry((parsed.segment_tag.clone(), q.clone()))
255                    .or_insert_with(|| target.to_string());
256            }
257
258            // Resolve and add the data field entry
259            let edifact_path = resolver
260                .and_then(|r| resolve_edifact_path_via_resolver(group_path, &parsed, r))
261                .or_else(|| resolve_edifact_path(group_path, &parsed, mig));
262
263            if let Some(edifact_path) = edifact_path {
264                out.push(FieldEntry {
265                    edifact_path,
266                    bo4e_field: target.to_string(),
267                    is_companion,
268                    qualifier: parsed.qualifier.clone(),
269                });
270            }
271        }
272
273        // Second pass: create entries for qualifier paths that reference their
274        // sibling data field. E.g., dtm[93].c507.d2005 (qualifier for gueltigBis)
275        // gets an entry pointing to "gueltigBis" so the missing-qualifier error
276        // resolves to stammdaten.Prozessdaten.gueltigBis instead of just Prozessdaten.
277        for qp in &qualifier_paths {
278            if let Some(ref q) = qp.parsed.qualifier {
279                let key = (qp.parsed.segment_tag.clone(), q.clone());
280                if let Some(sibling_field) = tag_qualifier_to_field.get(&key)
281                {
282                    let edifact_path = resolver
283                        .and_then(|r| {
284                            resolve_edifact_path_via_resolver(group_path, &qp.parsed, r)
285                        })
286                        .or_else(|| resolve_edifact_path(group_path, &qp.parsed, mig));
287
288                    if let Some(edifact_path) = edifact_path {
289                        out.push(FieldEntry {
290                            edifact_path,
291                            bo4e_field: sibling_field.clone(),
292                            is_companion,
293                            qualifier: qp.parsed.qualifier.clone(),
294                        });
295                    }
296                }
297            }
298        }
299    }
300
301    fn build_bo4e_path(&self, entry: &IndexEntry, field: &FieldEntry) -> String {
302        let location = match entry.location {
303            FieldLocation::Stammdaten => "stammdaten",
304        };
305        if field.is_companion {
306            if let Some(ref ct) = entry.companion_type {
307                format!(
308                    "{}.{}.{}.{}",
309                    location,
310                    entry.entity,
311                    to_camel_first_lower(ct),
312                    field.bo4e_field
313                )
314            } else {
315                format!("{}.{}.{}", location, entry.entity, field.bo4e_field)
316            }
317        } else {
318            format!("{}.{}.{}", location, entry.entity, field.bo4e_field)
319        }
320    }
321
322    fn build_entity_path(&self, entry: &IndexEntry) -> String {
323        let location = match entry.location {
324            FieldLocation::Stammdaten => "stammdaten",
325        };
326        format!("{}.{}", location, entry.entity)
327    }
328}
329
330/// Parsed TOML field path components.
331struct ParsedTomlPath {
332    /// Segment tag in uppercase (e.g., "LOC", "DTM").
333    segment_tag: String,
334    /// Element index (e.g., 1 in "loc.1.0").
335    element_idx: usize,
336    /// Optional component sub-index (e.g., 0 in "loc.1.0").
337    component_idx: Option<usize>,
338    /// Optional qualifier from the tag (e.g., "93" from "dtm[93]").
339    qualifier: Option<String>,
340}
341
342/// Parse a TOML field path like "loc.1.0" or "dtm[92].0.1".
343fn parse_toml_path(path: &str) -> Option<ParsedTomlPath> {
344    let parts: Vec<&str> = path.split('.').collect();
345    if parts.len() < 2 {
346        return None;
347    }
348
349    // Extract qualifier from tag: "dtm[92]" → tag="DTM", qualifier=Some("92")
350    let raw_tag = parts[0];
351    let (tag, qualifier) = if let Some(bracket) = raw_tag.find('[') {
352        let end = raw_tag.find(']').unwrap_or(raw_tag.len());
353        let qual = &raw_tag[bracket + 1..end];
354        // Strip occurrence suffix: "Z34,1" → "Z34"
355        let qual = qual.split(',').next().unwrap_or(qual);
356        (&raw_tag[..bracket], Some(qual.to_string()))
357    } else {
358        (raw_tag, None)
359    };
360
361    let element_idx: usize = parts[1].parse().ok()?;
362    let component_idx = if parts.len() > 2 {
363        Some(parts[2].parse::<usize>().ok()?)
364    } else {
365        None
366    };
367
368    Some(ParsedTomlPath {
369        segment_tag: tag.to_uppercase(),
370        element_idx,
371        component_idx,
372        qualifier,
373    })
374}
375
376/// Convert source_group dot notation to slash notation, stripping `:N` suffixes.
377/// "SG4.SG5" → "SG4/SG5", "SG8:1.SG10" → "SG8/SG10"
378fn source_group_to_slash(source_group: &str) -> String {
379    source_group
380        .split('.')
381        .map(|part| {
382            if let Some(colon) = part.find(':') {
383                &part[..colon]
384            } else {
385                part
386            }
387        })
388        .collect::<Vec<_>>()
389        .join("/")
390}
391
392/// Classify entity location. All entities are now in stammdaten
393/// (the transaktionsdaten split has been removed).
394fn classify_entity(_entity: &str) -> FieldLocation {
395    FieldLocation::Stammdaten
396}
397
398/// Resolve a parsed TOML path to an AHB-style EDIFACT path using the ReversePathResolver.
399///
400/// This is more accurate than MIG-based resolution because the resolver is built from
401/// unmerged PID schema JSONs, preserving composites that get lost during MIG merging.
402fn resolve_edifact_path_via_resolver(
403    group_path: &str,
404    parsed: &ParsedTomlPath,
405    resolver: &ReversePathResolver,
406) -> Option<String> {
407    // Build the numeric path: "cci.2.0" from tag=CCI, elem=2, comp=0
408    let numeric_path = if let Some(ci) = parsed.component_idx {
409        format!(
410            "{}.{}.{}",
411            parsed.segment_tag.to_lowercase(),
412            parsed.element_idx,
413            ci
414        )
415    } else {
416        format!(
417            "{}.{}",
418            parsed.segment_tag.to_lowercase(),
419            parsed.element_idx
420        )
421    };
422
423    // Use ReversePathResolver to get named path: "cci.c240.d7037"
424    let named = resolver.reverse_path(&numeric_path);
425    if named == numeric_path {
426        // Resolver couldn't resolve — not in schema
427        return None;
428    }
429
430    // Convert named path to AHB-style: "cci.c240.d7037" → "CCI/C240/7037"
431    let parts: Vec<&str> = named.split('.').collect();
432    let ahb_parts: Vec<String> = parts
433        .iter()
434        .map(|p| {
435            // Strip qualifier suffix: "dtm[92]" → "DTM"
436            let clean = if let Some(bracket) = p.find('[') {
437                &p[..bracket]
438            } else {
439                p
440            };
441            // Strip edifact id prefix (c/d/s): "c240" → "C240", "d7037" → "7037"
442            if clean.len() > 1
443                && (clean.starts_with('c') || clean.starts_with('C'))
444                && clean[1..].chars().next().is_some_and(|c| c.is_ascii_digit())
445            {
446                clean.to_uppercase()
447            } else if clean.len() > 1
448                && (clean.starts_with('d') || clean.starts_with('D'))
449                && clean[1..].chars().next().is_some_and(|c| c.is_ascii_digit())
450            {
451                // Data element: strip 'd' prefix → "7037"
452                clean[1..].to_string()
453            } else {
454                clean.to_uppercase()
455            }
456        })
457        .collect();
458
459    let edifact_suffix = ahb_parts.join("/");
460
461    if group_path.is_empty() {
462        Some(edifact_suffix)
463    } else {
464        Some(format!("{}/{}", group_path, edifact_suffix))
465    }
466}
467
468/// Resolve a parsed TOML path to an AHB-style EDIFACT path using the MIG.
469fn resolve_edifact_path(
470    group_path: &str,
471    parsed: &ParsedTomlPath,
472    mig: &MigSchema,
473) -> Option<String> {
474    // Find the segment in the MIG
475    let segment = find_segment_in_mig(mig, group_path, &parsed.segment_tag)?;
476
477    // Build a unified list of (position, element_kind) sorted by position
478    let resolved = resolve_element_at_position(segment, parsed.element_idx, parsed.component_idx)?;
479
480    let prefix = if group_path.is_empty() {
481        parsed.segment_tag.clone()
482    } else {
483        format!("{}/{}", group_path, parsed.segment_tag)
484    };
485
486    match resolved {
487        ResolvedElement::DataElement(id) => Some(format!("{}/{}", prefix, id)),
488        ResolvedElement::CompositeElement(composite_id, element_id) => {
489            Some(format!("{}/{}/{}", prefix, composite_id, element_id))
490        }
491    }
492}
493
494enum ResolvedElement {
495    /// A standalone data element: just the element ID.
496    DataElement(String),
497    /// A component within a composite: (composite_id, data_element_id).
498    CompositeElement(String, String),
499}
500
501/// Find a segment by tag within a group path in the MIG.
502fn find_segment_in_mig<'a>(
503    mig: &'a MigSchema,
504    group_path: &str,
505    segment_tag: &str,
506) -> Option<&'a MigSegment> {
507    if group_path.is_empty() {
508        // Root-level segment
509        return mig
510            .segments
511            .iter()
512            .find(|s| s.id.eq_ignore_ascii_case(segment_tag));
513    }
514
515    let parts: Vec<&str> = group_path.split('/').collect();
516
517    // Find the first group
518    let mut current_group = mig
519        .segment_groups
520        .iter()
521        .find(|g| g.id.eq_ignore_ascii_case(parts[0]))?;
522
523    // Navigate nested groups
524    for &part in &parts[1..] {
525        current_group = current_group
526            .nested_groups
527            .iter()
528            .find(|g| g.id.eq_ignore_ascii_case(part))?;
529    }
530
531    find_segment_in_group(current_group, segment_tag)
532}
533
534/// Find a segment by tag within a group (checking the group and its nested groups).
535fn find_segment_in_group<'a>(
536    group: &'a MigSegmentGroup,
537    segment_tag: &str,
538) -> Option<&'a MigSegment> {
539    group
540        .segments
541        .iter()
542        .find(|s| s.id.eq_ignore_ascii_case(segment_tag))
543}
544
545/// Resolve an element at a given position within a MIG segment.
546///
547/// Builds a unified position list from data_elements and composites,
548/// then finds what's at element_idx. If it's a composite and component_idx
549/// is provided, returns the sub-element.
550fn resolve_element_at_position(
551    segment: &MigSegment,
552    element_idx: usize,
553    component_idx: Option<usize>,
554) -> Option<ResolvedElement> {
555    // Check composites first — they have a position field
556    if let Some(composite) = segment
557        .composites
558        .iter()
559        .find(|c| c.position == element_idx)
560    {
561        let comp_idx = component_idx.unwrap_or(0);
562        // Find the data element at the component sub-index by sorting by position
563        let mut sub_elements: Vec<_> = composite.data_elements.iter().collect();
564        sub_elements.sort_by_key(|de| de.position);
565        let de = sub_elements.get(comp_idx)?;
566        return Some(ResolvedElement::CompositeElement(
567            composite.id.clone(),
568            de.id.clone(),
569        ));
570    }
571
572    // Check standalone data elements
573    if let Some(de) = segment
574        .data_elements
575        .iter()
576        .find(|d| d.position == element_idx)
577    {
578        return Some(ResolvedElement::DataElement(de.id.clone()));
579    }
580
581    None
582}
583
584/// Convert PascalCase to camelCase (first char lowercase).
585fn to_camel_first_lower(s: &str) -> String {
586    let mut chars = s.chars();
587    match chars.next() {
588        None => String::new(),
589        Some(c) => c.to_lowercase().to_string() + chars.as_str(),
590    }
591}
592
593#[cfg(test)]
594mod tests {
595    use super::*;
596
597    #[test]
598    fn test_source_group_to_slash() {
599        assert_eq!(source_group_to_slash("SG4.SG5"), "SG4/SG5");
600        assert_eq!(source_group_to_slash("SG4"), "SG4");
601        assert_eq!(source_group_to_slash("SG8:1.SG10"), "SG8/SG10");
602        assert_eq!(source_group_to_slash(""), "");
603    }
604
605    #[test]
606    fn test_parse_toml_path() {
607        let p = parse_toml_path("loc.1.0").unwrap();
608        assert_eq!(p.segment_tag, "LOC");
609        assert_eq!(p.element_idx, 1);
610        assert_eq!(p.component_idx, Some(0));
611
612        let p = parse_toml_path("ide.1").unwrap();
613        assert_eq!(p.segment_tag, "IDE");
614        assert_eq!(p.element_idx, 1);
615        assert_eq!(p.component_idx, None);
616
617        let p = parse_toml_path("dtm[92].0.1").unwrap();
618        assert_eq!(p.segment_tag, "DTM");
619        assert_eq!(p.element_idx, 0);
620        assert_eq!(p.component_idx, Some(1));
621
622        assert!(parse_toml_path("loc").is_none());
623    }
624
625    #[test]
626    fn test_classify_entity() {
627        // All entities are now classified as Stammdaten (no more transaktionsdaten split)
628        assert!(matches!(
629            classify_entity("Prozessdaten"),
630            FieldLocation::Stammdaten
631        ));
632        assert!(matches!(
633            classify_entity("Nachricht"),
634            FieldLocation::Stammdaten
635        ));
636        assert!(matches!(
637            classify_entity("Marktlokation"),
638            FieldLocation::Stammdaten
639        ));
640        assert!(matches!(
641            classify_entity("Marktteilnehmer"),
642            FieldLocation::Stammdaten
643        ));
644    }
645
646    #[test]
647    fn test_to_camel_first_lower() {
648        assert_eq!(
649            to_camel_first_lower("MarktlokationEdifact"),
650            "marktlokationEdifact"
651        );
652        assert_eq!(to_camel_first_lower("Foo"), "foo");
653        assert_eq!(to_camel_first_lower(""), "");
654    }
655
656    #[test]
657    fn test_resolve_returns_none_for_unknown_path() {
658        let index = Bo4eFieldIndex { entries: vec![] };
659        assert!(index.resolve("SG99/UNKNOWN/9999", None).is_none());
660    }
661}