Skip to main content

mig_bo4e/
code_lookup.rs

1//! Code enrichment lookup — maps EDIFACT companion field codes to human-readable meanings.
2//!
3//! Built from PID schema JSON files. Used by the mapping engine to automatically
4//! enrich companion field values during forward mapping (EDIFACT → BO4E).
5
6use serde_json::Value;
7use std::collections::{BTreeMap, HashMap};
8use std::path::Path;
9
10/// Lookup key: (source_path, segment_tag, element_index, component_index).
11///
12/// `source_path` matches the TOML `source_path` field (e.g., "sg4.sg8_z01.sg10").
13/// `segment_tag` is uppercase (e.g., "CCI", "CAV").
14pub type CodeLookupKey = (String, String, usize, usize);
15
16/// Enrichment data for a single EDIFACT code value.
17#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
18pub struct CodeEnrichment {
19    pub meaning: String,
20    pub enum_key: Option<String>,
21}
22
23/// Maps EDIFACT code values to their enrichment data (meaning + optional enum key).
24/// E.g., "Z15" → CodeEnrichment { meaning: "Haushaltskunde gem. EnWG", enum_key: Some("HAUSHALTSKUNDE_ENWG") }.
25pub type CodeMeanings = BTreeMap<String, CodeEnrichment>;
26
27/// Complete code lookup table built from a PID schema JSON.
28#[derive(Debug, Clone, Default)]
29pub struct CodeLookup {
30    entries: HashMap<CodeLookupKey, CodeMeanings>,
31}
32
33// Custom serialization: convert tuple keys to "source_path|segment_tag|elem|comp" strings
34impl serde::Serialize for CodeLookup {
35    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
36        use serde::ser::SerializeMap;
37        let mut map = serializer.serialize_map(Some(self.entries.len()))?;
38        for ((path, tag, elem, comp), meanings) in &self.entries {
39            let key = format!("{path}|{tag}|{elem}|{comp}");
40            map.serialize_entry(&key, meanings)?;
41        }
42        map.end()
43    }
44}
45
46impl<'de> serde::Deserialize<'de> for CodeLookup {
47    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
48        let raw: HashMap<String, CodeMeanings> = HashMap::deserialize(deserializer)?;
49        let mut entries = HashMap::with_capacity(raw.len());
50        for (key_str, meanings) in raw {
51            let parts: Vec<&str> = key_str.splitn(4, '|').collect();
52            if parts.len() == 4 {
53                let elem: usize = parts[2].parse().map_err(serde::de::Error::custom)?;
54                let comp: usize = parts[3].parse().map_err(serde::de::Error::custom)?;
55                entries.insert(
56                    (parts[0].to_string(), parts[1].to_string(), elem, comp),
57                    meanings,
58                );
59            }
60        }
61        Ok(Self { entries })
62    }
63}
64
65impl CodeLookup {
66    /// Build a CodeLookup from a PID schema JSON file.
67    pub fn from_schema_file(path: &Path) -> Result<Self, std::io::Error> {
68        let content = std::fs::read_to_string(path)?;
69        let schema: Value = serde_json::from_str(&content)
70            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
71        Ok(Self::from_schema_value(&schema))
72    }
73
74    /// Build a CodeLookup from an already-parsed PID schema JSON value.
75    pub fn from_schema_value(schema: &Value) -> Self {
76        let mut entries = HashMap::new();
77        if let Some(fields) = schema.get("fields").and_then(|f| f.as_object()) {
78            for (group_key, group_value) in fields {
79                Self::walk_group(group_key, group_value, &mut entries);
80            }
81        }
82        // Root-level segments (BGM, DTM, etc.) use empty source_path.
83        if let Some(root_segments) = schema.get("root_segments").and_then(|s| s.as_array()) {
84            for segment in root_segments {
85                let seg_id = segment
86                    .get("id")
87                    .and_then(|v| v.as_str())
88                    .unwrap_or("")
89                    .to_uppercase();
90                Self::process_segment("", &seg_id, segment, &mut entries);
91            }
92        }
93        Self { entries }
94    }
95
96    /// Check if a companion field at the given position is a code-type field.
97    pub fn is_code_field(
98        &self,
99        source_path: &str,
100        segment_tag: &str,
101        element_index: usize,
102        component_index: usize,
103    ) -> bool {
104        let key = (
105            source_path.to_string(),
106            segment_tag.to_string(),
107            element_index,
108            component_index,
109        );
110        self.entries.contains_key(&key)
111    }
112
113    /// Get the full enrichment data for a code value at the given position.
114    /// Returns `None` if the position is not a code field or the value is unknown.
115    pub fn enrichment_for(
116        &self,
117        source_path: &str,
118        segment_tag: &str,
119        element_index: usize,
120        component_index: usize,
121        value: &str,
122    ) -> Option<&CodeEnrichment> {
123        let key = (
124            source_path.to_string(),
125            segment_tag.to_string(),
126            element_index,
127            component_index,
128        );
129        self.entries
130            .get(&key)
131            .and_then(|meanings| meanings.get(value))
132    }
133
134    /// Get the human-readable meaning for a code value at the given position.
135    /// Returns `None` if the position is not a code field or the value is unknown.
136    pub fn meaning_for(
137        &self,
138        source_path: &str,
139        segment_tag: &str,
140        element_index: usize,
141        component_index: usize,
142        value: &str,
143    ) -> Option<&str> {
144        self.enrichment_for(
145            source_path,
146            segment_tag,
147            element_index,
148            component_index,
149            value,
150        )
151        .map(|e| e.meaning.as_str())
152    }
153
154    /// Walk a group node recursively, collecting code entries.
155    fn walk_group(
156        path_prefix: &str,
157        group: &Value,
158        entries: &mut HashMap<CodeLookupKey, CodeMeanings>,
159    ) {
160        if let Some(segments) = group.get("segments").and_then(|s| s.as_array()) {
161            for segment in segments {
162                let seg_id = segment
163                    .get("id")
164                    .and_then(|v| v.as_str())
165                    .unwrap_or("")
166                    .to_uppercase();
167                Self::process_segment(path_prefix, &seg_id, segment, entries);
168            }
169        }
170        if let Some(children) = group.get("children").and_then(|c| c.as_object()) {
171            for (child_key, child_value) in children {
172                let child_path = format!("{}.{}", path_prefix, child_key);
173                Self::walk_group(&child_path, child_value, entries);
174            }
175            // Create aggregate entries at the base path for discriminated variants.
176            // E.g., sg12_z63, sg12_z65, sg12_z66 → also register at sg12 (unioned codes).
177            // This supports TOMLs using non-discriminated source_path (e.g., "sg4.sg12").
178            Self::merge_variant_entries(path_prefix, children, entries);
179        }
180    }
181
182    /// Process a single segment, collecting code entries for its elements/components.
183    fn process_segment(
184        source_path: &str,
185        segment_tag: &str,
186        segment: &Value,
187        entries: &mut HashMap<CodeLookupKey, CodeMeanings>,
188    ) {
189        let Some(elements) = segment.get("elements").and_then(|e| e.as_array()) else {
190            return;
191        };
192        for element in elements {
193            let element_index = element.get("index").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
194
195            // Simple element (no composite) with codes
196            if let Some("code") = element.get("type").and_then(|v| v.as_str()) {
197                if let Some(codes) = element.get("codes").and_then(|c| c.as_array()) {
198                    let meanings = Self::extract_codes(codes);
199                    if !meanings.is_empty() {
200                        let key = (
201                            source_path.to_string(),
202                            segment_tag.to_string(),
203                            element_index,
204                            0,
205                        );
206                        entries.entry(key).or_default().extend(meanings);
207                    }
208                }
209            }
210
211            // Composite components
212            if let Some(components) = element.get("components").and_then(|c| c.as_array()) {
213                for component in components {
214                    if let Some("code") = component.get("type").and_then(|v| v.as_str()) {
215                        let sub_index = component
216                            .get("sub_index")
217                            .and_then(|v| v.as_u64())
218                            .unwrap_or(0) as usize;
219                        if let Some(codes) = component.get("codes").and_then(|c| c.as_array()) {
220                            let meanings = Self::extract_codes(codes);
221                            if !meanings.is_empty() {
222                                let key = (
223                                    source_path.to_string(),
224                                    segment_tag.to_string(),
225                                    element_index,
226                                    sub_index,
227                                );
228                                entries.entry(key).or_default().extend(meanings);
229                            }
230                        }
231                    }
232                }
233            }
234        }
235    }
236
237    /// Merge code entries from discriminated variant children into aggregate base-path entries.
238    ///
239    /// When the schema has `sg12_z63`, `sg12_z65`, etc., each gets its own CodeLookup entries
240    /// at `prefix.sg12_z63`, `prefix.sg12_z65`. This method also creates entries at the
241    /// base path `prefix.sg12` by unioning all codes from the variants. This supports
242    /// TOMLs that use a non-discriminated `source_path` (e.g., the Geschaeftspartner pattern).
243    fn merge_variant_entries(
244        path_prefix: &str,
245        children: &serde_json::Map<String, Value>,
246        entries: &mut HashMap<CodeLookupKey, CodeMeanings>,
247    ) {
248        // Group children by base name (part before '_'): sg12_z63 → sg12
249        let mut bases: HashMap<&str, Vec<&str>> = HashMap::new();
250        for child_key in children.keys() {
251            if let Some(underscore_pos) = child_key.find('_') {
252                let base = &child_key[..underscore_pos];
253                bases.entry(base).or_default().push(child_key);
254            }
255        }
256
257        for (base, variant_keys) in &bases {
258            if variant_keys.len() < 2 {
259                continue; // Not a discriminated group
260            }
261            let base_path = format!("{}.{}", path_prefix, base);
262            // Collect all variant-path entries and merge into base-path entries
263            let mut merged: HashMap<(String, usize, usize), CodeMeanings> = HashMap::new();
264            for variant_key in variant_keys {
265                let variant_path = format!("{}.{}", path_prefix, variant_key);
266                for (key, meanings) in entries.iter() {
267                    if key.0 == variant_path {
268                        let agg_key = (key.1.clone(), key.2, key.3);
269                        let target = merged.entry(agg_key).or_default();
270                        for (k, v) in meanings {
271                            target.insert(k.clone(), v.clone());
272                        }
273                    }
274                }
275            }
276            for ((seg_tag, elem_idx, comp_idx), meanings) in merged {
277                let key = (base_path.clone(), seg_tag, elem_idx, comp_idx);
278                entries.entry(key).or_default().extend(meanings);
279            }
280        }
281    }
282
283    /// Extract code value→enrichment mappings from a JSON codes array.
284    fn extract_codes(codes: &[Value]) -> CodeMeanings {
285        let mut meanings = BTreeMap::new();
286        for code in codes {
287            if let (Some(value), Some(name)) = (
288                code.get("value").and_then(|v| v.as_str()),
289                code.get("name").and_then(|v| v.as_str()),
290            ) {
291                let enum_key = code
292                    .get("enum")
293                    .and_then(|v| v.as_str())
294                    .map(|s| s.to_string());
295                meanings.insert(
296                    value.to_string(),
297                    CodeEnrichment {
298                        meaning: name.to_string(),
299                        enum_key,
300                    },
301                );
302            }
303        }
304        meanings
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn test_parse_pid_55001_schema() {
314        let schema_path = Path::new(concat!(
315            env!("CARGO_MANIFEST_DIR"),
316            "/../../crates/mig-types/src/generated/fv2504/utilmd/pids/pid_55001_schema.json"
317        ));
318        if !schema_path.exists() {
319            eprintln!("Skipping: PID schema not found");
320            return;
321        }
322
323        let lookup = CodeLookup::from_schema_file(schema_path).unwrap();
324
325        // CCI element 2 component 0 in sg4.sg8_z01.sg10 — Haushaltskunde codes
326        assert!(lookup.is_code_field("sg4.sg8_z01.sg10", "CCI", 2, 0));
327        assert_eq!(
328            lookup.meaning_for("sg4.sg8_z01.sg10", "CCI", 2, 0, "Z15"),
329            Some("Haushaltskunde gem. EnWG")
330        );
331        assert_eq!(
332            lookup.meaning_for("sg4.sg8_z01.sg10", "CCI", 2, 0, "Z18"),
333            Some("Kein Haushaltskunde gem. EnWG")
334        );
335
336        // CCI element 0 in sg4.sg8_z79.sg10 — Produkteigenschaft
337        assert!(lookup.is_code_field("sg4.sg8_z79.sg10", "CCI", 0, 0));
338        assert_eq!(
339            lookup.meaning_for("sg4.sg8_z79.sg10", "CCI", 0, 0, "Z66"),
340            Some("Produkteigenschaft")
341        );
342
343        // CAV element 0 component 0 — code field
344        assert!(lookup.is_code_field("sg4.sg8_z79.sg10", "CAV", 0, 0));
345
346        // CAV element 0 component 3 — data field, NOT a code
347        assert!(!lookup.is_code_field("sg4.sg8_z79.sg10", "CAV", 0, 3));
348
349        // LOC element 1 — data field
350        assert!(!lookup.is_code_field("sg4.sg5_z16", "LOC", 1, 0));
351    }
352
353    #[test]
354    fn test_from_inline_schema() {
355        let schema = serde_json::json!({
356            "fields": {
357                "sg4": {
358                    "children": {
359                        "sg8_test": {
360                            "children": {
361                                "sg10": {
362                                    "segments": [{
363                                        "id": "CCI",
364                                        "elements": [{
365                                            "index": 2,
366                                            "components": [{
367                                                "sub_index": 0,
368                                                "type": "code",
369                                                "codes": [
370                                                    {"value": "A1", "name": "Alpha"},
371                                                    {"value": "B2", "name": "Beta"}
372                                                ]
373                                            }]
374                                        }]
375                                    }],
376                                    "source_group": "SG10"
377                                }
378                            },
379                            "segments": [],
380                            "source_group": "SG8"
381                        }
382                    },
383                    "segments": [],
384                    "source_group": "SG4"
385                }
386            }
387        });
388
389        let lookup = CodeLookup::from_schema_value(&schema);
390
391        assert!(lookup.is_code_field("sg4.sg8_test.sg10", "CCI", 2, 0));
392        assert_eq!(
393            lookup.meaning_for("sg4.sg8_test.sg10", "CCI", 2, 0, "A1"),
394            Some("Alpha")
395        );
396        assert_eq!(
397            lookup.meaning_for("sg4.sg8_test.sg10", "CCI", 2, 0, "B2"),
398            Some("Beta")
399        );
400        assert_eq!(
401            lookup.meaning_for("sg4.sg8_test.sg10", "CCI", 2, 0, "XX"),
402            None
403        );
404        assert!(!lookup.is_code_field("sg4.sg8_test.sg10", "CCI", 0, 0));
405    }
406
407    #[test]
408    fn test_discriminated_variant_merge() {
409        // Schema with discriminated SG12 variants (sg12_z63, sg12_z65)
410        let schema = serde_json::json!({
411            "fields": {
412                "sg4": {
413                    "children": {
414                        "sg12_z63": {
415                            "segments": [{
416                                "id": "NAD",
417                                "elements": [{
418                                    "index": 0,
419                                    "type": "code",
420                                    "codes": [{"value": "Z63", "name": "Standortadresse"}]
421                                }]
422                            }],
423                            "source_group": "SG12"
424                        },
425                        "sg12_z65": {
426                            "segments": [{
427                                "id": "NAD",
428                                "elements": [
429                                    {
430                                        "index": 0,
431                                        "type": "code",
432                                        "codes": [{"value": "Z65", "name": "Kunde des LF"}]
433                                    },
434                                    {
435                                        "index": 3,
436                                        "components": [{
437                                            "sub_index": 5,
438                                            "type": "code",
439                                            "codes": [
440                                                {"value": "Z01", "name": "Herr"},
441                                                {"value": "Z02", "name": "Frau"}
442                                            ]
443                                        }]
444                                    }
445                                ]
446                            }],
447                            "source_group": "SG12"
448                        }
449                    },
450                    "segments": [],
451                    "source_group": "SG4"
452                }
453            }
454        });
455
456        let lookup = CodeLookup::from_schema_value(&schema);
457
458        // Variant-specific paths still work
459        assert!(lookup.is_code_field("sg4.sg12_z63", "NAD", 0, 0));
460        assert!(lookup.is_code_field("sg4.sg12_z65", "NAD", 0, 0));
461
462        // Base path also works (merged from variants)
463        assert!(lookup.is_code_field("sg4.sg12", "NAD", 0, 0));
464        assert_eq!(
465            lookup.meaning_for("sg4.sg12", "NAD", 0, 0, "Z63"),
466            Some("Standortadresse")
467        );
468        assert_eq!(
469            lookup.meaning_for("sg4.sg12", "NAD", 0, 0, "Z65"),
470            Some("Kunde des LF")
471        );
472
473        // Anrede code from z65 also available at base path
474        assert!(lookup.is_code_field("sg4.sg12", "NAD", 3, 5));
475        assert_eq!(
476            lookup.meaning_for("sg4.sg12", "NAD", 3, 5, "Z01"),
477            Some("Herr")
478        );
479    }
480
481    #[test]
482    fn test_pid_55013_sg12_base_path() {
483        let schema_path = Path::new(concat!(
484            env!("CARGO_MANIFEST_DIR"),
485            "/../../crates/mig-types/src/generated/fv2504/utilmd/pids/pid_55013_schema.json"
486        ));
487        if !schema_path.exists() {
488            eprintln!("Skipping: PID schema not found");
489            return;
490        }
491
492        let lookup = CodeLookup::from_schema_file(schema_path).unwrap();
493
494        // Base path "sg4.sg12" should have merged NAD qualifier codes from all variants
495        assert!(lookup.is_code_field("sg4.sg12", "NAD", 0, 0));
496        // Z67 meaning comes from sg12_z67 variant
497        assert!(lookup.meaning_for("sg4.sg12", "NAD", 0, 0, "Z67").is_some());
498        // All 7 SG12 qualifiers should be present
499        for code in &["Z63", "Z65", "Z66", "Z67", "Z68", "Z69", "Z70"] {
500            assert!(
501                lookup.meaning_for("sg4.sg12", "NAD", 0, 0, code).is_some(),
502                "Missing meaning for NAD qualifier {code} at base path sg4.sg12"
503            );
504        }
505    }
506
507    #[test]
508    fn test_multi_segment_code_merge() {
509        // SG10 with 3 CCI segments at same element position but different codes.
510        // All codes should be merged, not overwritten by last CCI.
511        let schema = serde_json::json!({
512            "fields": {
513                "sg4": {
514                    "children": {
515                        "sg8_z98": {
516                            "children": {
517                                "sg10": {
518                                    "segments": [
519                                        {
520                                            "id": "CCI",
521                                            "elements": [{"index": 2, "components": [{
522                                                "sub_index": 0, "type": "code",
523                                                "codes": [{"value": "ZB3", "name": "Zugeordneter Marktpartner"}]
524                                            }]}]
525                                        },
526                                        {
527                                            "id": "CAV",
528                                            "elements": [{"index": 0, "components": [{
529                                                "sub_index": 0, "type": "code",
530                                                "codes": [{"value": "Z91", "name": "MSB"}]
531                                            }]}]
532                                        },
533                                        {
534                                            "id": "CCI",
535                                            "elements": [{"index": 2, "components": [{
536                                                "sub_index": 0, "type": "code",
537                                                "codes": [{"value": "E03", "name": "Spannungsebene"}]
538                                            }]}]
539                                        },
540                                        {
541                                            "id": "CAV",
542                                            "elements": [{"index": 0, "components": [{
543                                                "sub_index": 0, "type": "code",
544                                                "codes": [
545                                                    {"value": "E05", "name": "Mittelspannung"},
546                                                    {"value": "E06", "name": "Niederspannung"}
547                                                ]
548                                            }]}]
549                                        },
550                                        {
551                                            "id": "CCI",
552                                            "elements": [{"index": 2, "components": [{
553                                                "sub_index": 0, "type": "code",
554                                                "codes": [
555                                                    {"value": "Z15", "name": "Haushaltskunde"},
556                                                    {"value": "Z18", "name": "Kein Haushaltskunde"}
557                                                ]
558                                            }]}]
559                                        }
560                                    ],
561                                    "source_group": "SG10"
562                                }
563                            },
564                            "segments": [],
565                            "source_group": "SG8"
566                        }
567                    },
568                    "segments": [],
569                    "source_group": "SG4"
570                }
571            }
572        });
573
574        let lookup = CodeLookup::from_schema_value(&schema);
575
576        // All CCI codes at (2,0) should be present (merged, not overwritten)
577        assert_eq!(
578            lookup.meaning_for("sg4.sg8_z98.sg10", "CCI", 2, 0, "ZB3"),
579            Some("Zugeordneter Marktpartner")
580        );
581        assert_eq!(
582            lookup.meaning_for("sg4.sg8_z98.sg10", "CCI", 2, 0, "E03"),
583            Some("Spannungsebene")
584        );
585        assert_eq!(
586            lookup.meaning_for("sg4.sg8_z98.sg10", "CCI", 2, 0, "Z15"),
587            Some("Haushaltskunde")
588        );
589
590        // All CAV codes at (0,0) should be present
591        assert_eq!(
592            lookup.meaning_for("sg4.sg8_z98.sg10", "CAV", 0, 0, "Z91"),
593            Some("MSB")
594        );
595        assert_eq!(
596            lookup.meaning_for("sg4.sg8_z98.sg10", "CAV", 0, 0, "E06"),
597            Some("Niederspannung")
598        );
599    }
600
601    #[test]
602    fn test_enrichment_for_with_enum() {
603        let schema = serde_json::json!({
604            "fields": {
605                "sg4": {
606                    "children": {
607                        "sg10": {
608                            "segments": [{
609                                "id": "CCI",
610                                "elements": [{
611                                    "index": 2,
612                                    "components": [{
613                                        "sub_index": 0,
614                                        "type": "code",
615                                        "codes": [
616                                            {"value": "Z15", "name": "Haushaltskunde", "enum": "HAUSHALTSKUNDE"},
617                                            {"value": "Z18", "name": "Kein Haushaltskunde", "enum": "KEIN_HAUSHALTSKUNDE"}
618                                        ]
619                                    }]
620                                }]
621                            }],
622                            "source_group": "SG10"
623                        }
624                    },
625                    "segments": [],
626                    "source_group": "SG4"
627                }
628            }
629        });
630
631        let lookup = CodeLookup::from_schema_value(&schema);
632
633        let enrichment = lookup.enrichment_for("sg4.sg10", "CCI", 2, 0, "Z15");
634        assert!(enrichment.is_some());
635        let e = enrichment.unwrap();
636        assert_eq!(e.meaning, "Haushaltskunde");
637        assert_eq!(e.enum_key.as_deref(), Some("HAUSHALTSKUNDE"));
638
639        let e2 = lookup
640            .enrichment_for("sg4.sg10", "CCI", 2, 0, "Z18")
641            .unwrap();
642        assert_eq!(e2.enum_key.as_deref(), Some("KEIN_HAUSHALTSKUNDE"));
643
644        // meaning_for still works
645        assert_eq!(
646            lookup.meaning_for("sg4.sg10", "CCI", 2, 0, "Z15"),
647            Some("Haushaltskunde")
648        );
649    }
650
651    #[test]
652    fn test_backward_compat_no_enum() {
653        // Old schema format without "enum" field — should still work, enum_key is None
654        let schema = serde_json::json!({
655            "fields": {
656                "sg4": {
657                    "children": {
658                        "sg10": {
659                            "segments": [{
660                                "id": "CCI",
661                                "elements": [{
662                                    "index": 2,
663                                    "components": [{
664                                        "sub_index": 0,
665                                        "type": "code",
666                                        "codes": [
667                                            {"value": "Z15", "name": "Haushaltskunde"}
668                                        ]
669                                    }]
670                                }]
671                            }],
672                            "source_group": "SG10"
673                        }
674                    },
675                    "segments": [],
676                    "source_group": "SG4"
677                }
678            }
679        });
680
681        let lookup = CodeLookup::from_schema_value(&schema);
682        let enrichment = lookup.enrichment_for("sg4.sg10", "CCI", 2, 0, "Z15");
683        assert!(enrichment.is_some());
684        let e = enrichment.unwrap();
685        assert_eq!(e.meaning, "Haushaltskunde");
686        assert_eq!(e.enum_key, None); // No enum in old schema
687    }
688}