Skip to main content

mig_bo4e/
code_lookup.rs

1//! Code enrichment lookup — maps EDIFACT companion field codes to human-readable meanings.
2//!
3//! Built from PID schema JSON files. Used by the mapping engine to automatically
4//! enrich companion field values during forward mapping (EDIFACT → BO4E).
5
6use serde_json::Value;
7use std::collections::{BTreeMap, HashMap};
8use std::path::Path;
9
10/// Lookup key: (source_path, segment_tag, qualifier, element_index, component_index).
11///
12/// `source_path` matches the TOML `source_path` field (e.g., "sg4.sg8_z01.sg10").
13/// `segment_tag` is uppercase (e.g., "CCI", "CAV").
14/// `qualifier` is the segment's discriminating qualifier when one applies (RFF/STS/CCI:
15/// element 0 component 0; DTM: c507.d2005). `None` for segments without a qualifier
16/// convention. The qualifier slot scopes lookups so that, for instance, RFF+TN's
17/// type=data d1154 (free-text Vorgangsnummer) is not confused with RFF+Z13's
18/// type=code d1154 (PID-identifier) at the same path/elem/comp.
19pub type CodeLookupKey = (String, String, Option<String>, usize, usize);
20
21/// Enrichment data for a single EDIFACT code value.
22#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
23pub struct CodeEnrichment {
24    pub meaning: String,
25    pub enum_key: Option<String>,
26}
27
28/// Maps EDIFACT code values to their enrichment data (meaning + optional enum key).
29/// E.g., "Z15" → CodeEnrichment { meaning: "Haushaltskunde gem. EnWG", enum_key: Some("HAUSHALTSKUNDE_ENWG") }.
30pub type CodeMeanings = BTreeMap<String, CodeEnrichment>;
31
32/// Complete code lookup table built from a PID schema JSON.
33#[derive(Debug, Clone, Default)]
34pub struct CodeLookup {
35    entries: HashMap<CodeLookupKey, CodeMeanings>,
36}
37
38// Custom serialization: convert tuple keys to "source_path|segment_tag|qualifier|elem|comp"
39// strings. An empty qualifier slot serializes as the empty string between the surrounding
40// pipes (e.g., "sg4|DTM||0|0").
41impl serde::Serialize for CodeLookup {
42    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
43        use serde::ser::SerializeMap;
44        let mut map = serializer.serialize_map(Some(self.entries.len()))?;
45        for ((path, tag, qual, elem, comp), meanings) in &self.entries {
46            let q = qual.as_deref().unwrap_or("");
47            let key = format!("{path}|{tag}|{q}|{elem}|{comp}");
48            map.serialize_entry(&key, meanings)?;
49        }
50        map.end()
51    }
52}
53
54impl<'de> serde::Deserialize<'de> for CodeLookup {
55    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
56        let raw: HashMap<String, CodeMeanings> = HashMap::deserialize(deserializer)?;
57        let mut entries = HashMap::with_capacity(raw.len());
58        for (key_str, meanings) in raw {
59            let parts: Vec<&str> = key_str.splitn(5, '|').collect();
60            if parts.len() == 5 {
61                let qual = if parts[2].is_empty() {
62                    None
63                } else {
64                    Some(parts[2].to_string())
65                };
66                let elem: usize = parts[3].parse().map_err(serde::de::Error::custom)?;
67                let comp: usize = parts[4].parse().map_err(serde::de::Error::custom)?;
68                entries.insert(
69                    (parts[0].to_string(), parts[1].to_string(), qual, elem, comp),
70                    meanings,
71                );
72            }
73        }
74        Ok(Self { entries })
75    }
76}
77
78impl CodeLookup {
79    /// Build a CodeLookup from a PID schema JSON file.
80    pub fn from_schema_file(path: &Path) -> Result<Self, std::io::Error> {
81        let content = std::fs::read_to_string(path)?;
82        let schema: Value = serde_json::from_str(&content)
83            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
84        Ok(Self::from_schema_value(&schema))
85    }
86
87    /// Build a CodeLookup from an already-parsed PID schema JSON value.
88    pub fn from_schema_value(schema: &Value) -> Self {
89        let mut entries = HashMap::new();
90        if let Some(fields) = schema.get("fields").and_then(|f| f.as_object()) {
91            for (group_key, group_value) in fields {
92                Self::walk_group(group_key, group_value, &mut entries);
93            }
94        }
95        // Root-level segments (BGM, DTM, etc.) use empty source_path.
96        if let Some(root_segments) = schema.get("root_segments").and_then(|s| s.as_array()) {
97            for segment in root_segments {
98                let seg_id = segment
99                    .get("id")
100                    .and_then(|v| v.as_str())
101                    .unwrap_or("")
102                    .to_uppercase();
103                Self::process_segment("", &seg_id, segment, &mut entries);
104            }
105        }
106        Self { entries }
107    }
108
109    /// Check if a companion field at the given position is a code-type field.
110    ///
111    /// Legacy shim — scans across all qualifier slots and returns true if ANY
112    /// matching entry exists for the (path, tag, elem, comp) tuple. This drifts
113    /// from the original "call _q with None" prescription but is more useful
114    /// for tests that don't have a qualifier handy. Production code paths use
115    /// [`is_code_field_q`] with the discriminator qualifier and a `None`
116    /// fallback for tags without a stored qualifier convention.
117    #[deprecated(
118        note = "use is_code_field_q with the discriminator qualifier; this shim scans across all qualifiers"
119    )]
120    pub fn is_code_field(
121        &self,
122        source_path: &str,
123        segment_tag: &str,
124        element_index: usize,
125        component_index: usize,
126    ) -> bool {
127        // Match if either the unqualified entry exists or any qualifier-scoped
128        // entry matches the path/tag/elem/comp.
129        self.entries.iter().any(|((p, t, _q, e, c), _)| {
130            p == source_path && t == segment_tag && *e == element_index && *c == component_index
131        })
132    }
133
134    /// Qualifier-aware variant: check if the position is a code field for the
135    /// given qualifier. The schema's stored qualifier slot is `Some` only for
136    /// tags with a qualifier convention (RFF/STS/CCI/DTM); other tags (NAD,
137    /// SEQ, LOC, BGM, COM, MOA, …) store entries under `None`. When the strict
138    /// match misses for `Some(q)`, this falls back to the `None` entry so that
139    /// TOMLs discriminated on those tags (e.g. `discriminator = "NAD.d3035=Z09"`)
140    /// still resolve enrichment correctly.
141    ///
142    /// Class A discrimination still works: for RFF, both Z13 (type=code) and
143    /// TN (type=data) carry stored qualifiers, so the strict `Some("Z13")`
144    /// match finds Z13 and the strict `Some("TN")` match finds nothing — the
145    /// `None` fallback also misses (RFF entries are all qualifier-scoped),
146    /// so TN free-text returns false.
147    pub fn is_code_field_q(
148        &self,
149        source_path: &str,
150        segment_tag: &str,
151        qualifier: Option<&str>,
152        element_index: usize,
153        component_index: usize,
154    ) -> bool {
155        // Strict match first.
156        if self.entries.contains_key(&(
157            source_path.to_string(),
158            segment_tag.to_string(),
159            qualifier.map(String::from),
160            element_index,
161            component_index,
162        )) {
163            return true;
164        }
165        // Fall back to None-qualifier entry for tags without a stored qualifier
166        // convention. Only fires when the caller passed a qualifier (otherwise
167        // we already attempted the None lookup above).
168        if qualifier.is_some() {
169            return self.entries.contains_key(&(
170                source_path.to_string(),
171                segment_tag.to_string(),
172                None,
173                element_index,
174                component_index,
175            ));
176        }
177        false
178    }
179
180    /// Get the full enrichment data for a code value at the given position.
181    ///
182    /// Legacy shim — scans across all qualifier slots. See [`enrichment_for_q`]
183    /// for the qualifier-aware version used by the engine. Kept for tests.
184    #[deprecated(
185        note = "use enrichment_for_q with the discriminator qualifier; this shim scans across all qualifiers"
186    )]
187    pub fn enrichment_for(
188        &self,
189        source_path: &str,
190        segment_tag: &str,
191        element_index: usize,
192        component_index: usize,
193        value: &str,
194    ) -> Option<&CodeEnrichment> {
195        // Try unqualified first, then any qualifier-scoped match.
196        let unqualified_key = (
197            source_path.to_string(),
198            segment_tag.to_string(),
199            None,
200            element_index,
201            component_index,
202        );
203        if let Some(e) = self
204            .entries
205            .get(&unqualified_key)
206            .and_then(|meanings| meanings.get(value))
207        {
208            return Some(e);
209        }
210        self.entries
211            .iter()
212            .filter(|((p, t, q, e, c), _)| {
213                p == source_path
214                    && t == segment_tag
215                    && q.is_some()
216                    && *e == element_index
217                    && *c == component_index
218            })
219            .find_map(|(_, meanings)| meanings.get(value))
220    }
221
222    /// Qualifier-aware enrichment lookup.
223    ///
224    /// Strict match on the stored `qualifier` slot first; on miss with
225    /// `qualifier.is_some()`, falls back to the `None` entry. This mirrors
226    /// [`is_code_field_q`]'s lookup shape — segments like NAD/SEQ/LOC store
227    /// their entries under `None` even when the engine queries with the
228    /// discriminator's qualifier value (e.g. `Some("Z09")` from a
229    /// `NAD.d3035=Z09` discriminator).
230    pub fn enrichment_for_q(
231        &self,
232        source_path: &str,
233        segment_tag: &str,
234        qualifier: Option<&str>,
235        element_index: usize,
236        component_index: usize,
237        value: &str,
238    ) -> Option<&CodeEnrichment> {
239        // Strict match first.
240        let strict_key = (
241            source_path.to_string(),
242            segment_tag.to_string(),
243            qualifier.map(String::from),
244            element_index,
245            component_index,
246        );
247        if let Some(e) = self
248            .entries
249            .get(&strict_key)
250            .and_then(|meanings| meanings.get(value))
251        {
252            return Some(e);
253        }
254        // None-fallback for tags without a stored qualifier convention.
255        if qualifier.is_some() {
256            let fallback_key = (
257                source_path.to_string(),
258                segment_tag.to_string(),
259                None,
260                element_index,
261                component_index,
262            );
263            return self
264                .entries
265                .get(&fallback_key)
266                .and_then(|meanings| meanings.get(value));
267        }
268        None
269    }
270
271    /// Get the human-readable meaning for a code value at the given position.
272    /// Returns `None` if the position is not a code field or the value is unknown.
273    ///
274    /// Legacy shim — scans across all qualifier slots via [`enrichment_for`].
275    /// Kept for tests; production code paths use the qualifier-aware
276    /// `enrichment_for_q`.
277    #[deprecated(
278        note = "use enrichment_for_q with the discriminator qualifier; this shim scans across all qualifiers"
279    )]
280    pub fn meaning_for(
281        &self,
282        source_path: &str,
283        segment_tag: &str,
284        element_index: usize,
285        component_index: usize,
286        value: &str,
287    ) -> Option<&str> {
288        #[allow(deprecated)]
289        self.enrichment_for(
290            source_path,
291            segment_tag,
292            element_index,
293            component_index,
294            value,
295        )
296        .map(|e| e.meaning.as_str())
297    }
298
299    /// Whether this code-field's only allowed value equals the given PID.
300    /// Used to suppress decoration for self-referential PID-identifier fields
301    /// (Class C in the 2026-04-28 audit). The qualifier scopes the lookup —
302    /// e.g., RFF+Z13's d1154 in PID 55002 has `value=55002` as the lone code,
303    /// so calling with `qualifier=Some("Z13"), pid="55002"` returns true.
304    pub fn is_pid_self_reference(
305        &self,
306        source_path: &str,
307        segment_tag: &str,
308        qualifier: Option<&str>,
309        element_index: usize,
310        component_index: usize,
311        pid: &str,
312    ) -> bool {
313        let key = (
314            source_path.to_string(),
315            segment_tag.to_string(),
316            qualifier.map(String::from),
317            element_index,
318            component_index,
319        );
320        if let Some(meanings) = self.entries.get(&key) {
321            meanings.len() == 1 && meanings.contains_key(pid)
322        } else {
323            false
324        }
325    }
326
327    /// Walk a group node recursively, collecting code entries.
328    fn walk_group(
329        path_prefix: &str,
330        group: &Value,
331        entries: &mut HashMap<CodeLookupKey, CodeMeanings>,
332    ) {
333        if let Some(segments) = group.get("segments").and_then(|s| s.as_array()) {
334            for segment in segments {
335                let seg_id = segment
336                    .get("id")
337                    .and_then(|v| v.as_str())
338                    .unwrap_or("")
339                    .to_uppercase();
340                Self::process_segment(path_prefix, &seg_id, segment, entries);
341            }
342        }
343        if let Some(children) = group.get("children").and_then(|c| c.as_object()) {
344            for (child_key, child_value) in children {
345                let child_path = format!("{}.{}", path_prefix, child_key);
346                Self::walk_group(&child_path, child_value, entries);
347            }
348            // Create aggregate entries at the base path for discriminated variants.
349            // E.g., sg12_z63, sg12_z65, sg12_z66 → also register at sg12 (unioned codes).
350            // This supports TOMLs using non-discriminated source_path (e.g., "sg4.sg12").
351            Self::merge_variant_entries(path_prefix, children, entries);
352        }
353    }
354
355    /// Process a single segment, collecting code entries for its elements/components.
356    ///
357    /// Extracts the segment's qualifier (per-tag convention) and uses it to scope
358    /// the entries. This avoids the (path, tag, elem, comp) collision between
359    /// type=code and type=data segments at the same position (e.g., RFF+Z13's
360    /// PID-identifier d1154 vs RFF+TN's free-text Vorgangsnummer d1154).
361    fn process_segment(
362        source_path: &str,
363        segment_tag: &str,
364        segment: &Value,
365        entries: &mut HashMap<CodeLookupKey, CodeMeanings>,
366    ) {
367        let Some(elements) = segment.get("elements").and_then(|e| e.as_array()) else {
368            return;
369        };
370
371        let qualifier = Self::extract_qualifier(segment_tag, elements);
372
373        for element in elements {
374            let element_index = element.get("index").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
375
376            // Simple element (no composite) with codes
377            if let Some("code") = element.get("type").and_then(|v| v.as_str()) {
378                if let Some(codes) = element.get("codes").and_then(|c| c.as_array()) {
379                    let meanings = Self::extract_codes(codes);
380                    if !meanings.is_empty() {
381                        let key = (
382                            source_path.to_string(),
383                            segment_tag.to_string(),
384                            qualifier.clone(),
385                            element_index,
386                            0,
387                        );
388                        entries.entry(key).or_default().extend(meanings);
389                    }
390                }
391            }
392
393            // Composite components
394            if let Some(components) = element.get("components").and_then(|c| c.as_array()) {
395                for component in components {
396                    if let Some("code") = component.get("type").and_then(|v| v.as_str()) {
397                        let sub_index = component
398                            .get("sub_index")
399                            .and_then(|v| v.as_u64())
400                            .unwrap_or(0) as usize;
401                        if let Some(codes) = component.get("codes").and_then(|c| c.as_array()) {
402                            let meanings = Self::extract_codes(codes);
403                            if !meanings.is_empty() {
404                                let key = (
405                                    source_path.to_string(),
406                                    segment_tag.to_string(),
407                                    qualifier.clone(),
408                                    element_index,
409                                    sub_index,
410                                );
411                                entries.entry(key).or_default().extend(meanings);
412                            }
413                        }
414                    }
415                }
416            }
417        }
418    }
419
420    /// Extract a segment's discriminating qualifier from its schema element list.
421    ///
422    /// Conventions:
423    /// - `RFF`, `STS`, `CCI`: qualifier is the type=code value at element 0,
424    ///   component 0 (RFF d1153, STS d9013, CCI d7059).
425    /// - `DTM`: qualifier is at composite c507's component 0 (d2005). This is
426    ///   the same physical position (element 0 component 0) — DTM's element 0
427    ///   IS the c507 composite — so the same lookup applies.
428    /// - All other tags: no qualifier convention; returns `None`.
429    ///
430    /// Only single-value enumerations count as a qualifier (the schema lists
431    /// exactly one allowed code at that position). Segments whose first
432    /// component lists multiple codes don't have a discriminating qualifier
433    /// at the schema level and fall back to `None`.
434    fn extract_qualifier(segment_tag: &str, elements: &[Value]) -> Option<String> {
435        if !matches!(segment_tag, "RFF" | "STS" | "CCI" | "DTM") {
436            return None;
437        }
438        // Find element index 0 (or the first element if no index 0 is set).
439        let element0 = elements
440            .iter()
441            .find(|el| el.get("index").and_then(|v| v.as_u64()) == Some(0))
442            .or_else(|| elements.first())?;
443
444        // Inspect component sub_index 0.
445        let component0 = element0
446            .get("components")
447            .and_then(|c| c.as_array())
448            .and_then(|comps| {
449                comps
450                    .iter()
451                    .find(|c| c.get("sub_index").and_then(|v| v.as_u64()) == Some(0))
452                    .or_else(|| comps.first())
453            });
454
455        let codes_node = if let Some(comp) = component0 {
456            // Composite case (RFF/DTM/CCI/STS — qualifier nested inside composite).
457            if comp.get("type").and_then(|v| v.as_str()) == Some("code") {
458                comp.get("codes").and_then(|c| c.as_array())
459            } else {
460                None
461            }
462        } else if element0.get("type").and_then(|v| v.as_str()) == Some("code") {
463            // Simple-element case.
464            element0.get("codes").and_then(|c| c.as_array())
465        } else {
466            None
467        };
468
469        let codes = codes_node?;
470        if codes.len() != 1 {
471            return None; // Multiple allowed qualifiers — not a single discriminator.
472        }
473        codes[0]
474            .get("value")
475            .and_then(|v| v.as_str())
476            .map(|s| s.to_string())
477    }
478
479    /// Merge code entries from discriminated variant children into aggregate base-path entries.
480    ///
481    /// When the schema has `sg12_z63`, `sg12_z65`, etc., each gets its own CodeLookup entries
482    /// at `prefix.sg12_z63`, `prefix.sg12_z65`. This method also creates entries at the
483    /// base path `prefix.sg12` by unioning all codes from the variants. This supports
484    /// TOMLs that use a non-discriminated `source_path` (e.g., the Geschaeftspartner pattern).
485    fn merge_variant_entries(
486        path_prefix: &str,
487        children: &serde_json::Map<String, Value>,
488        entries: &mut HashMap<CodeLookupKey, CodeMeanings>,
489    ) {
490        // Group children by base name (part before '_'): sg12_z63 → sg12
491        let mut bases: HashMap<&str, Vec<&str>> = HashMap::new();
492        for child_key in children.keys() {
493            if let Some(underscore_pos) = child_key.find('_') {
494                let base = &child_key[..underscore_pos];
495                bases.entry(base).or_default().push(child_key);
496            }
497        }
498
499        for (base, variant_keys) in &bases {
500            if variant_keys.len() < 2 {
501                continue; // Not a discriminated group
502            }
503            let base_path = format!("{}.{}", path_prefix, base);
504            // Collect all variant-path entries and merge into base-path entries.
505            // Aggregation key keeps the qualifier slot so that, e.g., NAD+Z63 vs
506            // NAD+Z65 don't collapse into one entry at the merged base path.
507            let mut merged: HashMap<(String, Option<String>, usize, usize), CodeMeanings> =
508                HashMap::new();
509            for variant_key in variant_keys {
510                let variant_path = format!("{}.{}", path_prefix, variant_key);
511                for (key, meanings) in entries.iter() {
512                    if key.0 == variant_path {
513                        let agg_key = (key.1.clone(), key.2.clone(), key.3, key.4);
514                        let target = merged.entry(agg_key).or_default();
515                        for (k, v) in meanings {
516                            target.insert(k.clone(), v.clone());
517                        }
518                    }
519                }
520            }
521            for ((seg_tag, qual, elem_idx, comp_idx), meanings) in merged {
522                let key = (base_path.clone(), seg_tag, qual, elem_idx, comp_idx);
523                entries.entry(key).or_default().extend(meanings);
524            }
525        }
526    }
527
528    /// Extract code value→enrichment mappings from a JSON codes array.
529    fn extract_codes(codes: &[Value]) -> CodeMeanings {
530        let mut meanings = BTreeMap::new();
531        for code in codes {
532            if let (Some(value), Some(name)) = (
533                code.get("value").and_then(|v| v.as_str()),
534                code.get("name").and_then(|v| v.as_str()),
535            ) {
536                let enum_key = code
537                    .get("enum")
538                    .and_then(|v| v.as_str())
539                    .map(|s| s.to_string());
540                meanings.insert(
541                    value.to_string(),
542                    CodeEnrichment {
543                        meaning: name.to_string(),
544                        enum_key,
545                    },
546                );
547            }
548        }
549        meanings
550    }
551}
552
553#[cfg(test)]
554#[allow(deprecated)]
555mod tests {
556    use super::*;
557
558    #[test]
559    fn test_parse_pid_55001_schema() {
560        let schema_path = Path::new(concat!(
561            env!("CARGO_MANIFEST_DIR"),
562            "/../../crates/mig-types/src/generated/fv2504/utilmd/pids/pid_55001_schema.json"
563        ));
564        if !schema_path.exists() {
565            eprintln!("Skipping: PID schema not found");
566            return;
567        }
568
569        let lookup = CodeLookup::from_schema_file(schema_path).unwrap();
570
571        // CCI element 2 component 0 in sg4.sg8_z01.sg10 — Haushaltskunde codes
572        assert!(lookup.is_code_field("sg4.sg8_z01.sg10", "CCI", 2, 0));
573        assert_eq!(
574            lookup.meaning_for("sg4.sg8_z01.sg10", "CCI", 2, 0, "Z15"),
575            Some("Haushaltskunde gem. EnWG")
576        );
577        assert_eq!(
578            lookup.meaning_for("sg4.sg8_z01.sg10", "CCI", 2, 0, "Z18"),
579            Some("Kein Haushaltskunde gem. EnWG")
580        );
581
582        // CCI element 0 in sg4.sg8_z79.sg10 — Produkteigenschaft
583        assert!(lookup.is_code_field("sg4.sg8_z79.sg10", "CCI", 0, 0));
584        assert_eq!(
585            lookup.meaning_for("sg4.sg8_z79.sg10", "CCI", 0, 0, "Z66"),
586            Some("Produkteigenschaft")
587        );
588
589        // CAV element 0 component 0 — code field
590        assert!(lookup.is_code_field("sg4.sg8_z79.sg10", "CAV", 0, 0));
591
592        // CAV element 0 component 3 — data field, NOT a code
593        assert!(!lookup.is_code_field("sg4.sg8_z79.sg10", "CAV", 0, 3));
594
595        // LOC element 1 — data field
596        assert!(!lookup.is_code_field("sg4.sg5_z16", "LOC", 1, 0));
597    }
598
599    #[test]
600    fn test_from_inline_schema() {
601        let schema = serde_json::json!({
602            "fields": {
603                "sg4": {
604                    "children": {
605                        "sg8_test": {
606                            "children": {
607                                "sg10": {
608                                    "segments": [{
609                                        "id": "CCI",
610                                        "elements": [{
611                                            "index": 2,
612                                            "components": [{
613                                                "sub_index": 0,
614                                                "type": "code",
615                                                "codes": [
616                                                    {"value": "A1", "name": "Alpha"},
617                                                    {"value": "B2", "name": "Beta"}
618                                                ]
619                                            }]
620                                        }]
621                                    }],
622                                    "source_group": "SG10"
623                                }
624                            },
625                            "segments": [],
626                            "source_group": "SG8"
627                        }
628                    },
629                    "segments": [],
630                    "source_group": "SG4"
631                }
632            }
633        });
634
635        let lookup = CodeLookup::from_schema_value(&schema);
636
637        assert!(lookup.is_code_field("sg4.sg8_test.sg10", "CCI", 2, 0));
638        assert_eq!(
639            lookup.meaning_for("sg4.sg8_test.sg10", "CCI", 2, 0, "A1"),
640            Some("Alpha")
641        );
642        assert_eq!(
643            lookup.meaning_for("sg4.sg8_test.sg10", "CCI", 2, 0, "B2"),
644            Some("Beta")
645        );
646        assert_eq!(
647            lookup.meaning_for("sg4.sg8_test.sg10", "CCI", 2, 0, "XX"),
648            None
649        );
650        assert!(!lookup.is_code_field("sg4.sg8_test.sg10", "CCI", 0, 0));
651    }
652
653    #[test]
654    fn test_discriminated_variant_merge() {
655        // Schema with discriminated SG12 variants (sg12_z63, sg12_z65)
656        let schema = serde_json::json!({
657            "fields": {
658                "sg4": {
659                    "children": {
660                        "sg12_z63": {
661                            "segments": [{
662                                "id": "NAD",
663                                "elements": [{
664                                    "index": 0,
665                                    "type": "code",
666                                    "codes": [{"value": "Z63", "name": "Standortadresse"}]
667                                }]
668                            }],
669                            "source_group": "SG12"
670                        },
671                        "sg12_z65": {
672                            "segments": [{
673                                "id": "NAD",
674                                "elements": [
675                                    {
676                                        "index": 0,
677                                        "type": "code",
678                                        "codes": [{"value": "Z65", "name": "Kunde des LF"}]
679                                    },
680                                    {
681                                        "index": 3,
682                                        "components": [{
683                                            "sub_index": 5,
684                                            "type": "code",
685                                            "codes": [
686                                                {"value": "Z01", "name": "Herr"},
687                                                {"value": "Z02", "name": "Frau"}
688                                            ]
689                                        }]
690                                    }
691                                ]
692                            }],
693                            "source_group": "SG12"
694                        }
695                    },
696                    "segments": [],
697                    "source_group": "SG4"
698                }
699            }
700        });
701
702        let lookup = CodeLookup::from_schema_value(&schema);
703
704        // Variant-specific paths still work
705        assert!(lookup.is_code_field("sg4.sg12_z63", "NAD", 0, 0));
706        assert!(lookup.is_code_field("sg4.sg12_z65", "NAD", 0, 0));
707
708        // Base path also works (merged from variants)
709        assert!(lookup.is_code_field("sg4.sg12", "NAD", 0, 0));
710        assert_eq!(
711            lookup.meaning_for("sg4.sg12", "NAD", 0, 0, "Z63"),
712            Some("Standortadresse")
713        );
714        assert_eq!(
715            lookup.meaning_for("sg4.sg12", "NAD", 0, 0, "Z65"),
716            Some("Kunde des LF")
717        );
718
719        // Anrede code from z65 also available at base path
720        assert!(lookup.is_code_field("sg4.sg12", "NAD", 3, 5));
721        assert_eq!(
722            lookup.meaning_for("sg4.sg12", "NAD", 3, 5, "Z01"),
723            Some("Herr")
724        );
725    }
726
727    #[test]
728    fn test_pid_55013_sg12_base_path() {
729        let schema_path = Path::new(concat!(
730            env!("CARGO_MANIFEST_DIR"),
731            "/../../crates/mig-types/src/generated/fv2504/utilmd/pids/pid_55013_schema.json"
732        ));
733        if !schema_path.exists() {
734            eprintln!("Skipping: PID schema not found");
735            return;
736        }
737
738        let lookup = CodeLookup::from_schema_file(schema_path).unwrap();
739
740        // Base path "sg4.sg12" should have merged NAD qualifier codes from all variants
741        assert!(lookup.is_code_field("sg4.sg12", "NAD", 0, 0));
742        // Z67 meaning comes from sg12_z67 variant
743        assert!(lookup.meaning_for("sg4.sg12", "NAD", 0, 0, "Z67").is_some());
744        // All 7 SG12 qualifiers should be present
745        for code in &["Z63", "Z65", "Z66", "Z67", "Z68", "Z69", "Z70"] {
746            assert!(
747                lookup.meaning_for("sg4.sg12", "NAD", 0, 0, code).is_some(),
748                "Missing meaning for NAD qualifier {code} at base path sg4.sg12"
749            );
750        }
751    }
752
753    #[test]
754    fn test_multi_segment_code_merge() {
755        // SG10 with 3 CCI segments at same element position but different codes.
756        // All codes should be merged, not overwritten by last CCI.
757        let schema = serde_json::json!({
758            "fields": {
759                "sg4": {
760                    "children": {
761                        "sg8_z98": {
762                            "children": {
763                                "sg10": {
764                                    "segments": [
765                                        {
766                                            "id": "CCI",
767                                            "elements": [{"index": 2, "components": [{
768                                                "sub_index": 0, "type": "code",
769                                                "codes": [{"value": "ZB3", "name": "Zugeordneter Marktpartner"}]
770                                            }]}]
771                                        },
772                                        {
773                                            "id": "CAV",
774                                            "elements": [{"index": 0, "components": [{
775                                                "sub_index": 0, "type": "code",
776                                                "codes": [{"value": "Z91", "name": "MSB"}]
777                                            }]}]
778                                        },
779                                        {
780                                            "id": "CCI",
781                                            "elements": [{"index": 2, "components": [{
782                                                "sub_index": 0, "type": "code",
783                                                "codes": [{"value": "E03", "name": "Spannungsebene"}]
784                                            }]}]
785                                        },
786                                        {
787                                            "id": "CAV",
788                                            "elements": [{"index": 0, "components": [{
789                                                "sub_index": 0, "type": "code",
790                                                "codes": [
791                                                    {"value": "E05", "name": "Mittelspannung"},
792                                                    {"value": "E06", "name": "Niederspannung"}
793                                                ]
794                                            }]}]
795                                        },
796                                        {
797                                            "id": "CCI",
798                                            "elements": [{"index": 2, "components": [{
799                                                "sub_index": 0, "type": "code",
800                                                "codes": [
801                                                    {"value": "Z15", "name": "Haushaltskunde"},
802                                                    {"value": "Z18", "name": "Kein Haushaltskunde"}
803                                                ]
804                                            }]}]
805                                        }
806                                    ],
807                                    "source_group": "SG10"
808                                }
809                            },
810                            "segments": [],
811                            "source_group": "SG8"
812                        }
813                    },
814                    "segments": [],
815                    "source_group": "SG4"
816                }
817            }
818        });
819
820        let lookup = CodeLookup::from_schema_value(&schema);
821
822        // All CCI codes at (2,0) should be present (merged, not overwritten)
823        assert_eq!(
824            lookup.meaning_for("sg4.sg8_z98.sg10", "CCI", 2, 0, "ZB3"),
825            Some("Zugeordneter Marktpartner")
826        );
827        assert_eq!(
828            lookup.meaning_for("sg4.sg8_z98.sg10", "CCI", 2, 0, "E03"),
829            Some("Spannungsebene")
830        );
831        assert_eq!(
832            lookup.meaning_for("sg4.sg8_z98.sg10", "CCI", 2, 0, "Z15"),
833            Some("Haushaltskunde")
834        );
835
836        // All CAV codes at (0,0) should be present
837        assert_eq!(
838            lookup.meaning_for("sg4.sg8_z98.sg10", "CAV", 0, 0, "Z91"),
839            Some("MSB")
840        );
841        assert_eq!(
842            lookup.meaning_for("sg4.sg8_z98.sg10", "CAV", 0, 0, "E06"),
843            Some("Niederspannung")
844        );
845    }
846
847    #[test]
848    fn test_enrichment_for_with_enum() {
849        let schema = serde_json::json!({
850            "fields": {
851                "sg4": {
852                    "children": {
853                        "sg10": {
854                            "segments": [{
855                                "id": "CCI",
856                                "elements": [{
857                                    "index": 2,
858                                    "components": [{
859                                        "sub_index": 0,
860                                        "type": "code",
861                                        "codes": [
862                                            {"value": "Z15", "name": "Haushaltskunde", "enum": "HAUSHALTSKUNDE"},
863                                            {"value": "Z18", "name": "Kein Haushaltskunde", "enum": "KEIN_HAUSHALTSKUNDE"}
864                                        ]
865                                    }]
866                                }]
867                            }],
868                            "source_group": "SG10"
869                        }
870                    },
871                    "segments": [],
872                    "source_group": "SG4"
873                }
874            }
875        });
876
877        let lookup = CodeLookup::from_schema_value(&schema);
878
879        let enrichment = lookup.enrichment_for("sg4.sg10", "CCI", 2, 0, "Z15");
880        assert!(enrichment.is_some());
881        let e = enrichment.unwrap();
882        assert_eq!(e.meaning, "Haushaltskunde");
883        assert_eq!(e.enum_key.as_deref(), Some("HAUSHALTSKUNDE"));
884
885        let e2 = lookup
886            .enrichment_for("sg4.sg10", "CCI", 2, 0, "Z18")
887            .unwrap();
888        assert_eq!(e2.enum_key.as_deref(), Some("KEIN_HAUSHALTSKUNDE"));
889
890        // meaning_for still works
891        assert_eq!(
892            lookup.meaning_for("sg4.sg10", "CCI", 2, 0, "Z15"),
893            Some("Haushaltskunde")
894        );
895    }
896
897    #[test]
898    fn test_backward_compat_no_enum() {
899        // Old schema format without "enum" field — should still work, enum_key is None
900        let schema = serde_json::json!({
901            "fields": {
902                "sg4": {
903                    "children": {
904                        "sg10": {
905                            "segments": [{
906                                "id": "CCI",
907                                "elements": [{
908                                    "index": 2,
909                                    "components": [{
910                                        "sub_index": 0,
911                                        "type": "code",
912                                        "codes": [
913                                            {"value": "Z15", "name": "Haushaltskunde"}
914                                        ]
915                                    }]
916                                }]
917                            }],
918                            "source_group": "SG10"
919                        }
920                    },
921                    "segments": [],
922                    "source_group": "SG4"
923                }
924            }
925        });
926
927        let lookup = CodeLookup::from_schema_value(&schema);
928        let enrichment = lookup.enrichment_for("sg4.sg10", "CCI", 2, 0, "Z15");
929        assert!(enrichment.is_some());
930        let e = enrichment.unwrap();
931        assert_eq!(e.meaning, "Haushaltskunde");
932        assert_eq!(e.enum_key, None); // No enum in old schema
933    }
934
935    #[test]
936    fn rff_tn_in_55002_is_not_a_code_field() {
937        let schema_path = Path::new(concat!(
938            env!("CARGO_MANIFEST_DIR"),
939            "/../../crates/mig-types/src/generated/fv2504/utilmd/pids/pid_55002_schema.json"
940        ));
941        if !schema_path.exists() {
942            return;
943        }
944        let lookup = CodeLookup::from_schema_file(schema_path).unwrap();
945
946        // RFF+TN component 1 is type=data (Vorgangsnummer), must NOT be a code field.
947        assert!(
948            !lookup.is_code_field_q("sg4.sg6", "RFF", Some("TN"), 0, 1),
949            "RFF+TN d1154 is free-text Vorgangsnummer, must not be classified as code"
950        );
951
952        // RFF+Z13 component 1 IS a code field with the PID value (Class C; suppression
953        // happens elsewhere — here we just confirm the lookup classifies it as code).
954        assert!(
955            lookup.is_code_field_q("sg4.sg6", "RFF", Some("Z13"), 0, 1),
956            "RFF+Z13 d1154 is type=code with PID-identifier value"
957        );
958    }
959
960    #[test]
961    fn pid_self_reference_detection() {
962        let schema_path = Path::new(concat!(
963            env!("CARGO_MANIFEST_DIR"),
964            "/../../crates/mig-types/src/generated/fv2504/utilmd/pids/pid_55002_schema.json"
965        ));
966        if !schema_path.exists() {
967            return;
968        }
969        let lookup = CodeLookup::from_schema_file(schema_path).unwrap();
970
971        // RFF+Z13 d1154's only allowed value is "55002" — the PID itself.
972        assert!(
973            lookup.is_pid_self_reference("sg4.sg6", "RFF", Some("Z13"), 0, 1, "55002"),
974            "Z13 d1154 with single value '55002' must be detected as PID self-ref"
975        );
976        // Same field for a different PID should NOT count as self-reference.
977        assert!(
978            !lookup.is_pid_self_reference("sg4.sg6", "RFF", Some("Z13"), 0, 1, "55001"),
979            "Z13 d1154's '55002' should not count as self-ref for PID 55001"
980        );
981    }
982}