Skip to main content

mig_bo4e/
path_resolver.rs

1//! Resolves EDIFACT ID paths to numeric element indices (and vice versa).
2//!
3//! Built from a PID schema JSON. Used at TOML load time to normalize
4//! named paths (e.g., `loc.c517.d3225`) to numeric paths (`loc.1.0`).
5//! This keeps the engine hot path unchanged — all resolution happens once at load time.
6//!
7//! Also provides [`ReversePathResolver`] for converting numeric paths back to
8//! self-documenting EDIFACT ID paths (used by the `migrate-paths` CLI).
9
10use std::collections::HashMap;
11use std::path::Path;
12
13// ── Forward resolver: named → numeric ──
14
15/// Resolves EDIFACT ID paths to numeric element indices.
16///
17/// Built from a PID schema JSON. Used at TOML load time to normalize
18/// named paths (e.g., "loc.c517.d3225") to numeric paths ("loc.1.0").
19///
20/// Supports ordinal suffixes for duplicate IDs:
21/// - Duplicate composites per segment: `c556` (first), `c556_2` (second), `c556_3` (third)
22/// - Duplicate data elements per composite: `d3036` (first), `d3036_2` (second), etc.
23#[derive(Clone)]
24pub struct PathResolver {
25    /// (segment_tag_upper, composite_id_lower, data_element_id_lower) → (element_index, sub_index)
26    composite_elements: HashMap<(String, String, String), (usize, usize)>,
27    /// (segment_tag_upper, element_id_lower) → element_index for simple data elements
28    simple_elements: HashMap<(String, String), usize>,
29}
30
31impl PathResolver {
32    /// Build from a PID schema JSON (`serde_json::Value`).
33    ///
34    /// Walks all groups recursively, collecting segment element indices.
35    pub fn from_schema(schema: &serde_json::Value) -> Self {
36        let mut simple_elements = HashMap::new();
37        let mut composite_elements = HashMap::new();
38
39        if let Some(fields) = schema.get("fields").and_then(|f| f.as_object()) {
40            for (_group_key, group_val) in fields {
41                collect_from_group(group_val, &mut simple_elements, &mut composite_elements);
42            }
43        }
44
45        // Also collect from root_segments (used by CONTRL, APERAK, etc.)
46        if let Some(root_segs) = schema.get("root_segments") {
47            let wrapper = serde_json::json!({ "segments": root_segs });
48            collect_from_group(&wrapper, &mut simple_elements, &mut composite_elements);
49        }
50
51        Self {
52            simple_elements,
53            composite_elements,
54        }
55    }
56
57    /// Build from all PID schema JSON files in a directory.
58    ///
59    /// Loads every `pid_*_schema.json` file and merges their element mappings.
60    /// This ensures comprehensive coverage across all PIDs.
61    pub fn from_schema_dir(dir: &Path) -> Self {
62        let mut resolver = Self {
63            simple_elements: HashMap::new(),
64            composite_elements: HashMap::new(),
65        };
66
67        let mut entries: Vec<_> = std::fs::read_dir(dir)
68            .into_iter()
69            .flatten()
70            .filter_map(|e| e.ok())
71            .collect();
72        entries.sort_by_key(|e| e.file_name());
73
74        for entry in entries {
75            let path = entry.path();
76            let is_schema = path
77                .file_name()
78                .and_then(|n| n.to_str())
79                .map(|n| n.starts_with("pid_") && n.ends_with("_schema.json"))
80                .unwrap_or(false);
81            if is_schema {
82                if let Ok(content) = std::fs::read_to_string(&path) {
83                    if let Ok(schema) = serde_json::from_str::<serde_json::Value>(&content) {
84                        resolver.merge_schema(&schema);
85                    }
86                }
87            }
88        }
89
90        resolver
91    }
92
93    /// Merge another PID schema into this resolver.
94    pub fn merge_schema(&mut self, schema: &serde_json::Value) {
95        if let Some(fields) = schema.get("fields").and_then(|f| f.as_object()) {
96            for (_group_key, group_val) in fields {
97                collect_from_group(
98                    group_val,
99                    &mut self.simple_elements,
100                    &mut self.composite_elements,
101                );
102            }
103        }
104        if let Some(root_segs) = schema.get("root_segments") {
105            let wrapper = serde_json::json!({ "segments": root_segs });
106            collect_from_group(
107                &wrapper,
108                &mut self.simple_elements,
109                &mut self.composite_elements,
110            );
111        }
112    }
113
114    /// Resolve a single field path. Returns the numeric path if the input
115    /// is a named path; returns the input unchanged if already numeric.
116    ///
117    /// Examples:
118    /// - `"loc.c517.d3225"` → `"loc.1.0"`
119    /// - `"seq.d1229"` → `"seq.0"`
120    /// - `"cav[Z91].c889.d7111"` → `"cav[Z91].0.0"`
121    /// - `"sts.c556_2.d9013"` → `"sts.3.0"` (ordinal suffix for duplicate composite)
122    /// - `"nad.c080.d3036_2"` → `"nad.3.1"` (ordinal suffix for duplicate data element)
123    /// - `"loc.1.0"` → `"loc.1.0"` (unchanged)
124    pub fn resolve_path(&self, path: &str) -> String {
125        let parts: Vec<&str> = path.split('.').collect();
126        if parts.len() < 2 {
127            return path.to_string();
128        }
129
130        // Parse segment tag and optional qualifier: "cav[Z91]" → ("cav", "[Z91]")
131        let (seg_raw, qualifier_suffix) = split_qualifier(parts[0]);
132        let seg_upper = seg_raw.to_ascii_uppercase();
133
134        let rest = &parts[1..];
135
136        // Check if already numeric: first rest part starts with a digit
137        if rest[0]
138            .chars()
139            .next()
140            .map(|c| c.is_ascii_digit())
141            .unwrap_or(false)
142        {
143            return path.to_string();
144        }
145
146        // Try resolving as composite path: seg.cNNN.dNNN (with optional ordinal suffixes)
147        if rest.len() == 2 && is_edifact_id(rest[0]) && is_edifact_id(rest[1]) {
148            let composite_id = rest[0].to_ascii_lowercase();
149            let data_element_id = rest[1].to_ascii_lowercase();
150
151            if let Some(&(elem_idx, sub_idx)) =
152                self.composite_elements
153                    .get(&(seg_upper.clone(), composite_id, data_element_id))
154            {
155                return format!("{}{}.{}.{}", seg_raw, qualifier_suffix, elem_idx, sub_idx);
156            }
157        }
158
159        // Try resolving as simple element: seg.dNNN
160        if rest.len() == 1 && is_edifact_id(rest[0]) {
161            let element_id = rest[0].to_ascii_lowercase();
162
163            if let Some(&elem_idx) = self.simple_elements.get(&(seg_upper, element_id)) {
164                return format!("{}{}.{}", seg_raw, qualifier_suffix, elem_idx);
165            }
166        }
167
168        // Unresolved — return as-is
169        path.to_string()
170    }
171
172    /// Resolve a discriminator string to 3-part numeric format.
173    ///
174    /// The engine's `resolve_repetition` requires `TAG.N.M=VALUE` (3-part).
175    ///
176    /// Input formats:
177    /// - Named simple: `"SEQ.d1229=ZF0"` → `"SEQ.0.0=ZF0"`
178    /// - Named composite: `"STS.c556.d9013=E01"` → `"STS.2.0=E01"`
179    /// - Numeric 3-part: `"LOC.0.0=Z16"` → `"LOC.0.0=Z16"` (unchanged)
180    /// - Numeric 2-part: `"SEQ.0=ZF0"` → `"SEQ.0.0=ZF0"` (upgraded)
181    pub fn resolve_discriminator(&self, disc: &str) -> String {
182        let Some((path_part, value_part)) = disc.split_once('=') else {
183            return disc.to_string();
184        };
185
186        let parts: Vec<&str> = path_part.split('.').collect();
187
188        match parts.len() {
189            2 => {
190                let seg_upper = parts[0].to_ascii_uppercase();
191                let element_ref = parts[1];
192
193                // Check if already numeric — upgrade 2-part to 3-part
194                if element_ref
195                    .chars()
196                    .next()
197                    .map(|c| c.is_ascii_digit())
198                    .unwrap_or(false)
199                {
200                    return format!("{}.{}.0={}", parts[0], element_ref, value_part);
201                }
202
203                // Try simple element resolution
204                if is_edifact_id(element_ref) {
205                    let element_id = element_ref.to_ascii_lowercase();
206                    if let Some(&elem_idx) = self.simple_elements.get(&(seg_upper, element_id)) {
207                        return format!("{}.{}.0={}", parts[0], elem_idx, value_part);
208                    }
209                }
210
211                disc.to_string()
212            }
213            3 => {
214                let seg_upper = parts[0].to_ascii_uppercase();
215
216                // Check if already numeric
217                if parts[1]
218                    .chars()
219                    .next()
220                    .map(|c| c.is_ascii_digit())
221                    .unwrap_or(false)
222                {
223                    return disc.to_string();
224                }
225
226                // Try composite resolution: TAG.cNNN.dNNN=VALUE
227                if is_edifact_id(parts[1]) && is_edifact_id(parts[2]) {
228                    let composite_id = parts[1].to_ascii_lowercase();
229                    let data_element_id = parts[2].to_ascii_lowercase();
230
231                    if let Some(&(elem_idx, sub_idx)) =
232                        self.composite_elements
233                            .get(&(seg_upper, composite_id, data_element_id))
234                    {
235                        return format!("{}.{}.{}={}", parts[0], elem_idx, sub_idx, value_part);
236                    }
237                }
238
239                disc.to_string()
240            }
241            _ => disc.to_string(),
242        }
243    }
244}
245
246// ── Reverse resolver: numeric → named ──
247
248/// Converts numeric element paths back to self-documenting EDIFACT ID paths.
249///
250/// Used by the `migrate-paths` CLI to convert existing TOML files from
251/// opaque numeric paths (`loc.1.0`) to readable named paths (`loc.c517.d3225`).
252#[derive(Clone)]
253pub struct ReversePathResolver {
254    /// (seg_upper, elem_idx, sub_idx) → named suffix like "c517.d3225"
255    composite_reverse: HashMap<(String, usize, usize), String>,
256    /// (seg_upper, elem_idx) → named id like "d3227"
257    simple_reverse: HashMap<(String, usize), String>,
258    /// (seg_upper, elem_idx) → true if composite element
259    is_composite: HashMap<(String, usize), bool>,
260}
261
262impl ReversePathResolver {
263    /// Build from a PID schema JSON.
264    pub fn from_schema(schema: &serde_json::Value) -> Self {
265        let mut composite_reverse = HashMap::new();
266        let mut simple_reverse = HashMap::new();
267        let mut is_composite = HashMap::new();
268
269        if let Some(fields) = schema.get("fields").and_then(|f| f.as_object()) {
270            for (_group_key, group_val) in fields {
271                collect_reverse_from_group(
272                    group_val,
273                    &mut composite_reverse,
274                    &mut simple_reverse,
275                    &mut is_composite,
276                );
277            }
278        }
279
280        if let Some(root_segs) = schema.get("root_segments") {
281            let wrapper = serde_json::json!({ "segments": root_segs });
282            collect_reverse_from_group(
283                &wrapper,
284                &mut composite_reverse,
285                &mut simple_reverse,
286                &mut is_composite,
287            );
288        }
289
290        Self {
291            composite_reverse,
292            simple_reverse,
293            is_composite,
294        }
295    }
296
297    /// Build from all PID schema JSON files in a directory.
298    pub fn from_schema_dir(dir: &Path) -> Self {
299        let mut resolver = Self {
300            composite_reverse: HashMap::new(),
301            simple_reverse: HashMap::new(),
302            is_composite: HashMap::new(),
303        };
304
305        let mut entries: Vec<_> = std::fs::read_dir(dir)
306            .into_iter()
307            .flatten()
308            .filter_map(|e| e.ok())
309            .collect();
310        entries.sort_by_key(|e| e.file_name());
311
312        for entry in entries {
313            let path = entry.path();
314            let is_schema = path
315                .file_name()
316                .and_then(|n| n.to_str())
317                .map(|n| n.starts_with("pid_") && n.ends_with("_schema.json"))
318                .unwrap_or(false);
319            if is_schema {
320                if let Ok(content) = std::fs::read_to_string(&path) {
321                    if let Ok(schema) = serde_json::from_str::<serde_json::Value>(&content) {
322                        resolver.merge_schema(&schema);
323                    }
324                }
325            }
326        }
327
328        resolver
329    }
330
331    /// Merge another PID schema into this resolver.
332    pub fn merge_schema(&mut self, schema: &serde_json::Value) {
333        if let Some(fields) = schema.get("fields").and_then(|f| f.as_object()) {
334            for (_group_key, group_val) in fields {
335                collect_reverse_from_group(
336                    group_val,
337                    &mut self.composite_reverse,
338                    &mut self.simple_reverse,
339                    &mut self.is_composite,
340                );
341            }
342        }
343        if let Some(root_segs) = schema.get("root_segments") {
344            let wrapper = serde_json::json!({ "segments": root_segs });
345            collect_reverse_from_group(
346                &wrapper,
347                &mut self.composite_reverse,
348                &mut self.simple_reverse,
349                &mut self.is_composite,
350            );
351        }
352    }
353
354    /// Convert a numeric path to a named EDIFACT ID path.
355    ///
356    /// Examples:
357    /// - `"loc.1.0"` → `"loc.c517.d3225"`
358    /// - `"loc.0"` → `"loc.d3227"` (simple element)
359    /// - `"sts.2"` → `"sts.c556.d9013"` (2-part → expands to first component)
360    /// - `"sts.3.0"` → `"sts.c556_2.d9013"` (ordinal suffix for duplicate composite)
361    /// - `"cav[Z91].0.1"` → `"cav[Z91].c889.d7110"` (preserves qualifier)
362    /// - `"loc.c517.d3225"` → `"loc.c517.d3225"` (already named, unchanged)
363    pub fn reverse_path(&self, path: &str) -> String {
364        let parts: Vec<&str> = path.split('.').collect();
365        if parts.len() < 2 {
366            return path.to_string();
367        }
368
369        let (seg_raw, qualifier_suffix) = split_qualifier(parts[0]);
370        let seg_upper = seg_raw.to_ascii_uppercase();
371        let rest = &parts[1..];
372
373        // If not numeric, already named — return as-is
374        if !rest[0]
375            .chars()
376            .next()
377            .map(|c| c.is_ascii_digit())
378            .unwrap_or(false)
379        {
380            return path.to_string();
381        }
382
383        match rest.len() {
384            1 => {
385                // 2-part: seg.N
386                let Ok(elem_idx) = rest[0].parse::<usize>() else {
387                    return path.to_string();
388                };
389
390                match self.is_composite.get(&(seg_upper.clone(), elem_idx)) {
391                    Some(true) => {
392                        // Composite — expand to first component: seg.cNNN.dNNN
393                        if let Some(named) = self.composite_reverse.get(&(seg_upper, elem_idx, 0)) {
394                            format!("{}{}.{}", seg_raw, qualifier_suffix, named)
395                        } else {
396                            path.to_string()
397                        }
398                    }
399                    Some(false) => {
400                        // Simple element: seg.dNNN
401                        if let Some(named) = self.simple_reverse.get(&(seg_upper, elem_idx)) {
402                            format!("{}{}.{}", seg_raw, qualifier_suffix, named)
403                        } else {
404                            path.to_string()
405                        }
406                    }
407                    None => path.to_string(),
408                }
409            }
410            2 => {
411                // 3-part: seg.N.M
412                let Ok(elem_idx) = rest[0].parse::<usize>() else {
413                    return path.to_string();
414                };
415                let Ok(sub_idx) = rest[1].parse::<usize>() else {
416                    return path.to_string();
417                };
418
419                if let Some(named) = self.composite_reverse.get(&(seg_upper, elem_idx, sub_idx)) {
420                    format!("{}{}.{}", seg_raw, qualifier_suffix, named)
421                } else {
422                    path.to_string()
423                }
424            }
425            _ => path.to_string(),
426        }
427    }
428
429    /// Convert a 3-part numeric discriminator to named EDIFACT ID format.
430    ///
431    /// Examples:
432    /// - `"LOC.0.0=Z16"` → `"LOC.d3227=Z16"` (simple element)
433    /// - `"STS.2.0=E01"` → `"STS.c556.d9013=E01"` (composite element)
434    /// - `"LOC.d3227=Z16"` → `"LOC.d3227=Z16"` (already named, unchanged)
435    pub fn reverse_discriminator(&self, disc: &str) -> String {
436        let Some((path_part, value_part)) = disc.split_once('=') else {
437            return disc.to_string();
438        };
439
440        let parts: Vec<&str> = path_part.split('.').collect();
441        if parts.len() != 3 {
442            return disc.to_string();
443        }
444
445        let seg_raw = parts[0];
446        let seg_upper = seg_raw.to_ascii_uppercase();
447
448        // Check if numeric
449        let Ok(elem_idx) = parts[1].parse::<usize>() else {
450            return disc.to_string(); // Already named
451        };
452        let Ok(sub_idx) = parts[2].parse::<usize>() else {
453            return disc.to_string();
454        };
455
456        // Check if it's a simple element (sub_idx 0 and element is not composite)
457        if sub_idx == 0 {
458            if let Some(false) = self.is_composite.get(&(seg_upper.clone(), elem_idx)) {
459                if let Some(named) = self.simple_reverse.get(&(seg_upper.clone(), elem_idx)) {
460                    return format!("{}.{}={}", seg_raw, named, value_part);
461                }
462            }
463        }
464
465        // Composite element
466        if let Some(named) = self
467            .composite_reverse
468            .get(&(seg_upper.clone(), elem_idx, sub_idx))
469        {
470            return format!("{}.{}={}", seg_raw, named, value_part);
471        }
472
473        disc.to_string()
474    }
475}
476
477// ── Helpers ──
478
479/// Check if a string looks like an EDIFACT ID: starts with 'c' or 'd' followed by digits,
480/// with an optional ordinal suffix (`_N`).
481///
482/// Matches: `c517`, `d3225`, `c556_2`, `d3036_3`
483fn is_edifact_id(s: &str) -> bool {
484    let mut chars = s.chars();
485    match chars.next() {
486        Some('c' | 'd' | 's' | 'C' | 'D' | 'S') => {
487            let rest: String = chars.collect();
488            if rest.is_empty() {
489                return false;
490            }
491            if let Some((base, suffix)) = rest.split_once('_') {
492                !base.is_empty()
493                    && base.chars().all(|c| c.is_ascii_digit())
494                    && !suffix.is_empty()
495                    && suffix.chars().all(|c| c.is_ascii_digit())
496            } else {
497                rest.chars().all(|c| c.is_ascii_digit())
498            }
499        }
500        _ => false,
501    }
502}
503
504/// Split qualifier from tag: `"cav[Z91]"` → `("cav", "[Z91]")`, `"loc"` → `("loc", "")`
505fn split_qualifier(tag: &str) -> (&str, &str) {
506    if let Some(bracket_pos) = tag.find('[') {
507        (&tag[..bracket_pos], &tag[bracket_pos..])
508    } else {
509        (tag, "")
510    }
511}
512
513/// Recursively collect forward element mappings from a group in the schema.
514///
515/// Tracks ordinal suffixes for duplicate composite IDs per segment and
516/// duplicate data element IDs per composite.
517fn collect_from_group(
518    group: &serde_json::Value,
519    simple: &mut HashMap<(String, String), usize>,
520    composite: &mut HashMap<(String, String, String), (usize, usize)>,
521) {
522    if let Some(segments) = group.get("segments").and_then(|s| s.as_array()) {
523        for seg in segments {
524            let seg_tag = seg
525                .get("id")
526                .and_then(|v| v.as_str())
527                .unwrap_or("")
528                .to_ascii_uppercase();
529
530            if let Some(elements) = seg.get("elements").and_then(|e| e.as_array()) {
531                // Track composite ID occurrences for ordinal suffixes
532                let mut composite_id_count: HashMap<String, usize> = HashMap::new();
533
534                for elem in elements {
535                    let elem_index =
536                        elem.get("index").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
537
538                    if let Some(composite_id) = elem.get("composite").and_then(|v| v.as_str()) {
539                        let base_composite = composite_id.to_ascii_lowercase();
540
541                        // Track occurrence for ordinal suffix
542                        let count = composite_id_count
543                            .entry(base_composite.clone())
544                            .or_insert(0);
545                        *count += 1;
546
547                        let composite_key = if *count == 1 {
548                            base_composite
549                        } else {
550                            format!("{}_{}", base_composite, count)
551                        };
552
553                        if let Some(components) = elem.get("components").and_then(|c| c.as_array())
554                        {
555                            // Track data element ID occurrences within this composite
556                            let mut data_elem_count: HashMap<String, usize> = HashMap::new();
557
558                            for comp in components {
559                                let comp_id = comp.get("id").and_then(|v| v.as_str()).unwrap_or("");
560                                let sub_index =
561                                    comp.get("sub_index").and_then(|v| v.as_u64()).unwrap_or(0)
562                                        as usize;
563                                let base_data = format!("d{}", comp_id).to_ascii_lowercase();
564
565                                let dcount = data_elem_count.entry(base_data.clone()).or_insert(0);
566                                *dcount += 1;
567
568                                let data_key = if *dcount == 1 {
569                                    base_data
570                                } else {
571                                    format!("{}_{}", base_data, dcount)
572                                };
573
574                                composite
575                                    .entry((seg_tag.clone(), composite_key.clone(), data_key))
576                                    .or_insert((elem_index, sub_index));
577                            }
578                        }
579                    } else {
580                        // Simple data element
581                        let elem_id = elem.get("id").and_then(|v| v.as_str()).unwrap_or("");
582                        let elem_id_lower = format!("d{}", elem_id).to_ascii_lowercase();
583                        simple
584                            .entry((seg_tag.clone(), elem_id_lower))
585                            .or_insert(elem_index);
586                    }
587                }
588            }
589        }
590    }
591
592    // Recurse into children
593    if let Some(children) = group.get("children").and_then(|c| c.as_object()) {
594        for (_child_key, child_val) in children {
595            collect_from_group(child_val, simple, composite);
596        }
597    }
598}
599
600/// Recursively collect reverse element mappings from a group in the schema.
601///
602/// Builds (seg, elem_idx, sub_idx) → named path mappings, with ordinal suffixes
603/// for duplicates.
604fn collect_reverse_from_group(
605    group: &serde_json::Value,
606    composite_reverse: &mut HashMap<(String, usize, usize), String>,
607    simple_reverse: &mut HashMap<(String, usize), String>,
608    is_composite: &mut HashMap<(String, usize), bool>,
609) {
610    if let Some(segments) = group.get("segments").and_then(|s| s.as_array()) {
611        for seg in segments {
612            let seg_tag = seg
613                .get("id")
614                .and_then(|v| v.as_str())
615                .unwrap_or("")
616                .to_ascii_uppercase();
617
618            if let Some(elements) = seg.get("elements").and_then(|e| e.as_array()) {
619                let mut composite_id_count: HashMap<String, usize> = HashMap::new();
620
621                for elem in elements {
622                    let elem_index =
623                        elem.get("index").and_then(|v| v.as_u64()).unwrap_or(0) as usize;
624
625                    if let Some(composite_id) = elem.get("composite").and_then(|v| v.as_str()) {
626                        let base = composite_id.to_ascii_lowercase();
627
628                        let count = composite_id_count.entry(base.clone()).or_insert(0);
629                        *count += 1;
630
631                        let comp_key = if *count == 1 {
632                            base
633                        } else {
634                            format!("{}_{}", base, count)
635                        };
636
637                        is_composite
638                            .entry((seg_tag.clone(), elem_index))
639                            .or_insert(true);
640
641                        if let Some(components) = elem.get("components").and_then(|c| c.as_array())
642                        {
643                            let mut data_elem_count: HashMap<String, usize> = HashMap::new();
644
645                            for comp in components {
646                                let comp_id = comp.get("id").and_then(|v| v.as_str()).unwrap_or("");
647                                let sub_index =
648                                    comp.get("sub_index").and_then(|v| v.as_u64()).unwrap_or(0)
649                                        as usize;
650                                let base_data = format!("d{}", comp_id).to_ascii_lowercase();
651
652                                let dcount = data_elem_count.entry(base_data.clone()).or_insert(0);
653                                *dcount += 1;
654
655                                let data_key = if *dcount == 1 {
656                                    base_data
657                                } else {
658                                    format!("{}_{}", base_data, dcount)
659                                };
660
661                                composite_reverse
662                                    .entry((seg_tag.clone(), elem_index, sub_index))
663                                    .or_insert(format!("{}.{}", comp_key, data_key));
664                            }
665                        }
666                    } else {
667                        let elem_id = elem.get("id").and_then(|v| v.as_str()).unwrap_or("");
668                        let elem_id_lower = format!("d{}", elem_id).to_ascii_lowercase();
669
670                        is_composite
671                            .entry((seg_tag.clone(), elem_index))
672                            .or_insert(false);
673                        simple_reverse
674                            .entry((seg_tag.clone(), elem_index))
675                            .or_insert(elem_id_lower);
676                    }
677                }
678            }
679        }
680    }
681
682    if let Some(children) = group.get("children").and_then(|c| c.as_object()) {
683        for (_child_key, child_val) in children {
684            collect_reverse_from_group(child_val, composite_reverse, simple_reverse, is_composite);
685        }
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692
693    fn test_schema() -> serde_json::Value {
694        serde_json::json!({
695            "beschreibung": "Test PID",
696            "fields": {
697                "sg4": {
698                    "segments": [
699                        {
700                            "id": "LOC",
701                            "name": "Lokation",
702                            "elements": [
703                                {
704                                    "id": "3227",
705                                    "index": 0,
706                                    "name": "Lokation, Qualifier",
707                                    "type": "code"
708                                },
709                                {
710                                    "composite": "C517",
711                                    "index": 1,
712                                    "name": "Lokationsidentifikation",
713                                    "components": [
714                                        {
715                                            "id": "3225",
716                                            "sub_index": 0,
717                                            "name": "MaLo-ID",
718                                            "type": "data"
719                                        },
720                                        {
721                                            "id": "1131",
722                                            "sub_index": 1,
723                                            "name": "Codeliste, Code",
724                                            "type": "data"
725                                        }
726                                    ]
727                                }
728                            ]
729                        },
730                        {
731                            "id": "SEQ",
732                            "name": "Reihenfolge",
733                            "elements": [
734                                {
735                                    "id": "1229",
736                                    "index": 0,
737                                    "name": "Handlung, Code",
738                                    "type": "code"
739                                },
740                                {
741                                    "composite": "C286",
742                                    "index": 1,
743                                    "name": "Information über eine Folge",
744                                    "components": [
745                                        {
746                                            "id": "1050",
747                                            "sub_index": 0,
748                                            "name": "Referenz auf Zeitraum-ID",
749                                            "type": "data"
750                                        }
751                                    ]
752                                }
753                            ]
754                        }
755                    ],
756                    "source_group": "SG4",
757                    "children": {
758                        "sg8_zf0": {
759                            "segments": [
760                                {
761                                    "id": "CAV",
762                                    "name": "Merkmal",
763                                    "elements": [
764                                        {
765                                            "composite": "C889",
766                                            "index": 0,
767                                            "name": "Merkmalswert",
768                                            "components": [
769                                                {
770                                                    "id": "7111",
771                                                    "sub_index": 0,
772                                                    "name": "Merkmalswert, Code",
773                                                    "type": "code"
774                                                },
775                                                {
776                                                    "id": "7110",
777                                                    "sub_index": 1,
778                                                    "name": "Merkmalswert",
779                                                    "type": "data"
780                                                }
781                                            ]
782                                        }
783                                    ]
784                                }
785                            ],
786                            "source_group": "SG8"
787                        }
788                    }
789                }
790            }
791        })
792    }
793
794    fn sts_schema() -> serde_json::Value {
795        serde_json::json!({
796            "beschreibung": "STS ordinal test",
797            "fields": {
798                "sg4": {
799                    "segments": [{
800                        "id": "STS",
801                        "name": "Status",
802                        "elements": [
803                            {
804                                "composite": "C601",
805                                "index": 0,
806                                "name": "Statuskategorie",
807                                "components": [{
808                                    "id": "9015",
809                                    "sub_index": 0,
810                                    "type": "code"
811                                }]
812                            },
813                            {
814                                "composite": "C555",
815                                "index": 1,
816                                "name": "Status",
817                                "components": [{
818                                    "id": "4405",
819                                    "sub_index": 0,
820                                    "type": "data"
821                                }]
822                            },
823                            {
824                                "composite": "C556",
825                                "index": 2,
826                                "name": "Statusanlaß",
827                                "components": [{
828                                    "id": "9013",
829                                    "sub_index": 0,
830                                    "type": "code"
831                                }]
832                            },
833                            {
834                                "composite": "C556",
835                                "index": 3,
836                                "name": "Statusanlaß",
837                                "components": [{
838                                    "id": "9013",
839                                    "sub_index": 0,
840                                    "type": "code"
841                                }]
842                            },
843                            {
844                                "composite": "C556",
845                                "index": 4,
846                                "name": "Statusanlaß",
847                                "components": [{
848                                    "id": "9013",
849                                    "sub_index": 0,
850                                    "type": "code"
851                                }]
852                            }
853                        ]
854                    }],
855                    "source_group": "SG4"
856                }
857            }
858        })
859    }
860
861    fn nad_schema() -> serde_json::Value {
862        serde_json::json!({
863            "beschreibung": "NAD ordinal test",
864            "fields": {
865                "sg12_z04": {
866                    "segments": [{
867                        "id": "NAD",
868                        "name": "Geschäftspartner",
869                        "elements": [
870                            {
871                                "id": "3229",
872                                "index": 0,
873                                "type": "code"
874                            },
875                            {
876                                "composite": "C082",
877                                "index": 1,
878                                "name": "Identifikation",
879                                "components": [
880                                    { "id": "3039", "sub_index": 0, "type": "data" },
881                                    { "id": "1131", "sub_index": 1, "type": "data" },
882                                    { "id": "3055", "sub_index": 2, "type": "code" }
883                                ]
884                            },
885                            {
886                                "composite": "C058",
887                                "index": 2,
888                                "name": "Zusatzinfo",
889                                "components": [
890                                    { "id": "3124", "sub_index": 0, "type": "data" },
891                                    { "id": "3124", "sub_index": 1, "type": "data" },
892                                    { "id": "3124", "sub_index": 2, "type": "data" }
893                                ]
894                            },
895                            {
896                                "composite": "C080",
897                                "index": 3,
898                                "name": "Name",
899                                "components": [
900                                    { "id": "3036", "sub_index": 0, "type": "data" },
901                                    { "id": "3036", "sub_index": 1, "type": "data" },
902                                    { "id": "3036", "sub_index": 2, "type": "data" },
903                                    { "id": "3036", "sub_index": 3, "type": "data" },
904                                    { "id": "3036", "sub_index": 4, "type": "data" },
905                                    { "id": "3045", "sub_index": 5, "type": "code" }
906                                ]
907                            }
908                        ]
909                    }],
910                    "source_group": "SG12"
911                }
912            }
913        })
914    }
915
916    // ── Forward resolver tests ──
917
918    #[test]
919    fn resolve_composite_path() {
920        let resolver = PathResolver::from_schema(&test_schema());
921        assert_eq!(resolver.resolve_path("loc.c517.d3225"), "loc.1.0");
922        assert_eq!(resolver.resolve_path("loc.c517.d1131"), "loc.1.1");
923    }
924
925    #[test]
926    fn resolve_simple_element_path() {
927        let resolver = PathResolver::from_schema(&test_schema());
928        assert_eq!(resolver.resolve_path("loc.d3227"), "loc.0");
929        assert_eq!(resolver.resolve_path("seq.d1229"), "seq.0");
930    }
931
932    #[test]
933    fn resolve_nested_group_paths() {
934        let resolver = PathResolver::from_schema(&test_schema());
935        assert_eq!(resolver.resolve_path("cav.c889.d7111"), "cav.0.0");
936        assert_eq!(resolver.resolve_path("cav.c889.d7110"), "cav.0.1");
937    }
938
939    #[test]
940    fn numeric_paths_unchanged() {
941        let resolver = PathResolver::from_schema(&test_schema());
942        assert_eq!(resolver.resolve_path("loc.1.0"), "loc.1.0");
943        assert_eq!(resolver.resolve_path("loc.0"), "loc.0");
944        assert_eq!(resolver.resolve_path("seq.0"), "seq.0");
945    }
946
947    #[test]
948    fn qualifier_paths() {
949        let resolver = PathResolver::from_schema(&test_schema());
950        assert_eq!(resolver.resolve_path("cav[Z91].c889.d7111"), "cav[Z91].0.0");
951        assert_eq!(resolver.resolve_path("cav[Z91].0.1"), "cav[Z91].0.1");
952    }
953
954    #[test]
955    fn resolve_discriminator_named() {
956        let resolver = PathResolver::from_schema(&test_schema());
957        // Named → 3-part numeric
958        assert_eq!(
959            resolver.resolve_discriminator("SEQ.d1229=ZF0"),
960            "SEQ.0.0=ZF0"
961        );
962        assert_eq!(
963            resolver.resolve_discriminator("LOC.d3227=Z16"),
964            "LOC.0.0=Z16"
965        );
966    }
967
968    #[test]
969    fn resolve_discriminator_numeric() {
970        let resolver = PathResolver::from_schema(&test_schema());
971        // Already 3-part numeric — unchanged
972        assert_eq!(resolver.resolve_discriminator("SEQ.0.0=ZF0"), "SEQ.0.0=ZF0");
973        // 2-part numeric — upgraded to 3-part
974        assert_eq!(resolver.resolve_discriminator("SEQ.0=ZF0"), "SEQ.0.0=ZF0");
975    }
976
977    #[test]
978    fn resolve_discriminator_composite() {
979        let resolver = PathResolver::from_schema(&sts_schema());
980        // Composite discriminator: TAG.cNNN.dNNN=VALUE → TAG.N.M=VALUE
981        assert_eq!(
982            resolver.resolve_discriminator("STS.c556.d9013=E01"),
983            "STS.2.0=E01"
984        );
985        assert_eq!(
986            resolver.resolve_discriminator("STS.c556_2.d9013=ZW4"),
987            "STS.3.0=ZW4"
988        );
989    }
990
991    #[test]
992    fn unresolved_paths_unchanged() {
993        let resolver = PathResolver::from_schema(&test_schema());
994        assert_eq!(resolver.resolve_path("xyz.d9999"), "xyz.d9999");
995        assert_eq!(resolver.resolve_path("loc"), "loc");
996    }
997
998    #[test]
999    fn composite_id_case_insensitive() {
1000        let resolver = PathResolver::from_schema(&test_schema());
1001        assert_eq!(resolver.resolve_path("loc.c517.d3225"), "loc.1.0");
1002        assert_eq!(resolver.resolve_path("LOC.c517.d3225"), "LOC.1.0");
1003    }
1004
1005    #[test]
1006    fn seq_composite_path() {
1007        let resolver = PathResolver::from_schema(&test_schema());
1008        assert_eq!(resolver.resolve_path("seq.c286.d1050"), "seq.1.0");
1009    }
1010
1011    // ── Ordinal suffix tests ──
1012
1013    #[test]
1014    fn ordinal_suffix_duplicate_composites() {
1015        let resolver = PathResolver::from_schema(&sts_schema());
1016        // First C556 at index 2
1017        assert_eq!(resolver.resolve_path("sts.c556.d9013"), "sts.2.0");
1018        // Second C556 at index 3
1019        assert_eq!(resolver.resolve_path("sts.c556_2.d9013"), "sts.3.0");
1020        // Third C556 at index 4
1021        assert_eq!(resolver.resolve_path("sts.c556_3.d9013"), "sts.4.0");
1022        // Non-duplicate composites still work
1023        assert_eq!(resolver.resolve_path("sts.c601.d9015"), "sts.0.0");
1024        assert_eq!(resolver.resolve_path("sts.c555.d4405"), "sts.1.0");
1025    }
1026
1027    #[test]
1028    fn ordinal_suffix_duplicate_data_elements() {
1029        let resolver = PathResolver::from_schema(&nad_schema());
1030        // NAD C080: d3036×5 + d3045×1
1031        assert_eq!(resolver.resolve_path("nad.c080.d3036"), "nad.3.0");
1032        assert_eq!(resolver.resolve_path("nad.c080.d3036_2"), "nad.3.1");
1033        assert_eq!(resolver.resolve_path("nad.c080.d3036_3"), "nad.3.2");
1034        assert_eq!(resolver.resolve_path("nad.c080.d3036_4"), "nad.3.3");
1035        assert_eq!(resolver.resolve_path("nad.c080.d3036_5"), "nad.3.4");
1036        assert_eq!(resolver.resolve_path("nad.c080.d3045"), "nad.3.5");
1037        // C058: d3124×3
1038        assert_eq!(resolver.resolve_path("nad.c058.d3124"), "nad.2.0");
1039        assert_eq!(resolver.resolve_path("nad.c058.d3124_2"), "nad.2.1");
1040        assert_eq!(resolver.resolve_path("nad.c058.d3124_3"), "nad.2.2");
1041    }
1042
1043    #[test]
1044    fn is_edifact_id_with_suffix() {
1045        assert!(is_edifact_id("c556"));
1046        assert!(is_edifact_id("c556_2"));
1047        assert!(is_edifact_id("c556_3"));
1048        assert!(is_edifact_id("d3036"));
1049        assert!(is_edifact_id("d3036_2"));
1050        assert!(is_edifact_id("D3036_5"));
1051        assert!(is_edifact_id("s002"));
1052        assert!(is_edifact_id("S009"));
1053        assert!(is_edifact_id("s011"));
1054        assert!(!is_edifact_id("c"));
1055        assert!(!is_edifact_id("c_2"));
1056        assert!(!is_edifact_id("c556_"));
1057        assert!(!is_edifact_id("c556_a"));
1058        assert!(!is_edifact_id("abc"));
1059        assert!(!is_edifact_id("123"));
1060    }
1061
1062    // ── Reverse resolver tests ──
1063
1064    #[test]
1065    fn reverse_path_composite() {
1066        let resolver = ReversePathResolver::from_schema(&test_schema());
1067        assert_eq!(resolver.reverse_path("loc.1.0"), "loc.c517.d3225");
1068        assert_eq!(resolver.reverse_path("loc.1.1"), "loc.c517.d1131");
1069        assert_eq!(resolver.reverse_path("seq.1.0"), "seq.c286.d1050");
1070        assert_eq!(resolver.reverse_path("cav.0.0"), "cav.c889.d7111");
1071        assert_eq!(resolver.reverse_path("cav.0.1"), "cav.c889.d7110");
1072    }
1073
1074    #[test]
1075    fn reverse_path_simple() {
1076        let resolver = ReversePathResolver::from_schema(&test_schema());
1077        assert_eq!(resolver.reverse_path("loc.0"), "loc.d3227");
1078        assert_eq!(resolver.reverse_path("seq.0"), "seq.d1229");
1079    }
1080
1081    #[test]
1082    fn reverse_path_two_part_composite() {
1083        let resolver = ReversePathResolver::from_schema(&sts_schema());
1084        // 2-part numeric for composite → expands to first component
1085        assert_eq!(resolver.reverse_path("sts.0"), "sts.c601.d9015");
1086        assert_eq!(resolver.reverse_path("sts.1"), "sts.c555.d4405");
1087        assert_eq!(resolver.reverse_path("sts.2"), "sts.c556.d9013");
1088    }
1089
1090    #[test]
1091    fn reverse_path_ordinal_composites() {
1092        let resolver = ReversePathResolver::from_schema(&sts_schema());
1093        assert_eq!(resolver.reverse_path("sts.2.0"), "sts.c556.d9013");
1094        assert_eq!(resolver.reverse_path("sts.3.0"), "sts.c556_2.d9013");
1095        assert_eq!(resolver.reverse_path("sts.4.0"), "sts.c556_3.d9013");
1096    }
1097
1098    #[test]
1099    fn reverse_path_ordinal_data_elements() {
1100        let resolver = ReversePathResolver::from_schema(&nad_schema());
1101        assert_eq!(resolver.reverse_path("nad.3.0"), "nad.c080.d3036");
1102        assert_eq!(resolver.reverse_path("nad.3.1"), "nad.c080.d3036_2");
1103        assert_eq!(resolver.reverse_path("nad.3.2"), "nad.c080.d3036_3");
1104        assert_eq!(resolver.reverse_path("nad.3.3"), "nad.c080.d3036_4");
1105        assert_eq!(resolver.reverse_path("nad.3.4"), "nad.c080.d3036_5");
1106        assert_eq!(resolver.reverse_path("nad.3.5"), "nad.c080.d3045");
1107    }
1108
1109    #[test]
1110    fn reverse_path_qualifier() {
1111        let resolver = ReversePathResolver::from_schema(&test_schema());
1112        assert_eq!(resolver.reverse_path("cav[Z91].0.0"), "cav[Z91].c889.d7111");
1113        assert_eq!(resolver.reverse_path("cav[Z91].0.1"), "cav[Z91].c889.d7110");
1114    }
1115
1116    #[test]
1117    fn reverse_path_already_named() {
1118        let resolver = ReversePathResolver::from_schema(&test_schema());
1119        assert_eq!(resolver.reverse_path("loc.c517.d3225"), "loc.c517.d3225");
1120        assert_eq!(resolver.reverse_path("loc.d3227"), "loc.d3227");
1121    }
1122
1123    #[test]
1124    fn reverse_discriminator_simple() {
1125        let resolver = ReversePathResolver::from_schema(&test_schema());
1126        assert_eq!(
1127            resolver.reverse_discriminator("LOC.0.0=Z16"),
1128            "LOC.d3227=Z16"
1129        );
1130        assert_eq!(
1131            resolver.reverse_discriminator("SEQ.0.0=ZF0"),
1132            "SEQ.d1229=ZF0"
1133        );
1134    }
1135
1136    #[test]
1137    fn reverse_discriminator_composite() {
1138        let resolver = ReversePathResolver::from_schema(&sts_schema());
1139        assert_eq!(
1140            resolver.reverse_discriminator("STS.2.0=E01"),
1141            "STS.c556.d9013=E01"
1142        );
1143        assert_eq!(
1144            resolver.reverse_discriminator("STS.3.0=ZW4"),
1145            "STS.c556_2.d9013=ZW4"
1146        );
1147    }
1148
1149    #[test]
1150    fn reverse_discriminator_already_named() {
1151        let resolver = ReversePathResolver::from_schema(&test_schema());
1152        // Not numeric → unchanged
1153        assert_eq!(
1154            resolver.reverse_discriminator("LOC.d3227=Z16"),
1155            "LOC.d3227=Z16"
1156        );
1157    }
1158
1159    fn s_prefix_schema() -> serde_json::Value {
1160        serde_json::json!({
1161            "beschreibung": "CONTRL S-prefix test",
1162            "fields": {},
1163            "root_segments": [{
1164                "id": "UCI",
1165                "name": "Übertragungsprüfung",
1166                "elements": [
1167                    { "id": "0020", "index": 0, "type": "data" },
1168                    {
1169                        "composite": "S002",
1170                        "index": 1,
1171                        "name": "Absender",
1172                        "components": [
1173                            { "id": "0004", "sub_index": 0, "type": "data" },
1174                            { "id": "0007", "sub_index": 1, "type": "code" }
1175                        ]
1176                    },
1177                    {
1178                        "composite": "S003",
1179                        "index": 2,
1180                        "name": "Empfänger",
1181                        "components": [
1182                            { "id": "0010", "sub_index": 0, "type": "data" },
1183                            { "id": "0007", "sub_index": 1, "type": "code" }
1184                        ]
1185                    },
1186                    { "id": "0083", "index": 3, "type": "code" },
1187                    {
1188                        "composite": "S011",
1189                        "index": 6,
1190                        "name": "Datenelement-Identifikation",
1191                        "components": [
1192                            { "id": "0098", "sub_index": 0, "type": "data" },
1193                            { "id": "0104", "sub_index": 1, "type": "data" }
1194                        ]
1195                    }
1196                ]
1197            }]
1198        })
1199    }
1200
1201    #[test]
1202    fn resolve_s_prefix_composites() {
1203        let resolver = PathResolver::from_schema(&s_prefix_schema());
1204        assert_eq!(resolver.resolve_path("uci.s002.d0004"), "uci.1.0");
1205        assert_eq!(resolver.resolve_path("uci.s002.d0007"), "uci.1.1");
1206        assert_eq!(resolver.resolve_path("uci.s003.d0010"), "uci.2.0");
1207        assert_eq!(resolver.resolve_path("uci.s003.d0007"), "uci.2.1");
1208        assert_eq!(resolver.resolve_path("uci.s011.d0098"), "uci.6.0");
1209        assert_eq!(resolver.resolve_path("uci.s011.d0104"), "uci.6.1");
1210        assert_eq!(resolver.resolve_path("uci.d0020"), "uci.0");
1211        assert_eq!(resolver.resolve_path("uci.d0083"), "uci.3");
1212    }
1213
1214    #[test]
1215    fn reverse_s_prefix_composites() {
1216        let resolver = ReversePathResolver::from_schema(&s_prefix_schema());
1217        assert_eq!(resolver.reverse_path("uci.1.0"), "uci.s002.d0004");
1218        assert_eq!(resolver.reverse_path("uci.1.1"), "uci.s002.d0007");
1219        assert_eq!(resolver.reverse_path("uci.2.0"), "uci.s003.d0010");
1220        assert_eq!(resolver.reverse_path("uci.6.0"), "uci.s011.d0098");
1221        assert_eq!(resolver.reverse_path("uci.0"), "uci.d0020");
1222    }
1223
1224    #[test]
1225    fn forward_reverse_roundtrip() {
1226        let fwd = PathResolver::from_schema(&sts_schema());
1227        let rev = ReversePathResolver::from_schema(&sts_schema());
1228
1229        // Named → numeric → named
1230        let named = "sts.c556_2.d9013";
1231        let numeric = fwd.resolve_path(named);
1232        assert_eq!(numeric, "sts.3.0");
1233        let back = rev.reverse_path(&numeric);
1234        assert_eq!(back, named);
1235
1236        // Simple element roundtrip
1237        let named_simple = "nad.d3229";
1238        let fwd_nad = PathResolver::from_schema(&nad_schema());
1239        let rev_nad = ReversePathResolver::from_schema(&nad_schema());
1240        let numeric_simple = fwd_nad.resolve_path(named_simple);
1241        assert_eq!(numeric_simple, "nad.0");
1242        let back_simple = rev_nad.reverse_path(&numeric_simple);
1243        assert_eq!(back_simple, named_simple);
1244    }
1245
1246    #[test]
1247    fn discriminator_forward_reverse_roundtrip() {
1248        let fwd = PathResolver::from_schema(&test_schema());
1249        let rev = ReversePathResolver::from_schema(&test_schema());
1250
1251        let named = "LOC.d3227=Z16";
1252        let numeric = fwd.resolve_discriminator(named);
1253        assert_eq!(numeric, "LOC.0.0=Z16");
1254        let back = rev.reverse_discriminator(&numeric);
1255        assert_eq!(back, named);
1256    }
1257}