Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10    /// Element must be present.
11    Mandatory,
12    /// Element is optional unless additional rules require it.
13    Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19    /// One-based element position in the segment definition.
20    pub position: u8,
21    /// UN/EDIFACT data element identifier.
22    pub data_element: &'static str,
23    /// Requirement status of the element.
24    pub status: Status,
25    /// Maximum repetition count for this element.
26    pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32    /// Segment tag.
33    pub tag: &'static str,
34    /// Human-readable segment name.
35    pub name: &'static str,
36    /// Ordered element definitions.
37    pub elements: &'static [ElementRef],
38}
39
40/// Owned runtime equivalent of [`ElementRef`].
41///
42/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
43/// to construct validators from data that is not available at compile time (e.g. loaded
44/// from JSON or a database at startup).
45///
46/// Use [`OwnedElementRef::new`] for compile-time-known positions (panics on invalid
47/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
48/// comes from an external source and you need a `Result`. Fields are private to prevent
49/// bypassing the position invariant through struct-literal syntax.
50#[derive(Debug, Clone)]
51pub struct OwnedElementRef {
52    /// One-based element position.
53    position: u8,
54    /// UN/EDIFACT data element identifier.
55    data_element: String,
56    /// Requirement status.
57    status: Status,
58    /// Maximum repetition count.
59    max_repeat: u8,
60}
61
62/// Owned runtime equivalent of [`SegmentDefinition`].
63///
64/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
65///
66/// Use [`OwnedSegmentDef::new`] for compile-time-known tags (panics on invalid input,
67/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
68/// an external source and you need a `Result`. Fields are private to prevent bypassing
69/// the tag invariant through struct-literal syntax.
70#[derive(Debug, Clone)]
71pub struct OwnedSegmentDef {
72    /// Segment tag (e.g. `"BGM"`).
73    tag: String,
74    /// Human-readable segment name.
75    name: String,
76    /// Ordered element definitions.
77    elements: Vec<OwnedElementRef>,
78}
79
80impl OwnedSegmentDef {
81    /// Construct an owned segment definition.
82    ///
83    /// This is the ergonomic constructor for compile-time-known tags (e.g.
84    /// `"BGM"`, `"UNH"`).  It panics immediately on invalid input so that
85    /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
86    /// boilerplate.
87    ///
88    /// Use [`try_new`][Self::try_new] instead when the tag originates from an
89    /// external source (user input, config file, database) and you need a
90    /// `Result` to propagate errors gracefully.
91    ///
92    /// # Panics
93    ///
94    /// Panics if `tag` is not exactly three ASCII uppercase letters.
95    pub fn new(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
96        assert!(
97            tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
98            "OwnedSegmentDef::new: tag must be exactly three ASCII uppercase letters, got {tag:?}"
99        );
100        Self {
101            tag,
102            name,
103            elements,
104        }
105    }
106
107    /// Construct an owned segment definition, returning an error for invalid tags.
108    ///
109    /// Prefer this over [`new`][Self::new] when the tag comes from an external
110    /// source (user input, config file, database) and you want to handle the
111    /// error without panicking.
112    ///
113    /// # Errors
114    ///
115    /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
116    /// ASCII uppercase letters.
117    pub fn try_new(
118        tag: String,
119        name: String,
120        elements: Vec<OwnedElementRef>,
121    ) -> Result<Self, EdifactError> {
122        if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
123            return Err(EdifactError::InvalidSegmentTag(tag));
124        }
125        Ok(Self {
126            tag,
127            name,
128            elements,
129        })
130    }
131
132    /// Segment tag (e.g. `"BGM"`).
133    #[inline]
134    pub fn tag(&self) -> &str {
135        &self.tag
136    }
137
138    /// Human-readable segment name.
139    #[inline]
140    pub fn name(&self) -> &str {
141        &self.name
142    }
143
144    /// Element definitions for this segment.
145    #[inline]
146    pub fn elements(&self) -> &[OwnedElementRef] {
147        &self.elements
148    }
149}
150
151impl OwnedElementRef {
152    /// Construct an owned element reference.
153    ///
154    /// This is the ergonomic constructor for compile-time-known positions.
155    /// It panics immediately on invalid input so that call sites with literal
156    /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
157    ///
158    /// Use [`try_new`][Self::try_new] instead when the position originates from
159    /// an external source (user input, config file, database) and you need a
160    /// `Result` to propagate errors gracefully.
161    ///
162    /// # Panics
163    ///
164    /// Panics if `position` is `0` (positions are one-based).
165    pub fn new(position: u8, data_element: String, status: Status, max_repeat: u8) -> Self {
166        assert!(
167            position != 0,
168            "OwnedElementRef::new: position must be >= 1 (one-based), got 0"
169        );
170        Self {
171            position,
172            data_element,
173            status,
174            max_repeat,
175        }
176    }
177
178    /// Construct an owned element reference, returning an error for position `0`.
179    ///
180    /// Prefer this over [`new`][Self::new] when the position comes from an
181    /// external source (user input, config file, database) and you want to
182    /// handle the error without panicking.
183    ///
184    /// # Errors
185    ///
186    /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
187    pub fn try_new(
188        position: u8,
189        data_element: String,
190        status: Status,
191        max_repeat: u8,
192    ) -> Result<Self, EdifactError> {
193        if position == 0 {
194            return Err(EdifactError::InvalidElementPosition);
195        }
196        Ok(Self {
197            position,
198            data_element,
199            status,
200            max_repeat,
201        })
202    }
203
204    /// One-based element position (always >= 1).
205    #[inline]
206    pub fn position(&self) -> u8 {
207        self.position
208    }
209
210    /// UN/EDIFACT data element identifier.
211    #[inline]
212    pub fn data_element(&self) -> &str {
213        &self.data_element
214    }
215
216    /// Requirement status of this element.
217    #[inline]
218    pub fn status(&self) -> Status {
219        self.status
220    }
221
222    /// Maximum repetition count for this element.
223    #[inline]
224    pub fn max_repeat(&self) -> u8 {
225        self.max_repeat
226    }
227}
228
229type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
230type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
231type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
232type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
233type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
234type AdditionalStructureRuleFn =
235    Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
236/// Returns the `(element_index, component_index, data_element_id)` tuples to
237/// validate against a code list for the given segment tag.
238type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
239/// Returns the mandatory segment tags for a given EDIFACT message type.
240///
241/// The slice should contain every tag that must appear at least once in a
242/// conformant message of the given type.  The tags are also used to check
243/// canonical ordering — their relative order in the returned slice is taken
244/// as the expected order in the message.
245type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
246
247/// Internal enum that unifies lookup results from static and owned segment definitions.
248///
249/// Allows `validate_segment` to handle both code-generated (`&'static`) and
250/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
251enum SegmentDefRef<'a> {
252    Static(&'static SegmentDefinition),
253    Owned(&'a OwnedSegmentDef),
254}
255
256impl<'a> SegmentDefRef<'a> {
257    /// Returns the highest defined element position (one-based → used directly as
258    /// the maximum zero-based slot count for element-count validation).
259    ///
260    /// For owned definitions the highest `position` value may exceed the number
261    /// of entries in the `elements` vec when positions are non-consecutive.
262    fn max_element_position(&self) -> usize {
263        match self {
264            Self::Static(d) => d
265                .elements
266                .iter()
267                .map(|e| e.position as usize)
268                .max()
269                .unwrap_or(0),
270            Self::Owned(d) => d
271                .elements
272                .iter()
273                .map(|e| e.position as usize)
274                .max()
275                .unwrap_or(0),
276        }
277    }
278
279    /// Returns the highest position number among mandatory elements (one-based).
280    ///
281    /// This equals the minimum number of elements that must be present in a
282    /// segment: if the highest-positioned mandatory element is at position 5,
283    /// the segment must supply at least 5 elements.
284    fn last_mandatory_position(&self) -> usize {
285        match self {
286            Self::Static(d) => d
287                .elements
288                .iter()
289                .filter(|e| e.status == Status::Mandatory)
290                .map(|e| e.position as usize)
291                .max()
292                .unwrap_or(0),
293            Self::Owned(d) => d
294                .elements
295                .iter()
296                .filter(|e| e.status == Status::Mandatory)
297                .map(|e| e.position as usize)
298                .max()
299                .unwrap_or(0),
300        }
301    }
302
303    /// Iterate over mandatory element positions without heap allocation.
304    ///
305    /// Calls `f(zero_based_index, data_element_id)` for each element whose
306    /// status is [`Status::Mandatory`].  Returns `Err` immediately if `f`
307    /// returns `Err`, short-circuiting the remaining elements.
308    fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
309    where
310        F: FnMut(usize, &str) -> Result<(), E>,
311    {
312        match self {
313            Self::Static(d) => {
314                for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
315                    f((e.position as usize).saturating_sub(1), e.data_element)?;
316                }
317            }
318            Self::Owned(d) => {
319                for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
320                    f(
321                        (e.position as usize).saturating_sub(1),
322                        e.data_element.as_str(),
323                    )?;
324                }
325            }
326        }
327        Ok(())
328    }
329}
330
331/// Default required-segments mapping used when no custom function is provided.
332fn default_required_segments(message_type: &str) -> &'static [&'static str] {
333    match message_type {
334        "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
335        _ => &["UNH", "UNT"],
336    }
337}
338
339/// Code-list validation rules common to all UN/EDIFACT directory releases.
340///
341/// Each entry is `(element_index, component_index, data_element_id)`.
342/// `element_index` and `component_index` are zero-based.
343///
344/// Covers the most frequently validated qualifier/code elements across ORDERS,
345/// INVOIC, UTILMD, and similar message types.
346pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
347    match tag {
348        "BGM" => &[(0, 0, "1001")],
349        "DTM" => &[(0, 0, "2005")],
350        "NAD" => &[(0, 0, "3035")],
351        "QTY" => &[(0, 0, "6063")],
352        "RFF" => &[(0, 0, "1153")],
353        "MOA" => &[(0, 0, "5025")],
354        "PRI" => &[(0, 0, "5125")],
355        "LOC" => &[(0, 0, "3227")],
356        _ => &[],
357    }
358}
359
360/// Shared validator implementation that is configured per UN/EDIFACT directory release.
361///
362/// # Scope and limitations
363///
364/// `DirectoryValidator` validates individual segment *content* (element counts,
365/// component counts, code-list values, and conditional rules) and checks that
366/// every *mandatory* segment type is present at least once.  It does **not**
367/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
368/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
369/// appears in the wrong position.  Full sequence validation requires a
370/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
371/// the scope of this implementation.
372#[derive(Clone)]
373pub struct DirectoryValidator {
374    directory_id: String,
375    segment_lookup: SegmentLookupFn,
376    /// Runtime-owned segment definitions (from builder / JSON / DB).
377    ///
378    /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
379    owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
380    is_code_valid: IsCodeValidFn,
381    suggest_code: SuggestCodeFn,
382    expected_components: ExpectedComponentsFn,
383    code_list_rules: CodeListRulesFn,
384    additional_structure_rule: Option<AdditionalStructureRuleFn>,
385    /// Configurable mapping from message type to required segment tags.
386    required_segments: RequiredSegmentsFn,
387    message_type: Option<String>,
388    enforce_known_tags: bool,
389    structure_checks: bool,
390    code_list_checks: bool,
391}
392
393impl std::fmt::Debug for DirectoryValidator {
394    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
395        f.debug_struct("DirectoryValidator")
396            .field("directory_id", &self.directory_id)
397            .field("message_type", &self.message_type)
398            .field("enforce_known_tags", &self.enforce_known_tags)
399            .field("structure_checks", &self.structure_checks)
400            .field("code_list_checks", &self.code_list_checks)
401            .finish_non_exhaustive()
402    }
403}
404
405impl DirectoryValidator {
406    /// Create a validator for a specific directory release with injected lookup/check hooks.
407    pub fn new(
408        directory_id: &'static str,
409        segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
410        is_code_valid: fn(&str, &str) -> bool,
411        suggest_code: fn(&str, &str) -> Option<&'static str>,
412        expected_components: fn(&str, usize) -> Option<u8>,
413        additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
414    ) -> Self {
415        Self {
416            directory_id: directory_id.to_owned(),
417            segment_lookup: Arc::new(segment_lookup),
418            owned_defs: None,
419            is_code_valid: Arc::new(is_code_valid),
420            suggest_code: Arc::new(suggest_code),
421            expected_components: Arc::new(expected_components),
422            code_list_rules: Arc::new(base_code_list_rules),
423            additional_structure_rule: additional_structure_rule
424                .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
425            required_segments: Arc::new(default_required_segments),
426            message_type: None,
427            enforce_known_tags: true,
428            structure_checks: true,
429            code_list_checks: true,
430        }
431    }
432
433    /// Create a validator from a static slice of [`SegmentDefinition`]s.
434    ///
435    /// This is the preferred constructor when code-generating directory data as
436    /// a `static` array: no manual fn-pointer boilerplate is required.
437    ///
438    /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
439    /// always returns `true`).  Call [`with_code_list_rules`][Self::with_code_list_rules]
440    /// to register directory-specific rules that actually validate code values.
441    ///
442    /// # Example
443    ///
444    /// ```rust,ignore
445    /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
446    ///
447    /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
448    ///     .with_code_list_rules(my_code_list_rules);
449    /// ```
450    pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
451        let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
452            definitions.iter().map(|d| (d.tag, d)).collect();
453        let lookup_map = Arc::new(lookup_map);
454        Self {
455            directory_id: "custom".to_owned(),
456            segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
457            owned_defs: None,
458            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
459            suggest_code: Arc::new(|_de: &str, _code: &str| None),
460            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
461            code_list_rules: Arc::new(base_code_list_rules),
462            additional_structure_rule: None,
463            required_segments: Arc::new(default_required_segments),
464            message_type: None,
465            enforce_known_tags: true,
466            structure_checks: true,
467            code_list_checks: false,
468        }
469    }
470
471    /// Create a validator from a runtime-owned collection of segment definitions.
472    ///
473    /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
474    /// loaded from an external source at startup (JSON, database, YAML, …) rather
475    /// than being known at compile time.
476    ///
477    /// Code-list checks are **disabled** by default; enable them by chaining
478    /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
479    /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
480    /// the builder.
481    ///
482    /// # Example
483    ///
484    /// ```rust,ignore
485    /// let defs = vec![
486    ///     OwnedSegmentDef::new(
487    ///         "BGM".to_owned(),
488    ///         "Beginning of message".to_owned(),
489    ///         vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
490    ///     ),
491    /// ];
492    /// let validator = DirectoryValidator::from_owned_definitions(defs)
493    ///     .with_directory_id("runtime-profile");
494    /// ```
495    pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
496        Self {
497            directory_id: "custom".to_owned(),
498            // The static lookup is never consulted when `owned_defs` is `Some`.
499            segment_lookup: Arc::new(|_| None),
500            owned_defs: Some(Arc::new(definitions)),
501            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
502            suggest_code: Arc::new(|_de: &str, _code: &str| None),
503            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
504            code_list_rules: Arc::new(base_code_list_rules),
505            additional_structure_rule: None,
506            required_segments: Arc::new(default_required_segments),
507            message_type: None,
508            enforce_known_tags: true,
509            structure_checks: true,
510            code_list_checks: false,
511        }
512    }
513
514    /// Set the directory identifier string (used in error messages).
515    pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
516        self.directory_id = id.into();
517        self
518    }
519
520    /// Override the code-list rules function.
521    ///
522    /// Directories can supply a directory-specific implementation that extends or
523    /// replaces the base rules from `base_code_list_rules`.
524    pub fn with_code_list_rules(
525        mut self,
526        f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
527    ) -> Self {
528        self.code_list_rules = Arc::new(f);
529        self
530    }
531
532    /// Enable only structure checks and disable code-list checks.
533    pub fn structure_only(mut self) -> Self {
534        self.structure_checks = true;
535        self.code_list_checks = false;
536        self
537    }
538
539    /// Enable only code-list checks and disable structure checks.
540    pub fn code_list_only(mut self) -> Self {
541        self.structure_checks = false;
542        self.code_list_checks = true;
543        self
544    }
545
546    /// Configure whether unknown segment tags should be rejected.
547    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
548        self.enforce_known_tags = enforce;
549        self
550    }
551
552    /// Override the required-segments mapping used for structural validation.
553    ///
554    /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
555    /// and must return a `'static` slice of segment tags that are mandatory for that
556    /// type.  The tags are checked both for *presence* and for *canonical ordering*
557    /// within the message.
558    ///
559    /// # Example
560    ///
561    /// ```rust,ignore
562    /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
563    ///     match msg_type {
564    ///         "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
565    ///         "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
566    ///         _ => &["UNH", "UNT"],
567    ///     }
568    /// }
569    ///
570    /// let validator = DirectoryValidator::from_definitions(DEFS)
571    ///     .with_required_segments(my_required_segments);
572    /// ```
573    pub fn with_required_segments(
574        mut self,
575        f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
576    ) -> Self {
577        self.required_segments = Arc::new(f);
578        self
579    }
580
581    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
582        if let Some(explicit) = self.message_type.as_deref() {
583            return Some(explicit.to_owned());
584        }
585
586        segments
587            .iter()
588            .find(|s| s.tag == "UNH")
589            .and_then(|s| s.get_element(1))
590            .and_then(|e| e.get_component(0))
591            .map(str::to_owned)
592    }
593
594    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
595    ///
596    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
597    /// a sender is not required to transmit trailing empty components; this function
598    /// therefore strips them before checking against the expected count so that
599    /// conformant messages with omitted trailing components are still accepted.
600    ///
601    /// # Examples
602    ///
603    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
604    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
605    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
606        let elem = seg.elements.get(element_idx)?;
607        let mut count = elem.components.len();
608        while count > 0 && elem.components[count - 1].as_ref().is_empty() {
609            count -= 1;
610        }
611        u8::try_from(count).ok()
612    }
613
614    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
615        for idx in 0..seg.elements.len() {
616            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
617                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
618                if actual != expected {
619                    return Err(EdifactError::InvalidComponentCount {
620                        tag: seg.tag.to_owned(),
621                        element_index: idx,
622                        expected,
623                        actual,
624                        offset: seg.span.start,
625                    });
626                }
627            }
628        }
629        Ok(())
630    }
631
632    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
633        let rules = (self.code_list_rules)(seg.tag);
634
635        for (elem_idx, comp_idx, de) in rules {
636            let value = seg
637                .get_element(*elem_idx)
638                .and_then(|e| e.get_component(*comp_idx))
639                .unwrap_or("");
640            if !value.is_empty() && !(self.is_code_valid)(de, value) {
641                let suggestion = (self.suggest_code)(de, value);
642                return Err(EdifactError::InvalidCodeValue {
643                    tag: seg.tag.to_owned(),
644                    element_index: *elem_idx,
645                    value: value.to_owned(),
646                    code_list: (*de).to_owned(),
647                    offset: seg.span.start,
648                    suggestion,
649                });
650            }
651        }
652
653        Ok(())
654    }
655}
656
657impl DirectoryValidator {
658    fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
659        if let Some(owned) = &self.owned_defs {
660            owned
661                .iter()
662                .find(|d| d.tag == tag)
663                .map(SegmentDefRef::Owned)
664        } else {
665            (self.segment_lookup)(tag).map(SegmentDefRef::Static)
666        }
667    }
668
669    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
670        if !self.structure_checks && !self.code_list_checks {
671            return Ok(());
672        }
673
674        let Some(def) = self.resolve_def(seg.tag) else {
675            if self.structure_checks && self.enforce_known_tags {
676                return Err(EdifactError::InvalidSegmentForMessage {
677                    tag: seg.tag.to_owned(),
678                    message_type: self
679                        .message_type
680                        .clone()
681                        .unwrap_or_else(|| self.directory_id.clone()),
682                    offset: seg.tag_span.start,
683                });
684            }
685            return Ok(());
686        };
687
688        let max_elements = def.max_element_position();
689        let min_elements = def.last_mandatory_position();
690        let actual = seg.elements.len();
691
692        if self.structure_checks && (actual < min_elements || actual > max_elements) {
693            return Err(EdifactError::InvalidElementCount {
694                tag: seg.tag.to_owned(),
695                min: min_elements,
696                max: max_elements,
697                actual,
698                offset: seg.span.start,
699            });
700        }
701
702        if self.structure_checks {
703            def.for_each_mandatory_position(|idx, _de| {
704                let is_present = seg
705                    .elements
706                    .get(idx)
707                    .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
708                if !is_present {
709                    return Err(EdifactError::MissingRequiredElement {
710                        tag: seg.tag.to_owned(),
711                        element_index: idx,
712                    });
713                }
714                Ok(())
715            })?;
716            self.validate_component_counts(seg)?;
717
718            if let Some(rule) = &self.additional_structure_rule {
719                rule(seg)?;
720            }
721        }
722
723        if self.code_list_checks {
724            self.validate_code_lists(seg)?;
725        }
726
727        Ok(())
728    }
729}
730
731impl Validator for DirectoryValidator {
732    fn set_message_type(&mut self, message_type: Option<&str>) {
733        self.message_type = message_type.map(str::to_owned);
734    }
735
736    fn validate_batch(
737        &self,
738        segments: &[Segment<'_>],
739        report: &mut ValidationReport,
740        _context: &ValidationRuleContext<'_>,
741    ) {
742        for seg in segments {
743            if let Err(err) = self.validate_segment(seg) {
744                report_error(report, err);
745            }
746        }
747
748        if self.structure_checks {
749            if let Some(message_type) = self.detect_message_type(segments) {
750                for required_tag in (self.required_segments)(&message_type) {
751                    if segments.iter().all(|s| s.tag != *required_tag) {
752                        report.add_error(
753                            ValidationIssue::new(
754                                ValidationSeverity::Error,
755                                format!(
756                                    "required segment {} missing for message type {}",
757                                    required_tag, message_type
758                                ),
759                            )
760                            .with_segment(*required_tag)
761                            .with_suggestion("Add the mandatory segment at the correct position"),
762                        );
763                    }
764                }
765
766                let seq = (self.required_segments)(&message_type);
767                let mut last_idx = None;
768                for tag in seq {
769                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
770                        if let Some(prev) = last_idx {
771                            if idx < prev {
772                                report.add_error(
773                                    ValidationIssue::new(
774                                        ValidationSeverity::Error,
775                                        format!(
776                                            "segment sequence violation for message type {}: '{}' appears out of order",
777                                            message_type, tag
778                                        ),
779                                    )
780                                    .with_segment(*tag)
781                                    .with_suggestion(
782                                        "Ensure required segments follow UN/EDIFACT canonical order",
783                                    ),
784                                );
785                            }
786                        }
787                        last_idx = Some(idx);
788                    }
789                }
790            }
791        }
792    }
793}
794
795// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
796
797/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
798///
799/// Use this when segment definitions are loaded from an external source at
800/// startup (JSON, database, YAML, …) rather than being available as `static`
801/// arrays at compile time.
802///
803/// # Example
804///
805/// ```rust,ignore
806/// let validator = DirectoryValidatorBuilder::new("my-profile")
807///     .add_segment(
808///         OwnedSegmentDef::new(
809///             "BGM".to_owned(),
810///             "Beginning of message".to_owned(),
811///             vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
812///         ),
813///     )
814///     .build();
815/// ```
816#[derive(Debug, Default)]
817pub struct DirectoryValidatorBuilder {
818    directory_id: Option<String>,
819    segments: Vec<OwnedSegmentDef>,
820}
821
822impl DirectoryValidatorBuilder {
823    /// Create a new builder with the given directory identifier.
824    ///
825    /// The identifier is used in error messages; set a human-readable value
826    /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
827    pub fn new(directory_id: impl Into<String>) -> Self {
828        Self {
829            directory_id: Some(directory_id.into()),
830            segments: Vec::new(),
831        }
832    }
833
834    /// Add a segment definition to the builder.
835    ///
836    /// Definitions can be added in any order; the resulting validator looks
837    /// them up by tag at validation time.
838    pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
839        self.segments.push(def);
840        self
841    }
842
843    /// Extend the builder with multiple segment definitions at once.
844    pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
845        self.segments.extend(defs);
846        self
847    }
848
849    /// Build the [`DirectoryValidator`].
850    ///
851    /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
852    /// Code-list checks are disabled by default; chain
853    /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
854    /// enable them.
855    pub fn build(self) -> DirectoryValidator {
856        let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
857        if let Some(id) = self.directory_id {
858            validator.directory_id = id;
859        }
860        validator
861    }
862}
863
864#[cfg(test)]
865mod tests {
866    use super::*;
867
868    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
869        position: 1,
870        data_element: "C507",
871        status: Status::Mandatory,
872        max_repeat: 1,
873    }];
874
875    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
876        tag: "TST",
877        name: "Test segment",
878        elements: TEST_ELEMENTS,
879    };
880
881    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
882        match tag {
883            "TST" => Some(&TEST_SEGMENT),
884            _ => None,
885        }
886    }
887
888    fn code_valid(_de: &str, _code: &str) -> bool {
889        true
890    }
891
892    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
893        None
894    }
895
896    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
897        None
898    }
899
900    #[test]
901    fn mandatory_composite_present_when_any_component_non_empty() {
902        let input = b"TST+:ABC'";
903        let segments: Vec<_> = crate::from_bytes(input)
904            .collect::<Result<Vec<_>, _>>()
905            .expect("parse should succeed");
906
907        let validator = DirectoryValidator::new(
908            "TEST",
909            segment_lookup,
910            code_valid,
911            suggest_code,
912            expected_components,
913            None,
914        );
915
916        let mut report = ValidationReport::default();
917        validator.validate_batch(
918            &segments,
919            &mut report,
920            &crate::validator::ValidationRuleContext::empty(),
921        );
922        assert!(!report.has_errors());
923    }
924
925    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
926
927    fn parse_single(input: &[u8]) -> crate::OwnedSegment {
928        crate::from_reader(std::io::Cursor::new(input))
929            .expect("parse should succeed")
930            .into_iter()
931            .next()
932            .expect("at least one segment")
933    }
934
935    #[test]
936    fn trailing_empty_component_stripped_from_dtm() {
937        // DTM+137:20200101: has three components in element 0; the third is empty.
938        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
939        // so effective count should be 2.
940        let owned = parse_single(b"DTM+137:20200101:'");
941        let seg = owned.as_borrowed();
942        let count = DirectoryValidator::effective_component_count(&seg, 0);
943        assert_eq!(
944            count,
945            Some(2),
946            "trailing empty component should be stripped"
947        );
948    }
949
950    #[test]
951    fn all_empty_components_result_in_zero() {
952        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
953        let owned = parse_single(b"NAD+MS++:'");
954        let seg = owned.as_borrowed();
955        let count = DirectoryValidator::effective_component_count(&seg, 2);
956        assert_eq!(
957            count,
958            Some(0),
959            "all-empty composite should have effective count 0"
960        );
961    }
962
963    #[test]
964    fn non_empty_component_not_stripped() {
965        // DTM+137:20200101:102 — all three components are non-empty
966        let owned = parse_single(b"DTM+137:20200101:102'");
967        let seg = owned.as_borrowed();
968        let count = DirectoryValidator::effective_component_count(&seg, 0);
969        assert_eq!(
970            count,
971            Some(3),
972            "no components should be stripped when all non-empty"
973        );
974    }
975
976    #[test]
977    fn with_code_list_rules_overrides_base() {
978        // Override code-list rules to require element 0 of TST to be a specific code.
979        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
980            match tag {
981                "TST" => &[(0, 0, "CUSTOM_DE")],
982                _ => &[],
983            }
984        }
985        fn custom_code_valid(_de: &str, code: &str) -> bool {
986            code == "VALID"
987        }
988        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
989            None
990        }
991
992        let input = b"TST+INVALID'";
993        let segments: Vec<_> = crate::from_bytes(input)
994            .collect::<Result<Vec<_>, _>>()
995            .expect("parse should succeed");
996
997        let validator = DirectoryValidator::new(
998            "TEST",
999            segment_lookup,
1000            custom_code_valid,
1001            no_suggestion,
1002            expected_components,
1003            None,
1004        )
1005        .with_code_list_rules(custom_rules);
1006
1007        let mut report = ValidationReport::default();
1008        validator.validate_batch(
1009            &segments,
1010            &mut report,
1011            &crate::validator::ValidationRuleContext::empty(),
1012        );
1013        assert!(
1014            report.has_warnings(),
1015            "INVALID is not in the custom code list so validation must warn"
1016        );
1017    }
1018}