Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10    /// Element must be present.
11    Mandatory,
12    /// Element is optional unless additional rules require it.
13    Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17///
18/// Fields are private to enforce the one-based position invariant through the
19/// [`ElementRef::new`] constructor.  Use [`ElementRef::new`] for compile-time
20/// literals (panics at compile time when `position == 0`).
21///
22/// Use [`OwnedElementRef`] for runtime-constructed element refs.
23#[derive(Debug, Clone, Copy)]
24pub struct ElementRef {
25    /// One-based element position in the segment definition.
26    position: u8,
27    /// UN/EDIFACT data element identifier.
28    data_element: &'static str,
29    /// Requirement status of the element.
30    status: Status,
31    /// Maximum repetition count for this element.
32    max_repeat: u8,
33}
34
35impl ElementRef {
36    /// Construct an `ElementRef` with compile-time position validation.
37    ///
38    /// `position` must be ≥ 1 (one-based).  When called in a `const` context
39    /// (e.g. inside a `static` array initialiser), a zero `position` causes a
40    /// **compile-time error**.  At runtime it panics.
41    ///
42    /// # Panics
43    ///
44    /// Panics if `position == 0`.
45    ///
46    /// # Example
47    ///
48    /// ```rust
49    /// use edifact_rs::{ElementRef, Status};
50    ///
51    /// const BGM_1001: ElementRef = ElementRef::new(1, "1001", Status::Mandatory, 1);
52    /// ```
53    #[must_use]
54    pub const fn new(
55        position: u8,
56        data_element: &'static str,
57        status: Status,
58        max_repeat: u8,
59    ) -> Self {
60        assert!(
61            position != 0,
62            "ElementRef position must be >= 1 (one-based)"
63        );
64        Self {
65            position,
66            data_element,
67            status,
68            max_repeat,
69        }
70    }
71
72    /// One-based element position in the segment definition.
73    #[must_use]
74    #[inline]
75    pub const fn position(&self) -> u8 {
76        self.position
77    }
78
79    /// UN/EDIFACT data element identifier.
80    #[must_use]
81    #[inline]
82    pub const fn data_element(&self) -> &'static str {
83        self.data_element
84    }
85
86    /// Requirement status of the element.
87    #[must_use]
88    #[inline]
89    pub const fn status(&self) -> Status {
90        self.status
91    }
92
93    /// Maximum repetition count for this element.
94    #[must_use]
95    #[inline]
96    pub const fn max_repeat(&self) -> u8 {
97        self.max_repeat
98    }
99}
100
101/// Definition of an EDIFACT segment (tag + element structure).
102#[derive(Debug)]
103pub struct SegmentDefinition {
104    /// Segment tag.
105    pub tag: &'static str,
106    /// Human-readable segment name.
107    pub name: &'static str,
108    /// Ordered element definitions.
109    pub elements: &'static [ElementRef],
110}
111
112/// Owned runtime equivalent of [`ElementRef`].
113///
114/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
115/// to construct validators from data that is not available at compile time (e.g. loaded
116/// from JSON or a database at startup).
117///
118/// Use [`OwnedElementRef::new_unchecked`] for compile-time-known positions (panics on invalid
119/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
120/// comes from an external source and you need a `Result`. Fields are private to prevent
121/// bypassing the position invariant through struct-literal syntax.
122#[derive(Debug, Clone)]
123pub struct OwnedElementRef {
124    /// One-based element position.
125    position: u8,
126    /// UN/EDIFACT data element identifier.
127    data_element: String,
128    /// Requirement status.
129    status: Status,
130    /// Maximum repetition count.
131    max_repeat: u8,
132}
133
134/// Owned runtime equivalent of [`SegmentDefinition`].
135///
136/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
137///
138/// Use [`OwnedSegmentDef::new_unchecked`] for compile-time-known tags (panics on invalid input,
139/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
140/// an external source and you need a `Result`. Fields are private to prevent bypassing
141/// the tag invariant through struct-literal syntax.
142#[derive(Debug, Clone)]
143pub struct OwnedSegmentDef {
144    /// Segment tag (e.g. `"BGM"`).
145    tag: String,
146    /// Human-readable segment name.
147    name: String,
148    /// Ordered element definitions.
149    elements: Vec<OwnedElementRef>,
150}
151
152impl OwnedSegmentDef {
153    /// Construct an owned segment definition.
154    ///
155    /// This is the ergonomic constructor for compile-time-known tags (e.g.
156    /// `"BGM"`, `"UNH"`).  It panics immediately on invalid input so that
157    /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
158    /// boilerplate.
159    ///
160    /// Use [`try_new`][Self::try_new] instead when the tag originates from an
161    /// external source (user input, config file, database) and you need a
162    /// `Result` to propagate errors gracefully.
163    ///
164    /// # Panics
165    ///
166    /// Panics if `tag` is not exactly three ASCII uppercase letters.
167    pub fn new_unchecked(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
168        assert!(
169            tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
170            "OwnedSegmentDef::new_unchecked: tag must be exactly three ASCII uppercase letters, got {tag:?}"
171        );
172        Self {
173            tag,
174            name,
175            elements,
176        }
177    }
178
179    /// Construct an owned segment definition, returning an error for invalid tags.
180    ///
181    /// Prefer this over [`new_unchecked`][Self::new_unchecked] when the tag comes from an external
182    /// source (user input, config file, database) and you want to handle the
183    /// error without panicking.
184    ///
185    /// # Errors
186    ///
187    /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
188    /// ASCII uppercase letters.
189    pub fn try_new(
190        tag: String,
191        name: String,
192        elements: Vec<OwnedElementRef>,
193    ) -> Result<Self, EdifactError> {
194        if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
195            return Err(EdifactError::InvalidSegmentTag(tag));
196        }
197        Ok(Self {
198            tag,
199            name,
200            elements,
201        })
202    }
203
204    /// Segment tag (e.g. `"BGM"`).
205    #[inline]
206    pub fn tag(&self) -> &str {
207        &self.tag
208    }
209
210    /// Human-readable segment name.
211    #[inline]
212    pub fn name(&self) -> &str {
213        &self.name
214    }
215
216    /// Element definitions for this segment.
217    #[inline]
218    pub fn elements(&self) -> &[OwnedElementRef] {
219        &self.elements
220    }
221}
222
223impl OwnedElementRef {
224    /// Construct an owned element reference.
225    ///
226    /// This is the ergonomic constructor for compile-time-known positions.
227    /// It panics immediately on invalid input so that call sites with literal
228    /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
229    ///
230    /// Use [`try_new`][Self::try_new] instead when the position originates from
231    /// an external source (user input, config file, database) and you need a
232    /// `Result` to propagate errors gracefully.
233    ///
234    /// # Panics
235    ///
236    /// Panics if `position` is `0` (positions are one-based).
237    pub fn new_unchecked(
238        position: u8,
239        data_element: String,
240        status: Status,
241        max_repeat: u8,
242    ) -> Self {
243        assert!(
244            position != 0,
245            "OwnedElementRef::new_unchecked: position must be >= 1 (one-based), got 0"
246        );
247        Self {
248            position,
249            data_element,
250            status,
251            max_repeat,
252        }
253    }
254
255    /// Construct an owned element reference, returning an error for position `0`.
256    ///
257    /// Prefer this over [`new_unchecked`][Self::new_unchecked] when the position comes from an
258    /// external source (user input, config file, database) and you want to
259    /// handle the error without panicking.
260    ///
261    /// # Errors
262    ///
263    /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
264    pub fn try_new(
265        position: u8,
266        data_element: String,
267        status: Status,
268        max_repeat: u8,
269    ) -> Result<Self, EdifactError> {
270        if position == 0 {
271            return Err(EdifactError::InvalidElementPosition);
272        }
273        Ok(Self {
274            position,
275            data_element,
276            status,
277            max_repeat,
278        })
279    }
280
281    /// One-based element position (always >= 1).
282    #[inline]
283    pub fn position(&self) -> u8 {
284        self.position
285    }
286
287    /// UN/EDIFACT data element identifier.
288    #[inline]
289    pub fn data_element(&self) -> &str {
290        &self.data_element
291    }
292
293    /// Requirement status of this element.
294    #[inline]
295    pub fn status(&self) -> Status {
296        self.status
297    }
298
299    /// Maximum repetition count for this element.
300    #[inline]
301    pub fn max_repeat(&self) -> u8 {
302        self.max_repeat
303    }
304}
305
306type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
307type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
308type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
309type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
310type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
311type AdditionalStructureRuleFn =
312    Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
313/// Returns the `(element_index, component_index, data_element_id)` tuples to
314/// validate against a code list for the given segment tag.
315type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
316/// Returns the mandatory segment tags for a given EDIFACT message type.
317///
318/// The slice should contain every tag that must appear at least once in a
319/// conformant message of the given type.  The tags are also used to check
320/// canonical ordering — their relative order in the returned slice is taken
321/// as the expected order in the message.
322type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
323
324/// Internal enum that unifies lookup results from static and owned segment definitions.
325///
326/// Allows `validate_segment` to handle both code-generated (`&'static`) and
327/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
328enum SegmentDefRef<'a> {
329    Static(&'static SegmentDefinition),
330    Owned(&'a OwnedSegmentDef),
331}
332
333impl SegmentDefRef<'_> {
334    /// Returns the highest defined element position (one-based → used directly as
335    /// the maximum zero-based slot count for element-count validation).
336    ///
337    /// For owned definitions the highest `position` value may exceed the number
338    /// of entries in the `elements` vec when positions are non-consecutive.
339    fn max_element_position(&self) -> usize {
340        match self {
341            Self::Static(d) => d
342                .elements
343                .iter()
344                .map(|e| e.position as usize)
345                .max()
346                .unwrap_or(0),
347            Self::Owned(d) => d
348                .elements
349                .iter()
350                .map(|e| e.position as usize)
351                .max()
352                .unwrap_or(0),
353        }
354    }
355
356    /// Returns the highest position number among mandatory elements (one-based).
357    ///
358    /// This equals the minimum number of elements that must be present in a
359    /// segment: if the highest-positioned mandatory element is at position 5,
360    /// the segment must supply at least 5 elements.
361    fn last_mandatory_position(&self) -> usize {
362        match self {
363            Self::Static(d) => d
364                .elements
365                .iter()
366                .filter(|e| e.status == Status::Mandatory)
367                .map(|e| e.position as usize)
368                .max()
369                .unwrap_or(0),
370            Self::Owned(d) => d
371                .elements
372                .iter()
373                .filter(|e| e.status == Status::Mandatory)
374                .map(|e| e.position as usize)
375                .max()
376                .unwrap_or(0),
377        }
378    }
379
380    /// Iterate over mandatory element positions without heap allocation.
381    ///
382    /// Calls `f(zero_based_index, data_element_id)` for each element whose
383    /// status is [`Status::Mandatory`].  Returns `Err` immediately if `f`
384    /// returns `Err`, short-circuiting the remaining elements.
385    fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
386    where
387        F: FnMut(usize, &str) -> Result<(), E>,
388    {
389        match self {
390            Self::Static(d) => {
391                for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
392                    f((e.position as usize).saturating_sub(1), e.data_element)?;
393                }
394            }
395            Self::Owned(d) => {
396                for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
397                    f(
398                        (e.position as usize).saturating_sub(1),
399                        e.data_element.as_str(),
400                    )?;
401                }
402            }
403        }
404        Ok(())
405    }
406}
407
408/// Default required-segments mapping used when no custom function is provided.
409///
410/// Returns the universal minimum: every EDIFACT message must begin with `UNH`
411/// and end with `UNT`.  Message-type-specific mandatory segments (such as
412/// `BGM` for ORDERS/INVOIC) must be enforced by a
413/// [`ProfileRulePack`][crate::ProfileRulePack] or a custom
414/// [`DirectoryValidatorBuilder::with_required_segments`] function to avoid
415/// false positives for message types that do not require `BGM`.
416fn default_required_segments(_message_type: &str) -> &'static [&'static str] {
417    &["UNH", "UNT"]
418}
419
420/// Code-list validation rules common to all UN/EDIFACT directory releases.
421///
422/// Each entry is `(element_index, component_index, data_element_id)`.
423/// `element_index` and `component_index` are zero-based.
424///
425/// Covers the most frequently validated qualifier/code elements across ORDERS,
426/// INVOIC, UTILMD, and similar message types.
427pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
428    match tag {
429        "BGM" => &[(0, 0, "1001")],
430        "DTM" => &[(0, 0, "2005")],
431        "NAD" => &[(0, 0, "3035")],
432        "QTY" => &[(0, 0, "6063")],
433        "RFF" => &[(0, 0, "1153")],
434        "MOA" => &[(0, 0, "5025")],
435        "PRI" => &[(0, 0, "5125")],
436        "LOC" => &[(0, 0, "3227")],
437        _ => &[],
438    }
439}
440
441/// Shared validator implementation that is configured per UN/EDIFACT directory release.
442///
443/// # Scope and limitations
444///
445/// `DirectoryValidator` validates individual segment *content* (element counts,
446/// component counts, code-list values, and conditional rules) and checks that
447/// every *mandatory* segment type is present at least once.  It does **not**
448/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
449/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
450/// appears in the wrong position.  Full sequence validation requires a
451/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
452/// the scope of this implementation.
453#[derive(Clone)]
454pub struct DirectoryValidator {
455    directory_id: String,
456    segment_lookup: SegmentLookupFn,
457    /// Runtime-owned segment definitions (from builder / JSON / DB).
458    ///
459    /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
460    owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
461    is_code_valid: IsCodeValidFn,
462    suggest_code: SuggestCodeFn,
463    expected_components: ExpectedComponentsFn,
464    code_list_rules: CodeListRulesFn,
465    additional_structure_rule: Option<AdditionalStructureRuleFn>,
466    /// Configurable mapping from message type to required segment tags.
467    required_segments: RequiredSegmentsFn,
468    message_type: Option<String>,
469    enforce_known_tags: bool,
470    structure_checks: bool,
471    code_list_checks: bool,
472}
473
474impl std::fmt::Debug for DirectoryValidator {
475    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
476        f.debug_struct("DirectoryValidator")
477            .field("directory_id", &self.directory_id)
478            .field("message_type", &self.message_type)
479            .field("enforce_known_tags", &self.enforce_known_tags)
480            .field("structure_checks", &self.structure_checks)
481            .field("code_list_checks", &self.code_list_checks)
482            .finish_non_exhaustive()
483    }
484}
485
486impl DirectoryValidator {
487    /// Create a validator for a specific directory release with injected lookup/check hooks.
488    pub fn new(
489        directory_id: &'static str,
490        segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
491        is_code_valid: fn(&str, &str) -> bool,
492        suggest_code: fn(&str, &str) -> Option<&'static str>,
493        expected_components: fn(&str, usize) -> Option<u8>,
494        additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
495    ) -> Self {
496        Self {
497            directory_id: directory_id.to_owned(),
498            segment_lookup: Arc::new(segment_lookup),
499            owned_defs: None,
500            is_code_valid: Arc::new(is_code_valid),
501            suggest_code: Arc::new(suggest_code),
502            expected_components: Arc::new(expected_components),
503            code_list_rules: Arc::new(base_code_list_rules),
504            additional_structure_rule: additional_structure_rule
505                .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
506            required_segments: Arc::new(default_required_segments),
507            message_type: None,
508            enforce_known_tags: true,
509            structure_checks: true,
510            code_list_checks: true,
511        }
512    }
513
514    /// Create a validator from a static slice of [`SegmentDefinition`]s.
515    ///
516    /// This is the preferred constructor when code-generating directory data as
517    /// a `static` array: no manual fn-pointer boilerplate is required.
518    ///
519    /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
520    /// always returns `true`).  Call [`with_code_list_rules`][Self::with_code_list_rules]
521    /// to register directory-specific rules that actually validate code values.
522    ///
523    /// # Example
524    ///
525    /// ```rust,ignore
526    /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
527    ///
528    /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
529    ///     .with_code_list_rules(my_code_list_rules);
530    /// ```
531    pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
532        let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
533            definitions.iter().map(|d| (d.tag, d)).collect();
534        let lookup_map = Arc::new(lookup_map);
535        Self {
536            directory_id: "custom".to_owned(),
537            segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
538            owned_defs: None,
539            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
540            suggest_code: Arc::new(|_de: &str, _code: &str| None),
541            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
542            code_list_rules: Arc::new(base_code_list_rules),
543            additional_structure_rule: None,
544            required_segments: Arc::new(default_required_segments),
545            message_type: None,
546            enforce_known_tags: true,
547            structure_checks: true,
548            code_list_checks: false,
549        }
550    }
551
552    /// Create a validator from a runtime-owned collection of segment definitions.
553    ///
554    /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
555    /// loaded from an external source at startup (JSON, database, YAML, …) rather
556    /// than being known at compile time.
557    ///
558    /// Code-list checks are **disabled** by default; enable them by chaining
559    /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
560    /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
561    /// the builder.
562    ///
563    /// # Example
564    ///
565    /// ```rust,ignore
566    /// let defs = vec![
567    ///     OwnedSegmentDef::new_unchecked(
568    ///         "BGM".to_owned(),
569    ///         "Beginning of message".to_owned(),
570    ///         vec![OwnedElementRef::new_unchecked(1, "C002".to_owned(), Status::Mandatory, 1)],
571    ///     ),
572    /// ];
573    /// let validator = DirectoryValidator::from_owned_definitions(defs)
574    ///     .with_directory_id("runtime-profile");
575    /// ```
576    pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
577        Self {
578            directory_id: "custom".to_owned(),
579            // The static lookup is never consulted when `owned_defs` is `Some`.
580            segment_lookup: Arc::new(|_| None),
581            owned_defs: Some(Arc::new(definitions)),
582            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
583            suggest_code: Arc::new(|_de: &str, _code: &str| None),
584            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
585            code_list_rules: Arc::new(base_code_list_rules),
586            additional_structure_rule: None,
587            required_segments: Arc::new(default_required_segments),
588            message_type: None,
589            enforce_known_tags: true,
590            structure_checks: true,
591            code_list_checks: false,
592        }
593    }
594
595    /// Set the directory identifier string (used in error messages).
596    pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
597        self.directory_id = id.into();
598        self
599    }
600
601    /// Override the code-list rules function.
602    ///
603    /// Directories can supply a directory-specific implementation that extends or
604    /// replaces the base rules from `base_code_list_rules`.
605    pub fn with_code_list_rules(
606        mut self,
607        f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
608    ) -> Self {
609        self.code_list_rules = Arc::new(f);
610        self
611    }
612
613    /// Enable only structure checks and disable code-list checks.
614    pub fn structure_only(mut self) -> Self {
615        self.structure_checks = true;
616        self.code_list_checks = false;
617        self
618    }
619
620    /// Enable only code-list checks and disable structure checks.
621    pub fn code_list_only(mut self) -> Self {
622        self.structure_checks = false;
623        self.code_list_checks = true;
624        self
625    }
626
627    /// Configure whether unknown segment tags should be rejected.
628    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
629        self.enforce_known_tags = enforce;
630        self
631    }
632
633    /// Override the required-segments mapping used for structural validation.
634    ///
635    /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
636    /// and must return a `'static` slice of segment tags that are mandatory for that
637    /// type.  The tags are checked both for *presence* and for *canonical ordering*
638    /// within the message.
639    ///
640    /// # Example
641    ///
642    /// ```rust,ignore
643    /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
644    ///     match msg_type {
645    ///         "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
646    ///         "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
647    ///         _ => &["UNH", "UNT"],
648    ///     }
649    /// }
650    ///
651    /// let validator = DirectoryValidator::from_definitions(DEFS)
652    ///     .with_required_segments(my_required_segments);
653    /// ```
654    pub fn with_required_segments(
655        mut self,
656        f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
657    ) -> Self {
658        self.required_segments = Arc::new(f);
659        self
660    }
661
662    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
663        if let Some(explicit) = self.message_type.as_deref() {
664            return Some(explicit.to_owned());
665        }
666
667        segments
668            .iter()
669            .find(|s| s.tag == "UNH")
670            .and_then(|s| s.get_element(1))
671            .and_then(|e| e.get_component(0))
672            .map(str::to_owned)
673    }
674
675    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
676    ///
677    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
678    /// a sender is not required to transmit trailing empty components; this function
679    /// therefore strips them before checking against the expected count so that
680    /// conformant messages with omitted trailing components are still accepted.
681    ///
682    /// # Examples
683    ///
684    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
685    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
686    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
687        let elem = seg.elements.get(element_idx)?;
688        let mut count = elem.components.len();
689        while count > 0 && elem.components[count - 1].0.as_ref().is_empty() {
690            count -= 1;
691        }
692        u8::try_from(count).ok()
693    }
694
695    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
696        for idx in 0..seg.elements.len() {
697            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
698                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
699                if actual != expected {
700                    return Err(EdifactError::InvalidComponentCount {
701                        tag: seg.tag.to_owned(),
702                        element_index: idx,
703                        expected,
704                        actual,
705                        offset: seg.span.start,
706                    });
707                }
708            }
709        }
710        Ok(())
711    }
712
713    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
714        let rules = (self.code_list_rules)(seg.tag);
715
716        for (elem_idx, comp_idx, de) in rules {
717            let value = seg
718                .get_element(*elem_idx)
719                .and_then(|e| e.get_component(*comp_idx))
720                .unwrap_or("");
721            if !value.is_empty() && !(self.is_code_valid)(de, value) {
722                let suggestion = (self.suggest_code)(de, value);
723                return Err(EdifactError::InvalidCodeValue {
724                    tag: seg.tag.to_owned(),
725                    element_index: *elem_idx,
726                    value: value.to_owned(),
727                    code_list: (*de).to_owned(),
728                    offset: seg.span.start,
729                    suggestion,
730                });
731            }
732        }
733
734        Ok(())
735    }
736}
737
738impl DirectoryValidator {
739    fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
740        if let Some(owned) = &self.owned_defs {
741            owned
742                .iter()
743                .find(|d| d.tag == tag)
744                .map(SegmentDefRef::Owned)
745        } else {
746            (self.segment_lookup)(tag).map(SegmentDefRef::Static)
747        }
748    }
749
750    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
751        if !self.structure_checks && !self.code_list_checks {
752            return Ok(());
753        }
754
755        let Some(def) = self.resolve_def(seg.tag) else {
756            if self.structure_checks && self.enforce_known_tags {
757                return Err(EdifactError::InvalidSegmentForMessage {
758                    tag: seg.tag.to_owned(),
759                    message_type: self
760                        .message_type
761                        .clone()
762                        .unwrap_or_else(|| self.directory_id.clone()),
763                    offset: seg.tag_span.start,
764                });
765            }
766            return Ok(());
767        };
768
769        let max_elements = def.max_element_position();
770        let min_elements = def.last_mandatory_position();
771        let actual = seg.elements.len();
772
773        if self.structure_checks && (actual < min_elements || actual > max_elements) {
774            return Err(EdifactError::InvalidElementCount {
775                tag: seg.tag.to_owned(),
776                min: min_elements,
777                max: max_elements,
778                actual,
779                offset: seg.span.start,
780            });
781        }
782
783        if self.structure_checks {
784            def.for_each_mandatory_position(|idx, _de| {
785                let is_present = seg.elements.get(idx).is_some_and(|elem| {
786                    elem.components.iter().any(|(c, _)| !c.as_ref().is_empty())
787                });
788                if !is_present {
789                    return Err(EdifactError::MissingRequiredElement {
790                        tag: seg.tag.to_owned(),
791                        element_index: idx,
792                    });
793                }
794                Ok(())
795            })?;
796            self.validate_component_counts(seg)?;
797
798            if let Some(rule) = &self.additional_structure_rule {
799                rule(seg)?;
800            }
801        }
802
803        if self.code_list_checks {
804            self.validate_code_lists(seg)?;
805        }
806
807        Ok(())
808    }
809}
810
811impl Validator for DirectoryValidator {
812    fn set_message_type(&mut self, message_type: Option<&str>) {
813        self.message_type = message_type.map(str::to_owned);
814    }
815
816    fn validate_batch(
817        &self,
818        segments: &[Segment<'_>],
819        report: &mut ValidationReport,
820        _context: &ValidationRuleContext<'_>,
821    ) {
822        for seg in segments {
823            if let Err(err) = self.validate_segment(seg) {
824                report_error(report, err);
825            }
826        }
827
828        if self.structure_checks {
829            if let Some(message_type) = self.detect_message_type(segments) {
830                for required_tag in (self.required_segments)(&message_type) {
831                    if segments.iter().all(|s| s.tag != *required_tag) {
832                        report.add_error(
833                            ValidationIssue::new(
834                                ValidationSeverity::Error,
835                                format!(
836                                    "required segment {} missing for message type {}",
837                                    required_tag, message_type
838                                ),
839                            )
840                            .with_segment(*required_tag)
841                            .with_suggestion("Add the mandatory segment at the correct position"),
842                        );
843                    }
844                }
845
846                let seq = (self.required_segments)(&message_type);
847                let mut last_idx = None;
848                for tag in seq {
849                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
850                        if let Some(prev) = last_idx {
851                            if idx < prev {
852                                report.add_error(
853                                    ValidationIssue::new(
854                                        ValidationSeverity::Error,
855                                        format!(
856                                            "segment sequence violation for message type {}: '{}' appears out of order",
857                                            message_type, tag
858                                        ),
859                                    )
860                                    .with_segment(*tag)
861                                    .with_suggestion(
862                                        "Ensure required segments follow UN/EDIFACT canonical order",
863                                    ),
864                                );
865                            }
866                        }
867                        last_idx = Some(idx);
868                    }
869                }
870            }
871        }
872    }
873}
874
875// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
876
877/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
878///
879/// Use this when segment definitions are loaded from an external source at
880/// startup (JSON, database, YAML, …) rather than being available as `static`
881/// arrays at compile time.
882///
883/// # Example
884///
885/// ```rust,ignore
886/// let validator = DirectoryValidatorBuilder::new("my-profile")
887///     .add_segment(
888///         OwnedSegmentDef::new_unchecked(
889///             "BGM".to_owned(),
890///             "Beginning of message".to_owned(),
891///             vec![OwnedElementRef::new_unchecked(1, "C002".to_owned(), Status::Mandatory, 1)],
892///         ),
893///     )
894///     .build();
895/// ```
896#[derive(Debug, Default)]
897pub struct DirectoryValidatorBuilder {
898    directory_id: Option<String>,
899    segments: Vec<OwnedSegmentDef>,
900}
901
902impl DirectoryValidatorBuilder {
903    /// Create a new builder with the given directory identifier.
904    ///
905    /// The identifier is used in error messages; set a human-readable value
906    /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
907    pub fn new(directory_id: impl Into<String>) -> Self {
908        Self {
909            directory_id: Some(directory_id.into()),
910            segments: Vec::new(),
911        }
912    }
913
914    /// Add a segment definition to the builder.
915    ///
916    /// Definitions can be added in any order; the resulting validator looks
917    /// them up by tag at validation time.
918    pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
919        self.segments.push(def);
920        self
921    }
922
923    /// Extend the builder with multiple segment definitions at once.
924    pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
925        self.segments.extend(defs);
926        self
927    }
928
929    /// Build the [`DirectoryValidator`].
930    ///
931    /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
932    /// Code-list checks are disabled by default; chain
933    /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
934    /// enable them.
935    pub fn build(self) -> DirectoryValidator {
936        let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
937        if let Some(id) = self.directory_id {
938            validator.directory_id = id;
939        }
940        validator
941    }
942}
943
944#[cfg(test)]
945mod tests {
946    use super::*;
947
948    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef::new(1, "C507", Status::Mandatory, 1)];
949
950    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
951        tag: "TST",
952        name: "Test segment",
953        elements: TEST_ELEMENTS,
954    };
955
956    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
957        match tag {
958            "TST" => Some(&TEST_SEGMENT),
959            _ => None,
960        }
961    }
962
963    fn code_valid(_de: &str, _code: &str) -> bool {
964        true
965    }
966
967    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
968        None
969    }
970
971    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
972        None
973    }
974
975    #[test]
976    fn mandatory_composite_present_when_any_component_non_empty() {
977        let input = b"TST+:ABC'";
978        let segments: Vec<_> = crate::from_bytes(input)
979            .collect::<Result<Vec<_>, _>>()
980            .expect("parse should succeed");
981
982        let validator = DirectoryValidator::new(
983            "TEST",
984            segment_lookup,
985            code_valid,
986            suggest_code,
987            expected_components,
988            None,
989        );
990
991        let mut report = ValidationReport::default();
992        validator.validate_batch(
993            &segments,
994            &mut report,
995            &crate::validator::ValidationRuleContext::empty(),
996        );
997        assert!(!report.has_errors());
998    }
999
1000    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
1001
1002    fn parse_single(input: &[u8]) -> crate::OwnedSegment {
1003        crate::from_reader_collect(std::io::Cursor::new(input))
1004            .expect("parse should succeed")
1005            .into_iter()
1006            .next()
1007            .expect("at least one segment")
1008    }
1009
1010    #[test]
1011    fn trailing_empty_component_stripped_from_dtm() {
1012        // DTM+137:20200101: has three components in element 0; the third is empty.
1013        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
1014        // so effective count should be 2.
1015        let owned = parse_single(b"DTM+137:20200101:'");
1016        let seg = owned.as_borrowed();
1017        let count = DirectoryValidator::effective_component_count(&seg, 0);
1018        assert_eq!(
1019            count,
1020            Some(2),
1021            "trailing empty component should be stripped"
1022        );
1023    }
1024
1025    #[test]
1026    fn all_empty_components_result_in_zero() {
1027        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
1028        let owned = parse_single(b"NAD+MS++:'");
1029        let seg = owned.as_borrowed();
1030        let count = DirectoryValidator::effective_component_count(&seg, 2);
1031        assert_eq!(
1032            count,
1033            Some(0),
1034            "all-empty composite should have effective count 0"
1035        );
1036    }
1037
1038    #[test]
1039    fn non_empty_component_not_stripped() {
1040        // DTM+137:20200101:102 — all three components are non-empty
1041        let owned = parse_single(b"DTM+137:20200101:102'");
1042        let seg = owned.as_borrowed();
1043        let count = DirectoryValidator::effective_component_count(&seg, 0);
1044        assert_eq!(
1045            count,
1046            Some(3),
1047            "no components should be stripped when all non-empty"
1048        );
1049    }
1050
1051    #[test]
1052    fn with_code_list_rules_overrides_base() {
1053        // Override code-list rules to require element 0 of TST to be a specific code.
1054        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
1055            match tag {
1056                "TST" => &[(0, 0, "CUSTOM_DE")],
1057                _ => &[],
1058            }
1059        }
1060        fn custom_code_valid(_de: &str, code: &str) -> bool {
1061            code == "VALID"
1062        }
1063        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
1064            None
1065        }
1066
1067        let input = b"TST+INVALID'";
1068        let segments: Vec<_> = crate::from_bytes(input)
1069            .collect::<Result<Vec<_>, _>>()
1070            .expect("parse should succeed");
1071
1072        let validator = DirectoryValidator::new(
1073            "TEST",
1074            segment_lookup,
1075            custom_code_valid,
1076            no_suggestion,
1077            expected_components,
1078            None,
1079        )
1080        .with_code_list_rules(custom_rules);
1081
1082        let mut report = ValidationReport::default();
1083        validator.validate_batch(
1084            &segments,
1085            &mut report,
1086            &crate::validator::ValidationRuleContext::empty(),
1087        );
1088        assert!(
1089            report.has_warnings(),
1090            "INVALID is not in the custom code list so validation must warn"
1091        );
1092    }
1093}