Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10    /// Element must be present.
11    Mandatory,
12    /// Element is optional unless additional rules require it.
13    Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19    /// One-based element position in the segment definition.
20    pub position: u8,
21    /// UN/EDIFACT data element identifier.
22    pub data_element: &'static str,
23    /// Requirement status of the element.
24    pub status: Status,
25    /// Maximum repetition count for this element.
26    pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32    /// Segment tag.
33    pub tag: &'static str,
34    /// Human-readable segment name.
35    pub name: &'static str,
36    /// Ordered element definitions.
37    pub elements: &'static [ElementRef],
38}
39
40/// Owned runtime equivalent of [`ElementRef`].
41///
42/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
43/// to construct validators from data that is not available at compile time (e.g. loaded
44/// from JSON or a database at startup).
45///
46/// Use [`OwnedElementRef::new`] for compile-time-known positions (panics on invalid
47/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
48/// comes from an external source and you need a `Result`. Fields are private to prevent
49/// bypassing the position invariant through struct-literal syntax.
50#[derive(Debug, Clone)]
51pub struct OwnedElementRef {
52    /// One-based element position.
53    position: u8,
54    /// UN/EDIFACT data element identifier.
55    data_element: String,
56    /// Requirement status.
57    status: Status,
58    /// Maximum repetition count.
59    max_repeat: u8,
60}
61
62/// Owned runtime equivalent of [`SegmentDefinition`].
63///
64/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
65///
66/// Use [`OwnedSegmentDef::new`] for compile-time-known tags (panics on invalid input,
67/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
68/// an external source and you need a `Result`. Fields are private to prevent bypassing
69/// the tag invariant through struct-literal syntax.
70#[derive(Debug, Clone)]
71pub struct OwnedSegmentDef {
72    /// Segment tag (e.g. `"BGM"`).
73    tag: String,
74    /// Human-readable segment name.
75    name: String,
76    /// Ordered element definitions.
77    elements: Vec<OwnedElementRef>,
78}
79
80impl OwnedSegmentDef {
81    /// Construct an owned segment definition.
82    ///
83    /// This is the ergonomic constructor for compile-time-known tags (e.g.
84    /// `"BGM"`, `"UNH"`).  It panics immediately on invalid input so that
85    /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
86    /// boilerplate.
87    ///
88    /// Use [`try_new`][Self::try_new] instead when the tag originates from an
89    /// external source (user input, config file, database) and you need a
90    /// `Result` to propagate errors gracefully.
91    ///
92    /// # Panics
93    ///
94    /// Panics if `tag` is not exactly three ASCII uppercase letters.
95    pub fn new(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
96        assert!(
97            tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
98            "OwnedSegmentDef::new: tag must be exactly three ASCII uppercase letters, got {tag:?}"
99        );
100        Self {
101            tag,
102            name,
103            elements,
104        }
105    }
106
107    /// Construct an owned segment definition, returning an error for invalid tags.
108    ///
109    /// Prefer this over [`new`][Self::new] when the tag comes from an external
110    /// source (user input, config file, database) and you want to handle the
111    /// error without panicking.
112    ///
113    /// # Errors
114    ///
115    /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
116    /// ASCII uppercase letters.
117    pub fn try_new(
118        tag: String,
119        name: String,
120        elements: Vec<OwnedElementRef>,
121    ) -> Result<Self, EdifactError> {
122        if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
123            return Err(EdifactError::InvalidSegmentTag(tag));
124        }
125        Ok(Self {
126            tag,
127            name,
128            elements,
129        })
130    }
131
132    /// Segment tag (e.g. `"BGM"`).
133    #[inline]
134    pub fn tag(&self) -> &str {
135        &self.tag
136    }
137
138    /// Human-readable segment name.
139    #[inline]
140    pub fn name(&self) -> &str {
141        &self.name
142    }
143
144    /// Element definitions for this segment.
145    #[inline]
146    pub fn elements(&self) -> &[OwnedElementRef] {
147        &self.elements
148    }
149}
150
151impl OwnedElementRef {
152    /// Construct an owned element reference.
153    ///
154    /// This is the ergonomic constructor for compile-time-known positions.
155    /// It panics immediately on invalid input so that call sites with literal
156    /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
157    ///
158    /// Use [`try_new`][Self::try_new] instead when the position originates from
159    /// an external source (user input, config file, database) and you need a
160    /// `Result` to propagate errors gracefully.
161    ///
162    /// # Panics
163    ///
164    /// Panics if `position` is `0` (positions are one-based).
165    pub fn new(position: u8, data_element: String, status: Status, max_repeat: u8) -> Self {
166        assert!(
167            position != 0,
168            "OwnedElementRef::new: position must be >= 1 (one-based), got 0"
169        );
170        Self {
171            position,
172            data_element,
173            status,
174            max_repeat,
175        }
176    }
177
178    /// Construct an owned element reference, returning an error for position `0`.
179    ///
180    /// Prefer this over [`new`][Self::new] when the position comes from an
181    /// external source (user input, config file, database) and you want to
182    /// handle the error without panicking.
183    ///
184    /// # Errors
185    ///
186    /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
187    pub fn try_new(
188        position: u8,
189        data_element: String,
190        status: Status,
191        max_repeat: u8,
192    ) -> Result<Self, EdifactError> {
193        if position == 0 {
194            return Err(EdifactError::InvalidElementPosition);
195        }
196        Ok(Self {
197            position,
198            data_element,
199            status,
200            max_repeat,
201        })
202    }
203
204    /// One-based element position (always >= 1).
205    #[inline]
206    pub fn position(&self) -> u8 {
207        self.position
208    }
209
210    /// UN/EDIFACT data element identifier.
211    #[inline]
212    pub fn data_element(&self) -> &str {
213        &self.data_element
214    }
215
216    /// Requirement status of this element.
217    #[inline]
218    pub fn status(&self) -> Status {
219        self.status
220    }
221
222    /// Maximum repetition count for this element.
223    #[inline]
224    pub fn max_repeat(&self) -> u8 {
225        self.max_repeat
226    }
227}
228
229type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
230type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
231type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
232type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
233type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
234type AdditionalStructureRuleFn =
235    Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
236/// Returns the `(element_index, component_index, data_element_id)` tuples to
237/// validate against a code list for the given segment tag.
238type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
239/// Returns the mandatory segment tags for a given EDIFACT message type.
240///
241/// The slice should contain every tag that must appear at least once in a
242/// conformant message of the given type.  The tags are also used to check
243/// canonical ordering — their relative order in the returned slice is taken
244/// as the expected order in the message.
245type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
246
247/// Internal enum that unifies lookup results from static and owned segment definitions.
248///
249/// Allows `validate_segment` to handle both code-generated (`&'static`) and
250/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
251enum SegmentDefRef<'a> {
252    Static(&'static SegmentDefinition),
253    Owned(&'a OwnedSegmentDef),
254}
255
256impl<'a> SegmentDefRef<'a> {
257    /// Returns the highest defined element position (one-based → used directly as
258    /// the maximum zero-based slot count for element-count validation).
259    ///
260    /// For owned definitions the highest `position` value may exceed the number
261    /// of entries in the `elements` vec when positions are non-consecutive.
262    fn max_element_position(&self) -> usize {
263        match self {
264            Self::Static(d) => d
265                .elements
266                .iter()
267                .map(|e| e.position as usize)
268                .max()
269                .unwrap_or(0),
270            Self::Owned(d) => d
271                .elements
272                .iter()
273                .map(|e| e.position as usize)
274                .max()
275                .unwrap_or(0),
276        }
277    }
278
279    /// Returns the highest position number among mandatory elements (one-based).
280    ///
281    /// This equals the minimum number of elements that must be present in a
282    /// segment: if the highest-positioned mandatory element is at position 5,
283    /// the segment must supply at least 5 elements.
284    fn last_mandatory_position(&self) -> usize {
285        match self {
286            Self::Static(d) => d
287                .elements
288                .iter()
289                .filter(|e| e.status == Status::Mandatory)
290                .map(|e| e.position as usize)
291                .max()
292                .unwrap_or(0),
293            Self::Owned(d) => d
294                .elements
295                .iter()
296                .filter(|e| e.status == Status::Mandatory)
297                .map(|e| e.position as usize)
298                .max()
299                .unwrap_or(0),
300        }
301    }
302
303    fn mandatory_positions(&self) -> impl Iterator<Item = (usize, &str)> {
304        enum E<A, B> {
305            A(A),
306            B(B),
307        }
308        impl<A, B, I> Iterator for E<A, B>
309        where
310            A: Iterator<Item = I>,
311            B: Iterator<Item = I>,
312        {
313            type Item = I;
314            fn next(&mut self) -> Option<I> {
315                match self {
316                    E::A(a) => a.next(),
317                    E::B(b) => b.next(),
318                }
319            }
320        }
321        match self {
322            Self::Static(d) => E::A(
323                d.elements
324                    .iter()
325                    .filter(|e| e.status == Status::Mandatory)
326                    .map(|e| ((e.position as usize).saturating_sub(1), e.data_element)),
327            ),
328            Self::Owned(d) => E::B(
329                d.elements
330                    .iter()
331                    .filter(|e| e.status == Status::Mandatory)
332                    .map(|e| {
333                        (
334                            (e.position as usize).saturating_sub(1),
335                            e.data_element.as_str(),
336                        )
337                    }),
338            ),
339        }
340    }
341}
342
343/// Default required-segments mapping used when no custom function is provided.
344fn default_required_segments(message_type: &str) -> &'static [&'static str] {
345    match message_type {
346        "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
347        _ => &["UNH", "UNT"],
348    }
349}
350
351/// Code-list validation rules common to all UN/EDIFACT directory releases.
352///
353/// Each entry is `(element_index, component_index, data_element_id)`.
354/// `element_index` and `component_index` are zero-based.
355///
356/// Covers the most frequently validated qualifier/code elements across ORDERS,
357/// INVOIC, UTILMD, and similar message types.
358pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
359    match tag {
360        "BGM" => &[(0, 0, "1001")],
361        "DTM" => &[(0, 0, "2005")],
362        "NAD" => &[(0, 0, "3035")],
363        "QTY" => &[(0, 0, "6063")],
364        "RFF" => &[(0, 0, "1153")],
365        "MOA" => &[(0, 0, "5025")],
366        "PRI" => &[(0, 0, "5125")],
367        "LOC" => &[(0, 0, "3227")],
368        _ => &[],
369    }
370}
371
372/// Shared validator implementation that is configured per UN/EDIFACT directory release.
373///
374/// # Scope and limitations
375///
376/// `DirectoryValidator` validates individual segment *content* (element counts,
377/// component counts, code-list values, and conditional rules) and checks that
378/// every *mandatory* segment type is present at least once.  It does **not**
379/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
380/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
381/// appears in the wrong position.  Full sequence validation requires a
382/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
383/// the scope of this implementation.
384#[derive(Clone)]
385pub struct DirectoryValidator {
386    directory_id: String,
387    segment_lookup: SegmentLookupFn,
388    /// Runtime-owned segment definitions (from builder / JSON / DB).
389    ///
390    /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
391    owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
392    is_code_valid: IsCodeValidFn,
393    suggest_code: SuggestCodeFn,
394    expected_components: ExpectedComponentsFn,
395    code_list_rules: CodeListRulesFn,
396    additional_structure_rule: Option<AdditionalStructureRuleFn>,
397    /// Configurable mapping from message type to required segment tags.
398    required_segments: RequiredSegmentsFn,
399    message_type: Option<String>,
400    enforce_known_tags: bool,
401    structure_checks: bool,
402    code_list_checks: bool,
403}
404
405impl std::fmt::Debug for DirectoryValidator {
406    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407        f.debug_struct("DirectoryValidator")
408            .field("directory_id", &self.directory_id)
409            .field("message_type", &self.message_type)
410            .field("enforce_known_tags", &self.enforce_known_tags)
411            .field("structure_checks", &self.structure_checks)
412            .field("code_list_checks", &self.code_list_checks)
413            .finish_non_exhaustive()
414    }
415}
416
417impl DirectoryValidator {
418    /// Create a validator for a specific directory release with injected lookup/check hooks.
419    pub fn new(
420        directory_id: &'static str,
421        segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
422        is_code_valid: fn(&str, &str) -> bool,
423        suggest_code: fn(&str, &str) -> Option<&'static str>,
424        expected_components: fn(&str, usize) -> Option<u8>,
425        additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
426    ) -> Self {
427        Self {
428            directory_id: directory_id.to_owned(),
429            segment_lookup: Arc::new(segment_lookup),
430            owned_defs: None,
431            is_code_valid: Arc::new(is_code_valid),
432            suggest_code: Arc::new(suggest_code),
433            expected_components: Arc::new(expected_components),
434            code_list_rules: Arc::new(base_code_list_rules),
435            additional_structure_rule: additional_structure_rule
436                .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
437            required_segments: Arc::new(default_required_segments),
438            message_type: None,
439            enforce_known_tags: true,
440            structure_checks: true,
441            code_list_checks: true,
442        }
443    }
444
445    /// Create a validator from a static slice of [`SegmentDefinition`]s.
446    ///
447    /// This is the preferred constructor when code-generating directory data as
448    /// a `static` array: no manual fn-pointer boilerplate is required.
449    ///
450    /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
451    /// always returns `true`).  Call [`with_code_list_rules`][Self::with_code_list_rules]
452    /// to register directory-specific rules that actually validate code values.
453    ///
454    /// # Example
455    ///
456    /// ```rust,ignore
457    /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
458    ///
459    /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
460    ///     .with_code_list_rules(my_code_list_rules);
461    /// ```
462    pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
463        Self {
464            directory_id: "custom".to_owned(),
465            segment_lookup: Arc::new(move |tag: &str| definitions.iter().find(|d| d.tag == tag)),
466            owned_defs: None,
467            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
468            suggest_code: Arc::new(|_de: &str, _code: &str| None),
469            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
470            code_list_rules: Arc::new(base_code_list_rules),
471            additional_structure_rule: None,
472            required_segments: Arc::new(default_required_segments),
473            message_type: None,
474            enforce_known_tags: true,
475            structure_checks: true,
476            code_list_checks: false,
477        }
478    }
479
480    /// Create a validator from a runtime-owned collection of segment definitions.
481    ///
482    /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
483    /// loaded from an external source at startup (JSON, database, YAML, …) rather
484    /// than being known at compile time.
485    ///
486    /// Code-list checks are **disabled** by default; enable them by chaining
487    /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
488    /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
489    /// the builder.
490    ///
491    /// # Example
492    ///
493    /// ```rust,ignore
494    /// let defs = vec![
495    ///     OwnedSegmentDef::new(
496    ///         "BGM".to_owned(),
497    ///         "Beginning of message".to_owned(),
498    ///         vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
499    ///     ),
500    /// ];
501    /// let validator = DirectoryValidator::from_owned_definitions(defs)
502    ///     .with_directory_id("runtime-profile");
503    /// ```
504    pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
505        Self {
506            directory_id: "custom".to_owned(),
507            // The static lookup is never consulted when `owned_defs` is `Some`.
508            segment_lookup: Arc::new(|_| None),
509            owned_defs: Some(Arc::new(definitions)),
510            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
511            suggest_code: Arc::new(|_de: &str, _code: &str| None),
512            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
513            code_list_rules: Arc::new(base_code_list_rules),
514            additional_structure_rule: None,
515            required_segments: Arc::new(default_required_segments),
516            message_type: None,
517            enforce_known_tags: true,
518            structure_checks: true,
519            code_list_checks: false,
520        }
521    }
522
523    /// Set the directory identifier string (used in error messages).
524    pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
525        self.directory_id = id.into();
526        self
527    }
528
529    /// Override the code-list rules function.
530    ///
531    /// Directories can supply a directory-specific implementation that extends or
532    /// replaces the base rules from `base_code_list_rules`.
533    pub fn with_code_list_rules(
534        mut self,
535        f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
536    ) -> Self {
537        self.code_list_rules = Arc::new(f);
538        self
539    }
540
541    /// Enable only structure checks and disable code-list checks.
542    pub fn structure_only(mut self) -> Self {
543        self.structure_checks = true;
544        self.code_list_checks = false;
545        self
546    }
547
548    /// Enable only code-list checks and disable structure checks.
549    pub fn code_list_only(mut self) -> Self {
550        self.structure_checks = false;
551        self.code_list_checks = true;
552        self
553    }
554
555    /// Configure whether unknown segment tags should be rejected.
556    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
557        self.enforce_known_tags = enforce;
558        self
559    }
560
561    /// Override the required-segments mapping used for structural validation.
562    ///
563    /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
564    /// and must return a `'static` slice of segment tags that are mandatory for that
565    /// type.  The tags are checked both for *presence* and for *canonical ordering*
566    /// within the message.
567    ///
568    /// # Example
569    ///
570    /// ```rust,ignore
571    /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
572    ///     match msg_type {
573    ///         "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
574    ///         "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
575    ///         _ => &["UNH", "UNT"],
576    ///     }
577    /// }
578    ///
579    /// let validator = DirectoryValidator::from_definitions(DEFS)
580    ///     .with_required_segments(my_required_segments);
581    /// ```
582    pub fn with_required_segments(
583        mut self,
584        f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
585    ) -> Self {
586        self.required_segments = Arc::new(f);
587        self
588    }
589
590    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
591        if let Some(explicit) = self.message_type.as_deref() {
592            return Some(explicit.to_owned());
593        }
594
595        segments
596            .iter()
597            .find(|s| s.tag == "UNH")
598            .and_then(|s| s.get_element(1))
599            .and_then(|e| e.get_component(0))
600            .map(str::to_owned)
601    }
602
603    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
604    ///
605    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
606    /// a sender is not required to transmit trailing empty components; this function
607    /// therefore strips them before checking against the expected count so that
608    /// conformant messages with omitted trailing components are still accepted.
609    ///
610    /// # Examples
611    ///
612    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
613    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
614    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
615        let elem = seg.elements.get(element_idx)?;
616        let mut count = elem.components.len();
617        while count > 0 && elem.components[count - 1].as_ref().is_empty() {
618            count -= 1;
619        }
620        u8::try_from(count).ok()
621    }
622
623    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
624        for idx in 0..seg.elements.len() {
625            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
626                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
627                if actual != expected {
628                    return Err(EdifactError::InvalidComponentCount {
629                        tag: seg.tag.to_owned(),
630                        element_index: idx,
631                        expected,
632                        actual,
633                        offset: seg.span.start,
634                    });
635                }
636            }
637        }
638        Ok(())
639    }
640
641    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
642        let rules = (self.code_list_rules)(seg.tag);
643
644        for (elem_idx, comp_idx, de) in rules {
645            let value = seg
646                .get_element(*elem_idx)
647                .and_then(|e| e.get_component(*comp_idx))
648                .unwrap_or("");
649            if !value.is_empty() && !(self.is_code_valid)(de, value) {
650                let suggestion = (self.suggest_code)(de, value);
651                return Err(EdifactError::InvalidCodeValue {
652                    tag: seg.tag.to_owned(),
653                    element_index: *elem_idx,
654                    value: value.to_owned(),
655                    code_list: (*de).to_owned(),
656                    offset: seg.span.start,
657                    suggestion,
658                });
659            }
660        }
661
662        Ok(())
663    }
664}
665
666impl DirectoryValidator {
667    fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
668        if let Some(owned) = &self.owned_defs {
669            owned
670                .iter()
671                .find(|d| d.tag == tag)
672                .map(SegmentDefRef::Owned)
673        } else {
674            (self.segment_lookup)(tag).map(SegmentDefRef::Static)
675        }
676    }
677
678    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
679        if !self.structure_checks && !self.code_list_checks {
680            return Ok(());
681        }
682
683        let Some(def) = self.resolve_def(seg.tag) else {
684            if self.structure_checks && self.enforce_known_tags {
685                return Err(EdifactError::InvalidSegmentForMessage {
686                    tag: seg.tag.to_owned(),
687                    message_type: self
688                        .message_type
689                        .clone()
690                        .unwrap_or_else(|| self.directory_id.clone()),
691                    offset: seg.tag_span.start,
692                });
693            }
694            return Ok(());
695        };
696
697        let max_elements = def.max_element_position();
698        let min_elements = def.last_mandatory_position();
699        let actual = seg.elements.len();
700
701        if self.structure_checks && (actual < min_elements || actual > max_elements) {
702            return Err(EdifactError::InvalidElementCount {
703                tag: seg.tag.to_owned(),
704                min: min_elements,
705                max: max_elements,
706                actual,
707                offset: seg.span.start,
708            });
709        }
710
711        if self.structure_checks {
712            for (idx, _de) in def.mandatory_positions() {
713                let is_present = seg
714                    .elements
715                    .get(idx)
716                    .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
717                if !is_present {
718                    return Err(EdifactError::MissingRequiredElement {
719                        tag: seg.tag.to_owned(),
720                        element_index: idx,
721                    });
722                }
723            }
724            self.validate_component_counts(seg)?;
725
726            if let Some(rule) = &self.additional_structure_rule {
727                rule(seg)?;
728            }
729        }
730
731        if self.code_list_checks {
732            self.validate_code_lists(seg)?;
733        }
734
735        Ok(())
736    }
737}
738
739impl Validator for DirectoryValidator {
740    fn set_message_type(&mut self, message_type: Option<&str>) {
741        self.message_type = message_type.map(str::to_owned);
742    }
743
744    fn validate_batch(
745        &self,
746        segments: &[Segment<'_>],
747        report: &mut ValidationReport,
748        _context: &ValidationRuleContext<'_>,
749    ) {
750        for seg in segments {
751            if let Err(err) = self.validate_segment(seg) {
752                report_error(report, err);
753            }
754        }
755
756        if self.structure_checks {
757            if let Some(message_type) = self.detect_message_type(segments) {
758                for required_tag in (self.required_segments)(&message_type) {
759                    if segments.iter().all(|s| s.tag != *required_tag) {
760                        report.add_error(
761                            ValidationIssue::new(
762                                ValidationSeverity::Error,
763                                format!(
764                                    "required segment {} missing for message type {}",
765                                    required_tag, message_type
766                                ),
767                            )
768                            .with_segment(*required_tag)
769                            .with_suggestion("Add the mandatory segment at the correct position"),
770                        );
771                    }
772                }
773
774                let seq = (self.required_segments)(&message_type);
775                let mut last_idx = None;
776                for tag in seq {
777                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
778                        if let Some(prev) = last_idx {
779                            if idx < prev {
780                                report.add_error(
781                                    ValidationIssue::new(
782                                        ValidationSeverity::Error,
783                                        format!(
784                                            "segment sequence violation for message type {}: '{}' appears out of order",
785                                            message_type, tag
786                                        ),
787                                    )
788                                    .with_segment(*tag)
789                                    .with_suggestion(
790                                        "Ensure required segments follow UN/EDIFACT canonical order",
791                                    ),
792                                );
793                            }
794                        }
795                        last_idx = Some(idx);
796                    }
797                }
798            }
799        }
800    }
801}
802
803// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
804
805/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
806///
807/// Use this when segment definitions are loaded from an external source at
808/// startup (JSON, database, YAML, …) rather than being available as `static`
809/// arrays at compile time.
810///
811/// # Example
812///
813/// ```rust,ignore
814/// let validator = DirectoryValidatorBuilder::new("my-profile")
815///     .add_segment(
816///         OwnedSegmentDef::new(
817///             "BGM".to_owned(),
818///             "Beginning of message".to_owned(),
819///             vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
820///         ),
821///     )
822///     .build();
823/// ```
824#[derive(Debug, Default)]
825pub struct DirectoryValidatorBuilder {
826    directory_id: Option<String>,
827    segments: Vec<OwnedSegmentDef>,
828}
829
830impl DirectoryValidatorBuilder {
831    /// Create a new builder with the given directory identifier.
832    ///
833    /// The identifier is used in error messages; set a human-readable value
834    /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
835    pub fn new(directory_id: impl Into<String>) -> Self {
836        Self {
837            directory_id: Some(directory_id.into()),
838            segments: Vec::new(),
839        }
840    }
841
842    /// Add a segment definition to the builder.
843    ///
844    /// Definitions can be added in any order; the resulting validator looks
845    /// them up by tag at validation time.
846    pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
847        self.segments.push(def);
848        self
849    }
850
851    /// Extend the builder with multiple segment definitions at once.
852    pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
853        self.segments.extend(defs);
854        self
855    }
856
857    /// Build the [`DirectoryValidator`].
858    ///
859    /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
860    /// Code-list checks are disabled by default; chain
861    /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
862    /// enable them.
863    pub fn build(self) -> DirectoryValidator {
864        let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
865        if let Some(id) = self.directory_id {
866            validator.directory_id = id;
867        }
868        validator
869    }
870}
871
872#[cfg(test)]
873mod tests {
874    use super::*;
875
876    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
877        position: 1,
878        data_element: "C507",
879        status: Status::Mandatory,
880        max_repeat: 1,
881    }];
882
883    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
884        tag: "TST",
885        name: "Test segment",
886        elements: TEST_ELEMENTS,
887    };
888
889    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
890        match tag {
891            "TST" => Some(&TEST_SEGMENT),
892            _ => None,
893        }
894    }
895
896    fn code_valid(_de: &str, _code: &str) -> bool {
897        true
898    }
899
900    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
901        None
902    }
903
904    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
905        None
906    }
907
908    #[test]
909    fn mandatory_composite_present_when_any_component_non_empty() {
910        let input = b"TST+:ABC'";
911        let segments: Vec<_> = crate::from_bytes(input)
912            .collect::<Result<Vec<_>, _>>()
913            .expect("parse should succeed");
914
915        let validator = DirectoryValidator::new(
916            "TEST",
917            segment_lookup,
918            code_valid,
919            suggest_code,
920            expected_components,
921            None,
922        );
923
924        let mut report = ValidationReport::default();
925        validator.validate_batch(
926            &segments,
927            &mut report,
928            &crate::validator::ValidationRuleContext::empty(),
929        );
930        assert!(!report.has_errors());
931    }
932
933    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
934
935    fn parse_single(input: &[u8]) -> crate::OwnedSegment {
936        crate::from_reader(std::io::Cursor::new(input))
937            .expect("parse should succeed")
938            .into_iter()
939            .next()
940            .expect("at least one segment")
941    }
942
943    #[test]
944    fn trailing_empty_component_stripped_from_dtm() {
945        // DTM+137:20200101: has three components in element 0; the third is empty.
946        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
947        // so effective count should be 2.
948        let owned = parse_single(b"DTM+137:20200101:'");
949        let seg = owned.as_borrowed();
950        let count = DirectoryValidator::effective_component_count(&seg, 0);
951        assert_eq!(
952            count,
953            Some(2),
954            "trailing empty component should be stripped"
955        );
956    }
957
958    #[test]
959    fn all_empty_components_result_in_zero() {
960        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
961        let owned = parse_single(b"NAD+MS++:'");
962        let seg = owned.as_borrowed();
963        let count = DirectoryValidator::effective_component_count(&seg, 2);
964        assert_eq!(
965            count,
966            Some(0),
967            "all-empty composite should have effective count 0"
968        );
969    }
970
971    #[test]
972    fn non_empty_component_not_stripped() {
973        // DTM+137:20200101:102 — all three components are non-empty
974        let owned = parse_single(b"DTM+137:20200101:102'");
975        let seg = owned.as_borrowed();
976        let count = DirectoryValidator::effective_component_count(&seg, 0);
977        assert_eq!(
978            count,
979            Some(3),
980            "no components should be stripped when all non-empty"
981        );
982    }
983
984    #[test]
985    fn with_code_list_rules_overrides_base() {
986        // Override code-list rules to require element 0 of TST to be a specific code.
987        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
988            match tag {
989                "TST" => &[(0, 0, "CUSTOM_DE")],
990                _ => &[],
991            }
992        }
993        fn custom_code_valid(_de: &str, code: &str) -> bool {
994            code == "VALID"
995        }
996        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
997            None
998        }
999
1000        let input = b"TST+INVALID'";
1001        let segments: Vec<_> = crate::from_bytes(input)
1002            .collect::<Result<Vec<_>, _>>()
1003            .expect("parse should succeed");
1004
1005        let validator = DirectoryValidator::new(
1006            "TEST",
1007            segment_lookup,
1008            custom_code_valid,
1009            no_suggestion,
1010            expected_components,
1011            None,
1012        )
1013        .with_code_list_rules(custom_rules);
1014
1015        let mut report = ValidationReport::default();
1016        validator.validate_batch(
1017            &segments,
1018            &mut report,
1019            &crate::validator::ValidationRuleContext::empty(),
1020        );
1021        assert!(
1022            report.has_warnings(),
1023            "INVALID is not in the custom code list so validation must warn"
1024        );
1025    }
1026}