Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10    /// Element must be present.
11    Mandatory,
12    /// Element is optional unless additional rules require it.
13    Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19    /// One-based element position in the segment definition.
20    pub position: u8,
21    /// UN/EDIFACT data element identifier.
22    pub data_element: &'static str,
23    /// Requirement status of the element.
24    pub status: Status,
25    /// Maximum repetition count for this element.
26    pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32    /// Segment tag.
33    pub tag: &'static str,
34    /// Human-readable segment name.
35    pub name: &'static str,
36    /// Ordered element definitions.
37    pub elements: &'static [ElementRef],
38}
39
40/// Owned runtime equivalent of [`ElementRef`].
41///
42/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
43/// to construct validators from data that is not available at compile time (e.g. loaded
44/// from JSON or a database at startup).
45///
46/// Use [`OwnedElementRef::new`] for compile-time-known positions (panics on invalid
47/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
48/// comes from an external source and you need a `Result`. Fields are private to prevent
49/// bypassing the position invariant through struct-literal syntax.
50#[derive(Debug, Clone)]
51pub struct OwnedElementRef {
52    /// One-based element position.
53    position: u8,
54    /// UN/EDIFACT data element identifier.
55    data_element: String,
56    /// Requirement status.
57    status: Status,
58    /// Maximum repetition count.
59    max_repeat: u8,
60}
61
62/// Owned runtime equivalent of [`SegmentDefinition`].
63///
64/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
65///
66/// Use [`OwnedSegmentDef::new`] for compile-time-known tags (panics on invalid input,
67/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
68/// an external source and you need a `Result`. Fields are private to prevent bypassing
69/// the tag invariant through struct-literal syntax.
70#[derive(Debug, Clone)]
71pub struct OwnedSegmentDef {
72    /// Segment tag (e.g. `"BGM"`).
73    tag: String,
74    /// Human-readable segment name.
75    name: String,
76    /// Ordered element definitions.
77    elements: Vec<OwnedElementRef>,
78}
79
80impl OwnedSegmentDef {
81    /// Construct an owned segment definition.
82    ///
83    /// This is the ergonomic constructor for compile-time-known tags (e.g.
84    /// `"BGM"`, `"UNH"`).  It panics immediately on invalid input so that
85    /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
86    /// boilerplate.
87    ///
88    /// Use [`try_new`][Self::try_new] instead when the tag originates from an
89    /// external source (user input, config file, database) and you need a
90    /// `Result` to propagate errors gracefully.
91    ///
92    /// # Panics
93    ///
94    /// Panics if `tag` is not exactly three ASCII uppercase letters.
95    pub fn new(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
96        assert!(
97            tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
98            "OwnedSegmentDef::new: tag must be exactly three ASCII uppercase letters, got {tag:?}"
99        );
100        Self {
101            tag,
102            name,
103            elements,
104        }
105    }
106
107    /// Construct an owned segment definition, returning an error for invalid tags.
108    ///
109    /// Prefer this over [`new`][Self::new] when the tag comes from an external
110    /// source (user input, config file, database) and you want to handle the
111    /// error without panicking.
112    ///
113    /// # Errors
114    ///
115    /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
116    /// ASCII uppercase letters.
117    pub fn try_new(
118        tag: String,
119        name: String,
120        elements: Vec<OwnedElementRef>,
121    ) -> Result<Self, EdifactError> {
122        if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
123            return Err(EdifactError::InvalidSegmentTag(tag));
124        }
125        Ok(Self {
126            tag,
127            name,
128            elements,
129        })
130    }
131
132    /// Segment tag (e.g. `"BGM"`).
133    #[inline]
134    pub fn tag(&self) -> &str {
135        &self.tag
136    }
137
138    /// Human-readable segment name.
139    #[inline]
140    pub fn name(&self) -> &str {
141        &self.name
142    }
143
144    /// Element definitions for this segment.
145    #[inline]
146    pub fn elements(&self) -> &[OwnedElementRef] {
147        &self.elements
148    }
149}
150
151impl OwnedElementRef {
152    /// Construct an owned element reference.
153    ///
154    /// This is the ergonomic constructor for compile-time-known positions.
155    /// It panics immediately on invalid input so that call sites with literal
156    /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
157    ///
158    /// Use [`try_new`][Self::try_new] instead when the position originates from
159    /// an external source (user input, config file, database) and you need a
160    /// `Result` to propagate errors gracefully.
161    ///
162    /// # Panics
163    ///
164    /// Panics if `position` is `0` (positions are one-based).
165    pub fn new(position: u8, data_element: String, status: Status, max_repeat: u8) -> Self {
166        assert!(
167            position != 0,
168            "OwnedElementRef::new: position must be >= 1 (one-based), got 0"
169        );
170        Self {
171            position,
172            data_element,
173            status,
174            max_repeat,
175        }
176    }
177
178    /// Construct an owned element reference, returning an error for position `0`.
179    ///
180    /// Prefer this over [`new`][Self::new] when the position comes from an
181    /// external source (user input, config file, database) and you want to
182    /// handle the error without panicking.
183    ///
184    /// # Errors
185    ///
186    /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
187    pub fn try_new(
188        position: u8,
189        data_element: String,
190        status: Status,
191        max_repeat: u8,
192    ) -> Result<Self, EdifactError> {
193        if position == 0 {
194            return Err(EdifactError::InvalidElementPosition);
195        }
196        Ok(Self {
197            position,
198            data_element,
199            status,
200            max_repeat,
201        })
202    }
203
204    /// One-based element position (always >= 1).
205    #[inline]
206    pub fn position(&self) -> u8 {
207        self.position
208    }
209
210    /// UN/EDIFACT data element identifier.
211    #[inline]
212    pub fn data_element(&self) -> &str {
213        &self.data_element
214    }
215
216    /// Requirement status of this element.
217    #[inline]
218    pub fn status(&self) -> Status {
219        self.status
220    }
221
222    /// Maximum repetition count for this element.
223    #[inline]
224    pub fn max_repeat(&self) -> u8 {
225        self.max_repeat
226    }
227}
228
229type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
230type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
231type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
232type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
233type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
234type AdditionalStructureRuleFn =
235    Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
236/// Returns the `(element_index, component_index, data_element_id)` tuples to
237/// validate against a code list for the given segment tag.
238type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
239/// Returns the mandatory segment tags for a given EDIFACT message type.
240///
241/// The slice should contain every tag that must appear at least once in a
242/// conformant message of the given type.  The tags are also used to check
243/// canonical ordering — their relative order in the returned slice is taken
244/// as the expected order in the message.
245type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
246
247/// Internal enum that unifies lookup results from static and owned segment definitions.
248///
249/// Allows `validate_segment` to handle both code-generated (`&'static`) and
250/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
251enum SegmentDefRef<'a> {
252    Static(&'static SegmentDefinition),
253    Owned(&'a OwnedSegmentDef),
254}
255
256impl<'a> SegmentDefRef<'a> {
257    /// Returns the highest defined element position (one-based → used directly as
258    /// the maximum zero-based slot count for element-count validation).
259    ///
260    /// For owned definitions the highest `position` value may exceed the number
261    /// of entries in the `elements` vec when positions are non-consecutive.
262    fn max_element_position(&self) -> usize {
263        match self {
264            Self::Static(d) => d
265                .elements
266                .iter()
267                .map(|e| e.position as usize)
268                .max()
269                .unwrap_or(0),
270            Self::Owned(d) => d
271                .elements
272                .iter()
273                .map(|e| e.position as usize)
274                .max()
275                .unwrap_or(0),
276        }
277    }
278
279    /// Returns the highest position number among mandatory elements (one-based).
280    ///
281    /// This equals the minimum number of elements that must be present in a
282    /// segment: if the highest-positioned mandatory element is at position 5,
283    /// the segment must supply at least 5 elements.
284    fn last_mandatory_position(&self) -> usize {
285        match self {
286            Self::Static(d) => d
287                .elements
288                .iter()
289                .filter(|e| e.status == Status::Mandatory)
290                .map(|e| e.position as usize)
291                .max()
292                .unwrap_or(0),
293            Self::Owned(d) => d
294                .elements
295                .iter()
296                .filter(|e| e.status == Status::Mandatory)
297                .map(|e| e.position as usize)
298                .max()
299                .unwrap_or(0),
300        }
301    }
302
303    /// Iterate over mandatory element positions without heap allocation.
304    ///
305    /// Calls `f(zero_based_index, data_element_id)` for each element whose
306    /// status is [`Status::Mandatory`].  Returns `Err` immediately if `f`
307    /// returns `Err`, short-circuiting the remaining elements.
308    fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
309    where
310        F: FnMut(usize, &str) -> Result<(), E>,
311    {
312        match self {
313            Self::Static(d) => {
314                for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
315                    f((e.position as usize).saturating_sub(1), e.data_element)?;
316                }
317            }
318            Self::Owned(d) => {
319                for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
320                    f(
321                        (e.position as usize).saturating_sub(1),
322                        e.data_element.as_str(),
323                    )?;
324                }
325            }
326        }
327        Ok(())
328    }
329}
330
331/// Default required-segments mapping used when no custom function is provided.
332///
333/// Returns the universal minimum: every EDIFACT message must begin with `UNH`
334/// and end with `UNT`.  Message-type-specific mandatory segments (such as
335/// `BGM` for ORDERS/INVOIC) must be enforced by a
336/// [`ProfileRulePack`][crate::ProfileRulePack] or a custom
337/// [`DirectoryValidatorBuilder::with_required_segments`] function to avoid
338/// false positives for message types that do not require `BGM`.
339fn default_required_segments(_message_type: &str) -> &'static [&'static str] {
340    &["UNH", "UNT"]
341}
342
343/// Code-list validation rules common to all UN/EDIFACT directory releases.
344///
345/// Each entry is `(element_index, component_index, data_element_id)`.
346/// `element_index` and `component_index` are zero-based.
347///
348/// Covers the most frequently validated qualifier/code elements across ORDERS,
349/// INVOIC, UTILMD, and similar message types.
350pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
351    match tag {
352        "BGM" => &[(0, 0, "1001")],
353        "DTM" => &[(0, 0, "2005")],
354        "NAD" => &[(0, 0, "3035")],
355        "QTY" => &[(0, 0, "6063")],
356        "RFF" => &[(0, 0, "1153")],
357        "MOA" => &[(0, 0, "5025")],
358        "PRI" => &[(0, 0, "5125")],
359        "LOC" => &[(0, 0, "3227")],
360        _ => &[],
361    }
362}
363
364/// Shared validator implementation that is configured per UN/EDIFACT directory release.
365///
366/// # Scope and limitations
367///
368/// `DirectoryValidator` validates individual segment *content* (element counts,
369/// component counts, code-list values, and conditional rules) and checks that
370/// every *mandatory* segment type is present at least once.  It does **not**
371/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
372/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
373/// appears in the wrong position.  Full sequence validation requires a
374/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
375/// the scope of this implementation.
376#[derive(Clone)]
377pub struct DirectoryValidator {
378    directory_id: String,
379    segment_lookup: SegmentLookupFn,
380    /// Runtime-owned segment definitions (from builder / JSON / DB).
381    ///
382    /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
383    owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
384    is_code_valid: IsCodeValidFn,
385    suggest_code: SuggestCodeFn,
386    expected_components: ExpectedComponentsFn,
387    code_list_rules: CodeListRulesFn,
388    additional_structure_rule: Option<AdditionalStructureRuleFn>,
389    /// Configurable mapping from message type to required segment tags.
390    required_segments: RequiredSegmentsFn,
391    message_type: Option<String>,
392    enforce_known_tags: bool,
393    structure_checks: bool,
394    code_list_checks: bool,
395}
396
397impl std::fmt::Debug for DirectoryValidator {
398    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
399        f.debug_struct("DirectoryValidator")
400            .field("directory_id", &self.directory_id)
401            .field("message_type", &self.message_type)
402            .field("enforce_known_tags", &self.enforce_known_tags)
403            .field("structure_checks", &self.structure_checks)
404            .field("code_list_checks", &self.code_list_checks)
405            .finish_non_exhaustive()
406    }
407}
408
409impl DirectoryValidator {
410    /// Create a validator for a specific directory release with injected lookup/check hooks.
411    pub fn new(
412        directory_id: &'static str,
413        segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
414        is_code_valid: fn(&str, &str) -> bool,
415        suggest_code: fn(&str, &str) -> Option<&'static str>,
416        expected_components: fn(&str, usize) -> Option<u8>,
417        additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
418    ) -> Self {
419        Self {
420            directory_id: directory_id.to_owned(),
421            segment_lookup: Arc::new(segment_lookup),
422            owned_defs: None,
423            is_code_valid: Arc::new(is_code_valid),
424            suggest_code: Arc::new(suggest_code),
425            expected_components: Arc::new(expected_components),
426            code_list_rules: Arc::new(base_code_list_rules),
427            additional_structure_rule: additional_structure_rule
428                .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
429            required_segments: Arc::new(default_required_segments),
430            message_type: None,
431            enforce_known_tags: true,
432            structure_checks: true,
433            code_list_checks: true,
434        }
435    }
436
437    /// Create a validator from a static slice of [`SegmentDefinition`]s.
438    ///
439    /// This is the preferred constructor when code-generating directory data as
440    /// a `static` array: no manual fn-pointer boilerplate is required.
441    ///
442    /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
443    /// always returns `true`).  Call [`with_code_list_rules`][Self::with_code_list_rules]
444    /// to register directory-specific rules that actually validate code values.
445    ///
446    /// # Example
447    ///
448    /// ```rust,ignore
449    /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
450    ///
451    /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
452    ///     .with_code_list_rules(my_code_list_rules);
453    /// ```
454    pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
455        let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
456            definitions.iter().map(|d| (d.tag, d)).collect();
457        let lookup_map = Arc::new(lookup_map);
458        Self {
459            directory_id: "custom".to_owned(),
460            segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
461            owned_defs: None,
462            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
463            suggest_code: Arc::new(|_de: &str, _code: &str| None),
464            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
465            code_list_rules: Arc::new(base_code_list_rules),
466            additional_structure_rule: None,
467            required_segments: Arc::new(default_required_segments),
468            message_type: None,
469            enforce_known_tags: true,
470            structure_checks: true,
471            code_list_checks: false,
472        }
473    }
474
475    /// Create a validator from a runtime-owned collection of segment definitions.
476    ///
477    /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
478    /// loaded from an external source at startup (JSON, database, YAML, …) rather
479    /// than being known at compile time.
480    ///
481    /// Code-list checks are **disabled** by default; enable them by chaining
482    /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
483    /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
484    /// the builder.
485    ///
486    /// # Example
487    ///
488    /// ```rust,ignore
489    /// let defs = vec![
490    ///     OwnedSegmentDef::new(
491    ///         "BGM".to_owned(),
492    ///         "Beginning of message".to_owned(),
493    ///         vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
494    ///     ),
495    /// ];
496    /// let validator = DirectoryValidator::from_owned_definitions(defs)
497    ///     .with_directory_id("runtime-profile");
498    /// ```
499    pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
500        Self {
501            directory_id: "custom".to_owned(),
502            // The static lookup is never consulted when `owned_defs` is `Some`.
503            segment_lookup: Arc::new(|_| None),
504            owned_defs: Some(Arc::new(definitions)),
505            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
506            suggest_code: Arc::new(|_de: &str, _code: &str| None),
507            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
508            code_list_rules: Arc::new(base_code_list_rules),
509            additional_structure_rule: None,
510            required_segments: Arc::new(default_required_segments),
511            message_type: None,
512            enforce_known_tags: true,
513            structure_checks: true,
514            code_list_checks: false,
515        }
516    }
517
518    /// Set the directory identifier string (used in error messages).
519    pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
520        self.directory_id = id.into();
521        self
522    }
523
524    /// Override the code-list rules function.
525    ///
526    /// Directories can supply a directory-specific implementation that extends or
527    /// replaces the base rules from `base_code_list_rules`.
528    pub fn with_code_list_rules(
529        mut self,
530        f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
531    ) -> Self {
532        self.code_list_rules = Arc::new(f);
533        self
534    }
535
536    /// Enable only structure checks and disable code-list checks.
537    pub fn structure_only(mut self) -> Self {
538        self.structure_checks = true;
539        self.code_list_checks = false;
540        self
541    }
542
543    /// Enable only code-list checks and disable structure checks.
544    pub fn code_list_only(mut self) -> Self {
545        self.structure_checks = false;
546        self.code_list_checks = true;
547        self
548    }
549
550    /// Configure whether unknown segment tags should be rejected.
551    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
552        self.enforce_known_tags = enforce;
553        self
554    }
555
556    /// Override the required-segments mapping used for structural validation.
557    ///
558    /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
559    /// and must return a `'static` slice of segment tags that are mandatory for that
560    /// type.  The tags are checked both for *presence* and for *canonical ordering*
561    /// within the message.
562    ///
563    /// # Example
564    ///
565    /// ```rust,ignore
566    /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
567    ///     match msg_type {
568    ///         "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
569    ///         "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
570    ///         _ => &["UNH", "UNT"],
571    ///     }
572    /// }
573    ///
574    /// let validator = DirectoryValidator::from_definitions(DEFS)
575    ///     .with_required_segments(my_required_segments);
576    /// ```
577    pub fn with_required_segments(
578        mut self,
579        f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
580    ) -> Self {
581        self.required_segments = Arc::new(f);
582        self
583    }
584
585    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
586        if let Some(explicit) = self.message_type.as_deref() {
587            return Some(explicit.to_owned());
588        }
589
590        segments
591            .iter()
592            .find(|s| s.tag == "UNH")
593            .and_then(|s| s.get_element(1))
594            .and_then(|e| e.get_component(0))
595            .map(str::to_owned)
596    }
597
598    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
599    ///
600    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
601    /// a sender is not required to transmit trailing empty components; this function
602    /// therefore strips them before checking against the expected count so that
603    /// conformant messages with omitted trailing components are still accepted.
604    ///
605    /// # Examples
606    ///
607    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
608    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
609    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
610        let elem = seg.elements.get(element_idx)?;
611        let mut count = elem.components.len();
612        while count > 0 && elem.components[count - 1].0.as_ref().is_empty() {
613            count -= 1;
614        }
615        u8::try_from(count).ok()
616    }
617
618    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
619        for idx in 0..seg.elements.len() {
620            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
621                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
622                if actual != expected {
623                    return Err(EdifactError::InvalidComponentCount {
624                        tag: seg.tag.to_owned(),
625                        element_index: idx,
626                        expected,
627                        actual,
628                        offset: seg.span.start,
629                    });
630                }
631            }
632        }
633        Ok(())
634    }
635
636    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
637        let rules = (self.code_list_rules)(seg.tag);
638
639        for (elem_idx, comp_idx, de) in rules {
640            let value = seg
641                .get_element(*elem_idx)
642                .and_then(|e| e.get_component(*comp_idx))
643                .unwrap_or("");
644            if !value.is_empty() && !(self.is_code_valid)(de, value) {
645                let suggestion = (self.suggest_code)(de, value);
646                return Err(EdifactError::InvalidCodeValue {
647                    tag: seg.tag.to_owned(),
648                    element_index: *elem_idx,
649                    value: value.to_owned(),
650                    code_list: (*de).to_owned(),
651                    offset: seg.span.start,
652                    suggestion,
653                });
654            }
655        }
656
657        Ok(())
658    }
659}
660
661impl DirectoryValidator {
662    fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
663        if let Some(owned) = &self.owned_defs {
664            owned
665                .iter()
666                .find(|d| d.tag == tag)
667                .map(SegmentDefRef::Owned)
668        } else {
669            (self.segment_lookup)(tag).map(SegmentDefRef::Static)
670        }
671    }
672
673    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
674        if !self.structure_checks && !self.code_list_checks {
675            return Ok(());
676        }
677
678        let Some(def) = self.resolve_def(seg.tag) else {
679            if self.structure_checks && self.enforce_known_tags {
680                return Err(EdifactError::InvalidSegmentForMessage {
681                    tag: seg.tag.to_owned(),
682                    message_type: self
683                        .message_type
684                        .clone()
685                        .unwrap_or_else(|| self.directory_id.clone()),
686                    offset: seg.tag_span.start,
687                });
688            }
689            return Ok(());
690        };
691
692        let max_elements = def.max_element_position();
693        let min_elements = def.last_mandatory_position();
694        let actual = seg.elements.len();
695
696        if self.structure_checks && (actual < min_elements || actual > max_elements) {
697            return Err(EdifactError::InvalidElementCount {
698                tag: seg.tag.to_owned(),
699                min: min_elements,
700                max: max_elements,
701                actual,
702                offset: seg.span.start,
703            });
704        }
705
706        if self.structure_checks {
707            def.for_each_mandatory_position(|idx, _de| {
708                let is_present = seg.elements.get(idx).is_some_and(|elem| {
709                    elem.components.iter().any(|(c, _)| !c.as_ref().is_empty())
710                });
711                if !is_present {
712                    return Err(EdifactError::MissingRequiredElement {
713                        tag: seg.tag.to_owned(),
714                        element_index: idx,
715                    });
716                }
717                Ok(())
718            })?;
719            self.validate_component_counts(seg)?;
720
721            if let Some(rule) = &self.additional_structure_rule {
722                rule(seg)?;
723            }
724        }
725
726        if self.code_list_checks {
727            self.validate_code_lists(seg)?;
728        }
729
730        Ok(())
731    }
732}
733
734impl Validator for DirectoryValidator {
735    fn set_message_type(&mut self, message_type: Option<&str>) {
736        self.message_type = message_type.map(str::to_owned);
737    }
738
739    fn validate_batch(
740        &self,
741        segments: &[Segment<'_>],
742        report: &mut ValidationReport,
743        _context: &ValidationRuleContext<'_>,
744    ) {
745        for seg in segments {
746            if let Err(err) = self.validate_segment(seg) {
747                report_error(report, err);
748            }
749        }
750
751        if self.structure_checks {
752            if let Some(message_type) = self.detect_message_type(segments) {
753                for required_tag in (self.required_segments)(&message_type) {
754                    if segments.iter().all(|s| s.tag != *required_tag) {
755                        report.add_error(
756                            ValidationIssue::new(
757                                ValidationSeverity::Error,
758                                format!(
759                                    "required segment {} missing for message type {}",
760                                    required_tag, message_type
761                                ),
762                            )
763                            .with_segment(*required_tag)
764                            .with_suggestion("Add the mandatory segment at the correct position"),
765                        );
766                    }
767                }
768
769                let seq = (self.required_segments)(&message_type);
770                let mut last_idx = None;
771                for tag in seq {
772                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
773                        if let Some(prev) = last_idx {
774                            if idx < prev {
775                                report.add_error(
776                                    ValidationIssue::new(
777                                        ValidationSeverity::Error,
778                                        format!(
779                                            "segment sequence violation for message type {}: '{}' appears out of order",
780                                            message_type, tag
781                                        ),
782                                    )
783                                    .with_segment(*tag)
784                                    .with_suggestion(
785                                        "Ensure required segments follow UN/EDIFACT canonical order",
786                                    ),
787                                );
788                            }
789                        }
790                        last_idx = Some(idx);
791                    }
792                }
793            }
794        }
795    }
796}
797
798// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
799
800/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
801///
802/// Use this when segment definitions are loaded from an external source at
803/// startup (JSON, database, YAML, …) rather than being available as `static`
804/// arrays at compile time.
805///
806/// # Example
807///
808/// ```rust,ignore
809/// let validator = DirectoryValidatorBuilder::new("my-profile")
810///     .add_segment(
811///         OwnedSegmentDef::new(
812///             "BGM".to_owned(),
813///             "Beginning of message".to_owned(),
814///             vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
815///         ),
816///     )
817///     .build();
818/// ```
819#[derive(Debug, Default)]
820pub struct DirectoryValidatorBuilder {
821    directory_id: Option<String>,
822    segments: Vec<OwnedSegmentDef>,
823}
824
825impl DirectoryValidatorBuilder {
826    /// Create a new builder with the given directory identifier.
827    ///
828    /// The identifier is used in error messages; set a human-readable value
829    /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
830    pub fn new(directory_id: impl Into<String>) -> Self {
831        Self {
832            directory_id: Some(directory_id.into()),
833            segments: Vec::new(),
834        }
835    }
836
837    /// Add a segment definition to the builder.
838    ///
839    /// Definitions can be added in any order; the resulting validator looks
840    /// them up by tag at validation time.
841    pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
842        self.segments.push(def);
843        self
844    }
845
846    /// Extend the builder with multiple segment definitions at once.
847    pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
848        self.segments.extend(defs);
849        self
850    }
851
852    /// Build the [`DirectoryValidator`].
853    ///
854    /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
855    /// Code-list checks are disabled by default; chain
856    /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
857    /// enable them.
858    pub fn build(self) -> DirectoryValidator {
859        let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
860        if let Some(id) = self.directory_id {
861            validator.directory_id = id;
862        }
863        validator
864    }
865}
866
867#[cfg(test)]
868mod tests {
869    use super::*;
870
871    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
872        position: 1,
873        data_element: "C507",
874        status: Status::Mandatory,
875        max_repeat: 1,
876    }];
877
878    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
879        tag: "TST",
880        name: "Test segment",
881        elements: TEST_ELEMENTS,
882    };
883
884    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
885        match tag {
886            "TST" => Some(&TEST_SEGMENT),
887            _ => None,
888        }
889    }
890
891    fn code_valid(_de: &str, _code: &str) -> bool {
892        true
893    }
894
895    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
896        None
897    }
898
899    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
900        None
901    }
902
903    #[test]
904    fn mandatory_composite_present_when_any_component_non_empty() {
905        let input = b"TST+:ABC'";
906        let segments: Vec<_> = crate::from_bytes(input)
907            .collect::<Result<Vec<_>, _>>()
908            .expect("parse should succeed");
909
910        let validator = DirectoryValidator::new(
911            "TEST",
912            segment_lookup,
913            code_valid,
914            suggest_code,
915            expected_components,
916            None,
917        );
918
919        let mut report = ValidationReport::default();
920        validator.validate_batch(
921            &segments,
922            &mut report,
923            &crate::validator::ValidationRuleContext::empty(),
924        );
925        assert!(!report.has_errors());
926    }
927
928    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
929
930    fn parse_single(input: &[u8]) -> crate::OwnedSegment {
931        crate::from_reader_collect(std::io::Cursor::new(input))
932            .expect("parse should succeed")
933            .into_iter()
934            .next()
935            .expect("at least one segment")
936    }
937
938    #[test]
939    fn trailing_empty_component_stripped_from_dtm() {
940        // DTM+137:20200101: has three components in element 0; the third is empty.
941        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
942        // so effective count should be 2.
943        let owned = parse_single(b"DTM+137:20200101:'");
944        let seg = owned.as_borrowed();
945        let count = DirectoryValidator::effective_component_count(&seg, 0);
946        assert_eq!(
947            count,
948            Some(2),
949            "trailing empty component should be stripped"
950        );
951    }
952
953    #[test]
954    fn all_empty_components_result_in_zero() {
955        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
956        let owned = parse_single(b"NAD+MS++:'");
957        let seg = owned.as_borrowed();
958        let count = DirectoryValidator::effective_component_count(&seg, 2);
959        assert_eq!(
960            count,
961            Some(0),
962            "all-empty composite should have effective count 0"
963        );
964    }
965
966    #[test]
967    fn non_empty_component_not_stripped() {
968        // DTM+137:20200101:102 — all three components are non-empty
969        let owned = parse_single(b"DTM+137:20200101:102'");
970        let seg = owned.as_borrowed();
971        let count = DirectoryValidator::effective_component_count(&seg, 0);
972        assert_eq!(
973            count,
974            Some(3),
975            "no components should be stripped when all non-empty"
976        );
977    }
978
979    #[test]
980    fn with_code_list_rules_overrides_base() {
981        // Override code-list rules to require element 0 of TST to be a specific code.
982        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
983            match tag {
984                "TST" => &[(0, 0, "CUSTOM_DE")],
985                _ => &[],
986            }
987        }
988        fn custom_code_valid(_de: &str, code: &str) -> bool {
989            code == "VALID"
990        }
991        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
992            None
993        }
994
995        let input = b"TST+INVALID'";
996        let segments: Vec<_> = crate::from_bytes(input)
997            .collect::<Result<Vec<_>, _>>()
998            .expect("parse should succeed");
999
1000        let validator = DirectoryValidator::new(
1001            "TEST",
1002            segment_lookup,
1003            custom_code_valid,
1004            no_suggestion,
1005            expected_components,
1006            None,
1007        )
1008        .with_code_list_rules(custom_rules);
1009
1010        let mut report = ValidationReport::default();
1011        validator.validate_batch(
1012            &segments,
1013            &mut report,
1014            &crate::validator::ValidationRuleContext::empty(),
1015        );
1016        assert!(
1017            report.has_warnings(),
1018            "INVALID is not in the custom code list so validation must warn"
1019        );
1020    }
1021}