Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10    /// Element must be present.
11    Mandatory,
12    /// Element is optional unless additional rules require it.
13    Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19    /// One-based element position in the segment definition.
20    pub position: u8,
21    /// UN/EDIFACT data element identifier.
22    pub data_element: &'static str,
23    /// Requirement status of the element.
24    pub status: Status,
25    /// Maximum repetition count for this element.
26    pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32    /// Segment tag.
33    pub tag: &'static str,
34    /// Human-readable segment name.
35    pub name: &'static str,
36    /// Ordered element definitions.
37    pub elements: &'static [ElementRef],
38}
39
40type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
41type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
42type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
43type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
44type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
45type AdditionalStructureRuleFn = Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
46/// Returns the `(element_index, component_index, data_element_id)` tuples to
47/// validate against a code list for the given segment tag.
48type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
49/// Returns the mandatory segment tags for a given EDIFACT message type.
50///
51/// The slice should contain every tag that must appear at least once in a
52/// conformant message of the given type.  The tags are also used to check
53/// canonical ordering — their relative order in the returned slice is taken
54/// as the expected order in the message.
55type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
56
57/// Default required-segments mapping used when no custom function is provided.
58fn default_required_segments(message_type: &str) -> &'static [&'static str] {
59    match message_type {
60        "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
61        _ => &["UNH", "UNT"],
62    }
63}
64
65/// Code-list validation rules common to all UN/EDIFACT directory releases.
66///
67/// Each entry is `(element_index, component_index, data_element_id)`.
68/// `element_index` and `component_index` are zero-based.
69///
70/// Covers the most frequently validated qualifier/code elements across ORDERS,
71/// INVOIC, UTILMD, and similar message types.
72pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
73    match tag {
74        "BGM" => &[(0, 0, "1001")],
75        "DTM" => &[(0, 0, "2005")],
76        "NAD" => &[(0, 0, "3035")],
77        "QTY" => &[(0, 0, "6063")],
78        "RFF" => &[(0, 0, "1153")],
79        "MOA" => &[(0, 0, "5025")],
80        "PRI" => &[(0, 0, "5125")],
81        "LOC" => &[(0, 0, "3227")],
82        _ => &[],
83    }
84}
85
86/// Shared validator implementation that is configured per UN/EDIFACT directory release.
87///
88/// # Scope and limitations
89///
90/// `DirectoryValidator` validates individual segment *content* (element counts,
91/// component counts, code-list values, and conditional rules) and checks that
92/// every *mandatory* segment type is present at least once.  It does **not**
93/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
94/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
95/// appears in the wrong position.  Full sequence validation requires a
96/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
97/// the scope of this implementation.
98#[derive(Clone)]
99pub struct DirectoryValidator {
100    directory_id: &'static str,
101    segment_lookup: SegmentLookupFn,
102    is_code_valid: IsCodeValidFn,
103    suggest_code: SuggestCodeFn,
104    expected_components: ExpectedComponentsFn,
105    code_list_rules: CodeListRulesFn,
106    additional_structure_rule: Option<AdditionalStructureRuleFn>,
107    /// Configurable mapping from message type to required segment tags.
108    required_segments: RequiredSegmentsFn,
109    message_type: Option<String>,
110    enforce_known_tags: bool,
111    structure_checks: bool,
112    code_list_checks: bool,
113}
114
115impl std::fmt::Debug for DirectoryValidator {
116    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
117        f.debug_struct("DirectoryValidator")
118            .field("directory_id", &self.directory_id)
119            .field("message_type", &self.message_type)
120            .field("enforce_known_tags", &self.enforce_known_tags)
121            .field("structure_checks", &self.structure_checks)
122            .field("code_list_checks", &self.code_list_checks)
123            .finish_non_exhaustive()
124    }
125}
126
127impl DirectoryValidator {
128    /// Create a validator for a specific directory release with injected lookup/check hooks.
129    pub fn new(
130        directory_id: &'static str,
131        segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
132        is_code_valid: fn(&str, &str) -> bool,
133        suggest_code: fn(&str, &str) -> Option<&'static str>,
134        expected_components: fn(&str, usize) -> Option<u8>,
135        additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
136    ) -> Self {
137        Self {
138            directory_id,
139            segment_lookup: Arc::new(segment_lookup),
140            is_code_valid: Arc::new(is_code_valid),
141            suggest_code: Arc::new(suggest_code),
142            expected_components: Arc::new(expected_components),
143            code_list_rules: Arc::new(base_code_list_rules),
144            additional_structure_rule: additional_structure_rule
145                .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
146            required_segments: Arc::new(default_required_segments),
147            message_type: None,
148            enforce_known_tags: true,
149            structure_checks: true,
150            code_list_checks: true,
151        }
152    }
153
154    /// Create a validator from a static slice of [`SegmentDefinition`]s.
155    ///
156    /// This is the preferred constructor when code-generating directory data as
157    /// a `static` array: no manual fn-pointer boilerplate is required.
158    ///
159    /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
160    /// always returns `true`).  Call [`with_code_list_rules`][Self::with_code_list_rules]
161    /// to register directory-specific rules that actually validate code values.
162    ///
163    /// # Example
164    ///
165    /// ```rust,ignore
166    /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
167    ///
168    /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
169    ///     .with_code_list_rules(my_code_list_rules);
170    /// ```
171    pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
172        Self {
173            directory_id: "custom",
174            segment_lookup: Arc::new(move |tag: &str| {
175                definitions.iter().find(|d| d.tag == tag)
176            }),
177            is_code_valid: Arc::new(|_de: &str, _code: &str| true),
178            suggest_code: Arc::new(|_de: &str, _code: &str| None),
179            expected_components: Arc::new(|_tag: &str, _idx: usize| None),
180            code_list_rules: Arc::new(base_code_list_rules),
181            additional_structure_rule: None,
182            required_segments: Arc::new(default_required_segments),
183            message_type: None,
184            enforce_known_tags: true,
185            structure_checks: true,
186            code_list_checks: false,
187        }
188    }
189
190    /// Set the directory identifier string (used in error messages).
191    pub fn with_directory_id(mut self, id: &'static str) -> Self {
192        self.directory_id = id;
193        self
194    }
195
196    /// Override the code-list rules function.
197    ///
198    /// Directories can supply a directory-specific implementation that extends or
199    /// replaces the base rules from `base_code_list_rules`.
200    pub fn with_code_list_rules(mut self, f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static) -> Self {
201        self.code_list_rules = Arc::new(f);
202        self
203    }
204
205    /// Enable only structure checks and disable code-list checks.
206    pub fn structure_only(mut self) -> Self {
207        self.structure_checks = true;
208        self.code_list_checks = false;
209        self
210    }
211
212    /// Enable only code-list checks and disable structure checks.
213    pub fn code_list_only(mut self) -> Self {
214        self.structure_checks = false;
215        self.code_list_checks = true;
216        self
217    }
218
219    /// Configure whether unknown segment tags should be rejected.
220    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
221        self.enforce_known_tags = enforce;
222        self
223    }
224
225    /// Override the required-segments mapping used for structural validation.
226    ///
227    /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
228    /// and must return a `'static` slice of segment tags that are mandatory for that
229    /// type.  The tags are checked both for *presence* and for *canonical ordering*
230    /// within the message.
231    ///
232    /// # Example
233    ///
234    /// ```rust,ignore
235    /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
236    ///     match msg_type {
237    ///         "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
238    ///         "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
239    ///         _ => &["UNH", "UNT"],
240    ///     }
241    /// }
242    ///
243    /// let validator = DirectoryValidator::from_definitions(DEFS)
244    ///     .with_required_segments(my_required_segments);
245    /// ```
246    pub fn with_required_segments(
247        mut self,
248        f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
249    ) -> Self {
250        self.required_segments = Arc::new(f);
251        self
252    }
253
254    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
255        if let Some(explicit) = self.message_type.as_deref() {
256            return Some(explicit.to_owned());
257        }
258
259        segments
260            .iter()
261            .find(|s| s.tag == "UNH")
262            .and_then(|s| s.get_element(1))
263            .and_then(|e| e.get_component(0))
264            .map(str::to_owned)
265    }
266
267    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
268    ///
269    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
270    /// a sender is not required to transmit trailing empty components; this function
271    /// therefore strips them before checking against the expected count so that
272    /// conformant messages with omitted trailing components are still accepted.
273    ///
274    /// # Examples
275    ///
276    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
277    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
278    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
279        let elem = seg.elements.get(element_idx)?;
280        let mut count = elem.components.len();
281        while count > 0 && elem.components[count - 1].as_ref().is_empty() {
282            count -= 1;
283        }
284        u8::try_from(count).ok()
285    }
286
287    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
288        for idx in 0..seg.elements.len() {
289            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
290                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
291                if actual != expected {
292                    return Err(EdifactError::InvalidComponentCount {
293                        tag: seg.tag.to_owned(),
294                        element_index: idx,
295                        expected,
296                        actual,
297                        offset: seg.span.start,
298                    });
299                }
300            }
301        }
302        Ok(())
303    }
304
305    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
306        let rules = (self.code_list_rules)(seg.tag);
307
308        for (elem_idx, comp_idx, de) in rules {
309            let value = seg
310                .get_element(*elem_idx)
311                .and_then(|e| e.get_component(*comp_idx))
312                .unwrap_or("");
313            if !value.is_empty() && !(self.is_code_valid)(de, value) {
314                let suggestion = (self.suggest_code)(de, value);
315                return Err(EdifactError::InvalidCodeValue {
316                    tag: seg.tag.to_owned(),
317                    element_index: *elem_idx,
318                    value: value.to_owned(),
319                    code_list: (*de).to_owned(),
320                    offset: seg.span.start,
321                    suggestion,
322                });
323            }
324        }
325
326        Ok(())
327    }
328}
329
330impl DirectoryValidator {
331    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
332        if !self.structure_checks && !self.code_list_checks {
333            return Ok(());
334        }
335
336        let Some(def) = (self.segment_lookup)(seg.tag) else {
337            if self.structure_checks && self.enforce_known_tags {
338                return Err(EdifactError::InvalidSegmentForMessage {
339                    tag: seg.tag.to_owned(),
340                    message_type: self
341                        .message_type
342                        .clone()
343                        .unwrap_or_else(|| self.directory_id.to_owned()),
344                    offset: seg.tag_span.start,
345                });
346            }
347            return Ok(());
348        };
349
350        let max_elements = def.elements.len();
351        let min_elements = def
352            .elements
353            .iter()
354            .rposition(|e| e.status == Status::Mandatory)
355            .map(|idx| idx + 1)
356            .unwrap_or(0);
357        let actual = seg.elements.len();
358
359        if self.structure_checks && (actual < min_elements || actual > max_elements) {
360            return Err(EdifactError::InvalidElementCount {
361                tag: seg.tag.to_owned(),
362                min: min_elements,
363                max: max_elements,
364                actual,
365                offset: seg.span.start,
366            });
367        }
368
369        if self.structure_checks {
370            for element in def
371                .elements
372                .iter()
373                .filter(|e| e.status == Status::Mandatory)
374            {
375                let idx = (element.position as usize).saturating_sub(1);
376                let is_present = seg
377                    .elements
378                    .get(idx)
379                    .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
380                if !is_present {
381                    return Err(EdifactError::MissingRequiredElement {
382                        tag: seg.tag.to_owned(),
383                        element_index: idx,
384                    });
385                }
386            }
387            self.validate_component_counts(seg)?;
388
389            if let Some(rule) = &self.additional_structure_rule {
390                rule(seg)?;
391            }
392        }
393
394        if self.code_list_checks {
395            self.validate_code_lists(seg)?;
396        }
397
398        Ok(())
399    }
400}
401
402impl Validator for DirectoryValidator {
403    fn set_message_type(&mut self, message_type: Option<&str>) {
404        self.message_type = message_type.map(str::to_owned);
405    }
406
407    fn validate_batch(&self, segments: &[Segment<'_>], report: &mut ValidationReport, _context: &ValidationRuleContext<'_>) {
408        for seg in segments {
409            if let Err(err) = self.validate_segment(seg) {
410                report_error(report, err);
411            }
412        }
413
414        if self.structure_checks {
415            if let Some(message_type) = self.detect_message_type(segments) {
416                for required_tag in (self.required_segments)(&message_type) {
417                    if segments.iter().all(|s| s.tag != *required_tag) {
418                        report.add_error(
419                            ValidationIssue::new(
420                                ValidationSeverity::Error,
421                                format!(
422                                    "required segment {} missing for message type {}",
423                                    required_tag, message_type
424                                ),
425                            )
426                            .with_segment(*required_tag)
427                            .with_suggestion("Add the mandatory segment at the correct position"),
428                        );
429                    }
430                }
431
432                let seq = (self.required_segments)(&message_type);
433                let mut last_idx = None;
434                for tag in seq {
435                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
436                        if let Some(prev) = last_idx {
437                            if idx < prev {
438                                report.add_error(
439                                    ValidationIssue::new(
440                                        ValidationSeverity::Error,
441                                        format!(
442                                            "segment sequence violation for message type {}: '{}' appears out of order",
443                                            message_type, tag
444                                        ),
445                                    )
446                                    .with_segment(*tag)
447                                    .with_suggestion(
448                                        "Ensure required segments follow UN/EDIFACT canonical order",
449                                    ),
450                                );
451                            }
452                        }
453                        last_idx = Some(idx);
454                    }
455                }
456            }
457        }
458    }
459}
460
461#[cfg(test)]
462mod tests {
463    use super::*;
464
465    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
466        position: 1,
467        data_element: "C507",
468        status: Status::Mandatory,
469        max_repeat: 1,
470    }];
471
472    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
473        tag: "TST",
474        name: "Test segment",
475        elements: TEST_ELEMENTS,
476    };
477
478    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
479        match tag {
480            "TST" => Some(&TEST_SEGMENT),
481            _ => None,
482        }
483    }
484
485    fn code_valid(_de: &str, _code: &str) -> bool {
486        true
487    }
488
489    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
490        None
491    }
492
493    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
494        None
495    }
496
497    #[test]
498    fn mandatory_composite_present_when_any_component_non_empty() {
499        let input = b"TST+:ABC'";
500        let segments: Vec<_> = crate::from_bytes(input)
501            .collect::<Result<Vec<_>, _>>()
502            .expect("parse should succeed");
503
504        let validator = DirectoryValidator::new(
505            "TEST",
506            segment_lookup,
507            code_valid,
508            suggest_code,
509            expected_components,
510            None,
511        );
512
513        let mut report = ValidationReport::default();
514        validator.validate_batch(&segments, &mut report, &crate::validator::ValidationRuleContext::empty());
515        assert!(!report.has_errors());
516    }
517
518    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
519
520    fn parse_single(input: &[u8]) -> crate::OwnedSegment {
521        crate::from_reader(std::io::Cursor::new(input))
522            .expect("parse should succeed")
523            .into_iter()
524            .next()
525            .expect("at least one segment")
526    }
527
528    #[test]
529    fn trailing_empty_component_stripped_from_dtm() {
530        // DTM+137:20200101: has three components in element 0; the third is empty.
531        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
532        // so effective count should be 2.
533        let owned = parse_single(b"DTM+137:20200101:'");
534        let seg = owned.as_borrowed();
535        let count = DirectoryValidator::effective_component_count(&seg, 0);
536        assert_eq!(count, Some(2), "trailing empty component should be stripped");
537    }
538
539    #[test]
540    fn all_empty_components_result_in_zero() {
541        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
542        let owned = parse_single(b"NAD+MS++:'");
543        let seg = owned.as_borrowed();
544        let count = DirectoryValidator::effective_component_count(&seg, 2);
545        assert_eq!(count, Some(0), "all-empty composite should have effective count 0");
546    }
547
548    #[test]
549    fn non_empty_component_not_stripped() {
550        // DTM+137:20200101:102 — all three components are non-empty
551        let owned = parse_single(b"DTM+137:20200101:102'");
552        let seg = owned.as_borrowed();
553        let count = DirectoryValidator::effective_component_count(&seg, 0);
554        assert_eq!(count, Some(3), "no components should be stripped when all non-empty");
555    }
556
557    #[test]
558    fn with_code_list_rules_overrides_base() {
559        // Override code-list rules to require element 0 of TST to be a specific code.
560        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
561            match tag {
562                "TST" => &[(0, 0, "CUSTOM_DE")],
563                _ => &[],
564            }
565        }
566        fn custom_code_valid(_de: &str, code: &str) -> bool {
567            code == "VALID"
568        }
569        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
570            None
571        }
572
573        let input = b"TST+INVALID'";
574        let segments: Vec<_> = crate::from_bytes(input)
575            .collect::<Result<Vec<_>, _>>()
576            .expect("parse should succeed");
577
578        let validator = DirectoryValidator::new(
579            "TEST",
580            segment_lookup,
581            custom_code_valid,
582            no_suggestion,
583            expected_components,
584            None,
585        )
586        .with_code_list_rules(custom_rules);
587
588        let mut report = ValidationReport::default();
589        validator.validate_batch(&segments, &mut report, &crate::validator::ValidationRuleContext::empty());
590        assert!(
591            report.has_warnings(),
592            "INVALID is not in the custom code list so validation must warn"
593        );
594    }
595}