Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5
6/// Mandatory/Conditional status of a data element within a segment.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Status {
9    /// Element must be present.
10    Mandatory,
11    /// Element is optional unless additional rules require it.
12    Conditional,
13}
14
15/// Reference to a data element within a segment definition.
16#[derive(Debug, Clone, Copy)]
17pub struct ElementRef {
18    /// One-based element position in the segment definition.
19    pub position: u8,
20    /// UN/EDIFACT data element identifier.
21    pub data_element: &'static str,
22    /// Requirement status of the element.
23    pub status: Status,
24    /// Maximum repetition count for this element.
25    pub max_repeat: u8,
26}
27
28/// Definition of an EDIFACT segment (tag + element structure).
29#[derive(Debug)]
30pub struct SegmentDefinition {
31    /// Segment tag.
32    pub tag: &'static str,
33    /// Human-readable segment name.
34    pub name: &'static str,
35    /// Ordered element definitions.
36    pub elements: &'static [ElementRef],
37}
38
39type SegmentLookupFn = fn(&str) -> Option<&'static SegmentDefinition>;
40type IsCodeValidFn = fn(&str, &str) -> bool;
41type SuggestCodeFn = fn(&str, &str) -> Option<&'static str>;
42type ExpectedComponentsFn = fn(&str, usize) -> Option<u8>;
43type AdditionalStructureRuleFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
44/// Returns the `(element_index, component_index, data_element_id)` tuples to
45/// validate against a code list for the given segment tag.
46type CodeListRulesFn = fn(tag: &str) -> &'static [(usize, usize, &'static str)];
47
48/// Code-list validation rules common to all UN/EDIFACT directory releases.
49///
50/// Each entry is `(element_index, component_index, data_element_id)`.
51/// `element_index` and `component_index` are zero-based.
52///
53/// Covers the most frequently validated qualifier/code elements across ORDERS,
54/// INVOIC, UTILMD, and similar message types.
55pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
56    match tag {
57        "BGM" => &[(0, 0, "1001")],
58        "DTM" => &[(0, 0, "2005")],
59        "NAD" => &[(0, 0, "3035")],
60        "QTY" => &[(0, 0, "6063")],
61        "RFF" => &[(0, 0, "1153")],
62        "MOA" => &[(0, 0, "5025")],
63        "PRI" => &[(0, 0, "5125")],
64        "LOC" => &[(0, 0, "3227")],
65        _ => &[],
66    }
67}
68
69/// Shared validator implementation that is configured per UN/EDIFACT directory release.
70///
71/// # Scope and limitations
72///
73/// `DirectoryValidator` validates individual segment *content* (element counts,
74/// component counts, code-list values, and conditional rules) and checks that
75/// every *mandatory* segment type is present at least once.  It does **not**
76/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
77/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
78/// appears in the wrong position.  Full sequence validation requires a
79/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
80/// the scope of this implementation.
81#[derive(Clone)]
82pub struct DirectoryValidator {
83    directory_id: &'static str,
84    segment_lookup: SegmentLookupFn,
85    is_code_valid: IsCodeValidFn,
86    suggest_code: SuggestCodeFn,
87    expected_components: ExpectedComponentsFn,
88    code_list_rules: CodeListRulesFn,
89    additional_structure_rule: Option<AdditionalStructureRuleFn>,
90    message_type: Option<String>,
91    enforce_known_tags: bool,
92    structure_checks: bool,
93    code_list_checks: bool,
94}
95
96impl std::fmt::Debug for DirectoryValidator {
97    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98        f.debug_struct("DirectoryValidator")
99            .field("directory_id", &self.directory_id)
100            .field("message_type", &self.message_type)
101            .field("enforce_known_tags", &self.enforce_known_tags)
102            .field("structure_checks", &self.structure_checks)
103            .field("code_list_checks", &self.code_list_checks)
104            .finish_non_exhaustive()
105    }
106}
107
108impl DirectoryValidator {
109    /// Create a validator for a specific directory release with injected lookup/check hooks.
110    pub fn new(
111        directory_id: &'static str,
112        segment_lookup: SegmentLookupFn,
113        is_code_valid: IsCodeValidFn,
114        suggest_code: SuggestCodeFn,
115        expected_components: ExpectedComponentsFn,
116        additional_structure_rule: Option<AdditionalStructureRuleFn>,
117    ) -> Self {
118        Self {
119            directory_id,
120            segment_lookup,
121            is_code_valid,
122            suggest_code,
123            expected_components,
124            code_list_rules: base_code_list_rules,
125            additional_structure_rule,
126            message_type: None,
127            enforce_known_tags: true,
128            structure_checks: true,
129            code_list_checks: true,
130        }
131    }
132
133    /// Override the code-list rules function.
134    ///
135    /// Directories can supply a directory-specific implementation that extends or
136    /// replaces the base rules from `base_code_list_rules`.
137    pub fn with_code_list_rules(mut self, f: CodeListRulesFn) -> Self {
138        self.code_list_rules = f;
139        self
140    }
141
142    /// Enable only structure checks and disable code-list checks.
143    pub fn structure_only(mut self) -> Self {
144        self.structure_checks = true;
145        self.code_list_checks = false;
146        self
147    }
148
149    /// Enable only code-list checks and disable structure checks.
150    pub fn code_list_only(mut self) -> Self {
151        self.structure_checks = false;
152        self.code_list_checks = true;
153        self
154    }
155
156    /// Configure whether unknown segment tags should be rejected.
157    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
158        self.enforce_known_tags = enforce;
159        self
160    }
161
162    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
163        if let Some(explicit) = self.message_type.as_deref() {
164            return Some(explicit.to_owned());
165        }
166
167        segments
168            .iter()
169            .find(|s| s.tag == "UNH")
170            .and_then(|s| s.get_element(1))
171            .and_then(|e| e.get_component(0))
172            .map(str::to_owned)
173    }
174
175    /// Return the list of segment tags that are mandatory for `message_type`.
176    ///
177    /// **Coverage**: only `UTILMD`, `ORDERS`, and `INVOIC` have message-type-specific
178    /// mandatory segments hard-coded.  All other message types fall back to the
179    /// generic set `["UNH", "UNT"]`.
180    ///
181    /// The returned tags are checked via a presence test only — ordering and
182    /// repetition constraints are *not* validated.  Unknown message types always
183    /// return the generic set, never an empty slice, so envelope segments are
184    /// always required regardless of message type.
185    fn required_segments_for(message_type: &str) -> &'static [&'static str] {
186        match message_type {
187            "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
188            _ => &["UNH", "UNT"],
189        }
190    }
191
192    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
193    ///
194    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
195    /// a sender is not required to transmit trailing empty components; this function
196    /// therefore strips them before checking against the expected count so that
197    /// conformant messages with omitted trailing components are still accepted.
198    ///
199    /// # Examples
200    ///
201    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
202    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
203    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
204        let elem = seg.elements.get(element_idx)?;
205        let mut count = elem.components.len();
206        while count > 0 && elem.components[count - 1].as_ref().is_empty() {
207            count -= 1;
208        }
209        u8::try_from(count).ok()
210    }
211
212    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
213        for idx in 0..seg.elements.len() {
214            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
215                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
216                if actual != expected {
217                    return Err(EdifactError::InvalidComponentCount {
218                        tag: seg.tag.to_owned(),
219                        element_index: idx,
220                        expected,
221                        actual,
222                        offset: seg.span.start,
223                    });
224                }
225            }
226        }
227        Ok(())
228    }
229
230    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
231        let rules = (self.code_list_rules)(seg.tag);
232
233        for (elem_idx, comp_idx, de) in rules {
234            let value = seg
235                .get_element(*elem_idx)
236                .and_then(|e| e.get_component(*comp_idx))
237                .unwrap_or("");
238            if !value.is_empty() && !(self.is_code_valid)(de, value) {
239                let suggestion = (self.suggest_code)(de, value);
240                return Err(EdifactError::InvalidCodeValue {
241                    tag: seg.tag.to_owned(),
242                    element_index: *elem_idx,
243                    value: value.to_owned(),
244                    code_list: (*de).to_owned(),
245                    offset: seg.span.start,
246                    suggestion,
247                });
248            }
249        }
250
251        Ok(())
252    }
253}
254
255impl DirectoryValidator {
256    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
257        if !self.structure_checks && !self.code_list_checks {
258            return Ok(());
259        }
260
261        let Some(def) = (self.segment_lookup)(seg.tag) else {
262            if self.structure_checks && self.enforce_known_tags {
263                return Err(EdifactError::InvalidSegmentForMessage {
264                    tag: seg.tag.to_owned(),
265                    message_type: self
266                        .message_type
267                        .clone()
268                        .unwrap_or_else(|| self.directory_id.to_owned()),
269                    offset: seg.tag_span.start,
270                });
271            }
272            return Ok(());
273        };
274
275        let max_elements = def.elements.len();
276        let min_elements = def
277            .elements
278            .iter()
279            .rposition(|e| e.status == Status::Mandatory)
280            .map(|idx| idx + 1)
281            .unwrap_or(0);
282        let actual = seg.elements.len();
283
284        if self.structure_checks && (actual < min_elements || actual > max_elements) {
285            return Err(EdifactError::InvalidElementCount {
286                tag: seg.tag.to_owned(),
287                min: min_elements,
288                max: max_elements,
289                actual,
290                offset: seg.span.start,
291            });
292        }
293
294        if self.structure_checks {
295            for element in def
296                .elements
297                .iter()
298                .filter(|e| e.status == Status::Mandatory)
299            {
300                let idx = (element.position as usize).saturating_sub(1);
301                let is_present = seg
302                    .elements
303                    .get(idx)
304                    .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
305                if !is_present {
306                    return Err(EdifactError::MissingRequiredElement {
307                        tag: seg.tag.to_owned(),
308                        element_index: idx,
309                    });
310                }
311            }
312            self.validate_component_counts(seg)?;
313
314            if let Some(rule) = self.additional_structure_rule {
315                rule(seg)?;
316            }
317        }
318
319        if self.code_list_checks {
320            self.validate_code_lists(seg)?;
321        }
322
323        Ok(())
324    }
325}
326
327impl Validator for DirectoryValidator {
328    fn set_message_type(&mut self, message_type: Option<&str>) {
329        self.message_type = message_type.map(str::to_owned);
330    }
331
332    fn validate_batch(&self, segments: &[Segment<'_>], report: &mut ValidationReport) {
333        for seg in segments {
334            if let Err(err) = self.validate_segment(seg) {
335                report_error(report, err);
336            }
337        }
338
339        if self.structure_checks {
340            if let Some(message_type) = self.detect_message_type(segments) {
341                for required_tag in Self::required_segments_for(&message_type) {
342                    if segments.iter().all(|s| s.tag != *required_tag) {
343                        report.add_error(
344                            ValidationIssue::new(
345                                ValidationSeverity::Error,
346                                format!(
347                                    "required segment {} missing for message type {}",
348                                    required_tag, message_type
349                                ),
350                            )
351                            .with_segment(*required_tag)
352                            .with_suggestion("Add the mandatory segment at the correct position"),
353                        );
354                    }
355                }
356
357                let seq = Self::required_segments_for(&message_type);
358                let mut last_idx = None;
359                for tag in seq {
360                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
361                        if let Some(prev) = last_idx {
362                            if idx < prev {
363                                report.add_error(
364                                    ValidationIssue::new(
365                                        ValidationSeverity::Error,
366                                        format!(
367                                            "segment sequence violation for message type {}: '{}' appears out of order",
368                                            message_type, tag
369                                        ),
370                                    )
371                                    .with_segment(*tag)
372                                    .with_suggestion(
373                                        "Ensure required segments follow UN/EDIFACT canonical order",
374                                    ),
375                                );
376                            }
377                        }
378                        last_idx = Some(idx);
379                    }
380                }
381            }
382        }
383    }
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
391        position: 1,
392        data_element: "C507",
393        status: Status::Mandatory,
394        max_repeat: 1,
395    }];
396
397    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
398        tag: "TST",
399        name: "Test segment",
400        elements: TEST_ELEMENTS,
401    };
402
403    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
404        match tag {
405            "TST" => Some(&TEST_SEGMENT),
406            _ => None,
407        }
408    }
409
410    fn code_valid(_de: &str, _code: &str) -> bool {
411        true
412    }
413
414    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
415        None
416    }
417
418    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
419        None
420    }
421
422    #[test]
423    fn mandatory_composite_present_when_any_component_non_empty() {
424        let input = b"TST+:ABC'";
425        let segments: Vec<_> = crate::from_bytes(input)
426            .collect::<Result<Vec<_>, _>>()
427            .expect("parse should succeed");
428
429        let validator = DirectoryValidator::new(
430            "TEST",
431            segment_lookup,
432            code_valid,
433            suggest_code,
434            expected_components,
435            None,
436        );
437
438        let mut report = ValidationReport::default();
439        validator.validate_batch(&segments, &mut report);
440        assert!(!report.has_errors());
441    }
442
443    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
444
445    fn parse_single(input: &[u8]) -> crate::model::Segment<'static> {
446        // SAFETY: intentional leak — test inputs are small and bounded per call.
447        // `Segment<'static>` is needed so the returned value is not tied to a local
448        // buffer; the allocation is bounded by test count, not message size.
449        let leaked: &'static [u8] = Box::leak(input.to_vec().into_boxed_slice());
450        crate::from_bytes(leaked)
451            .collect::<Result<Vec<_>, _>>()
452            .expect("parse should succeed")
453            .into_iter()
454            .next()
455            .expect("at least one segment")
456    }
457
458    #[test]
459    fn trailing_empty_component_stripped_from_dtm() {
460        // DTM+137:20200101: has three components in element 0; the third is empty.
461        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
462        // so effective count should be 2.
463        let seg = parse_single(b"DTM+137:20200101:'");
464        let count = DirectoryValidator::effective_component_count(&seg, 0);
465        assert_eq!(count, Some(2), "trailing empty component should be stripped");
466    }
467
468    #[test]
469    fn all_empty_components_result_in_zero() {
470        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
471        let seg = parse_single(b"NAD+MS++:'");
472        let count = DirectoryValidator::effective_component_count(&seg, 2);
473        assert_eq!(count, Some(0), "all-empty composite should have effective count 0");
474    }
475
476    #[test]
477    fn non_empty_component_not_stripped() {
478        // DTM+137:20200101:102 — all three components are non-empty
479        let seg = parse_single(b"DTM+137:20200101:102'");
480        let count = DirectoryValidator::effective_component_count(&seg, 0);
481        assert_eq!(count, Some(3), "no components should be stripped when all non-empty");
482    }
483
484    #[test]
485    fn with_code_list_rules_overrides_base() {
486        // Override code-list rules to require element 0 of TST to be a specific code.
487        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
488            match tag {
489                "TST" => &[(0, 0, "CUSTOM_DE")],
490                _ => &[],
491            }
492        }
493        fn custom_code_valid(_de: &str, code: &str) -> bool {
494            code == "VALID"
495        }
496        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
497            None
498        }
499
500        let input = b"TST+INVALID'";
501        let segments: Vec<_> = crate::from_bytes(input)
502            .collect::<Result<Vec<_>, _>>()
503            .expect("parse should succeed");
504
505        let validator = DirectoryValidator::new(
506            "TEST",
507            segment_lookup,
508            custom_code_valid,
509            no_suggestion,
510            expected_components,
511            None,
512        )
513        .with_code_list_rules(custom_rules);
514
515        let mut report = ValidationReport::default();
516        validator.validate_batch(&segments, &mut report);
517        assert!(
518            report.has_warnings(),
519            "INVALID is not in the custom code list so validation must warn"
520        );
521    }
522}