Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5
6/// Mandatory/Conditional status of a data element within a segment.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Status {
9    /// Element must be present.
10    Mandatory,
11    /// Element is optional unless additional rules require it.
12    Conditional,
13}
14
15/// Reference to a data element within a segment definition.
16#[derive(Debug, Clone, Copy)]
17pub struct ElementRef {
18    /// One-based element position in the segment definition.
19    pub position: u8,
20    /// UN/EDIFACT data element identifier.
21    pub data_element: &'static str,
22    /// Requirement status of the element.
23    pub status: Status,
24    /// Maximum repetition count for this element.
25    pub max_repeat: u8,
26}
27
28/// Definition of an EDIFACT segment (tag + element structure).
29#[derive(Debug)]
30pub struct SegmentDefinition {
31    /// Segment tag.
32    pub tag: &'static str,
33    /// Human-readable segment name.
34    pub name: &'static str,
35    /// Ordered element definitions.
36    pub elements: &'static [ElementRef],
37}
38
39type SegmentLookupFn = fn(&str) -> Option<&'static SegmentDefinition>;
40type IsCodeValidFn = fn(&str, &str) -> bool;
41type SuggestCodeFn = fn(&str, &str) -> Option<&'static str>;
42type ExpectedComponentsFn = fn(&str, usize) -> Option<u8>;
43type AdditionalStructureRuleFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
44/// Returns the `(element_index, component_index, data_element_id)` tuples to
45/// validate against a code list for the given segment tag.
46type CodeListRulesFn = fn(tag: &str) -> &'static [(usize, usize, &'static str)];
47
48/// Code-list validation rules common to all UN/EDIFACT directory releases.
49///
50/// Each entry is `(element_index, component_index, data_element_id)`.
51/// `element_index` and `component_index` are zero-based.
52///
53/// Covers the most frequently validated qualifier/code elements across ORDERS,
54/// INVOIC, UTILMD, and similar message types.
55pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
56    match tag {
57        "BGM" => &[(0, 0, "1001")],
58        "DTM" => &[(0, 0, "2005")],
59        "NAD" => &[(0, 0, "3035")],
60        "QTY" => &[(0, 0, "6063")],
61        "RFF" => &[(0, 0, "1153")],
62        "MOA" => &[(0, 0, "5025")],
63        "PRI" => &[(0, 0, "5125")],
64        "LOC" => &[(0, 0, "3227")],
65        _ => &[],
66    }
67}
68
69/// Shared validator implementation that is configured per UN/EDIFACT directory release.
70///
71/// # Scope and limitations
72///
73/// `DirectoryValidator` validates individual segment *content* (element counts,
74/// component counts, code-list values, and conditional rules) and checks that
75/// every *mandatory* segment type is present at least once.  It does **not**
76/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
77/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
78/// appears in the wrong position.  Full sequence validation requires a
79/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
80/// the scope of this implementation.
81#[derive(Debug, Clone)]
82pub struct DirectoryValidator {
83    directory_id: &'static str,
84    segment_lookup: SegmentLookupFn,
85    is_code_valid: IsCodeValidFn,
86    suggest_code: SuggestCodeFn,
87    expected_components: ExpectedComponentsFn,
88    code_list_rules: CodeListRulesFn,
89    additional_structure_rule: Option<AdditionalStructureRuleFn>,
90    message_type: Option<String>,
91    enforce_known_tags: bool,
92    structure_checks: bool,
93    code_list_checks: bool,
94}
95
96impl DirectoryValidator {
97    /// Create a validator for a specific directory release with injected lookup/check hooks.
98    pub fn new(
99        directory_id: &'static str,
100        segment_lookup: SegmentLookupFn,
101        is_code_valid: IsCodeValidFn,
102        suggest_code: SuggestCodeFn,
103        expected_components: ExpectedComponentsFn,
104        additional_structure_rule: Option<AdditionalStructureRuleFn>,
105    ) -> Self {
106        Self {
107            directory_id,
108            segment_lookup,
109            is_code_valid,
110            suggest_code,
111            expected_components,
112            code_list_rules: base_code_list_rules,
113            additional_structure_rule,
114            message_type: None,
115            enforce_known_tags: true,
116            structure_checks: true,
117            code_list_checks: true,
118        }
119    }
120
121    /// Override the code-list rules function.
122    ///
123    /// Directories can supply a directory-specific implementation that extends or
124    /// replaces the base rules from `base_code_list_rules`.
125    pub fn with_code_list_rules(mut self, f: CodeListRulesFn) -> Self {
126        self.code_list_rules = f;
127        self
128    }
129
130    /// Enable only structure checks and disable code-list checks.
131    pub fn structure_only(mut self) -> Self {
132        self.structure_checks = true;
133        self.code_list_checks = false;
134        self
135    }
136
137    /// Enable only code-list checks and disable structure checks.
138    pub fn code_list_only(mut self) -> Self {
139        self.structure_checks = false;
140        self.code_list_checks = true;
141        self
142    }
143
144    /// Configure whether unknown segment tags should be rejected.
145    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
146        self.enforce_known_tags = enforce;
147        self
148    }
149
150    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
151        if let Some(explicit) = self.message_type.as_deref() {
152            return Some(explicit.to_owned());
153        }
154
155        segments
156            .iter()
157            .find(|s| s.tag == "UNH")
158            .and_then(|s| s.get_element(1))
159            .and_then(|e| e.get_component(0))
160            .map(str::to_owned)
161    }
162
163    /// Return the list of segment tags that are mandatory for `message_type`.
164    ///
165    /// **Coverage**: only `UTILMD`, `ORDERS`, and `INVOIC` have message-type-specific
166    /// mandatory segments hard-coded.  All other message types fall back to the
167    /// generic set `["UNH", "UNT"]`.
168    ///
169    /// The returned tags are checked via a presence test only — ordering and
170    /// repetition constraints are *not* validated.  Unknown message types always
171    /// return the generic set, never an empty slice, so envelope segments are
172    /// always required regardless of message type.
173    fn required_segments_for(message_type: &str) -> &'static [&'static str] {
174        match message_type {
175            "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
176            _ => &["UNH", "UNT"],
177        }
178    }
179
180    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
181    ///
182    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
183    /// a sender is not required to transmit trailing empty components; this function
184    /// therefore strips them before checking against the expected count so that
185    /// conformant messages with omitted trailing components are still accepted.
186    ///
187    /// # Examples
188    ///
189    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
190    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
191    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
192        let elem = seg.elements.get(element_idx)?;
193        let mut count = elem.components.len();
194        while count > 0 && elem.components[count - 1].as_ref().is_empty() {
195            count -= 1;
196        }
197        u8::try_from(count).ok()
198    }
199
200    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
201        for idx in 0..seg.elements.len() {
202            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
203                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
204                if actual != expected {
205                    return Err(EdifactError::InvalidComponentCount {
206                        tag: seg.tag.to_owned(),
207                        element_index: idx,
208                        expected,
209                        actual,
210                        offset: seg.span.start,
211                    });
212                }
213            }
214        }
215        Ok(())
216    }
217
218    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
219        let rules = (self.code_list_rules)(seg.tag);
220
221        for (elem_idx, comp_idx, de) in rules {
222            let value = seg
223                .get_element(*elem_idx)
224                .and_then(|e| e.get_component(*comp_idx))
225                .unwrap_or("");
226            if !value.is_empty() && !(self.is_code_valid)(de, value) {
227                let suggestion = (self.suggest_code)(de, value);
228                return Err(EdifactError::InvalidCodeValue {
229                    tag: seg.tag.to_owned(),
230                    element_index: *elem_idx,
231                    value: value.to_owned(),
232                    code_list: (*de).to_owned(),
233                    offset: seg.span.start,
234                    suggestion,
235                });
236            }
237        }
238
239        Ok(())
240    }
241}
242
243impl DirectoryValidator {
244    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
245        if !self.structure_checks && !self.code_list_checks {
246            return Ok(());
247        }
248
249        let Some(def) = (self.segment_lookup)(seg.tag) else {
250            if self.structure_checks && self.enforce_known_tags {
251                return Err(EdifactError::InvalidSegmentForMessage {
252                    tag: seg.tag.to_owned(),
253                    message_type: self
254                        .message_type
255                        .clone()
256                        .unwrap_or_else(|| self.directory_id.to_owned()),
257                    offset: seg.tag_span.start,
258                });
259            }
260            return Ok(());
261        };
262
263        let max_elements = def.elements.len();
264        let min_elements = def
265            .elements
266            .iter()
267            .rposition(|e| e.status == Status::Mandatory)
268            .map(|idx| idx + 1)
269            .unwrap_or(0);
270        let actual = seg.elements.len();
271
272        if self.structure_checks && (actual < min_elements || actual > max_elements) {
273            return Err(EdifactError::InvalidElementCount {
274                tag: seg.tag.to_owned(),
275                min: min_elements,
276                max: max_elements,
277                actual,
278                offset: seg.span.start,
279            });
280        }
281
282        if self.structure_checks {
283            for element in def
284                .elements
285                .iter()
286                .filter(|e| e.status == Status::Mandatory)
287            {
288                let idx = (element.position as usize).saturating_sub(1);
289                let is_present = seg
290                    .elements
291                    .get(idx)
292                    .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
293                if !is_present {
294                    return Err(EdifactError::MissingRequiredElement {
295                        tag: seg.tag.to_owned(),
296                        element_index: idx,
297                    });
298                }
299            }
300            self.validate_component_counts(seg)?;
301
302            if let Some(rule) = self.additional_structure_rule {
303                rule(seg)?;
304            }
305        }
306
307        if self.code_list_checks {
308            self.validate_code_lists(seg)?;
309        }
310
311        Ok(())
312    }
313}
314
315impl Validator for DirectoryValidator {
316    fn set_message_type(&mut self, message_type: Option<&str>) {
317        self.message_type = message_type.map(str::to_owned);
318    }
319
320    fn validate_batch(&self, segments: &[Segment<'_>], report: &mut ValidationReport) {
321        for seg in segments {
322            if let Err(err) = self.validate_segment(seg) {
323                report_error(report, err);
324            }
325        }
326
327        if self.structure_checks {
328            if let Some(message_type) = self.detect_message_type(segments) {
329                for required_tag in Self::required_segments_for(&message_type) {
330                    if segments.iter().all(|s| s.tag != *required_tag) {
331                        report.add_error(
332                            ValidationIssue::new(
333                                ValidationSeverity::Error,
334                                format!(
335                                    "required segment {} missing for message type {}",
336                                    required_tag, message_type
337                                ),
338                            )
339                            .with_segment(*required_tag)
340                            .with_suggestion("Add the mandatory segment at the correct position"),
341                        );
342                    }
343                }
344
345                let seq = Self::required_segments_for(&message_type);
346                let mut last_idx = None;
347                for tag in seq {
348                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
349                        if let Some(prev) = last_idx {
350                            if idx < prev {
351                                report.add_error(
352                                    ValidationIssue::new(
353                                        ValidationSeverity::Error,
354                                        format!(
355                                            "segment sequence violation for message type {}: '{}' appears out of order",
356                                            message_type, tag
357                                        ),
358                                    )
359                                    .with_segment(*tag)
360                                    .with_suggestion(
361                                        "Ensure required segments follow UN/EDIFACT canonical order",
362                                    ),
363                                );
364                            }
365                        }
366                        last_idx = Some(idx);
367                    }
368                }
369            }
370        }
371    }
372}
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377
378    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
379        position: 1,
380        data_element: "C507",
381        status: Status::Mandatory,
382        max_repeat: 1,
383    }];
384
385    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
386        tag: "TST",
387        name: "Test segment",
388        elements: TEST_ELEMENTS,
389    };
390
391    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
392        match tag {
393            "TST" => Some(&TEST_SEGMENT),
394            _ => None,
395        }
396    }
397
398    fn code_valid(_de: &str, _code: &str) -> bool {
399        true
400    }
401
402    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
403        None
404    }
405
406    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
407        None
408    }
409
410    #[test]
411    fn mandatory_composite_present_when_any_component_non_empty() {
412        let input = b"TST+:ABC'";
413        let segments: Vec<_> = crate::from_bytes(input)
414            .collect::<Result<Vec<_>, _>>()
415            .expect("parse should succeed");
416
417        let validator = DirectoryValidator::new(
418            "TEST",
419            segment_lookup,
420            code_valid,
421            suggest_code,
422            expected_components,
423            None,
424        );
425
426        let mut report = ValidationReport::default();
427        validator.validate_batch(&segments, &mut report);
428        assert!(!report.has_errors());
429    }
430
431    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
432
433    fn parse_single(input: &[u8]) -> crate::model::Segment<'static> {
434        // SAFETY: intentional leak — test inputs are small and bounded per call.
435        // `Segment<'static>` is needed so the returned value is not tied to a local
436        // buffer; the allocation is bounded by test count, not message size.
437        let leaked: &'static [u8] = Box::leak(input.to_vec().into_boxed_slice());
438        crate::from_bytes(leaked)
439            .collect::<Result<Vec<_>, _>>()
440            .expect("parse should succeed")
441            .into_iter()
442            .next()
443            .expect("at least one segment")
444    }
445
446    #[test]
447    fn trailing_empty_component_stripped_from_dtm() {
448        // DTM+137:20200101: has three components in element 0; the third is empty.
449        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
450        // so effective count should be 2.
451        let seg = parse_single(b"DTM+137:20200101:'");
452        let count = DirectoryValidator::effective_component_count(&seg, 0);
453        assert_eq!(count, Some(2), "trailing empty component should be stripped");
454    }
455
456    #[test]
457    fn all_empty_components_result_in_zero() {
458        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
459        let seg = parse_single(b"NAD+MS++:'");
460        let count = DirectoryValidator::effective_component_count(&seg, 2);
461        assert_eq!(count, Some(0), "all-empty composite should have effective count 0");
462    }
463
464    #[test]
465    fn non_empty_component_not_stripped() {
466        // DTM+137:20200101:102 — all three components are non-empty
467        let seg = parse_single(b"DTM+137:20200101:102'");
468        let count = DirectoryValidator::effective_component_count(&seg, 0);
469        assert_eq!(count, Some(3), "no components should be stripped when all non-empty");
470    }
471
472    #[test]
473    fn with_code_list_rules_overrides_base() {
474        // Override code-list rules to require element 0 of TST to be a specific code.
475        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
476            match tag {
477                "TST" => &[(0, 0, "CUSTOM_DE")],
478                _ => &[],
479            }
480        }
481        fn custom_code_valid(_de: &str, code: &str) -> bool {
482            code == "VALID"
483        }
484        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
485            None
486        }
487
488        let input = b"TST+INVALID'";
489        let segments: Vec<_> = crate::from_bytes(input)
490            .collect::<Result<Vec<_>, _>>()
491            .expect("parse should succeed");
492
493        let validator = DirectoryValidator::new(
494            "TEST",
495            segment_lookup,
496            custom_code_valid,
497            no_suggestion,
498            expected_components,
499            None,
500        )
501        .with_code_list_rules(custom_rules);
502
503        let mut report = ValidationReport::default();
504        validator.validate_batch(&segments, &mut report);
505        assert!(
506            report.has_warnings(),
507            "INVALID is not in the custom code list so validation must warn"
508        );
509    }
510}