Skip to main content

edifact_rs/
directory_validator.rs

1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5
6/// Mandatory/Conditional status of a data element within a segment.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Status {
9    /// Element must be present.
10    Mandatory,
11    /// Element is optional unless additional rules require it.
12    Conditional,
13}
14
15/// Reference to a data element within a segment definition.
16#[derive(Debug, Clone, Copy)]
17pub struct ElementRef {
18    /// One-based element position in the segment definition.
19    pub position: u8,
20    /// UN/EDIFACT data element identifier.
21    pub data_element: &'static str,
22    /// Requirement status of the element.
23    pub status: Status,
24    /// Maximum repetition count for this element.
25    pub max_repeat: u8,
26}
27
28/// Definition of an EDIFACT segment (tag + element structure).
29#[derive(Debug)]
30pub struct SegmentDefinition {
31    /// Segment tag.
32    pub tag: &'static str,
33    /// Human-readable segment name.
34    pub name: &'static str,
35    /// Ordered element definitions.
36    pub elements: &'static [ElementRef],
37}
38
39type SegmentLookupFn = fn(&str) -> Option<&'static SegmentDefinition>;
40type IsCodeValidFn = fn(&str, &str) -> bool;
41type SuggestCodeFn = fn(&str, &str) -> Option<&'static str>;
42type ExpectedComponentsFn = fn(&str, usize) -> Option<u8>;
43type AdditionalStructureRuleFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
44/// Returns the `(element_index, component_index, data_element_id)` tuples to
45/// validate against a code list for the given segment tag.
46type CodeListRulesFn = fn(tag: &str) -> &'static [(usize, usize, &'static str)];
47
48/// Code-list validation rules common to all UN/EDIFACT directory releases.
49///
50/// Each entry is `(element_index, component_index, data_element_id)`.
51/// `element_index` and `component_index` are zero-based.
52///
53/// Covers the most frequently validated qualifier/code elements across ORDERS,
54/// INVOIC, UTILMD, and similar message types.
55pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
56    match tag {
57        "BGM" => &[(0, 0, "1001")],
58        "DTM" => &[(0, 0, "2005")],
59        "NAD" => &[(0, 0, "3035")],
60        "QTY" => &[(0, 0, "6063")],
61        "RFF" => &[(0, 0, "1153")],
62        "MOA" => &[(0, 0, "5025")],
63        "PRI" => &[(0, 0, "5125")],
64        "LOC" => &[(0, 0, "3227")],
65        _ => &[],
66    }
67}
68
69/// Shared validator implementation that is configured per UN/EDIFACT directory release.
70///
71/// # Scope and limitations
72///
73/// `DirectoryValidator` validates individual segment *content* (element counts,
74/// component counts, code-list values, and conditional rules) and checks that
75/// every *mandatory* segment type is present at least once.  It does **not**
76/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
77/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
78/// appears in the wrong position.  Full sequence validation requires a
79/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
80/// the scope of this implementation.
81#[derive(Debug, Clone)]
82pub struct DirectoryValidator {
83    directory_id: &'static str,
84    segment_lookup: SegmentLookupFn,
85    is_code_valid: IsCodeValidFn,
86    suggest_code: SuggestCodeFn,
87    expected_components: ExpectedComponentsFn,
88    code_list_rules: CodeListRulesFn,
89    additional_structure_rule: Option<AdditionalStructureRuleFn>,
90    message_type: Option<String>,
91    enforce_known_tags: bool,
92    structure_checks: bool,
93    code_list_checks: bool,
94}
95
96impl DirectoryValidator {
97    /// Create a validator for a specific directory release with injected lookup/check hooks.
98    pub fn new(
99        directory_id: &'static str,
100        segment_lookup: SegmentLookupFn,
101        is_code_valid: IsCodeValidFn,
102        suggest_code: SuggestCodeFn,
103        expected_components: ExpectedComponentsFn,
104        additional_structure_rule: Option<AdditionalStructureRuleFn>,
105    ) -> Self {
106        Self {
107            directory_id,
108            segment_lookup,
109            is_code_valid,
110            suggest_code,
111            expected_components,
112            code_list_rules: base_code_list_rules,
113            additional_structure_rule,
114            message_type: None,
115            enforce_known_tags: true,
116            structure_checks: true,
117            code_list_checks: true,
118        }
119    }
120
121    /// Override the code-list rules function.
122    ///
123    /// Directories can supply a directory-specific implementation that extends or
124    /// replaces the base rules from `base_code_list_rules`.
125    pub fn with_code_list_rules(mut self, f: CodeListRulesFn) -> Self {
126        self.code_list_rules = f;
127        self
128    }
129
130    /// Enable only structure checks and disable code-list checks.
131    pub fn structure_only(mut self) -> Self {
132        self.structure_checks = true;
133        self.code_list_checks = false;
134        self
135    }
136
137    /// Enable only code-list checks and disable structure checks.
138    pub fn code_list_only(mut self) -> Self {
139        self.structure_checks = false;
140        self.code_list_checks = true;
141        self
142    }
143
144    /// Configure whether unknown segment tags should be rejected.
145    pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
146        self.enforce_known_tags = enforce;
147        self
148    }
149
150    fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
151        if let Some(explicit) = self.message_type.as_deref() {
152            return Some(explicit.to_owned());
153        }
154
155        segments
156            .iter()
157            .find(|s| s.tag == "UNH")
158            .and_then(|s| s.get_element(1))
159            .and_then(|e| e.get_component(0))
160            .map(str::to_owned)
161    }
162
163    /// Return the list of segment tags that are mandatory for `message_type`.
164    ///
165    /// **Coverage**: only `UTILMD`, `ORDERS`, and `INVOIC` have message-type-specific
166    /// mandatory segments hard-coded.  All other message types fall back to the
167    /// generic set `["UNH", "UNT"]`.
168    ///
169    /// The returned tags are checked via a presence test only — ordering and
170    /// repetition constraints are *not* validated.  Unknown message types always
171    /// return the generic set, never an empty slice, so envelope segments are
172    /// always required regardless of message type.
173    fn required_segments_for(message_type: &str) -> &'static [&'static str] {
174        match message_type {
175            "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
176            _ => &["UNH", "UNT"],
177        }
178    }
179
180    /// Count the non-trailing-empty components in element `element_idx` of `seg`.
181    ///
182    /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
183    /// a sender is not required to transmit trailing empty components; this function
184    /// therefore strips them before checking against the expected count so that
185    /// conformant messages with omitted trailing components are still accepted.
186    ///
187    /// # Examples
188    ///
189    /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
190    /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
191    fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
192        let elem = seg.elements.get(element_idx)?;
193        let mut count = elem.components.len();
194        while count > 0 && elem.components[count - 1].as_ref().is_empty() {
195            count -= 1;
196        }
197        debug_assert!(
198            count <= u8::MAX as usize,
199            "effective_component_count: element has >255 components, which is invalid EDIFACT"
200        );
201        Some(count as u8)
202    }
203
204    fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
205        for idx in 0..seg.elements.len() {
206            if let Some(expected) = (self.expected_components)(seg.tag, idx) {
207                let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
208                if actual != expected {
209                    return Err(EdifactError::InvalidComponentCount {
210                        tag: seg.tag.to_owned(),
211                        element_index: idx,
212                        expected,
213                        actual,
214                        offset: seg.span.start,
215                    });
216                }
217            }
218        }
219        Ok(())
220    }
221
222    fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
223        let rules = (self.code_list_rules)(seg.tag);
224
225        for (elem_idx, comp_idx, de) in rules {
226            let value = seg
227                .get_element(*elem_idx)
228                .and_then(|e| e.get_component(*comp_idx))
229                .unwrap_or("");
230            if !value.is_empty() && !(self.is_code_valid)(de, value) {
231                let suggestion = (self.suggest_code)(de, value);
232                return Err(EdifactError::InvalidCodeValue {
233                    tag: seg.tag.to_owned(),
234                    element_index: *elem_idx,
235                    value: value.to_owned(),
236                    code_list: (*de).to_owned(),
237                    offset: seg.span.start,
238                    suggestion,
239                });
240            }
241        }
242
243        Ok(())
244    }
245}
246
247impl DirectoryValidator {
248    fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
249        if !self.structure_checks && !self.code_list_checks {
250            return Ok(());
251        }
252
253        let Some(def) = (self.segment_lookup)(seg.tag) else {
254            if self.structure_checks && self.enforce_known_tags {
255                return Err(EdifactError::InvalidSegmentForMessage {
256                    tag: seg.tag.to_owned(),
257                    message_type: self
258                        .message_type
259                        .clone()
260                        .unwrap_or_else(|| self.directory_id.to_owned()),
261                    offset: seg.tag_span.start,
262                });
263            }
264            return Ok(());
265        };
266
267        let max_elements = def.elements.len();
268        let min_elements = def
269            .elements
270            .iter()
271            .rposition(|e| e.status == Status::Mandatory)
272            .map(|idx| idx + 1)
273            .unwrap_or(0);
274        let actual = seg.elements.len();
275
276        if self.structure_checks && (actual < min_elements || actual > max_elements) {
277            return Err(EdifactError::InvalidElementCount {
278                tag: seg.tag.to_owned(),
279                min: min_elements,
280                max: max_elements,
281                actual,
282                offset: seg.span.start,
283            });
284        }
285
286        if self.structure_checks {
287            for element in def
288                .elements
289                .iter()
290                .filter(|e| e.status == Status::Mandatory)
291            {
292                let idx = (element.position as usize).saturating_sub(1);
293                let is_present = seg
294                    .elements
295                    .get(idx)
296                    .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
297                if !is_present {
298                    return Err(EdifactError::MissingRequiredElement {
299                        tag: seg.tag.to_owned(),
300                        element_index: idx,
301                    });
302                }
303            }
304            self.validate_component_counts(seg)?;
305
306            if let Some(rule) = self.additional_structure_rule {
307                rule(seg)?;
308            }
309        }
310
311        if self.code_list_checks {
312            self.validate_code_lists(seg)?;
313        }
314
315        Ok(())
316    }
317}
318
319impl Validator for DirectoryValidator {
320    fn set_message_type(&mut self, message_type: Option<&str>) {
321        self.message_type = message_type.map(str::to_owned);
322    }
323
324    fn validate_batch(&self, segments: &[Segment<'_>], report: &mut ValidationReport) {
325        for seg in segments {
326            if let Err(err) = self.validate_segment(seg) {
327                report_error(report, err);
328            }
329        }
330
331        if self.structure_checks {
332            if let Some(message_type) = self.detect_message_type(segments) {
333                for required_tag in Self::required_segments_for(&message_type) {
334                    if segments.iter().all(|s| s.tag != *required_tag) {
335                        report.add_error(
336                            ValidationIssue::new(
337                                ValidationSeverity::Error,
338                                format!(
339                                    "required segment {} missing for message type {}",
340                                    required_tag, message_type
341                                ),
342                            )
343                            .with_segment(*required_tag)
344                            .with_suggestion("Add the mandatory segment at the correct position"),
345                        );
346                    }
347                }
348
349                let seq = Self::required_segments_for(&message_type);
350                let mut last_idx = None;
351                for tag in seq {
352                    if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
353                        if let Some(prev) = last_idx {
354                            if idx < prev {
355                                report.add_error(
356                                    ValidationIssue::new(
357                                        ValidationSeverity::Error,
358                                        format!(
359                                            "segment sequence violation for message type {}: '{}' appears out of order",
360                                            message_type, tag
361                                        ),
362                                    )
363                                    .with_segment(*tag)
364                                    .with_suggestion(
365                                        "Ensure required segments follow UN/EDIFACT canonical order",
366                                    ),
367                                );
368                            }
369                        }
370                        last_idx = Some(idx);
371                    }
372                }
373            }
374        }
375    }
376}
377
378#[cfg(test)]
379mod tests {
380    use super::*;
381
382    static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
383        position: 1,
384        data_element: "C507",
385        status: Status::Mandatory,
386        max_repeat: 1,
387    }];
388
389    static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
390        tag: "TST",
391        name: "Test segment",
392        elements: TEST_ELEMENTS,
393    };
394
395    fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
396        match tag {
397            "TST" => Some(&TEST_SEGMENT),
398            _ => None,
399        }
400    }
401
402    fn code_valid(_de: &str, _code: &str) -> bool {
403        true
404    }
405
406    fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
407        None
408    }
409
410    fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
411        None
412    }
413
414    #[test]
415    fn mandatory_composite_present_when_any_component_non_empty() {
416        let input = b"TST+:ABC'";
417        let segments: Vec<_> = crate::from_bytes(input)
418            .collect::<Result<Vec<_>, _>>()
419            .expect("parse should succeed");
420
421        let validator = DirectoryValidator::new(
422            "TEST",
423            segment_lookup,
424            code_valid,
425            suggest_code,
426            expected_components,
427            None,
428        );
429
430        let mut report = ValidationReport::default();
431        validator.validate_batch(&segments, &mut report);
432        assert!(!report.has_errors());
433    }
434
435    // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
436
437    fn parse_single(input: &[u8]) -> crate::model::Segment<'static> {
438        // SAFETY: intentional leak — test inputs are small and bounded per call.
439        // `Segment<'static>` is needed so the returned value is not tied to a local
440        // buffer; the allocation is bounded by test count, not message size.
441        let leaked: &'static [u8] = Box::leak(input.to_vec().into_boxed_slice());
442        crate::from_bytes(leaked)
443            .collect::<Result<Vec<_>, _>>()
444            .expect("parse should succeed")
445            .into_iter()
446            .next()
447            .expect("at least one segment")
448    }
449
450    #[test]
451    fn trailing_empty_component_stripped_from_dtm() {
452        // DTM+137:20200101: has three components in element 0; the third is empty.
453        // ISO 9735-1 §3.3 says trailing empty components may be omitted,
454        // so effective count should be 2.
455        let seg = parse_single(b"DTM+137:20200101:'");
456        let count = DirectoryValidator::effective_component_count(&seg, 0);
457        assert_eq!(count, Some(2), "trailing empty component should be stripped");
458    }
459
460    #[test]
461    fn all_empty_components_result_in_zero() {
462        // NAD+MS++: → element 2 is ":" with two empty components → effective=0
463        let seg = parse_single(b"NAD+MS++:'");
464        let count = DirectoryValidator::effective_component_count(&seg, 2);
465        assert_eq!(count, Some(0), "all-empty composite should have effective count 0");
466    }
467
468    #[test]
469    fn non_empty_component_not_stripped() {
470        // DTM+137:20200101:102 — all three components are non-empty
471        let seg = parse_single(b"DTM+137:20200101:102'");
472        let count = DirectoryValidator::effective_component_count(&seg, 0);
473        assert_eq!(count, Some(3), "no components should be stripped when all non-empty");
474    }
475
476    #[test]
477    fn with_code_list_rules_overrides_base() {
478        // Override code-list rules to require element 0 of TST to be a specific code.
479        fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
480            match tag {
481                "TST" => &[(0, 0, "CUSTOM_DE")],
482                _ => &[],
483            }
484        }
485        fn custom_code_valid(_de: &str, code: &str) -> bool {
486            code == "VALID"
487        }
488        fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
489            None
490        }
491
492        let input = b"TST+INVALID'";
493        let segments: Vec<_> = crate::from_bytes(input)
494            .collect::<Result<Vec<_>, _>>()
495            .expect("parse should succeed");
496
497        let validator = DirectoryValidator::new(
498            "TEST",
499            segment_lookup,
500            custom_code_valid,
501            no_suggestion,
502            expected_components,
503            None,
504        )
505        .with_code_list_rules(custom_rules);
506
507        let mut report = ValidationReport::default();
508        validator.validate_batch(&segments, &mut report);
509        assert!(
510            report.has_warnings(),
511            "INVALID is not in the custom code list so validation must warn"
512        );
513    }
514}