edifact_rs/directory_validator.rs
1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10 /// Element must be present.
11 Mandatory,
12 /// Element is optional unless additional rules require it.
13 Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17///
18/// Fields are private to enforce the one-based position invariant through the
19/// [`ElementRef::new`] constructor. Use [`ElementRef::new`] for compile-time
20/// literals (panics at compile time when `position == 0`).
21///
22/// Use [`OwnedElementRef`] for runtime-constructed element refs.
23#[derive(Debug, Clone, Copy)]
24pub struct ElementRef {
25 /// One-based element position in the segment definition.
26 position: u8,
27 /// UN/EDIFACT data element identifier.
28 data_element: &'static str,
29 /// Requirement status of the element.
30 status: Status,
31 /// Maximum repetition count for this element.
32 max_repeat: u8,
33}
34
35impl ElementRef {
36 /// Construct an `ElementRef` with compile-time position validation.
37 ///
38 /// `position` must be ≥ 1 (one-based). When called in a `const` context
39 /// (e.g. inside a `static` array initialiser), a zero `position` causes a
40 /// **compile-time error**. At runtime it panics.
41 ///
42 /// # Panics
43 ///
44 /// Panics if `position == 0`.
45 ///
46 /// # Example
47 ///
48 /// ```rust
49 /// use edifact_rs::{ElementRef, Status};
50 ///
51 /// const BGM_1001: ElementRef = ElementRef::new(1, "1001", Status::Mandatory, 1);
52 /// ```
53 #[must_use]
54 pub const fn new(
55 position: u8,
56 data_element: &'static str,
57 status: Status,
58 max_repeat: u8,
59 ) -> Self {
60 assert!(
61 position != 0,
62 "ElementRef position must be >= 1 (one-based)"
63 );
64 Self {
65 position,
66 data_element,
67 status,
68 max_repeat,
69 }
70 }
71
72 /// One-based element position in the segment definition.
73 #[must_use]
74 #[inline]
75 pub const fn position(&self) -> u8 {
76 self.position
77 }
78
79 /// UN/EDIFACT data element identifier.
80 #[must_use]
81 #[inline]
82 pub const fn data_element(&self) -> &'static str {
83 self.data_element
84 }
85
86 /// Requirement status of the element.
87 #[must_use]
88 #[inline]
89 pub const fn status(&self) -> Status {
90 self.status
91 }
92
93 /// Maximum repetition count for this element.
94 #[must_use]
95 #[inline]
96 pub const fn max_repeat(&self) -> u8 {
97 self.max_repeat
98 }
99}
100
101/// Definition of an EDIFACT segment (tag + element structure).
102#[derive(Debug)]
103pub struct SegmentDefinition {
104 /// Segment tag.
105 pub tag: &'static str,
106 /// Human-readable segment name.
107 pub name: &'static str,
108 /// Ordered element definitions.
109 pub elements: &'static [ElementRef],
110}
111
112/// Owned runtime equivalent of [`ElementRef`].
113///
114/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
115/// to construct validators from data that is not available at compile time (e.g. loaded
116/// from JSON or a database at startup).
117///
118/// Use [`OwnedElementRef::new_unchecked`] for compile-time-known positions (panics on invalid
119/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
120/// comes from an external source and you need a `Result`. Fields are private to prevent
121/// bypassing the position invariant through struct-literal syntax.
122#[derive(Debug, Clone)]
123pub struct OwnedElementRef {
124 /// One-based element position.
125 position: u8,
126 /// UN/EDIFACT data element identifier.
127 data_element: String,
128 /// Requirement status.
129 status: Status,
130 /// Maximum repetition count.
131 max_repeat: u8,
132}
133
134/// Owned runtime equivalent of [`SegmentDefinition`].
135///
136/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
137///
138/// Use [`OwnedSegmentDef::new_unchecked`] for compile-time-known tags (panics on invalid input,
139/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
140/// an external source and you need a `Result`. Fields are private to prevent bypassing
141/// the tag invariant through struct-literal syntax.
142#[derive(Debug, Clone)]
143pub struct OwnedSegmentDef {
144 /// Segment tag (e.g. `"BGM"`).
145 tag: String,
146 /// Human-readable segment name.
147 name: String,
148 /// Ordered element definitions.
149 elements: Vec<OwnedElementRef>,
150}
151
152impl OwnedSegmentDef {
153 /// Construct an owned segment definition.
154 ///
155 /// This is the ergonomic constructor for compile-time-known tags (e.g.
156 /// `"BGM"`, `"UNH"`). It panics immediately on invalid input so that
157 /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
158 /// boilerplate.
159 ///
160 /// Use [`try_new`][Self::try_new] instead when the tag originates from an
161 /// external source (user input, config file, database) and you need a
162 /// `Result` to propagate errors gracefully.
163 ///
164 /// # Panics
165 ///
166 /// Panics if `tag` is not exactly three ASCII uppercase letters.
167 pub fn new_unchecked(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
168 assert!(
169 tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
170 "OwnedSegmentDef::new_unchecked: tag must be exactly three ASCII uppercase letters, got {tag:?}"
171 );
172 Self {
173 tag,
174 name,
175 elements,
176 }
177 }
178
179 /// Construct an owned segment definition, returning an error for invalid tags.
180 ///
181 /// Prefer this over [`new_unchecked`][Self::new_unchecked] when the tag comes from an external
182 /// source (user input, config file, database) and you want to handle the
183 /// error without panicking.
184 ///
185 /// # Errors
186 ///
187 /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
188 /// ASCII uppercase letters.
189 pub fn try_new(
190 tag: String,
191 name: String,
192 elements: Vec<OwnedElementRef>,
193 ) -> Result<Self, EdifactError> {
194 if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
195 return Err(EdifactError::InvalidSegmentTag(tag));
196 }
197 Ok(Self {
198 tag,
199 name,
200 elements,
201 })
202 }
203
204 /// Segment tag (e.g. `"BGM"`).
205 #[inline]
206 pub fn tag(&self) -> &str {
207 &self.tag
208 }
209
210 /// Human-readable segment name.
211 #[inline]
212 pub fn name(&self) -> &str {
213 &self.name
214 }
215
216 /// Element definitions for this segment.
217 #[inline]
218 pub fn elements(&self) -> &[OwnedElementRef] {
219 &self.elements
220 }
221}
222
223impl OwnedElementRef {
224 /// Construct an owned element reference.
225 ///
226 /// This is the ergonomic constructor for compile-time-known positions.
227 /// It panics immediately on invalid input so that call sites with literal
228 /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
229 ///
230 /// Use [`try_new`][Self::try_new] instead when the position originates from
231 /// an external source (user input, config file, database) and you need a
232 /// `Result` to propagate errors gracefully.
233 ///
234 /// # Panics
235 ///
236 /// Panics if `position` is `0` (positions are one-based).
237 pub fn new_unchecked(
238 position: u8,
239 data_element: String,
240 status: Status,
241 max_repeat: u8,
242 ) -> Self {
243 assert!(
244 position != 0,
245 "OwnedElementRef::new_unchecked: position must be >= 1 (one-based), got 0"
246 );
247 Self {
248 position,
249 data_element,
250 status,
251 max_repeat,
252 }
253 }
254
255 /// Construct an owned element reference, returning an error for position `0`.
256 ///
257 /// Prefer this over [`new_unchecked`][Self::new_unchecked] when the position comes from an
258 /// external source (user input, config file, database) and you want to
259 /// handle the error without panicking.
260 ///
261 /// # Errors
262 ///
263 /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
264 pub fn try_new(
265 position: u8,
266 data_element: String,
267 status: Status,
268 max_repeat: u8,
269 ) -> Result<Self, EdifactError> {
270 if position == 0 {
271 return Err(EdifactError::InvalidElementPosition);
272 }
273 Ok(Self {
274 position,
275 data_element,
276 status,
277 max_repeat,
278 })
279 }
280
281 /// One-based element position (always >= 1).
282 #[inline]
283 pub fn position(&self) -> u8 {
284 self.position
285 }
286
287 /// UN/EDIFACT data element identifier.
288 #[inline]
289 pub fn data_element(&self) -> &str {
290 &self.data_element
291 }
292
293 /// Requirement status of this element.
294 #[inline]
295 pub fn status(&self) -> Status {
296 self.status
297 }
298
299 /// Maximum repetition count for this element.
300 #[inline]
301 pub fn max_repeat(&self) -> u8 {
302 self.max_repeat
303 }
304}
305
306type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
307type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
308type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
309type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
310type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
311type AdditionalStructureRuleFn =
312 Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
313/// Returns the `(element_index, component_index, data_element_id)` tuples to
314/// validate against a code list for the given segment tag.
315type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
316/// Returns the mandatory segment tags for a given EDIFACT message type.
317///
318/// The slice should contain every tag that must appear at least once in a
319/// conformant message of the given type. The tags are also used to check
320/// canonical ordering — their relative order in the returned slice is taken
321/// as the expected order in the message.
322type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
323
324/// Internal enum that unifies lookup results from static and owned segment definitions.
325///
326/// Allows `validate_segment` to handle both code-generated (`&'static`) and
327/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
328enum SegmentDefRef<'a> {
329 Static(&'static SegmentDefinition),
330 Owned(&'a OwnedSegmentDef),
331}
332
333impl SegmentDefRef<'_> {
334 /// Returns the highest defined element position (one-based → used directly as
335 /// the maximum zero-based slot count for element-count validation).
336 ///
337 /// For owned definitions the highest `position` value may exceed the number
338 /// of entries in the `elements` vec when positions are non-consecutive.
339 fn max_element_position(&self) -> usize {
340 match self {
341 Self::Static(d) => d
342 .elements
343 .iter()
344 .map(|e| e.position as usize)
345 .max()
346 .unwrap_or(0),
347 Self::Owned(d) => d
348 .elements
349 .iter()
350 .map(|e| e.position as usize)
351 .max()
352 .unwrap_or(0),
353 }
354 }
355
356 /// Returns the highest position number among mandatory elements (one-based).
357 ///
358 /// This equals the minimum number of elements that must be present in a
359 /// segment: if the highest-positioned mandatory element is at position 5,
360 /// the segment must supply at least 5 elements.
361 fn last_mandatory_position(&self) -> usize {
362 match self {
363 Self::Static(d) => d
364 .elements
365 .iter()
366 .filter(|e| e.status == Status::Mandatory)
367 .map(|e| e.position as usize)
368 .max()
369 .unwrap_or(0),
370 Self::Owned(d) => d
371 .elements
372 .iter()
373 .filter(|e| e.status == Status::Mandatory)
374 .map(|e| e.position as usize)
375 .max()
376 .unwrap_or(0),
377 }
378 }
379
380 /// Iterate over mandatory element positions without heap allocation.
381 ///
382 /// Calls `f(zero_based_index, data_element_id)` for each element whose
383 /// status is [`Status::Mandatory`]. Returns `Err` immediately if `f`
384 /// returns `Err`, short-circuiting the remaining elements.
385 fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
386 where
387 F: FnMut(usize, &str) -> Result<(), E>,
388 {
389 match self {
390 Self::Static(d) => {
391 for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
392 f((e.position as usize).saturating_sub(1), e.data_element)?;
393 }
394 }
395 Self::Owned(d) => {
396 for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
397 f(
398 (e.position as usize).saturating_sub(1),
399 e.data_element.as_str(),
400 )?;
401 }
402 }
403 }
404 Ok(())
405 }
406}
407
408/// Default required-segments mapping used when no custom function is provided.
409///
410/// Returns the universal minimum: every EDIFACT message must begin with `UNH`
411/// and end with `UNT`. Message-type-specific mandatory segments (such as
412/// `BGM` for ORDERS/INVOIC) must be enforced by a
413/// [`ProfileRulePack`][crate::ProfileRulePack] or a custom
414/// [`DirectoryValidatorBuilder::with_required_segments`] function to avoid
415/// false positives for message types that do not require `BGM`.
416fn default_required_segments(_message_type: &str) -> &'static [&'static str] {
417 &["UNH", "UNT"]
418}
419
420/// Code-list validation rules common to all UN/EDIFACT directory releases.
421///
422/// Each entry is `(element_index, component_index, data_element_id)`.
423/// `element_index` and `component_index` are zero-based.
424///
425/// Covers the most frequently validated qualifier/code elements across ORDERS,
426/// INVOIC, UTILMD, and similar message types.
427pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
428 match tag {
429 "BGM" => &[(0, 0, "1001")],
430 "DTM" => &[(0, 0, "2005")],
431 "NAD" => &[(0, 0, "3035")],
432 "QTY" => &[(0, 0, "6063")],
433 "RFF" => &[(0, 0, "1153")],
434 "MOA" => &[(0, 0, "5025")],
435 "PRI" => &[(0, 0, "5125")],
436 "LOC" => &[(0, 0, "3227")],
437 _ => &[],
438 }
439}
440
441/// Shared validator implementation that is configured per UN/EDIFACT directory release.
442///
443/// # Scope and limitations
444///
445/// `DirectoryValidator` validates individual segment *content* (element counts,
446/// component counts, code-list values, and conditional rules) and checks that
447/// every *mandatory* segment type is present at least once. It does **not**
448/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
449/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
450/// appears in the wrong position. Full sequence validation requires a
451/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
452/// the scope of this implementation.
453#[derive(Clone)]
454pub struct DirectoryValidator {
455 directory_id: String,
456 segment_lookup: SegmentLookupFn,
457 /// Runtime-owned segment definitions (from builder / JSON / DB).
458 ///
459 /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
460 owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
461 is_code_valid: IsCodeValidFn,
462 suggest_code: SuggestCodeFn,
463 expected_components: ExpectedComponentsFn,
464 code_list_rules: CodeListRulesFn,
465 additional_structure_rule: Option<AdditionalStructureRuleFn>,
466 /// Configurable mapping from message type to required segment tags.
467 required_segments: RequiredSegmentsFn,
468 message_type: Option<String>,
469 enforce_known_tags: bool,
470 structure_checks: bool,
471 code_list_checks: bool,
472}
473
474impl std::fmt::Debug for DirectoryValidator {
475 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
476 f.debug_struct("DirectoryValidator")
477 .field("directory_id", &self.directory_id)
478 .field("message_type", &self.message_type)
479 .field("enforce_known_tags", &self.enforce_known_tags)
480 .field("structure_checks", &self.structure_checks)
481 .field("code_list_checks", &self.code_list_checks)
482 .finish_non_exhaustive()
483 }
484}
485
486impl DirectoryValidator {
487 /// Create a validator for a specific directory release with injected lookup/check hooks.
488 pub fn new(
489 directory_id: &'static str,
490 segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
491 is_code_valid: fn(&str, &str) -> bool,
492 suggest_code: fn(&str, &str) -> Option<&'static str>,
493 expected_components: fn(&str, usize) -> Option<u8>,
494 additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
495 ) -> Self {
496 Self {
497 directory_id: directory_id.to_owned(),
498 segment_lookup: Arc::new(segment_lookup),
499 owned_defs: None,
500 is_code_valid: Arc::new(is_code_valid),
501 suggest_code: Arc::new(suggest_code),
502 expected_components: Arc::new(expected_components),
503 code_list_rules: Arc::new(base_code_list_rules),
504 additional_structure_rule: additional_structure_rule
505 .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
506 required_segments: Arc::new(default_required_segments),
507 message_type: None,
508 enforce_known_tags: true,
509 structure_checks: true,
510 code_list_checks: true,
511 }
512 }
513
514 /// Create a validator from a static slice of [`SegmentDefinition`]s.
515 ///
516 /// This is the preferred constructor when code-generating directory data as
517 /// a `static` array: no manual fn-pointer boilerplate is required.
518 ///
519 /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
520 /// always returns `true`). Call [`with_code_list_rules`][Self::with_code_list_rules]
521 /// to register directory-specific rules that actually validate code values.
522 ///
523 /// # Example
524 ///
525 /// ```rust,ignore
526 /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
527 ///
528 /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
529 /// .with_code_list_rules(my_code_list_rules);
530 /// ```
531 pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
532 let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
533 definitions.iter().map(|d| (d.tag, d)).collect();
534 let lookup_map = Arc::new(lookup_map);
535 Self {
536 directory_id: "custom".to_owned(),
537 segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
538 owned_defs: None,
539 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
540 suggest_code: Arc::new(|_de: &str, _code: &str| None),
541 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
542 code_list_rules: Arc::new(base_code_list_rules),
543 additional_structure_rule: None,
544 required_segments: Arc::new(default_required_segments),
545 message_type: None,
546 enforce_known_tags: true,
547 structure_checks: true,
548 code_list_checks: false,
549 }
550 }
551
552 /// Create a validator from a runtime-owned collection of segment definitions.
553 ///
554 /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
555 /// loaded from an external source at startup (JSON, database, YAML, …) rather
556 /// than being known at compile time.
557 ///
558 /// Code-list checks are **disabled** by default; enable them by chaining
559 /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
560 /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
561 /// the builder.
562 ///
563 /// # Example
564 ///
565 /// ```rust,ignore
566 /// let defs = vec![
567 /// OwnedSegmentDef::new_unchecked(
568 /// "BGM".to_owned(),
569 /// "Beginning of message".to_owned(),
570 /// vec![OwnedElementRef::new_unchecked(1, "C002".to_owned(), Status::Mandatory, 1)],
571 /// ),
572 /// ];
573 /// let validator = DirectoryValidator::from_owned_definitions(defs)
574 /// .with_directory_id("runtime-profile");
575 /// ```
576 pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
577 Self {
578 directory_id: "custom".to_owned(),
579 // The static lookup is never consulted when `owned_defs` is `Some`.
580 segment_lookup: Arc::new(|_| None),
581 owned_defs: Some(Arc::new(definitions)),
582 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
583 suggest_code: Arc::new(|_de: &str, _code: &str| None),
584 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
585 code_list_rules: Arc::new(base_code_list_rules),
586 additional_structure_rule: None,
587 required_segments: Arc::new(default_required_segments),
588 message_type: None,
589 enforce_known_tags: true,
590 structure_checks: true,
591 code_list_checks: false,
592 }
593 }
594
595 /// Set the directory identifier string (used in error messages).
596 pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
597 self.directory_id = id.into();
598 self
599 }
600
601 /// Override the code-list rules function.
602 ///
603 /// Directories can supply a directory-specific implementation that extends or
604 /// replaces the base rules from `base_code_list_rules`.
605 pub fn with_code_list_rules(
606 mut self,
607 f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
608 ) -> Self {
609 self.code_list_rules = Arc::new(f);
610 self
611 }
612
613 /// Enable only structure checks and disable code-list checks.
614 pub fn structure_only(mut self) -> Self {
615 self.structure_checks = true;
616 self.code_list_checks = false;
617 self
618 }
619
620 /// Enable only code-list checks and disable structure checks.
621 pub fn code_list_only(mut self) -> Self {
622 self.structure_checks = false;
623 self.code_list_checks = true;
624 self
625 }
626
627 /// Configure whether unknown segment tags should be rejected.
628 pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
629 self.enforce_known_tags = enforce;
630 self
631 }
632
633 /// Override the required-segments mapping used for structural validation.
634 ///
635 /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
636 /// and must return a `'static` slice of segment tags that are mandatory for that
637 /// type. The tags are checked both for *presence* and for *canonical ordering*
638 /// within the message.
639 ///
640 /// # Example
641 ///
642 /// ```rust,ignore
643 /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
644 /// match msg_type {
645 /// "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
646 /// "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
647 /// _ => &["UNH", "UNT"],
648 /// }
649 /// }
650 ///
651 /// let validator = DirectoryValidator::from_definitions(DEFS)
652 /// .with_required_segments(my_required_segments);
653 /// ```
654 pub fn with_required_segments(
655 mut self,
656 f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
657 ) -> Self {
658 self.required_segments = Arc::new(f);
659 self
660 }
661
662 fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
663 if let Some(explicit) = self.message_type.as_deref() {
664 return Some(explicit.to_owned());
665 }
666
667 segments
668 .iter()
669 .find(|s| s.tag == "UNH")
670 .and_then(|s| s.get_element(1))
671 .and_then(|e| e.get_component(0))
672 .map(str::to_owned)
673 }
674
675 /// Count the non-trailing-empty components in element `element_idx` of `seg`.
676 ///
677 /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
678 /// a sender is not required to transmit trailing empty components; this function
679 /// therefore strips them before checking against the expected count so that
680 /// conformant messages with omitted trailing components are still accepted.
681 ///
682 /// # Examples
683 ///
684 /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
685 /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
686 fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
687 let elem = seg.elements.get(element_idx)?;
688 let mut count = elem.components.len();
689 while count > 0 && elem.components[count - 1].0.as_ref().is_empty() {
690 count -= 1;
691 }
692 u8::try_from(count).ok()
693 }
694
695 fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
696 for idx in 0..seg.elements.len() {
697 if let Some(expected) = (self.expected_components)(seg.tag, idx) {
698 let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
699 if actual != expected {
700 return Err(EdifactError::InvalidComponentCount {
701 tag: seg.tag.to_owned(),
702 element_index: idx,
703 expected,
704 actual,
705 offset: seg.span.start,
706 });
707 }
708 }
709 }
710 Ok(())
711 }
712
713 fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
714 let rules = (self.code_list_rules)(seg.tag);
715
716 for (elem_idx, comp_idx, de) in rules {
717 let value = seg
718 .get_element(*elem_idx)
719 .and_then(|e| e.get_component(*comp_idx))
720 .unwrap_or("");
721 if !value.is_empty() && !(self.is_code_valid)(de, value) {
722 let suggestion = (self.suggest_code)(de, value);
723 return Err(EdifactError::InvalidCodeValue {
724 tag: seg.tag.to_owned(),
725 element_index: *elem_idx,
726 value: value.to_owned(),
727 code_list: (*de).to_owned(),
728 offset: seg.span.start,
729 suggestion,
730 });
731 }
732 }
733
734 Ok(())
735 }
736}
737
738impl DirectoryValidator {
739 fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
740 if let Some(owned) = &self.owned_defs {
741 owned
742 .iter()
743 .find(|d| d.tag == tag)
744 .map(SegmentDefRef::Owned)
745 } else {
746 (self.segment_lookup)(tag).map(SegmentDefRef::Static)
747 }
748 }
749
750 fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
751 if !self.structure_checks && !self.code_list_checks {
752 return Ok(());
753 }
754
755 let Some(def) = self.resolve_def(seg.tag) else {
756 if self.structure_checks && self.enforce_known_tags {
757 return Err(EdifactError::InvalidSegmentForMessage {
758 tag: seg.tag.to_owned(),
759 message_type: self
760 .message_type
761 .clone()
762 .unwrap_or_else(|| self.directory_id.clone()),
763 offset: seg.tag_span.start,
764 });
765 }
766 return Ok(());
767 };
768
769 let max_elements = def.max_element_position();
770 let min_elements = def.last_mandatory_position();
771 let actual = seg.elements.len();
772
773 if self.structure_checks && (actual < min_elements || actual > max_elements) {
774 return Err(EdifactError::InvalidElementCount {
775 tag: seg.tag.to_owned(),
776 min: min_elements,
777 max: max_elements,
778 actual,
779 offset: seg.span.start,
780 });
781 }
782
783 if self.structure_checks {
784 def.for_each_mandatory_position(|idx, _de| {
785 let is_present = seg.elements.get(idx).is_some_and(|elem| {
786 elem.components.iter().any(|(c, _)| !c.as_ref().is_empty())
787 });
788 if !is_present {
789 return Err(EdifactError::MissingRequiredElement {
790 tag: seg.tag.to_owned(),
791 element_index: idx,
792 });
793 }
794 Ok(())
795 })?;
796 self.validate_component_counts(seg)?;
797
798 if let Some(rule) = &self.additional_structure_rule {
799 rule(seg)?;
800 }
801 }
802
803 if self.code_list_checks {
804 self.validate_code_lists(seg)?;
805 }
806
807 Ok(())
808 }
809}
810
811impl Validator for DirectoryValidator {
812 fn set_message_type(&mut self, message_type: Option<&str>) {
813 self.message_type = message_type.map(str::to_owned);
814 }
815
816 fn validate_batch(
817 &self,
818 segments: &[Segment<'_>],
819 report: &mut ValidationReport,
820 _context: &ValidationRuleContext<'_>,
821 ) {
822 for seg in segments {
823 if let Err(err) = self.validate_segment(seg) {
824 report_error(report, err);
825 }
826 }
827
828 if self.structure_checks {
829 if let Some(message_type) = self.detect_message_type(segments) {
830 for required_tag in (self.required_segments)(&message_type) {
831 if segments.iter().all(|s| s.tag != *required_tag) {
832 report.add_error(
833 ValidationIssue::new(
834 ValidationSeverity::Error,
835 format!(
836 "required segment {} missing for message type {}",
837 required_tag, message_type
838 ),
839 )
840 .with_segment(*required_tag)
841 .with_suggestion("Add the mandatory segment at the correct position"),
842 );
843 }
844 }
845
846 let seq = (self.required_segments)(&message_type);
847 let mut last_idx = None;
848 for tag in seq {
849 if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
850 if let Some(prev) = last_idx {
851 if idx < prev {
852 report.add_error(
853 ValidationIssue::new(
854 ValidationSeverity::Error,
855 format!(
856 "segment sequence violation for message type {}: '{}' appears out of order",
857 message_type, tag
858 ),
859 )
860 .with_segment(*tag)
861 .with_suggestion(
862 "Ensure required segments follow UN/EDIFACT canonical order",
863 ),
864 );
865 }
866 }
867 last_idx = Some(idx);
868 }
869 }
870 }
871 }
872 }
873}
874
875// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
876
877/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
878///
879/// Use this when segment definitions are loaded from an external source at
880/// startup (JSON, database, YAML, …) rather than being available as `static`
881/// arrays at compile time.
882///
883/// # Example
884///
885/// ```rust,ignore
886/// let validator = DirectoryValidatorBuilder::new("my-profile")
887/// .add_segment(
888/// OwnedSegmentDef::new_unchecked(
889/// "BGM".to_owned(),
890/// "Beginning of message".to_owned(),
891/// vec![OwnedElementRef::new_unchecked(1, "C002".to_owned(), Status::Mandatory, 1)],
892/// ),
893/// )
894/// .build();
895/// ```
896#[derive(Debug, Default)]
897pub struct DirectoryValidatorBuilder {
898 directory_id: Option<String>,
899 segments: Vec<OwnedSegmentDef>,
900}
901
902impl DirectoryValidatorBuilder {
903 /// Create a new builder with the given directory identifier.
904 ///
905 /// The identifier is used in error messages; set a human-readable value
906 /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
907 pub fn new(directory_id: impl Into<String>) -> Self {
908 Self {
909 directory_id: Some(directory_id.into()),
910 segments: Vec::new(),
911 }
912 }
913
914 /// Add a segment definition to the builder.
915 ///
916 /// Definitions can be added in any order; the resulting validator looks
917 /// them up by tag at validation time.
918 pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
919 self.segments.push(def);
920 self
921 }
922
923 /// Extend the builder with multiple segment definitions at once.
924 pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
925 self.segments.extend(defs);
926 self
927 }
928
929 /// Build the [`DirectoryValidator`].
930 ///
931 /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
932 /// Code-list checks are disabled by default; chain
933 /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
934 /// enable them.
935 pub fn build(self) -> DirectoryValidator {
936 let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
937 if let Some(id) = self.directory_id {
938 validator.directory_id = id;
939 }
940 validator
941 }
942}
943
944#[cfg(test)]
945mod tests {
946 use super::*;
947
948 static TEST_ELEMENTS: &[ElementRef] = &[ElementRef::new(1, "C507", Status::Mandatory, 1)];
949
950 static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
951 tag: "TST",
952 name: "Test segment",
953 elements: TEST_ELEMENTS,
954 };
955
956 fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
957 match tag {
958 "TST" => Some(&TEST_SEGMENT),
959 _ => None,
960 }
961 }
962
963 fn code_valid(_de: &str, _code: &str) -> bool {
964 true
965 }
966
967 fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
968 None
969 }
970
971 fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
972 None
973 }
974
975 #[test]
976 fn mandatory_composite_present_when_any_component_non_empty() {
977 let input = b"TST+:ABC'";
978 let segments: Vec<_> = crate::from_bytes(input)
979 .collect::<Result<Vec<_>, _>>()
980 .expect("parse should succeed");
981
982 let validator = DirectoryValidator::new(
983 "TEST",
984 segment_lookup,
985 code_valid,
986 suggest_code,
987 expected_components,
988 None,
989 );
990
991 let mut report = ValidationReport::default();
992 validator.validate_batch(
993 &segments,
994 &mut report,
995 &crate::validator::ValidationRuleContext::empty(),
996 );
997 assert!(!report.has_errors());
998 }
999
1000 // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
1001
1002 fn parse_single(input: &[u8]) -> crate::OwnedSegment {
1003 crate::from_reader_collect(std::io::Cursor::new(input))
1004 .expect("parse should succeed")
1005 .into_iter()
1006 .next()
1007 .expect("at least one segment")
1008 }
1009
1010 #[test]
1011 fn trailing_empty_component_stripped_from_dtm() {
1012 // DTM+137:20200101: has three components in element 0; the third is empty.
1013 // ISO 9735-1 §3.3 says trailing empty components may be omitted,
1014 // so effective count should be 2.
1015 let owned = parse_single(b"DTM+137:20200101:'");
1016 let seg = owned.as_borrowed();
1017 let count = DirectoryValidator::effective_component_count(&seg, 0);
1018 assert_eq!(
1019 count,
1020 Some(2),
1021 "trailing empty component should be stripped"
1022 );
1023 }
1024
1025 #[test]
1026 fn all_empty_components_result_in_zero() {
1027 // NAD+MS++: → element 2 is ":" with two empty components → effective=0
1028 let owned = parse_single(b"NAD+MS++:'");
1029 let seg = owned.as_borrowed();
1030 let count = DirectoryValidator::effective_component_count(&seg, 2);
1031 assert_eq!(
1032 count,
1033 Some(0),
1034 "all-empty composite should have effective count 0"
1035 );
1036 }
1037
1038 #[test]
1039 fn non_empty_component_not_stripped() {
1040 // DTM+137:20200101:102 — all three components are non-empty
1041 let owned = parse_single(b"DTM+137:20200101:102'");
1042 let seg = owned.as_borrowed();
1043 let count = DirectoryValidator::effective_component_count(&seg, 0);
1044 assert_eq!(
1045 count,
1046 Some(3),
1047 "no components should be stripped when all non-empty"
1048 );
1049 }
1050
1051 #[test]
1052 fn with_code_list_rules_overrides_base() {
1053 // Override code-list rules to require element 0 of TST to be a specific code.
1054 fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
1055 match tag {
1056 "TST" => &[(0, 0, "CUSTOM_DE")],
1057 _ => &[],
1058 }
1059 }
1060 fn custom_code_valid(_de: &str, code: &str) -> bool {
1061 code == "VALID"
1062 }
1063 fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
1064 None
1065 }
1066
1067 let input = b"TST+INVALID'";
1068 let segments: Vec<_> = crate::from_bytes(input)
1069 .collect::<Result<Vec<_>, _>>()
1070 .expect("parse should succeed");
1071
1072 let validator = DirectoryValidator::new(
1073 "TEST",
1074 segment_lookup,
1075 custom_code_valid,
1076 no_suggestion,
1077 expected_components,
1078 None,
1079 )
1080 .with_code_list_rules(custom_rules);
1081
1082 let mut report = ValidationReport::default();
1083 validator.validate_batch(
1084 &segments,
1085 &mut report,
1086 &crate::validator::ValidationRuleContext::empty(),
1087 );
1088 assert!(
1089 report.has_warnings(),
1090 "INVALID is not in the custom code list so validation must warn"
1091 );
1092 }
1093}