edifact_rs/directory_validator.rs
1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10 /// Element must be present.
11 Mandatory,
12 /// Element is optional unless additional rules require it.
13 Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19 /// One-based element position in the segment definition.
20 pub position: u8,
21 /// UN/EDIFACT data element identifier.
22 pub data_element: &'static str,
23 /// Requirement status of the element.
24 pub status: Status,
25 /// Maximum repetition count for this element.
26 pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32 /// Segment tag.
33 pub tag: &'static str,
34 /// Human-readable segment name.
35 pub name: &'static str,
36 /// Ordered element definitions.
37 pub elements: &'static [ElementRef],
38}
39
40/// Owned runtime equivalent of [`ElementRef`].
41///
42/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
43/// to construct validators from data that is not available at compile time (e.g. loaded
44/// from JSON or a database at startup).
45///
46/// Use [`OwnedElementRef::new`] for compile-time-known positions (panics on invalid
47/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
48/// comes from an external source and you need a `Result`. Fields are private to prevent
49/// bypassing the position invariant through struct-literal syntax.
50#[derive(Debug, Clone)]
51pub struct OwnedElementRef {
52 /// One-based element position.
53 position: u8,
54 /// UN/EDIFACT data element identifier.
55 data_element: String,
56 /// Requirement status.
57 status: Status,
58 /// Maximum repetition count.
59 max_repeat: u8,
60}
61
62/// Owned runtime equivalent of [`SegmentDefinition`].
63///
64/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
65///
66/// Use [`OwnedSegmentDef::new`] for compile-time-known tags (panics on invalid input,
67/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
68/// an external source and you need a `Result`. Fields are private to prevent bypassing
69/// the tag invariant through struct-literal syntax.
70#[derive(Debug, Clone)]
71pub struct OwnedSegmentDef {
72 /// Segment tag (e.g. `"BGM"`).
73 tag: String,
74 /// Human-readable segment name.
75 name: String,
76 /// Ordered element definitions.
77 elements: Vec<OwnedElementRef>,
78}
79
80impl OwnedSegmentDef {
81 /// Construct an owned segment definition.
82 ///
83 /// This is the ergonomic constructor for compile-time-known tags (e.g.
84 /// `"BGM"`, `"UNH"`). It panics immediately on invalid input so that
85 /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
86 /// boilerplate.
87 ///
88 /// Use [`try_new`][Self::try_new] instead when the tag originates from an
89 /// external source (user input, config file, database) and you need a
90 /// `Result` to propagate errors gracefully.
91 ///
92 /// # Panics
93 ///
94 /// Panics if `tag` is not exactly three ASCII uppercase letters.
95 pub fn new(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
96 assert!(
97 tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
98 "OwnedSegmentDef::new: tag must be exactly three ASCII uppercase letters, got {tag:?}"
99 );
100 Self {
101 tag,
102 name,
103 elements,
104 }
105 }
106
107 /// Construct an owned segment definition, returning an error for invalid tags.
108 ///
109 /// Prefer this over [`new`][Self::new] when the tag comes from an external
110 /// source (user input, config file, database) and you want to handle the
111 /// error without panicking.
112 ///
113 /// # Errors
114 ///
115 /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
116 /// ASCII uppercase letters.
117 pub fn try_new(
118 tag: String,
119 name: String,
120 elements: Vec<OwnedElementRef>,
121 ) -> Result<Self, EdifactError> {
122 if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
123 return Err(EdifactError::InvalidSegmentTag(tag));
124 }
125 Ok(Self {
126 tag,
127 name,
128 elements,
129 })
130 }
131
132 /// Segment tag (e.g. `"BGM"`).
133 #[inline]
134 pub fn tag(&self) -> &str {
135 &self.tag
136 }
137
138 /// Human-readable segment name.
139 #[inline]
140 pub fn name(&self) -> &str {
141 &self.name
142 }
143
144 /// Element definitions for this segment.
145 #[inline]
146 pub fn elements(&self) -> &[OwnedElementRef] {
147 &self.elements
148 }
149}
150
151impl OwnedElementRef {
152 /// Construct an owned element reference.
153 ///
154 /// This is the ergonomic constructor for compile-time-known positions.
155 /// It panics immediately on invalid input so that call sites with literal
156 /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
157 ///
158 /// Use [`try_new`][Self::try_new] instead when the position originates from
159 /// an external source (user input, config file, database) and you need a
160 /// `Result` to propagate errors gracefully.
161 ///
162 /// # Panics
163 ///
164 /// Panics if `position` is `0` (positions are one-based).
165 pub fn new(position: u8, data_element: String, status: Status, max_repeat: u8) -> Self {
166 assert!(
167 position != 0,
168 "OwnedElementRef::new: position must be >= 1 (one-based), got 0"
169 );
170 Self {
171 position,
172 data_element,
173 status,
174 max_repeat,
175 }
176 }
177
178 /// Construct an owned element reference, returning an error for position `0`.
179 ///
180 /// Prefer this over [`new`][Self::new] when the position comes from an
181 /// external source (user input, config file, database) and you want to
182 /// handle the error without panicking.
183 ///
184 /// # Errors
185 ///
186 /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
187 pub fn try_new(
188 position: u8,
189 data_element: String,
190 status: Status,
191 max_repeat: u8,
192 ) -> Result<Self, EdifactError> {
193 if position == 0 {
194 return Err(EdifactError::InvalidElementPosition);
195 }
196 Ok(Self {
197 position,
198 data_element,
199 status,
200 max_repeat,
201 })
202 }
203
204 /// One-based element position (always >= 1).
205 #[inline]
206 pub fn position(&self) -> u8 {
207 self.position
208 }
209
210 /// UN/EDIFACT data element identifier.
211 #[inline]
212 pub fn data_element(&self) -> &str {
213 &self.data_element
214 }
215
216 /// Requirement status of this element.
217 #[inline]
218 pub fn status(&self) -> Status {
219 self.status
220 }
221
222 /// Maximum repetition count for this element.
223 #[inline]
224 pub fn max_repeat(&self) -> u8 {
225 self.max_repeat
226 }
227}
228
229type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
230type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
231type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
232type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
233type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
234type AdditionalStructureRuleFn =
235 Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
236/// Returns the `(element_index, component_index, data_element_id)` tuples to
237/// validate against a code list for the given segment tag.
238type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
239/// Returns the mandatory segment tags for a given EDIFACT message type.
240///
241/// The slice should contain every tag that must appear at least once in a
242/// conformant message of the given type. The tags are also used to check
243/// canonical ordering — their relative order in the returned slice is taken
244/// as the expected order in the message.
245type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
246
247/// Internal enum that unifies lookup results from static and owned segment definitions.
248///
249/// Allows `validate_segment` to handle both code-generated (`&'static`) and
250/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
251enum SegmentDefRef<'a> {
252 Static(&'static SegmentDefinition),
253 Owned(&'a OwnedSegmentDef),
254}
255
256impl<'a> SegmentDefRef<'a> {
257 /// Returns the highest defined element position (one-based → used directly as
258 /// the maximum zero-based slot count for element-count validation).
259 ///
260 /// For owned definitions the highest `position` value may exceed the number
261 /// of entries in the `elements` vec when positions are non-consecutive.
262 fn max_element_position(&self) -> usize {
263 match self {
264 Self::Static(d) => d
265 .elements
266 .iter()
267 .map(|e| e.position as usize)
268 .max()
269 .unwrap_or(0),
270 Self::Owned(d) => d
271 .elements
272 .iter()
273 .map(|e| e.position as usize)
274 .max()
275 .unwrap_or(0),
276 }
277 }
278
279 /// Returns the highest position number among mandatory elements (one-based).
280 ///
281 /// This equals the minimum number of elements that must be present in a
282 /// segment: if the highest-positioned mandatory element is at position 5,
283 /// the segment must supply at least 5 elements.
284 fn last_mandatory_position(&self) -> usize {
285 match self {
286 Self::Static(d) => d
287 .elements
288 .iter()
289 .filter(|e| e.status == Status::Mandatory)
290 .map(|e| e.position as usize)
291 .max()
292 .unwrap_or(0),
293 Self::Owned(d) => d
294 .elements
295 .iter()
296 .filter(|e| e.status == Status::Mandatory)
297 .map(|e| e.position as usize)
298 .max()
299 .unwrap_or(0),
300 }
301 }
302
303 /// Iterate over mandatory element positions without heap allocation.
304 ///
305 /// Calls `f(zero_based_index, data_element_id)` for each element whose
306 /// status is [`Status::Mandatory`]. Returns `Err` immediately if `f`
307 /// returns `Err`, short-circuiting the remaining elements.
308 fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
309 where
310 F: FnMut(usize, &str) -> Result<(), E>,
311 {
312 match self {
313 Self::Static(d) => {
314 for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
315 f((e.position as usize).saturating_sub(1), e.data_element)?;
316 }
317 }
318 Self::Owned(d) => {
319 for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
320 f(
321 (e.position as usize).saturating_sub(1),
322 e.data_element.as_str(),
323 )?;
324 }
325 }
326 }
327 Ok(())
328 }
329}
330
331/// Default required-segments mapping used when no custom function is provided.
332fn default_required_segments(message_type: &str) -> &'static [&'static str] {
333 match message_type {
334 "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
335 _ => &["UNH", "UNT"],
336 }
337}
338
339/// Code-list validation rules common to all UN/EDIFACT directory releases.
340///
341/// Each entry is `(element_index, component_index, data_element_id)`.
342/// `element_index` and `component_index` are zero-based.
343///
344/// Covers the most frequently validated qualifier/code elements across ORDERS,
345/// INVOIC, UTILMD, and similar message types.
346pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
347 match tag {
348 "BGM" => &[(0, 0, "1001")],
349 "DTM" => &[(0, 0, "2005")],
350 "NAD" => &[(0, 0, "3035")],
351 "QTY" => &[(0, 0, "6063")],
352 "RFF" => &[(0, 0, "1153")],
353 "MOA" => &[(0, 0, "5025")],
354 "PRI" => &[(0, 0, "5125")],
355 "LOC" => &[(0, 0, "3227")],
356 _ => &[],
357 }
358}
359
360/// Shared validator implementation that is configured per UN/EDIFACT directory release.
361///
362/// # Scope and limitations
363///
364/// `DirectoryValidator` validates individual segment *content* (element counts,
365/// component counts, code-list values, and conditional rules) and checks that
366/// every *mandatory* segment type is present at least once. It does **not**
367/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
368/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
369/// appears in the wrong position. Full sequence validation requires a
370/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
371/// the scope of this implementation.
372#[derive(Clone)]
373pub struct DirectoryValidator {
374 directory_id: String,
375 segment_lookup: SegmentLookupFn,
376 /// Runtime-owned segment definitions (from builder / JSON / DB).
377 ///
378 /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
379 owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
380 is_code_valid: IsCodeValidFn,
381 suggest_code: SuggestCodeFn,
382 expected_components: ExpectedComponentsFn,
383 code_list_rules: CodeListRulesFn,
384 additional_structure_rule: Option<AdditionalStructureRuleFn>,
385 /// Configurable mapping from message type to required segment tags.
386 required_segments: RequiredSegmentsFn,
387 message_type: Option<String>,
388 enforce_known_tags: bool,
389 structure_checks: bool,
390 code_list_checks: bool,
391}
392
393impl std::fmt::Debug for DirectoryValidator {
394 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
395 f.debug_struct("DirectoryValidator")
396 .field("directory_id", &self.directory_id)
397 .field("message_type", &self.message_type)
398 .field("enforce_known_tags", &self.enforce_known_tags)
399 .field("structure_checks", &self.structure_checks)
400 .field("code_list_checks", &self.code_list_checks)
401 .finish_non_exhaustive()
402 }
403}
404
405impl DirectoryValidator {
406 /// Create a validator for a specific directory release with injected lookup/check hooks.
407 pub fn new(
408 directory_id: &'static str,
409 segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
410 is_code_valid: fn(&str, &str) -> bool,
411 suggest_code: fn(&str, &str) -> Option<&'static str>,
412 expected_components: fn(&str, usize) -> Option<u8>,
413 additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
414 ) -> Self {
415 Self {
416 directory_id: directory_id.to_owned(),
417 segment_lookup: Arc::new(segment_lookup),
418 owned_defs: None,
419 is_code_valid: Arc::new(is_code_valid),
420 suggest_code: Arc::new(suggest_code),
421 expected_components: Arc::new(expected_components),
422 code_list_rules: Arc::new(base_code_list_rules),
423 additional_structure_rule: additional_structure_rule
424 .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
425 required_segments: Arc::new(default_required_segments),
426 message_type: None,
427 enforce_known_tags: true,
428 structure_checks: true,
429 code_list_checks: true,
430 }
431 }
432
433 /// Create a validator from a static slice of [`SegmentDefinition`]s.
434 ///
435 /// This is the preferred constructor when code-generating directory data as
436 /// a `static` array: no manual fn-pointer boilerplate is required.
437 ///
438 /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
439 /// always returns `true`). Call [`with_code_list_rules`][Self::with_code_list_rules]
440 /// to register directory-specific rules that actually validate code values.
441 ///
442 /// # Example
443 ///
444 /// ```rust,ignore
445 /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
446 ///
447 /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
448 /// .with_code_list_rules(my_code_list_rules);
449 /// ```
450 pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
451 let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
452 definitions.iter().map(|d| (d.tag, d)).collect();
453 let lookup_map = Arc::new(lookup_map);
454 Self {
455 directory_id: "custom".to_owned(),
456 segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
457 owned_defs: None,
458 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
459 suggest_code: Arc::new(|_de: &str, _code: &str| None),
460 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
461 code_list_rules: Arc::new(base_code_list_rules),
462 additional_structure_rule: None,
463 required_segments: Arc::new(default_required_segments),
464 message_type: None,
465 enforce_known_tags: true,
466 structure_checks: true,
467 code_list_checks: false,
468 }
469 }
470
471 /// Create a validator from a runtime-owned collection of segment definitions.
472 ///
473 /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
474 /// loaded from an external source at startup (JSON, database, YAML, …) rather
475 /// than being known at compile time.
476 ///
477 /// Code-list checks are **disabled** by default; enable them by chaining
478 /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
479 /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
480 /// the builder.
481 ///
482 /// # Example
483 ///
484 /// ```rust,ignore
485 /// let defs = vec![
486 /// OwnedSegmentDef::new(
487 /// "BGM".to_owned(),
488 /// "Beginning of message".to_owned(),
489 /// vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
490 /// ),
491 /// ];
492 /// let validator = DirectoryValidator::from_owned_definitions(defs)
493 /// .with_directory_id("runtime-profile");
494 /// ```
495 pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
496 Self {
497 directory_id: "custom".to_owned(),
498 // The static lookup is never consulted when `owned_defs` is `Some`.
499 segment_lookup: Arc::new(|_| None),
500 owned_defs: Some(Arc::new(definitions)),
501 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
502 suggest_code: Arc::new(|_de: &str, _code: &str| None),
503 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
504 code_list_rules: Arc::new(base_code_list_rules),
505 additional_structure_rule: None,
506 required_segments: Arc::new(default_required_segments),
507 message_type: None,
508 enforce_known_tags: true,
509 structure_checks: true,
510 code_list_checks: false,
511 }
512 }
513
514 /// Set the directory identifier string (used in error messages).
515 pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
516 self.directory_id = id.into();
517 self
518 }
519
520 /// Override the code-list rules function.
521 ///
522 /// Directories can supply a directory-specific implementation that extends or
523 /// replaces the base rules from `base_code_list_rules`.
524 pub fn with_code_list_rules(
525 mut self,
526 f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
527 ) -> Self {
528 self.code_list_rules = Arc::new(f);
529 self
530 }
531
532 /// Enable only structure checks and disable code-list checks.
533 pub fn structure_only(mut self) -> Self {
534 self.structure_checks = true;
535 self.code_list_checks = false;
536 self
537 }
538
539 /// Enable only code-list checks and disable structure checks.
540 pub fn code_list_only(mut self) -> Self {
541 self.structure_checks = false;
542 self.code_list_checks = true;
543 self
544 }
545
546 /// Configure whether unknown segment tags should be rejected.
547 pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
548 self.enforce_known_tags = enforce;
549 self
550 }
551
552 /// Override the required-segments mapping used for structural validation.
553 ///
554 /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
555 /// and must return a `'static` slice of segment tags that are mandatory for that
556 /// type. The tags are checked both for *presence* and for *canonical ordering*
557 /// within the message.
558 ///
559 /// # Example
560 ///
561 /// ```rust,ignore
562 /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
563 /// match msg_type {
564 /// "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
565 /// "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
566 /// _ => &["UNH", "UNT"],
567 /// }
568 /// }
569 ///
570 /// let validator = DirectoryValidator::from_definitions(DEFS)
571 /// .with_required_segments(my_required_segments);
572 /// ```
573 pub fn with_required_segments(
574 mut self,
575 f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
576 ) -> Self {
577 self.required_segments = Arc::new(f);
578 self
579 }
580
581 fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
582 if let Some(explicit) = self.message_type.as_deref() {
583 return Some(explicit.to_owned());
584 }
585
586 segments
587 .iter()
588 .find(|s| s.tag == "UNH")
589 .and_then(|s| s.get_element(1))
590 .and_then(|e| e.get_component(0))
591 .map(str::to_owned)
592 }
593
594 /// Count the non-trailing-empty components in element `element_idx` of `seg`.
595 ///
596 /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
597 /// a sender is not required to transmit trailing empty components; this function
598 /// therefore strips them before checking against the expected count so that
599 /// conformant messages with omitted trailing components are still accepted.
600 ///
601 /// # Examples
602 ///
603 /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
604 /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
605 fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
606 let elem = seg.elements.get(element_idx)?;
607 let mut count = elem.components.len();
608 while count > 0 && elem.components[count - 1].as_ref().is_empty() {
609 count -= 1;
610 }
611 u8::try_from(count).ok()
612 }
613
614 fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
615 for idx in 0..seg.elements.len() {
616 if let Some(expected) = (self.expected_components)(seg.tag, idx) {
617 let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
618 if actual != expected {
619 return Err(EdifactError::InvalidComponentCount {
620 tag: seg.tag.to_owned(),
621 element_index: idx,
622 expected,
623 actual,
624 offset: seg.span.start,
625 });
626 }
627 }
628 }
629 Ok(())
630 }
631
632 fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
633 let rules = (self.code_list_rules)(seg.tag);
634
635 for (elem_idx, comp_idx, de) in rules {
636 let value = seg
637 .get_element(*elem_idx)
638 .and_then(|e| e.get_component(*comp_idx))
639 .unwrap_or("");
640 if !value.is_empty() && !(self.is_code_valid)(de, value) {
641 let suggestion = (self.suggest_code)(de, value);
642 return Err(EdifactError::InvalidCodeValue {
643 tag: seg.tag.to_owned(),
644 element_index: *elem_idx,
645 value: value.to_owned(),
646 code_list: (*de).to_owned(),
647 offset: seg.span.start,
648 suggestion,
649 });
650 }
651 }
652
653 Ok(())
654 }
655}
656
657impl DirectoryValidator {
658 fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
659 if let Some(owned) = &self.owned_defs {
660 owned
661 .iter()
662 .find(|d| d.tag == tag)
663 .map(SegmentDefRef::Owned)
664 } else {
665 (self.segment_lookup)(tag).map(SegmentDefRef::Static)
666 }
667 }
668
669 fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
670 if !self.structure_checks && !self.code_list_checks {
671 return Ok(());
672 }
673
674 let Some(def) = self.resolve_def(seg.tag) else {
675 if self.structure_checks && self.enforce_known_tags {
676 return Err(EdifactError::InvalidSegmentForMessage {
677 tag: seg.tag.to_owned(),
678 message_type: self
679 .message_type
680 .clone()
681 .unwrap_or_else(|| self.directory_id.clone()),
682 offset: seg.tag_span.start,
683 });
684 }
685 return Ok(());
686 };
687
688 let max_elements = def.max_element_position();
689 let min_elements = def.last_mandatory_position();
690 let actual = seg.elements.len();
691
692 if self.structure_checks && (actual < min_elements || actual > max_elements) {
693 return Err(EdifactError::InvalidElementCount {
694 tag: seg.tag.to_owned(),
695 min: min_elements,
696 max: max_elements,
697 actual,
698 offset: seg.span.start,
699 });
700 }
701
702 if self.structure_checks {
703 def.for_each_mandatory_position(|idx, _de| {
704 let is_present = seg
705 .elements
706 .get(idx)
707 .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
708 if !is_present {
709 return Err(EdifactError::MissingRequiredElement {
710 tag: seg.tag.to_owned(),
711 element_index: idx,
712 });
713 }
714 Ok(())
715 })?;
716 self.validate_component_counts(seg)?;
717
718 if let Some(rule) = &self.additional_structure_rule {
719 rule(seg)?;
720 }
721 }
722
723 if self.code_list_checks {
724 self.validate_code_lists(seg)?;
725 }
726
727 Ok(())
728 }
729}
730
731impl Validator for DirectoryValidator {
732 fn set_message_type(&mut self, message_type: Option<&str>) {
733 self.message_type = message_type.map(str::to_owned);
734 }
735
736 fn validate_batch(
737 &self,
738 segments: &[Segment<'_>],
739 report: &mut ValidationReport,
740 _context: &ValidationRuleContext<'_>,
741 ) {
742 for seg in segments {
743 if let Err(err) = self.validate_segment(seg) {
744 report_error(report, err);
745 }
746 }
747
748 if self.structure_checks {
749 if let Some(message_type) = self.detect_message_type(segments) {
750 for required_tag in (self.required_segments)(&message_type) {
751 if segments.iter().all(|s| s.tag != *required_tag) {
752 report.add_error(
753 ValidationIssue::new(
754 ValidationSeverity::Error,
755 format!(
756 "required segment {} missing for message type {}",
757 required_tag, message_type
758 ),
759 )
760 .with_segment(*required_tag)
761 .with_suggestion("Add the mandatory segment at the correct position"),
762 );
763 }
764 }
765
766 let seq = (self.required_segments)(&message_type);
767 let mut last_idx = None;
768 for tag in seq {
769 if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
770 if let Some(prev) = last_idx {
771 if idx < prev {
772 report.add_error(
773 ValidationIssue::new(
774 ValidationSeverity::Error,
775 format!(
776 "segment sequence violation for message type {}: '{}' appears out of order",
777 message_type, tag
778 ),
779 )
780 .with_segment(*tag)
781 .with_suggestion(
782 "Ensure required segments follow UN/EDIFACT canonical order",
783 ),
784 );
785 }
786 }
787 last_idx = Some(idx);
788 }
789 }
790 }
791 }
792 }
793}
794
795// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
796
797/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
798///
799/// Use this when segment definitions are loaded from an external source at
800/// startup (JSON, database, YAML, …) rather than being available as `static`
801/// arrays at compile time.
802///
803/// # Example
804///
805/// ```rust,ignore
806/// let validator = DirectoryValidatorBuilder::new("my-profile")
807/// .add_segment(
808/// OwnedSegmentDef::new(
809/// "BGM".to_owned(),
810/// "Beginning of message".to_owned(),
811/// vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
812/// ),
813/// )
814/// .build();
815/// ```
816#[derive(Debug, Default)]
817pub struct DirectoryValidatorBuilder {
818 directory_id: Option<String>,
819 segments: Vec<OwnedSegmentDef>,
820}
821
822impl DirectoryValidatorBuilder {
823 /// Create a new builder with the given directory identifier.
824 ///
825 /// The identifier is used in error messages; set a human-readable value
826 /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
827 pub fn new(directory_id: impl Into<String>) -> Self {
828 Self {
829 directory_id: Some(directory_id.into()),
830 segments: Vec::new(),
831 }
832 }
833
834 /// Add a segment definition to the builder.
835 ///
836 /// Definitions can be added in any order; the resulting validator looks
837 /// them up by tag at validation time.
838 pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
839 self.segments.push(def);
840 self
841 }
842
843 /// Extend the builder with multiple segment definitions at once.
844 pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
845 self.segments.extend(defs);
846 self
847 }
848
849 /// Build the [`DirectoryValidator`].
850 ///
851 /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
852 /// Code-list checks are disabled by default; chain
853 /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
854 /// enable them.
855 pub fn build(self) -> DirectoryValidator {
856 let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
857 if let Some(id) = self.directory_id {
858 validator.directory_id = id;
859 }
860 validator
861 }
862}
863
864#[cfg(test)]
865mod tests {
866 use super::*;
867
868 static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
869 position: 1,
870 data_element: "C507",
871 status: Status::Mandatory,
872 max_repeat: 1,
873 }];
874
875 static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
876 tag: "TST",
877 name: "Test segment",
878 elements: TEST_ELEMENTS,
879 };
880
881 fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
882 match tag {
883 "TST" => Some(&TEST_SEGMENT),
884 _ => None,
885 }
886 }
887
888 fn code_valid(_de: &str, _code: &str) -> bool {
889 true
890 }
891
892 fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
893 None
894 }
895
896 fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
897 None
898 }
899
900 #[test]
901 fn mandatory_composite_present_when_any_component_non_empty() {
902 let input = b"TST+:ABC'";
903 let segments: Vec<_> = crate::from_bytes(input)
904 .collect::<Result<Vec<_>, _>>()
905 .expect("parse should succeed");
906
907 let validator = DirectoryValidator::new(
908 "TEST",
909 segment_lookup,
910 code_valid,
911 suggest_code,
912 expected_components,
913 None,
914 );
915
916 let mut report = ValidationReport::default();
917 validator.validate_batch(
918 &segments,
919 &mut report,
920 &crate::validator::ValidationRuleContext::empty(),
921 );
922 assert!(!report.has_errors());
923 }
924
925 // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
926
927 fn parse_single(input: &[u8]) -> crate::OwnedSegment {
928 crate::from_reader(std::io::Cursor::new(input))
929 .expect("parse should succeed")
930 .into_iter()
931 .next()
932 .expect("at least one segment")
933 }
934
935 #[test]
936 fn trailing_empty_component_stripped_from_dtm() {
937 // DTM+137:20200101: has three components in element 0; the third is empty.
938 // ISO 9735-1 §3.3 says trailing empty components may be omitted,
939 // so effective count should be 2.
940 let owned = parse_single(b"DTM+137:20200101:'");
941 let seg = owned.as_borrowed();
942 let count = DirectoryValidator::effective_component_count(&seg, 0);
943 assert_eq!(
944 count,
945 Some(2),
946 "trailing empty component should be stripped"
947 );
948 }
949
950 #[test]
951 fn all_empty_components_result_in_zero() {
952 // NAD+MS++: → element 2 is ":" with two empty components → effective=0
953 let owned = parse_single(b"NAD+MS++:'");
954 let seg = owned.as_borrowed();
955 let count = DirectoryValidator::effective_component_count(&seg, 2);
956 assert_eq!(
957 count,
958 Some(0),
959 "all-empty composite should have effective count 0"
960 );
961 }
962
963 #[test]
964 fn non_empty_component_not_stripped() {
965 // DTM+137:20200101:102 — all three components are non-empty
966 let owned = parse_single(b"DTM+137:20200101:102'");
967 let seg = owned.as_borrowed();
968 let count = DirectoryValidator::effective_component_count(&seg, 0);
969 assert_eq!(
970 count,
971 Some(3),
972 "no components should be stripped when all non-empty"
973 );
974 }
975
976 #[test]
977 fn with_code_list_rules_overrides_base() {
978 // Override code-list rules to require element 0 of TST to be a specific code.
979 fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
980 match tag {
981 "TST" => &[(0, 0, "CUSTOM_DE")],
982 _ => &[],
983 }
984 }
985 fn custom_code_valid(_de: &str, code: &str) -> bool {
986 code == "VALID"
987 }
988 fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
989 None
990 }
991
992 let input = b"TST+INVALID'";
993 let segments: Vec<_> = crate::from_bytes(input)
994 .collect::<Result<Vec<_>, _>>()
995 .expect("parse should succeed");
996
997 let validator = DirectoryValidator::new(
998 "TEST",
999 segment_lookup,
1000 custom_code_valid,
1001 no_suggestion,
1002 expected_components,
1003 None,
1004 )
1005 .with_code_list_rules(custom_rules);
1006
1007 let mut report = ValidationReport::default();
1008 validator.validate_batch(
1009 &segments,
1010 &mut report,
1011 &crate::validator::ValidationRuleContext::empty(),
1012 );
1013 assert!(
1014 report.has_warnings(),
1015 "INVALID is not in the custom code list so validation must warn"
1016 );
1017 }
1018}