edifact_rs/directory_validator.rs
1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10 /// Element must be present.
11 Mandatory,
12 /// Element is optional unless additional rules require it.
13 Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19 /// One-based element position in the segment definition.
20 pub position: u8,
21 /// UN/EDIFACT data element identifier.
22 pub data_element: &'static str,
23 /// Requirement status of the element.
24 pub status: Status,
25 /// Maximum repetition count for this element.
26 pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32 /// Segment tag.
33 pub tag: &'static str,
34 /// Human-readable segment name.
35 pub name: &'static str,
36 /// Ordered element definitions.
37 pub elements: &'static [ElementRef],
38}
39
40/// Owned runtime equivalent of [`ElementRef`].
41///
42/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
43/// to construct validators from data that is not available at compile time (e.g. loaded
44/// from JSON or a database at startup).
45///
46/// Use [`OwnedElementRef::new`] for compile-time-known positions (panics on invalid
47/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
48/// comes from an external source and you need a `Result`. Fields are private to prevent
49/// bypassing the position invariant through struct-literal syntax.
50#[derive(Debug, Clone)]
51pub struct OwnedElementRef {
52 /// One-based element position.
53 position: u8,
54 /// UN/EDIFACT data element identifier.
55 data_element: String,
56 /// Requirement status.
57 status: Status,
58 /// Maximum repetition count.
59 max_repeat: u8,
60}
61
62/// Owned runtime equivalent of [`SegmentDefinition`].
63///
64/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
65///
66/// Use [`OwnedSegmentDef::new`] for compile-time-known tags (panics on invalid input,
67/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
68/// an external source and you need a `Result`. Fields are private to prevent bypassing
69/// the tag invariant through struct-literal syntax.
70#[derive(Debug, Clone)]
71pub struct OwnedSegmentDef {
72 /// Segment tag (e.g. `"BGM"`).
73 tag: String,
74 /// Human-readable segment name.
75 name: String,
76 /// Ordered element definitions.
77 elements: Vec<OwnedElementRef>,
78}
79
80impl OwnedSegmentDef {
81 /// Construct an owned segment definition.
82 ///
83 /// This is the ergonomic constructor for compile-time-known tags (e.g.
84 /// `"BGM"`, `"UNH"`). It panics immediately on invalid input so that
85 /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
86 /// boilerplate.
87 ///
88 /// Use [`try_new`][Self::try_new] instead when the tag originates from an
89 /// external source (user input, config file, database) and you need a
90 /// `Result` to propagate errors gracefully.
91 ///
92 /// # Panics
93 ///
94 /// Panics if `tag` is not exactly three ASCII uppercase letters.
95 pub fn new(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
96 assert!(
97 tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
98 "OwnedSegmentDef::new: tag must be exactly three ASCII uppercase letters, got {tag:?}"
99 );
100 Self {
101 tag,
102 name,
103 elements,
104 }
105 }
106
107 /// Construct an owned segment definition, returning an error for invalid tags.
108 ///
109 /// Prefer this over [`new`][Self::new] when the tag comes from an external
110 /// source (user input, config file, database) and you want to handle the
111 /// error without panicking.
112 ///
113 /// # Errors
114 ///
115 /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
116 /// ASCII uppercase letters.
117 pub fn try_new(
118 tag: String,
119 name: String,
120 elements: Vec<OwnedElementRef>,
121 ) -> Result<Self, EdifactError> {
122 if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
123 return Err(EdifactError::InvalidSegmentTag(tag));
124 }
125 Ok(Self {
126 tag,
127 name,
128 elements,
129 })
130 }
131
132 /// Segment tag (e.g. `"BGM"`).
133 #[inline]
134 pub fn tag(&self) -> &str {
135 &self.tag
136 }
137
138 /// Human-readable segment name.
139 #[inline]
140 pub fn name(&self) -> &str {
141 &self.name
142 }
143
144 /// Element definitions for this segment.
145 #[inline]
146 pub fn elements(&self) -> &[OwnedElementRef] {
147 &self.elements
148 }
149}
150
151impl OwnedElementRef {
152 /// Construct an owned element reference.
153 ///
154 /// This is the ergonomic constructor for compile-time-known positions.
155 /// It panics immediately on invalid input so that call sites with literal
156 /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
157 ///
158 /// Use [`try_new`][Self::try_new] instead when the position originates from
159 /// an external source (user input, config file, database) and you need a
160 /// `Result` to propagate errors gracefully.
161 ///
162 /// # Panics
163 ///
164 /// Panics if `position` is `0` (positions are one-based).
165 pub fn new(position: u8, data_element: String, status: Status, max_repeat: u8) -> Self {
166 assert!(
167 position != 0,
168 "OwnedElementRef::new: position must be >= 1 (one-based), got 0"
169 );
170 Self {
171 position,
172 data_element,
173 status,
174 max_repeat,
175 }
176 }
177
178 /// Construct an owned element reference, returning an error for position `0`.
179 ///
180 /// Prefer this over [`new`][Self::new] when the position comes from an
181 /// external source (user input, config file, database) and you want to
182 /// handle the error without panicking.
183 ///
184 /// # Errors
185 ///
186 /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
187 pub fn try_new(
188 position: u8,
189 data_element: String,
190 status: Status,
191 max_repeat: u8,
192 ) -> Result<Self, EdifactError> {
193 if position == 0 {
194 return Err(EdifactError::InvalidElementPosition);
195 }
196 Ok(Self {
197 position,
198 data_element,
199 status,
200 max_repeat,
201 })
202 }
203
204 /// One-based element position (always >= 1).
205 #[inline]
206 pub fn position(&self) -> u8 {
207 self.position
208 }
209
210 /// UN/EDIFACT data element identifier.
211 #[inline]
212 pub fn data_element(&self) -> &str {
213 &self.data_element
214 }
215
216 /// Requirement status of this element.
217 #[inline]
218 pub fn status(&self) -> Status {
219 self.status
220 }
221
222 /// Maximum repetition count for this element.
223 #[inline]
224 pub fn max_repeat(&self) -> u8 {
225 self.max_repeat
226 }
227}
228
229type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
230type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
231type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
232type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
233type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
234type AdditionalStructureRuleFn =
235 Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
236/// Returns the `(element_index, component_index, data_element_id)` tuples to
237/// validate against a code list for the given segment tag.
238type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
239/// Returns the mandatory segment tags for a given EDIFACT message type.
240///
241/// The slice should contain every tag that must appear at least once in a
242/// conformant message of the given type. The tags are also used to check
243/// canonical ordering — their relative order in the returned slice is taken
244/// as the expected order in the message.
245type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
246
247/// Internal enum that unifies lookup results from static and owned segment definitions.
248///
249/// Allows `validate_segment` to handle both code-generated (`&'static`) and
250/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
251enum SegmentDefRef<'a> {
252 Static(&'static SegmentDefinition),
253 Owned(&'a OwnedSegmentDef),
254}
255
256impl<'a> SegmentDefRef<'a> {
257 /// Returns the highest defined element position (one-based → used directly as
258 /// the maximum zero-based slot count for element-count validation).
259 ///
260 /// For owned definitions the highest `position` value may exceed the number
261 /// of entries in the `elements` vec when positions are non-consecutive.
262 fn max_element_position(&self) -> usize {
263 match self {
264 Self::Static(d) => d
265 .elements
266 .iter()
267 .map(|e| e.position as usize)
268 .max()
269 .unwrap_or(0),
270 Self::Owned(d) => d
271 .elements
272 .iter()
273 .map(|e| e.position as usize)
274 .max()
275 .unwrap_or(0),
276 }
277 }
278
279 /// Returns the highest position number among mandatory elements (one-based).
280 ///
281 /// This equals the minimum number of elements that must be present in a
282 /// segment: if the highest-positioned mandatory element is at position 5,
283 /// the segment must supply at least 5 elements.
284 fn last_mandatory_position(&self) -> usize {
285 match self {
286 Self::Static(d) => d
287 .elements
288 .iter()
289 .filter(|e| e.status == Status::Mandatory)
290 .map(|e| e.position as usize)
291 .max()
292 .unwrap_or(0),
293 Self::Owned(d) => d
294 .elements
295 .iter()
296 .filter(|e| e.status == Status::Mandatory)
297 .map(|e| e.position as usize)
298 .max()
299 .unwrap_or(0),
300 }
301 }
302
303 fn mandatory_positions(&self) -> impl Iterator<Item = (usize, &str)> {
304 enum E<A, B> {
305 A(A),
306 B(B),
307 }
308 impl<A, B, I> Iterator for E<A, B>
309 where
310 A: Iterator<Item = I>,
311 B: Iterator<Item = I>,
312 {
313 type Item = I;
314 fn next(&mut self) -> Option<I> {
315 match self {
316 E::A(a) => a.next(),
317 E::B(b) => b.next(),
318 }
319 }
320 }
321 match self {
322 Self::Static(d) => E::A(
323 d.elements
324 .iter()
325 .filter(|e| e.status == Status::Mandatory)
326 .map(|e| ((e.position as usize).saturating_sub(1), e.data_element)),
327 ),
328 Self::Owned(d) => E::B(
329 d.elements
330 .iter()
331 .filter(|e| e.status == Status::Mandatory)
332 .map(|e| {
333 (
334 (e.position as usize).saturating_sub(1),
335 e.data_element.as_str(),
336 )
337 }),
338 ),
339 }
340 }
341}
342
343/// Default required-segments mapping used when no custom function is provided.
344fn default_required_segments(message_type: &str) -> &'static [&'static str] {
345 match message_type {
346 "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
347 _ => &["UNH", "UNT"],
348 }
349}
350
351/// Code-list validation rules common to all UN/EDIFACT directory releases.
352///
353/// Each entry is `(element_index, component_index, data_element_id)`.
354/// `element_index` and `component_index` are zero-based.
355///
356/// Covers the most frequently validated qualifier/code elements across ORDERS,
357/// INVOIC, UTILMD, and similar message types.
358pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
359 match tag {
360 "BGM" => &[(0, 0, "1001")],
361 "DTM" => &[(0, 0, "2005")],
362 "NAD" => &[(0, 0, "3035")],
363 "QTY" => &[(0, 0, "6063")],
364 "RFF" => &[(0, 0, "1153")],
365 "MOA" => &[(0, 0, "5025")],
366 "PRI" => &[(0, 0, "5125")],
367 "LOC" => &[(0, 0, "3227")],
368 _ => &[],
369 }
370}
371
372/// Shared validator implementation that is configured per UN/EDIFACT directory release.
373///
374/// # Scope and limitations
375///
376/// `DirectoryValidator` validates individual segment *content* (element counts,
377/// component counts, code-list values, and conditional rules) and checks that
378/// every *mandatory* segment type is present at least once. It does **not**
379/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
380/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
381/// appears in the wrong position. Full sequence validation requires a
382/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
383/// the scope of this implementation.
384#[derive(Clone)]
385pub struct DirectoryValidator {
386 directory_id: String,
387 segment_lookup: SegmentLookupFn,
388 /// Runtime-owned segment definitions (from builder / JSON / DB).
389 ///
390 /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
391 owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
392 is_code_valid: IsCodeValidFn,
393 suggest_code: SuggestCodeFn,
394 expected_components: ExpectedComponentsFn,
395 code_list_rules: CodeListRulesFn,
396 additional_structure_rule: Option<AdditionalStructureRuleFn>,
397 /// Configurable mapping from message type to required segment tags.
398 required_segments: RequiredSegmentsFn,
399 message_type: Option<String>,
400 enforce_known_tags: bool,
401 structure_checks: bool,
402 code_list_checks: bool,
403}
404
405impl std::fmt::Debug for DirectoryValidator {
406 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407 f.debug_struct("DirectoryValidator")
408 .field("directory_id", &self.directory_id)
409 .field("message_type", &self.message_type)
410 .field("enforce_known_tags", &self.enforce_known_tags)
411 .field("structure_checks", &self.structure_checks)
412 .field("code_list_checks", &self.code_list_checks)
413 .finish_non_exhaustive()
414 }
415}
416
417impl DirectoryValidator {
418 /// Create a validator for a specific directory release with injected lookup/check hooks.
419 pub fn new(
420 directory_id: &'static str,
421 segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
422 is_code_valid: fn(&str, &str) -> bool,
423 suggest_code: fn(&str, &str) -> Option<&'static str>,
424 expected_components: fn(&str, usize) -> Option<u8>,
425 additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
426 ) -> Self {
427 Self {
428 directory_id: directory_id.to_owned(),
429 segment_lookup: Arc::new(segment_lookup),
430 owned_defs: None,
431 is_code_valid: Arc::new(is_code_valid),
432 suggest_code: Arc::new(suggest_code),
433 expected_components: Arc::new(expected_components),
434 code_list_rules: Arc::new(base_code_list_rules),
435 additional_structure_rule: additional_structure_rule
436 .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
437 required_segments: Arc::new(default_required_segments),
438 message_type: None,
439 enforce_known_tags: true,
440 structure_checks: true,
441 code_list_checks: true,
442 }
443 }
444
445 /// Create a validator from a static slice of [`SegmentDefinition`]s.
446 ///
447 /// This is the preferred constructor when code-generating directory data as
448 /// a `static` array: no manual fn-pointer boilerplate is required.
449 ///
450 /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
451 /// always returns `true`). Call [`with_code_list_rules`][Self::with_code_list_rules]
452 /// to register directory-specific rules that actually validate code values.
453 ///
454 /// # Example
455 ///
456 /// ```rust,ignore
457 /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
458 ///
459 /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
460 /// .with_code_list_rules(my_code_list_rules);
461 /// ```
462 pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
463 Self {
464 directory_id: "custom".to_owned(),
465 segment_lookup: Arc::new(move |tag: &str| definitions.iter().find(|d| d.tag == tag)),
466 owned_defs: None,
467 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
468 suggest_code: Arc::new(|_de: &str, _code: &str| None),
469 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
470 code_list_rules: Arc::new(base_code_list_rules),
471 additional_structure_rule: None,
472 required_segments: Arc::new(default_required_segments),
473 message_type: None,
474 enforce_known_tags: true,
475 structure_checks: true,
476 code_list_checks: false,
477 }
478 }
479
480 /// Create a validator from a runtime-owned collection of segment definitions.
481 ///
482 /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
483 /// loaded from an external source at startup (JSON, database, YAML, …) rather
484 /// than being known at compile time.
485 ///
486 /// Code-list checks are **disabled** by default; enable them by chaining
487 /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
488 /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
489 /// the builder.
490 ///
491 /// # Example
492 ///
493 /// ```rust,ignore
494 /// let defs = vec![
495 /// OwnedSegmentDef::new(
496 /// "BGM".to_owned(),
497 /// "Beginning of message".to_owned(),
498 /// vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
499 /// ),
500 /// ];
501 /// let validator = DirectoryValidator::from_owned_definitions(defs)
502 /// .with_directory_id("runtime-profile");
503 /// ```
504 pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
505 Self {
506 directory_id: "custom".to_owned(),
507 // The static lookup is never consulted when `owned_defs` is `Some`.
508 segment_lookup: Arc::new(|_| None),
509 owned_defs: Some(Arc::new(definitions)),
510 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
511 suggest_code: Arc::new(|_de: &str, _code: &str| None),
512 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
513 code_list_rules: Arc::new(base_code_list_rules),
514 additional_structure_rule: None,
515 required_segments: Arc::new(default_required_segments),
516 message_type: None,
517 enforce_known_tags: true,
518 structure_checks: true,
519 code_list_checks: false,
520 }
521 }
522
523 /// Set the directory identifier string (used in error messages).
524 pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
525 self.directory_id = id.into();
526 self
527 }
528
529 /// Override the code-list rules function.
530 ///
531 /// Directories can supply a directory-specific implementation that extends or
532 /// replaces the base rules from `base_code_list_rules`.
533 pub fn with_code_list_rules(
534 mut self,
535 f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
536 ) -> Self {
537 self.code_list_rules = Arc::new(f);
538 self
539 }
540
541 /// Enable only structure checks and disable code-list checks.
542 pub fn structure_only(mut self) -> Self {
543 self.structure_checks = true;
544 self.code_list_checks = false;
545 self
546 }
547
548 /// Enable only code-list checks and disable structure checks.
549 pub fn code_list_only(mut self) -> Self {
550 self.structure_checks = false;
551 self.code_list_checks = true;
552 self
553 }
554
555 /// Configure whether unknown segment tags should be rejected.
556 pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
557 self.enforce_known_tags = enforce;
558 self
559 }
560
561 /// Override the required-segments mapping used for structural validation.
562 ///
563 /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
564 /// and must return a `'static` slice of segment tags that are mandatory for that
565 /// type. The tags are checked both for *presence* and for *canonical ordering*
566 /// within the message.
567 ///
568 /// # Example
569 ///
570 /// ```rust,ignore
571 /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
572 /// match msg_type {
573 /// "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
574 /// "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
575 /// _ => &["UNH", "UNT"],
576 /// }
577 /// }
578 ///
579 /// let validator = DirectoryValidator::from_definitions(DEFS)
580 /// .with_required_segments(my_required_segments);
581 /// ```
582 pub fn with_required_segments(
583 mut self,
584 f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
585 ) -> Self {
586 self.required_segments = Arc::new(f);
587 self
588 }
589
590 fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
591 if let Some(explicit) = self.message_type.as_deref() {
592 return Some(explicit.to_owned());
593 }
594
595 segments
596 .iter()
597 .find(|s| s.tag == "UNH")
598 .and_then(|s| s.get_element(1))
599 .and_then(|e| e.get_component(0))
600 .map(str::to_owned)
601 }
602
603 /// Count the non-trailing-empty components in element `element_idx` of `seg`.
604 ///
605 /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
606 /// a sender is not required to transmit trailing empty components; this function
607 /// therefore strips them before checking against the expected count so that
608 /// conformant messages with omitted trailing components are still accepted.
609 ///
610 /// # Examples
611 ///
612 /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
613 /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
614 fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
615 let elem = seg.elements.get(element_idx)?;
616 let mut count = elem.components.len();
617 while count > 0 && elem.components[count - 1].as_ref().is_empty() {
618 count -= 1;
619 }
620 u8::try_from(count).ok()
621 }
622
623 fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
624 for idx in 0..seg.elements.len() {
625 if let Some(expected) = (self.expected_components)(seg.tag, idx) {
626 let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
627 if actual != expected {
628 return Err(EdifactError::InvalidComponentCount {
629 tag: seg.tag.to_owned(),
630 element_index: idx,
631 expected,
632 actual,
633 offset: seg.span.start,
634 });
635 }
636 }
637 }
638 Ok(())
639 }
640
641 fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
642 let rules = (self.code_list_rules)(seg.tag);
643
644 for (elem_idx, comp_idx, de) in rules {
645 let value = seg
646 .get_element(*elem_idx)
647 .and_then(|e| e.get_component(*comp_idx))
648 .unwrap_or("");
649 if !value.is_empty() && !(self.is_code_valid)(de, value) {
650 let suggestion = (self.suggest_code)(de, value);
651 return Err(EdifactError::InvalidCodeValue {
652 tag: seg.tag.to_owned(),
653 element_index: *elem_idx,
654 value: value.to_owned(),
655 code_list: (*de).to_owned(),
656 offset: seg.span.start,
657 suggestion,
658 });
659 }
660 }
661
662 Ok(())
663 }
664}
665
666impl DirectoryValidator {
667 fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
668 if let Some(owned) = &self.owned_defs {
669 owned
670 .iter()
671 .find(|d| d.tag == tag)
672 .map(SegmentDefRef::Owned)
673 } else {
674 (self.segment_lookup)(tag).map(SegmentDefRef::Static)
675 }
676 }
677
678 fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
679 if !self.structure_checks && !self.code_list_checks {
680 return Ok(());
681 }
682
683 let Some(def) = self.resolve_def(seg.tag) else {
684 if self.structure_checks && self.enforce_known_tags {
685 return Err(EdifactError::InvalidSegmentForMessage {
686 tag: seg.tag.to_owned(),
687 message_type: self
688 .message_type
689 .clone()
690 .unwrap_or_else(|| self.directory_id.clone()),
691 offset: seg.tag_span.start,
692 });
693 }
694 return Ok(());
695 };
696
697 let max_elements = def.max_element_position();
698 let min_elements = def.last_mandatory_position();
699 let actual = seg.elements.len();
700
701 if self.structure_checks && (actual < min_elements || actual > max_elements) {
702 return Err(EdifactError::InvalidElementCount {
703 tag: seg.tag.to_owned(),
704 min: min_elements,
705 max: max_elements,
706 actual,
707 offset: seg.span.start,
708 });
709 }
710
711 if self.structure_checks {
712 for (idx, _de) in def.mandatory_positions() {
713 let is_present = seg
714 .elements
715 .get(idx)
716 .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
717 if !is_present {
718 return Err(EdifactError::MissingRequiredElement {
719 tag: seg.tag.to_owned(),
720 element_index: idx,
721 });
722 }
723 }
724 self.validate_component_counts(seg)?;
725
726 if let Some(rule) = &self.additional_structure_rule {
727 rule(seg)?;
728 }
729 }
730
731 if self.code_list_checks {
732 self.validate_code_lists(seg)?;
733 }
734
735 Ok(())
736 }
737}
738
739impl Validator for DirectoryValidator {
740 fn set_message_type(&mut self, message_type: Option<&str>) {
741 self.message_type = message_type.map(str::to_owned);
742 }
743
744 fn validate_batch(
745 &self,
746 segments: &[Segment<'_>],
747 report: &mut ValidationReport,
748 _context: &ValidationRuleContext<'_>,
749 ) {
750 for seg in segments {
751 if let Err(err) = self.validate_segment(seg) {
752 report_error(report, err);
753 }
754 }
755
756 if self.structure_checks {
757 if let Some(message_type) = self.detect_message_type(segments) {
758 for required_tag in (self.required_segments)(&message_type) {
759 if segments.iter().all(|s| s.tag != *required_tag) {
760 report.add_error(
761 ValidationIssue::new(
762 ValidationSeverity::Error,
763 format!(
764 "required segment {} missing for message type {}",
765 required_tag, message_type
766 ),
767 )
768 .with_segment(*required_tag)
769 .with_suggestion("Add the mandatory segment at the correct position"),
770 );
771 }
772 }
773
774 let seq = (self.required_segments)(&message_type);
775 let mut last_idx = None;
776 for tag in seq {
777 if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
778 if let Some(prev) = last_idx {
779 if idx < prev {
780 report.add_error(
781 ValidationIssue::new(
782 ValidationSeverity::Error,
783 format!(
784 "segment sequence violation for message type {}: '{}' appears out of order",
785 message_type, tag
786 ),
787 )
788 .with_segment(*tag)
789 .with_suggestion(
790 "Ensure required segments follow UN/EDIFACT canonical order",
791 ),
792 );
793 }
794 }
795 last_idx = Some(idx);
796 }
797 }
798 }
799 }
800 }
801}
802
803// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
804
805/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
806///
807/// Use this when segment definitions are loaded from an external source at
808/// startup (JSON, database, YAML, …) rather than being available as `static`
809/// arrays at compile time.
810///
811/// # Example
812///
813/// ```rust,ignore
814/// let validator = DirectoryValidatorBuilder::new("my-profile")
815/// .add_segment(
816/// OwnedSegmentDef::new(
817/// "BGM".to_owned(),
818/// "Beginning of message".to_owned(),
819/// vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
820/// ),
821/// )
822/// .build();
823/// ```
824#[derive(Debug, Default)]
825pub struct DirectoryValidatorBuilder {
826 directory_id: Option<String>,
827 segments: Vec<OwnedSegmentDef>,
828}
829
830impl DirectoryValidatorBuilder {
831 /// Create a new builder with the given directory identifier.
832 ///
833 /// The identifier is used in error messages; set a human-readable value
834 /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
835 pub fn new(directory_id: impl Into<String>) -> Self {
836 Self {
837 directory_id: Some(directory_id.into()),
838 segments: Vec::new(),
839 }
840 }
841
842 /// Add a segment definition to the builder.
843 ///
844 /// Definitions can be added in any order; the resulting validator looks
845 /// them up by tag at validation time.
846 pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
847 self.segments.push(def);
848 self
849 }
850
851 /// Extend the builder with multiple segment definitions at once.
852 pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
853 self.segments.extend(defs);
854 self
855 }
856
857 /// Build the [`DirectoryValidator`].
858 ///
859 /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
860 /// Code-list checks are disabled by default; chain
861 /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
862 /// enable them.
863 pub fn build(self) -> DirectoryValidator {
864 let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
865 if let Some(id) = self.directory_id {
866 validator.directory_id = id;
867 }
868 validator
869 }
870}
871
872#[cfg(test)]
873mod tests {
874 use super::*;
875
876 static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
877 position: 1,
878 data_element: "C507",
879 status: Status::Mandatory,
880 max_repeat: 1,
881 }];
882
883 static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
884 tag: "TST",
885 name: "Test segment",
886 elements: TEST_ELEMENTS,
887 };
888
889 fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
890 match tag {
891 "TST" => Some(&TEST_SEGMENT),
892 _ => None,
893 }
894 }
895
896 fn code_valid(_de: &str, _code: &str) -> bool {
897 true
898 }
899
900 fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
901 None
902 }
903
904 fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
905 None
906 }
907
908 #[test]
909 fn mandatory_composite_present_when_any_component_non_empty() {
910 let input = b"TST+:ABC'";
911 let segments: Vec<_> = crate::from_bytes(input)
912 .collect::<Result<Vec<_>, _>>()
913 .expect("parse should succeed");
914
915 let validator = DirectoryValidator::new(
916 "TEST",
917 segment_lookup,
918 code_valid,
919 suggest_code,
920 expected_components,
921 None,
922 );
923
924 let mut report = ValidationReport::default();
925 validator.validate_batch(
926 &segments,
927 &mut report,
928 &crate::validator::ValidationRuleContext::empty(),
929 );
930 assert!(!report.has_errors());
931 }
932
933 // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
934
935 fn parse_single(input: &[u8]) -> crate::OwnedSegment {
936 crate::from_reader(std::io::Cursor::new(input))
937 .expect("parse should succeed")
938 .into_iter()
939 .next()
940 .expect("at least one segment")
941 }
942
943 #[test]
944 fn trailing_empty_component_stripped_from_dtm() {
945 // DTM+137:20200101: has three components in element 0; the third is empty.
946 // ISO 9735-1 §3.3 says trailing empty components may be omitted,
947 // so effective count should be 2.
948 let owned = parse_single(b"DTM+137:20200101:'");
949 let seg = owned.as_borrowed();
950 let count = DirectoryValidator::effective_component_count(&seg, 0);
951 assert_eq!(
952 count,
953 Some(2),
954 "trailing empty component should be stripped"
955 );
956 }
957
958 #[test]
959 fn all_empty_components_result_in_zero() {
960 // NAD+MS++: → element 2 is ":" with two empty components → effective=0
961 let owned = parse_single(b"NAD+MS++:'");
962 let seg = owned.as_borrowed();
963 let count = DirectoryValidator::effective_component_count(&seg, 2);
964 assert_eq!(
965 count,
966 Some(0),
967 "all-empty composite should have effective count 0"
968 );
969 }
970
971 #[test]
972 fn non_empty_component_not_stripped() {
973 // DTM+137:20200101:102 — all three components are non-empty
974 let owned = parse_single(b"DTM+137:20200101:102'");
975 let seg = owned.as_borrowed();
976 let count = DirectoryValidator::effective_component_count(&seg, 0);
977 assert_eq!(
978 count,
979 Some(3),
980 "no components should be stripped when all non-empty"
981 );
982 }
983
984 #[test]
985 fn with_code_list_rules_overrides_base() {
986 // Override code-list rules to require element 0 of TST to be a specific code.
987 fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
988 match tag {
989 "TST" => &[(0, 0, "CUSTOM_DE")],
990 _ => &[],
991 }
992 }
993 fn custom_code_valid(_de: &str, code: &str) -> bool {
994 code == "VALID"
995 }
996 fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
997 None
998 }
999
1000 let input = b"TST+INVALID'";
1001 let segments: Vec<_> = crate::from_bytes(input)
1002 .collect::<Result<Vec<_>, _>>()
1003 .expect("parse should succeed");
1004
1005 let validator = DirectoryValidator::new(
1006 "TEST",
1007 segment_lookup,
1008 custom_code_valid,
1009 no_suggestion,
1010 expected_components,
1011 None,
1012 )
1013 .with_code_list_rules(custom_rules);
1014
1015 let mut report = ValidationReport::default();
1016 validator.validate_batch(
1017 &segments,
1018 &mut report,
1019 &crate::validator::ValidationRuleContext::empty(),
1020 );
1021 assert!(
1022 report.has_warnings(),
1023 "INVALID is not in the custom code list so validation must warn"
1024 );
1025 }
1026}