edifact_rs/directory_validator.rs
1//! Shared UN/EDIFACT directory validation engine used by D.11A, D.01B and D.96A.
2
3use crate::validator::{ValidationRuleContext, Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5use std::sync::Arc;
6
7/// Mandatory/Conditional status of a data element within a segment.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Status {
10 /// Element must be present.
11 Mandatory,
12 /// Element is optional unless additional rules require it.
13 Conditional,
14}
15
16/// Reference to a data element within a segment definition.
17#[derive(Debug, Clone, Copy)]
18pub struct ElementRef {
19 /// One-based element position in the segment definition.
20 pub position: u8,
21 /// UN/EDIFACT data element identifier.
22 pub data_element: &'static str,
23 /// Requirement status of the element.
24 pub status: Status,
25 /// Maximum repetition count for this element.
26 pub max_repeat: u8,
27}
28
29/// Definition of an EDIFACT segment (tag + element structure).
30#[derive(Debug)]
31pub struct SegmentDefinition {
32 /// Segment tag.
33 pub tag: &'static str,
34 /// Human-readable segment name.
35 pub name: &'static str,
36 /// Ordered element definitions.
37 pub elements: &'static [ElementRef],
38}
39
40/// Owned runtime equivalent of [`ElementRef`].
41///
42/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`]
43/// to construct validators from data that is not available at compile time (e.g. loaded
44/// from JSON or a database at startup).
45///
46/// Use [`OwnedElementRef::new`] for compile-time-known positions (panics on invalid
47/// input, no error handling noise) or [`OwnedElementRef::try_new`] when the position
48/// comes from an external source and you need a `Result`. Fields are private to prevent
49/// bypassing the position invariant through struct-literal syntax.
50#[derive(Debug, Clone)]
51pub struct OwnedElementRef {
52 /// One-based element position.
53 position: u8,
54 /// UN/EDIFACT data element identifier.
55 data_element: String,
56 /// Requirement status.
57 status: Status,
58 /// Maximum repetition count.
59 max_repeat: u8,
60}
61
62/// Owned runtime equivalent of [`SegmentDefinition`].
63///
64/// Used by [`DirectoryValidatorBuilder`] and [`DirectoryValidator::from_owned_definitions`].
65///
66/// Use [`OwnedSegmentDef::new`] for compile-time-known tags (panics on invalid input,
67/// no error handling noise) or [`OwnedSegmentDef::try_new`] when the tag comes from
68/// an external source and you need a `Result`. Fields are private to prevent bypassing
69/// the tag invariant through struct-literal syntax.
70#[derive(Debug, Clone)]
71pub struct OwnedSegmentDef {
72 /// Segment tag (e.g. `"BGM"`).
73 tag: String,
74 /// Human-readable segment name.
75 name: String,
76 /// Ordered element definitions.
77 elements: Vec<OwnedElementRef>,
78}
79
80impl OwnedSegmentDef {
81 /// Construct an owned segment definition.
82 ///
83 /// This is the ergonomic constructor for compile-time-known tags (e.g.
84 /// `"BGM"`, `"UNH"`). It panics immediately on invalid input so that
85 /// call sites with literal tag strings require no `.unwrap()` / `.expect()`
86 /// boilerplate.
87 ///
88 /// Use [`try_new`][Self::try_new] instead when the tag originates from an
89 /// external source (user input, config file, database) and you need a
90 /// `Result` to propagate errors gracefully.
91 ///
92 /// # Panics
93 ///
94 /// Panics if `tag` is not exactly three ASCII uppercase letters.
95 pub fn new(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
96 assert!(
97 tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
98 "OwnedSegmentDef::new: tag must be exactly three ASCII uppercase letters, got {tag:?}"
99 );
100 Self {
101 tag,
102 name,
103 elements,
104 }
105 }
106
107 /// Construct an owned segment definition, returning an error for invalid tags.
108 ///
109 /// Prefer this over [`new`][Self::new] when the tag comes from an external
110 /// source (user input, config file, database) and you want to handle the
111 /// error without panicking.
112 ///
113 /// # Errors
114 ///
115 /// Returns [`EdifactError::InvalidSegmentTag`] if `tag` is not exactly three
116 /// ASCII uppercase letters.
117 pub fn try_new(
118 tag: String,
119 name: String,
120 elements: Vec<OwnedElementRef>,
121 ) -> Result<Self, EdifactError> {
122 if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
123 return Err(EdifactError::InvalidSegmentTag(tag));
124 }
125 Ok(Self {
126 tag,
127 name,
128 elements,
129 })
130 }
131
132 /// Segment tag (e.g. `"BGM"`).
133 #[inline]
134 pub fn tag(&self) -> &str {
135 &self.tag
136 }
137
138 /// Human-readable segment name.
139 #[inline]
140 pub fn name(&self) -> &str {
141 &self.name
142 }
143
144 /// Element definitions for this segment.
145 #[inline]
146 pub fn elements(&self) -> &[OwnedElementRef] {
147 &self.elements
148 }
149}
150
151impl OwnedElementRef {
152 /// Construct an owned element reference.
153 ///
154 /// This is the ergonomic constructor for compile-time-known positions.
155 /// It panics immediately on invalid input so that call sites with literal
156 /// position numbers require no `.unwrap()` / `.expect()` boilerplate.
157 ///
158 /// Use [`try_new`][Self::try_new] instead when the position originates from
159 /// an external source (user input, config file, database) and you need a
160 /// `Result` to propagate errors gracefully.
161 ///
162 /// # Panics
163 ///
164 /// Panics if `position` is `0` (positions are one-based).
165 pub fn new(position: u8, data_element: String, status: Status, max_repeat: u8) -> Self {
166 assert!(
167 position != 0,
168 "OwnedElementRef::new: position must be >= 1 (one-based), got 0"
169 );
170 Self {
171 position,
172 data_element,
173 status,
174 max_repeat,
175 }
176 }
177
178 /// Construct an owned element reference, returning an error for position `0`.
179 ///
180 /// Prefer this over [`new`][Self::new] when the position comes from an
181 /// external source (user input, config file, database) and you want to
182 /// handle the error without panicking.
183 ///
184 /// # Errors
185 ///
186 /// Returns [`EdifactError::InvalidElementPosition`] if `position` is `0`.
187 pub fn try_new(
188 position: u8,
189 data_element: String,
190 status: Status,
191 max_repeat: u8,
192 ) -> Result<Self, EdifactError> {
193 if position == 0 {
194 return Err(EdifactError::InvalidElementPosition);
195 }
196 Ok(Self {
197 position,
198 data_element,
199 status,
200 max_repeat,
201 })
202 }
203
204 /// One-based element position (always >= 1).
205 #[inline]
206 pub fn position(&self) -> u8 {
207 self.position
208 }
209
210 /// UN/EDIFACT data element identifier.
211 #[inline]
212 pub fn data_element(&self) -> &str {
213 &self.data_element
214 }
215
216 /// Requirement status of this element.
217 #[inline]
218 pub fn status(&self) -> Status {
219 self.status
220 }
221
222 /// Maximum repetition count for this element.
223 #[inline]
224 pub fn max_repeat(&self) -> u8 {
225 self.max_repeat
226 }
227}
228
229type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
230type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
231type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
232type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
233type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
234type AdditionalStructureRuleFn =
235 Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
236/// Returns the `(element_index, component_index, data_element_id)` tuples to
237/// validate against a code list for the given segment tag.
238type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
239/// Returns the mandatory segment tags for a given EDIFACT message type.
240///
241/// The slice should contain every tag that must appear at least once in a
242/// conformant message of the given type. The tags are also used to check
243/// canonical ordering — their relative order in the returned slice is taken
244/// as the expected order in the message.
245type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
246
247/// Internal enum that unifies lookup results from static and owned segment definitions.
248///
249/// Allows `validate_segment` to handle both code-generated (`&'static`) and
250/// runtime-constructed ([`OwnedSegmentDef`]) definitions without duplication.
251enum SegmentDefRef<'a> {
252 Static(&'static SegmentDefinition),
253 Owned(&'a OwnedSegmentDef),
254}
255
256impl<'a> SegmentDefRef<'a> {
257 /// Returns the highest defined element position (one-based → used directly as
258 /// the maximum zero-based slot count for element-count validation).
259 ///
260 /// For owned definitions the highest `position` value may exceed the number
261 /// of entries in the `elements` vec when positions are non-consecutive.
262 fn max_element_position(&self) -> usize {
263 match self {
264 Self::Static(d) => d
265 .elements
266 .iter()
267 .map(|e| e.position as usize)
268 .max()
269 .unwrap_or(0),
270 Self::Owned(d) => d
271 .elements
272 .iter()
273 .map(|e| e.position as usize)
274 .max()
275 .unwrap_or(0),
276 }
277 }
278
279 /// Returns the highest position number among mandatory elements (one-based).
280 ///
281 /// This equals the minimum number of elements that must be present in a
282 /// segment: if the highest-positioned mandatory element is at position 5,
283 /// the segment must supply at least 5 elements.
284 fn last_mandatory_position(&self) -> usize {
285 match self {
286 Self::Static(d) => d
287 .elements
288 .iter()
289 .filter(|e| e.status == Status::Mandatory)
290 .map(|e| e.position as usize)
291 .max()
292 .unwrap_or(0),
293 Self::Owned(d) => d
294 .elements
295 .iter()
296 .filter(|e| e.status == Status::Mandatory)
297 .map(|e| e.position as usize)
298 .max()
299 .unwrap_or(0),
300 }
301 }
302
303 /// Iterate over mandatory element positions without heap allocation.
304 ///
305 /// Calls `f(zero_based_index, data_element_id)` for each element whose
306 /// status is [`Status::Mandatory`]. Returns `Err` immediately if `f`
307 /// returns `Err`, short-circuiting the remaining elements.
308 fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
309 where
310 F: FnMut(usize, &str) -> Result<(), E>,
311 {
312 match self {
313 Self::Static(d) => {
314 for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
315 f((e.position as usize).saturating_sub(1), e.data_element)?;
316 }
317 }
318 Self::Owned(d) => {
319 for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
320 f(
321 (e.position as usize).saturating_sub(1),
322 e.data_element.as_str(),
323 )?;
324 }
325 }
326 }
327 Ok(())
328 }
329}
330
331/// Default required-segments mapping used when no custom function is provided.
332///
333/// Returns the universal minimum: every EDIFACT message must begin with `UNH`
334/// and end with `UNT`. Message-type-specific mandatory segments (such as
335/// `BGM` for ORDERS/INVOIC) must be enforced by a
336/// [`ProfileRulePack`][crate::ProfileRulePack] or a custom
337/// [`DirectoryValidatorBuilder::with_required_segments`] function to avoid
338/// false positives for message types that do not require `BGM`.
339fn default_required_segments(_message_type: &str) -> &'static [&'static str] {
340 &["UNH", "UNT"]
341}
342
343/// Code-list validation rules common to all UN/EDIFACT directory releases.
344///
345/// Each entry is `(element_index, component_index, data_element_id)`.
346/// `element_index` and `component_index` are zero-based.
347///
348/// Covers the most frequently validated qualifier/code elements across ORDERS,
349/// INVOIC, UTILMD, and similar message types.
350pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
351 match tag {
352 "BGM" => &[(0, 0, "1001")],
353 "DTM" => &[(0, 0, "2005")],
354 "NAD" => &[(0, 0, "3035")],
355 "QTY" => &[(0, 0, "6063")],
356 "RFF" => &[(0, 0, "1153")],
357 "MOA" => &[(0, 0, "5025")],
358 "PRI" => &[(0, 0, "5125")],
359 "LOC" => &[(0, 0, "3227")],
360 _ => &[],
361 }
362}
363
364/// Shared validator implementation that is configured per UN/EDIFACT directory release.
365///
366/// # Scope and limitations
367///
368/// `DirectoryValidator` validates individual segment *content* (element counts,
369/// component counts, code-list values, and conditional rules) and checks that
370/// every *mandatory* segment type is present at least once. It does **not**
371/// validate segment *sequence* or *repetition cardinality* — i.e., it cannot
372/// tell you that a `BGM` segment appears more than once, or that a `RFF` group
373/// appears in the wrong position. Full sequence validation requires a
374/// state-machine per message type (UN/EDIFACT Segment Tables) which is outside
375/// the scope of this implementation.
376#[derive(Clone)]
377pub struct DirectoryValidator {
378 directory_id: String,
379 segment_lookup: SegmentLookupFn,
380 /// Runtime-owned segment definitions (from builder / JSON / DB).
381 ///
382 /// When `Some`, takes precedence over `segment_lookup` for tag resolution.
383 owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
384 is_code_valid: IsCodeValidFn,
385 suggest_code: SuggestCodeFn,
386 expected_components: ExpectedComponentsFn,
387 code_list_rules: CodeListRulesFn,
388 additional_structure_rule: Option<AdditionalStructureRuleFn>,
389 /// Configurable mapping from message type to required segment tags.
390 required_segments: RequiredSegmentsFn,
391 message_type: Option<String>,
392 enforce_known_tags: bool,
393 structure_checks: bool,
394 code_list_checks: bool,
395}
396
397impl std::fmt::Debug for DirectoryValidator {
398 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
399 f.debug_struct("DirectoryValidator")
400 .field("directory_id", &self.directory_id)
401 .field("message_type", &self.message_type)
402 .field("enforce_known_tags", &self.enforce_known_tags)
403 .field("structure_checks", &self.structure_checks)
404 .field("code_list_checks", &self.code_list_checks)
405 .finish_non_exhaustive()
406 }
407}
408
409impl DirectoryValidator {
410 /// Create a validator for a specific directory release with injected lookup/check hooks.
411 pub fn new(
412 directory_id: &'static str,
413 segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
414 is_code_valid: fn(&str, &str) -> bool,
415 suggest_code: fn(&str, &str) -> Option<&'static str>,
416 expected_components: fn(&str, usize) -> Option<u8>,
417 additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
418 ) -> Self {
419 Self {
420 directory_id: directory_id.to_owned(),
421 segment_lookup: Arc::new(segment_lookup),
422 owned_defs: None,
423 is_code_valid: Arc::new(is_code_valid),
424 suggest_code: Arc::new(suggest_code),
425 expected_components: Arc::new(expected_components),
426 code_list_rules: Arc::new(base_code_list_rules),
427 additional_structure_rule: additional_structure_rule
428 .map(|f| Arc::new(f) as AdditionalStructureRuleFn),
429 required_segments: Arc::new(default_required_segments),
430 message_type: None,
431 enforce_known_tags: true,
432 structure_checks: true,
433 code_list_checks: true,
434 }
435 }
436
437 /// Create a validator from a static slice of [`SegmentDefinition`]s.
438 ///
439 /// This is the preferred constructor when code-generating directory data as
440 /// a `static` array: no manual fn-pointer boilerplate is required.
441 ///
442 /// Code-list checks are **disabled** by default (the built-in `is_code_valid`
443 /// always returns `true`). Call [`with_code_list_rules`][Self::with_code_list_rules]
444 /// to register directory-specific rules that actually validate code values.
445 ///
446 /// # Example
447 ///
448 /// ```rust,ignore
449 /// static MY_SEGMENTS: &[SegmentDefinition] = &[ /* … */ ];
450 ///
451 /// let validator = DirectoryValidator::from_definitions(MY_SEGMENTS)
452 /// .with_code_list_rules(my_code_list_rules);
453 /// ```
454 pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
455 let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
456 definitions.iter().map(|d| (d.tag, d)).collect();
457 let lookup_map = Arc::new(lookup_map);
458 Self {
459 directory_id: "custom".to_owned(),
460 segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
461 owned_defs: None,
462 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
463 suggest_code: Arc::new(|_de: &str, _code: &str| None),
464 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
465 code_list_rules: Arc::new(base_code_list_rules),
466 additional_structure_rule: None,
467 required_segments: Arc::new(default_required_segments),
468 message_type: None,
469 enforce_known_tags: true,
470 structure_checks: true,
471 code_list_checks: false,
472 }
473 }
474
475 /// Create a validator from a runtime-owned collection of segment definitions.
476 ///
477 /// Use this (or [`DirectoryValidatorBuilder`]) when segment definitions are
478 /// loaded from an external source at startup (JSON, database, YAML, …) rather
479 /// than being known at compile time.
480 ///
481 /// Code-list checks are **disabled** by default; enable them by chaining
482 /// [`with_code_list_rules`][Self::with_code_list_rules] and setting
483 /// `is_code_valid` via a custom [`new`][Self::new] call or by subclassing
484 /// the builder.
485 ///
486 /// # Example
487 ///
488 /// ```rust,ignore
489 /// let defs = vec![
490 /// OwnedSegmentDef::new(
491 /// "BGM".to_owned(),
492 /// "Beginning of message".to_owned(),
493 /// vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
494 /// ),
495 /// ];
496 /// let validator = DirectoryValidator::from_owned_definitions(defs)
497 /// .with_directory_id("runtime-profile");
498 /// ```
499 pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
500 Self {
501 directory_id: "custom".to_owned(),
502 // The static lookup is never consulted when `owned_defs` is `Some`.
503 segment_lookup: Arc::new(|_| None),
504 owned_defs: Some(Arc::new(definitions)),
505 is_code_valid: Arc::new(|_de: &str, _code: &str| true),
506 suggest_code: Arc::new(|_de: &str, _code: &str| None),
507 expected_components: Arc::new(|_tag: &str, _idx: usize| None),
508 code_list_rules: Arc::new(base_code_list_rules),
509 additional_structure_rule: None,
510 required_segments: Arc::new(default_required_segments),
511 message_type: None,
512 enforce_known_tags: true,
513 structure_checks: true,
514 code_list_checks: false,
515 }
516 }
517
518 /// Set the directory identifier string (used in error messages).
519 pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
520 self.directory_id = id.into();
521 self
522 }
523
524 /// Override the code-list rules function.
525 ///
526 /// Directories can supply a directory-specific implementation that extends or
527 /// replaces the base rules from `base_code_list_rules`.
528 pub fn with_code_list_rules(
529 mut self,
530 f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
531 ) -> Self {
532 self.code_list_rules = Arc::new(f);
533 self
534 }
535
536 /// Enable only structure checks and disable code-list checks.
537 pub fn structure_only(mut self) -> Self {
538 self.structure_checks = true;
539 self.code_list_checks = false;
540 self
541 }
542
543 /// Enable only code-list checks and disable structure checks.
544 pub fn code_list_only(mut self) -> Self {
545 self.structure_checks = false;
546 self.code_list_checks = true;
547 self
548 }
549
550 /// Configure whether unknown segment tags should be rejected.
551 pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
552 self.enforce_known_tags = enforce;
553 self
554 }
555
556 /// Override the required-segments mapping used for structural validation.
557 ///
558 /// The supplied function receives an EDIFACT message type string (e.g. `"ORDERS"`)
559 /// and must return a `'static` slice of segment tags that are mandatory for that
560 /// type. The tags are checked both for *presence* and for *canonical ordering*
561 /// within the message.
562 ///
563 /// # Example
564 ///
565 /// ```rust,ignore
566 /// fn my_required_segments(msg_type: &str) -> &'static [&'static str] {
567 /// match msg_type {
568 /// "DESADV" => &["UNH", "BGM", "SHP", "UNT"],
569 /// "INVOIC" => &["UNH", "BGM", "MOA", "UNT"],
570 /// _ => &["UNH", "UNT"],
571 /// }
572 /// }
573 ///
574 /// let validator = DirectoryValidator::from_definitions(DEFS)
575 /// .with_required_segments(my_required_segments);
576 /// ```
577 pub fn with_required_segments(
578 mut self,
579 f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
580 ) -> Self {
581 self.required_segments = Arc::new(f);
582 self
583 }
584
585 fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
586 if let Some(explicit) = self.message_type.as_deref() {
587 return Some(explicit.to_owned());
588 }
589
590 segments
591 .iter()
592 .find(|s| s.tag == "UNH")
593 .and_then(|s| s.get_element(1))
594 .and_then(|e| e.get_component(0))
595 .map(str::to_owned)
596 }
597
598 /// Count the non-trailing-empty components in element `element_idx` of `seg`.
599 ///
600 /// Per ISO 9735-1 §3.3 ("Trailing empty component data elements may be omitted"),
601 /// a sender is not required to transmit trailing empty components; this function
602 /// therefore strips them before checking against the expected count so that
603 /// conformant messages with omitted trailing components are still accepted.
604 ///
605 /// # Examples
606 ///
607 /// - `DTM+137:20200101:` has three declared components but only 2 non-empty → effective=2
608 /// - `NAD+MS++::293` has a composite with 3 components, last two empty → effective=1
609 fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
610 let elem = seg.elements.get(element_idx)?;
611 let mut count = elem.components.len();
612 while count > 0 && elem.components[count - 1].0.as_ref().is_empty() {
613 count -= 1;
614 }
615 u8::try_from(count).ok()
616 }
617
618 fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
619 for idx in 0..seg.elements.len() {
620 if let Some(expected) = (self.expected_components)(seg.tag, idx) {
621 let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
622 if actual != expected {
623 return Err(EdifactError::InvalidComponentCount {
624 tag: seg.tag.to_owned(),
625 element_index: idx,
626 expected,
627 actual,
628 offset: seg.span.start,
629 });
630 }
631 }
632 }
633 Ok(())
634 }
635
636 fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
637 let rules = (self.code_list_rules)(seg.tag);
638
639 for (elem_idx, comp_idx, de) in rules {
640 let value = seg
641 .get_element(*elem_idx)
642 .and_then(|e| e.get_component(*comp_idx))
643 .unwrap_or("");
644 if !value.is_empty() && !(self.is_code_valid)(de, value) {
645 let suggestion = (self.suggest_code)(de, value);
646 return Err(EdifactError::InvalidCodeValue {
647 tag: seg.tag.to_owned(),
648 element_index: *elem_idx,
649 value: value.to_owned(),
650 code_list: (*de).to_owned(),
651 offset: seg.span.start,
652 suggestion,
653 });
654 }
655 }
656
657 Ok(())
658 }
659}
660
661impl DirectoryValidator {
662 fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
663 if let Some(owned) = &self.owned_defs {
664 owned
665 .iter()
666 .find(|d| d.tag == tag)
667 .map(SegmentDefRef::Owned)
668 } else {
669 (self.segment_lookup)(tag).map(SegmentDefRef::Static)
670 }
671 }
672
673 fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
674 if !self.structure_checks && !self.code_list_checks {
675 return Ok(());
676 }
677
678 let Some(def) = self.resolve_def(seg.tag) else {
679 if self.structure_checks && self.enforce_known_tags {
680 return Err(EdifactError::InvalidSegmentForMessage {
681 tag: seg.tag.to_owned(),
682 message_type: self
683 .message_type
684 .clone()
685 .unwrap_or_else(|| self.directory_id.clone()),
686 offset: seg.tag_span.start,
687 });
688 }
689 return Ok(());
690 };
691
692 let max_elements = def.max_element_position();
693 let min_elements = def.last_mandatory_position();
694 let actual = seg.elements.len();
695
696 if self.structure_checks && (actual < min_elements || actual > max_elements) {
697 return Err(EdifactError::InvalidElementCount {
698 tag: seg.tag.to_owned(),
699 min: min_elements,
700 max: max_elements,
701 actual,
702 offset: seg.span.start,
703 });
704 }
705
706 if self.structure_checks {
707 def.for_each_mandatory_position(|idx, _de| {
708 let is_present = seg.elements.get(idx).is_some_and(|elem| {
709 elem.components.iter().any(|(c, _)| !c.as_ref().is_empty())
710 });
711 if !is_present {
712 return Err(EdifactError::MissingRequiredElement {
713 tag: seg.tag.to_owned(),
714 element_index: idx,
715 });
716 }
717 Ok(())
718 })?;
719 self.validate_component_counts(seg)?;
720
721 if let Some(rule) = &self.additional_structure_rule {
722 rule(seg)?;
723 }
724 }
725
726 if self.code_list_checks {
727 self.validate_code_lists(seg)?;
728 }
729
730 Ok(())
731 }
732}
733
734impl Validator for DirectoryValidator {
735 fn set_message_type(&mut self, message_type: Option<&str>) {
736 self.message_type = message_type.map(str::to_owned);
737 }
738
739 fn validate_batch(
740 &self,
741 segments: &[Segment<'_>],
742 report: &mut ValidationReport,
743 _context: &ValidationRuleContext<'_>,
744 ) {
745 for seg in segments {
746 if let Err(err) = self.validate_segment(seg) {
747 report_error(report, err);
748 }
749 }
750
751 if self.structure_checks {
752 if let Some(message_type) = self.detect_message_type(segments) {
753 for required_tag in (self.required_segments)(&message_type) {
754 if segments.iter().all(|s| s.tag != *required_tag) {
755 report.add_error(
756 ValidationIssue::new(
757 ValidationSeverity::Error,
758 format!(
759 "required segment {} missing for message type {}",
760 required_tag, message_type
761 ),
762 )
763 .with_segment(*required_tag)
764 .with_suggestion("Add the mandatory segment at the correct position"),
765 );
766 }
767 }
768
769 let seq = (self.required_segments)(&message_type);
770 let mut last_idx = None;
771 for tag in seq {
772 if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
773 if let Some(prev) = last_idx {
774 if idx < prev {
775 report.add_error(
776 ValidationIssue::new(
777 ValidationSeverity::Error,
778 format!(
779 "segment sequence violation for message type {}: '{}' appears out of order",
780 message_type, tag
781 ),
782 )
783 .with_segment(*tag)
784 .with_suggestion(
785 "Ensure required segments follow UN/EDIFACT canonical order",
786 ),
787 );
788 }
789 }
790 last_idx = Some(idx);
791 }
792 }
793 }
794 }
795 }
796}
797
798// ── DirectoryValidatorBuilder ─────────────────────────────────────────────────
799
800/// Builder for [`DirectoryValidator`] using runtime-owned segment definitions.
801///
802/// Use this when segment definitions are loaded from an external source at
803/// startup (JSON, database, YAML, …) rather than being available as `static`
804/// arrays at compile time.
805///
806/// # Example
807///
808/// ```rust,ignore
809/// let validator = DirectoryValidatorBuilder::new("my-profile")
810/// .add_segment(
811/// OwnedSegmentDef::new(
812/// "BGM".to_owned(),
813/// "Beginning of message".to_owned(),
814/// vec![OwnedElementRef::new(1, "C002".to_owned(), Status::Mandatory, 1)],
815/// ),
816/// )
817/// .build();
818/// ```
819#[derive(Debug, Default)]
820pub struct DirectoryValidatorBuilder {
821 directory_id: Option<String>,
822 segments: Vec<OwnedSegmentDef>,
823}
824
825impl DirectoryValidatorBuilder {
826 /// Create a new builder with the given directory identifier.
827 ///
828 /// The identifier is used in error messages; set a human-readable value
829 /// such as `"UTILMD-5.5.3a"` or `"custom-profile"`.
830 pub fn new(directory_id: impl Into<String>) -> Self {
831 Self {
832 directory_id: Some(directory_id.into()),
833 segments: Vec::new(),
834 }
835 }
836
837 /// Add a segment definition to the builder.
838 ///
839 /// Definitions can be added in any order; the resulting validator looks
840 /// them up by tag at validation time.
841 pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
842 self.segments.push(def);
843 self
844 }
845
846 /// Extend the builder with multiple segment definitions at once.
847 pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
848 self.segments.extend(defs);
849 self
850 }
851
852 /// Build the [`DirectoryValidator`].
853 ///
854 /// Returns a validator backed by the accumulated [`OwnedSegmentDef`]s.
855 /// Code-list checks are disabled by default; chain
856 /// [`DirectoryValidator::with_code_list_rules`] on the returned value to
857 /// enable them.
858 pub fn build(self) -> DirectoryValidator {
859 let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
860 if let Some(id) = self.directory_id {
861 validator.directory_id = id;
862 }
863 validator
864 }
865}
866
867#[cfg(test)]
868mod tests {
869 use super::*;
870
871 static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
872 position: 1,
873 data_element: "C507",
874 status: Status::Mandatory,
875 max_repeat: 1,
876 }];
877
878 static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
879 tag: "TST",
880 name: "Test segment",
881 elements: TEST_ELEMENTS,
882 };
883
884 fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
885 match tag {
886 "TST" => Some(&TEST_SEGMENT),
887 _ => None,
888 }
889 }
890
891 fn code_valid(_de: &str, _code: &str) -> bool {
892 true
893 }
894
895 fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
896 None
897 }
898
899 fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
900 None
901 }
902
903 #[test]
904 fn mandatory_composite_present_when_any_component_non_empty() {
905 let input = b"TST+:ABC'";
906 let segments: Vec<_> = crate::from_bytes(input)
907 .collect::<Result<Vec<_>, _>>()
908 .expect("parse should succeed");
909
910 let validator = DirectoryValidator::new(
911 "TEST",
912 segment_lookup,
913 code_valid,
914 suggest_code,
915 expected_components,
916 None,
917 );
918
919 let mut report = ValidationReport::default();
920 validator.validate_batch(
921 &segments,
922 &mut report,
923 &crate::validator::ValidationRuleContext::empty(),
924 );
925 assert!(!report.has_errors());
926 }
927
928 // ── effective_component_count (ISO 9735-1 §3.3 trailing-empty-component trim) ──
929
930 fn parse_single(input: &[u8]) -> crate::OwnedSegment {
931 crate::from_reader_collect(std::io::Cursor::new(input))
932 .expect("parse should succeed")
933 .into_iter()
934 .next()
935 .expect("at least one segment")
936 }
937
938 #[test]
939 fn trailing_empty_component_stripped_from_dtm() {
940 // DTM+137:20200101: has three components in element 0; the third is empty.
941 // ISO 9735-1 §3.3 says trailing empty components may be omitted,
942 // so effective count should be 2.
943 let owned = parse_single(b"DTM+137:20200101:'");
944 let seg = owned.as_borrowed();
945 let count = DirectoryValidator::effective_component_count(&seg, 0);
946 assert_eq!(
947 count,
948 Some(2),
949 "trailing empty component should be stripped"
950 );
951 }
952
953 #[test]
954 fn all_empty_components_result_in_zero() {
955 // NAD+MS++: → element 2 is ":" with two empty components → effective=0
956 let owned = parse_single(b"NAD+MS++:'");
957 let seg = owned.as_borrowed();
958 let count = DirectoryValidator::effective_component_count(&seg, 2);
959 assert_eq!(
960 count,
961 Some(0),
962 "all-empty composite should have effective count 0"
963 );
964 }
965
966 #[test]
967 fn non_empty_component_not_stripped() {
968 // DTM+137:20200101:102 — all three components are non-empty
969 let owned = parse_single(b"DTM+137:20200101:102'");
970 let seg = owned.as_borrowed();
971 let count = DirectoryValidator::effective_component_count(&seg, 0);
972 assert_eq!(
973 count,
974 Some(3),
975 "no components should be stripped when all non-empty"
976 );
977 }
978
979 #[test]
980 fn with_code_list_rules_overrides_base() {
981 // Override code-list rules to require element 0 of TST to be a specific code.
982 fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
983 match tag {
984 "TST" => &[(0, 0, "CUSTOM_DE")],
985 _ => &[],
986 }
987 }
988 fn custom_code_valid(_de: &str, code: &str) -> bool {
989 code == "VALID"
990 }
991 fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
992 None
993 }
994
995 let input = b"TST+INVALID'";
996 let segments: Vec<_> = crate::from_bytes(input)
997 .collect::<Result<Vec<_>, _>>()
998 .expect("parse should succeed");
999
1000 let validator = DirectoryValidator::new(
1001 "TEST",
1002 segment_lookup,
1003 custom_code_valid,
1004 no_suggestion,
1005 expected_components,
1006 None,
1007 )
1008 .with_code_list_rules(custom_rules);
1009
1010 let mut report = ValidationReport::default();
1011 validator.validate_batch(
1012 &segments,
1013 &mut report,
1014 &crate::validator::ValidationRuleContext::empty(),
1015 );
1016 assert!(
1017 report.has_warnings(),
1018 "INVALID is not in the custom code list so validation must warn"
1019 );
1020 }
1021}