use crate::validator::{ValidationRuleContext, Validator, report_error};
use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
use std::sync::Arc;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Status {
Mandatory,
Conditional,
}
#[derive(Debug, Clone, Copy)]
pub struct ElementRef {
position: u8,
data_element: &'static str,
status: Status,
max_repeat: u8,
}
impl ElementRef {
#[must_use]
pub const fn new(
position: u8,
data_element: &'static str,
status: Status,
max_repeat: u8,
) -> Self {
assert!(
position != 0,
"ElementRef position must be >= 1 (one-based)"
);
Self {
position,
data_element,
status,
max_repeat,
}
}
#[must_use]
#[inline]
pub const fn position(&self) -> u8 {
self.position
}
#[must_use]
#[inline]
pub const fn data_element(&self) -> &'static str {
self.data_element
}
#[must_use]
#[inline]
pub const fn status(&self) -> Status {
self.status
}
#[must_use]
#[inline]
pub const fn max_repeat(&self) -> u8 {
self.max_repeat
}
}
#[derive(Debug)]
pub struct SegmentDefinition {
pub tag: &'static str,
pub name: &'static str,
pub elements: &'static [ElementRef],
}
#[derive(Debug, Clone)]
pub struct OwnedElementRef {
position: u8,
data_element: String,
status: Status,
max_repeat: u8,
}
#[derive(Debug, Clone)]
pub struct OwnedSegmentDef {
tag: String,
name: String,
elements: Vec<OwnedElementRef>,
}
impl OwnedSegmentDef {
pub fn new_unchecked(tag: String, name: String, elements: Vec<OwnedElementRef>) -> Self {
assert!(
tag.len() == 3 && tag.bytes().all(|b| b.is_ascii_uppercase()),
"OwnedSegmentDef::new_unchecked: tag must be exactly three ASCII uppercase letters, got {tag:?}"
);
Self {
tag,
name,
elements,
}
}
pub fn try_new(
tag: String,
name: String,
elements: Vec<OwnedElementRef>,
) -> Result<Self, EdifactError> {
if tag.len() != 3 || !tag.bytes().all(|b| b.is_ascii_uppercase()) {
return Err(EdifactError::InvalidSegmentTag(tag));
}
Ok(Self {
tag,
name,
elements,
})
}
#[inline]
pub fn tag(&self) -> &str {
&self.tag
}
#[inline]
pub fn name(&self) -> &str {
&self.name
}
#[inline]
pub fn elements(&self) -> &[OwnedElementRef] {
&self.elements
}
}
impl OwnedElementRef {
pub fn new_unchecked(
position: u8,
data_element: String,
status: Status,
max_repeat: u8,
) -> Self {
assert!(
position != 0,
"OwnedElementRef::new_unchecked: position must be >= 1 (one-based), got 0"
);
Self {
position,
data_element,
status,
max_repeat,
}
}
pub fn try_new(
position: u8,
data_element: String,
status: Status,
max_repeat: u8,
) -> Result<Self, EdifactError> {
if position == 0 {
return Err(EdifactError::InvalidElementPosition);
}
Ok(Self {
position,
data_element,
status,
max_repeat,
})
}
#[inline]
pub fn position(&self) -> u8 {
self.position
}
#[inline]
pub fn data_element(&self) -> &str {
&self.data_element
}
#[inline]
pub fn status(&self) -> Status {
self.status
}
#[inline]
pub fn max_repeat(&self) -> u8 {
self.max_repeat
}
}
type SegmentLookupFn = Arc<dyn Fn(&str) -> Option<&'static SegmentDefinition> + Send + Sync>;
type IsCodeValidFn = Arc<dyn Fn(&str, &str) -> bool + Send + Sync>;
type SuggestCodeFn = Arc<dyn Fn(&str, &str) -> Option<&'static str> + Send + Sync>;
type ExpectedComponentsFn = Arc<dyn Fn(&str, usize) -> Option<u8> + Send + Sync>;
type AdditionalStructureRuleRefFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
type AdditionalStructureRuleFn =
Arc<dyn Fn(&Segment<'_>) -> Result<(), EdifactError> + Send + Sync>;
type CodeListRulesFn = Arc<dyn Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync>;
type RequiredSegmentsFn = Arc<dyn Fn(&str) -> &'static [&'static str] + Send + Sync>;
enum SegmentDefRef<'a> {
Static(&'static SegmentDefinition),
Owned(&'a OwnedSegmentDef),
}
impl SegmentDefRef<'_> {
fn max_element_position(&self) -> usize {
match self {
Self::Static(d) => d
.elements
.iter()
.map(|e| e.position as usize)
.max()
.unwrap_or(0),
Self::Owned(d) => d
.elements
.iter()
.map(|e| e.position as usize)
.max()
.unwrap_or(0),
}
}
fn last_mandatory_position(&self) -> usize {
match self {
Self::Static(d) => d
.elements
.iter()
.filter(|e| e.status == Status::Mandatory)
.map(|e| e.position as usize)
.max()
.unwrap_or(0),
Self::Owned(d) => d
.elements
.iter()
.filter(|e| e.status == Status::Mandatory)
.map(|e| e.position as usize)
.max()
.unwrap_or(0),
}
}
fn for_each_mandatory_position<E, F>(&self, mut f: F) -> Result<(), E>
where
F: FnMut(usize, &str) -> Result<(), E>,
{
match self {
Self::Static(d) => {
for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
f((e.position as usize).saturating_sub(1), e.data_element)?;
}
}
Self::Owned(d) => {
for e in d.elements.iter().filter(|e| e.status == Status::Mandatory) {
f(
(e.position as usize).saturating_sub(1),
e.data_element.as_str(),
)?;
}
}
}
Ok(())
}
}
fn default_required_segments(_message_type: &str) -> &'static [&'static str] {
&["UNH", "UNT"]
}
pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
match tag {
"BGM" => &[(0, 0, "1001")],
"DTM" => &[(0, 0, "2005")],
"NAD" => &[(0, 0, "3035")],
"QTY" => &[(0, 0, "6063")],
"RFF" => &[(0, 0, "1153")],
"MOA" => &[(0, 0, "5025")],
"PRI" => &[(0, 0, "5125")],
"LOC" => &[(0, 0, "3227")],
_ => &[],
}
}
#[derive(Clone)]
pub struct DirectoryValidator {
directory_id: String,
segment_lookup: SegmentLookupFn,
owned_defs: Option<Arc<Vec<OwnedSegmentDef>>>,
is_code_valid: IsCodeValidFn,
suggest_code: SuggestCodeFn,
expected_components: ExpectedComponentsFn,
code_list_rules: CodeListRulesFn,
additional_structure_rule: Option<AdditionalStructureRuleFn>,
required_segments: RequiredSegmentsFn,
message_type: Option<String>,
enforce_known_tags: bool,
structure_checks: bool,
code_list_checks: bool,
}
impl std::fmt::Debug for DirectoryValidator {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DirectoryValidator")
.field("directory_id", &self.directory_id)
.field("message_type", &self.message_type)
.field("enforce_known_tags", &self.enforce_known_tags)
.field("structure_checks", &self.structure_checks)
.field("code_list_checks", &self.code_list_checks)
.finish_non_exhaustive()
}
}
impl DirectoryValidator {
pub fn new(
directory_id: &'static str,
segment_lookup: fn(&str) -> Option<&'static SegmentDefinition>,
is_code_valid: fn(&str, &str) -> bool,
suggest_code: fn(&str, &str) -> Option<&'static str>,
expected_components: fn(&str, usize) -> Option<u8>,
additional_structure_rule: Option<AdditionalStructureRuleRefFn>,
) -> Self {
Self {
directory_id: directory_id.to_owned(),
segment_lookup: Arc::new(segment_lookup),
owned_defs: None,
is_code_valid: Arc::new(is_code_valid),
suggest_code: Arc::new(suggest_code),
expected_components: Arc::new(expected_components),
code_list_rules: Arc::new(base_code_list_rules),
additional_structure_rule: additional_structure_rule
.map(|f| Arc::new(f) as AdditionalStructureRuleFn),
required_segments: Arc::new(default_required_segments),
message_type: None,
enforce_known_tags: true,
structure_checks: true,
code_list_checks: true,
}
}
pub fn from_definitions(definitions: &'static [SegmentDefinition]) -> Self {
let lookup_map: std::collections::HashMap<&'static str, &'static SegmentDefinition> =
definitions.iter().map(|d| (d.tag, d)).collect();
let lookup_map = Arc::new(lookup_map);
Self {
directory_id: "custom".to_owned(),
segment_lookup: Arc::new(move |tag: &str| lookup_map.get(tag).copied()),
owned_defs: None,
is_code_valid: Arc::new(|_de: &str, _code: &str| true),
suggest_code: Arc::new(|_de: &str, _code: &str| None),
expected_components: Arc::new(|_tag: &str, _idx: usize| None),
code_list_rules: Arc::new(base_code_list_rules),
additional_structure_rule: None,
required_segments: Arc::new(default_required_segments),
message_type: None,
enforce_known_tags: true,
structure_checks: true,
code_list_checks: false,
}
}
pub fn from_owned_definitions(definitions: Vec<OwnedSegmentDef>) -> Self {
Self {
directory_id: "custom".to_owned(),
segment_lookup: Arc::new(|_| None),
owned_defs: Some(Arc::new(definitions)),
is_code_valid: Arc::new(|_de: &str, _code: &str| true),
suggest_code: Arc::new(|_de: &str, _code: &str| None),
expected_components: Arc::new(|_tag: &str, _idx: usize| None),
code_list_rules: Arc::new(base_code_list_rules),
additional_structure_rule: None,
required_segments: Arc::new(default_required_segments),
message_type: None,
enforce_known_tags: true,
structure_checks: true,
code_list_checks: false,
}
}
pub fn with_directory_id(mut self, id: impl Into<String>) -> Self {
self.directory_id = id.into();
self
}
pub fn with_code_list_rules(
mut self,
f: impl Fn(&str) -> &'static [(usize, usize, &'static str)] + Send + Sync + 'static,
) -> Self {
self.code_list_rules = Arc::new(f);
self
}
pub fn structure_only(mut self) -> Self {
self.structure_checks = true;
self.code_list_checks = false;
self
}
pub fn code_list_only(mut self) -> Self {
self.structure_checks = false;
self.code_list_checks = true;
self
}
pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
self.enforce_known_tags = enforce;
self
}
pub fn with_required_segments(
mut self,
f: impl Fn(&str) -> &'static [&'static str] + Send + Sync + 'static,
) -> Self {
self.required_segments = Arc::new(f);
self
}
fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
if let Some(explicit) = self.message_type.as_deref() {
return Some(explicit.to_owned());
}
segments
.iter()
.find(|s| s.tag == "UNH")
.and_then(|s| s.get_element(1))
.and_then(|e| e.get_component(0))
.map(str::to_owned)
}
fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
let elem = seg.elements.get(element_idx)?;
let mut count = elem.components.len();
while count > 0 && elem.components[count - 1].0.as_ref().is_empty() {
count -= 1;
}
u8::try_from(count).ok()
}
fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
for idx in 0..seg.elements.len() {
if let Some(expected) = (self.expected_components)(seg.tag, idx) {
let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
if actual != expected {
return Err(EdifactError::InvalidComponentCount {
tag: seg.tag.to_owned(),
element_index: idx,
expected,
actual,
offset: seg.span.start,
});
}
}
}
Ok(())
}
fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
let rules = (self.code_list_rules)(seg.tag);
for (elem_idx, comp_idx, de) in rules {
let value = seg
.get_element(*elem_idx)
.and_then(|e| e.get_component(*comp_idx))
.unwrap_or("");
if !value.is_empty() && !(self.is_code_valid)(de, value) {
let suggestion = (self.suggest_code)(de, value);
return Err(EdifactError::InvalidCodeValue {
tag: seg.tag.to_owned(),
element_index: *elem_idx,
value: value.to_owned(),
code_list: (*de).to_owned(),
offset: seg.span.start,
suggestion,
});
}
}
Ok(())
}
}
impl DirectoryValidator {
fn resolve_def<'a>(&'a self, tag: &str) -> Option<SegmentDefRef<'a>> {
if let Some(owned) = &self.owned_defs {
owned
.iter()
.find(|d| d.tag == tag)
.map(SegmentDefRef::Owned)
} else {
(self.segment_lookup)(tag).map(SegmentDefRef::Static)
}
}
fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
if !self.structure_checks && !self.code_list_checks {
return Ok(());
}
let Some(def) = self.resolve_def(seg.tag) else {
if self.structure_checks && self.enforce_known_tags {
return Err(EdifactError::InvalidSegmentForMessage {
tag: seg.tag.to_owned(),
message_type: self
.message_type
.clone()
.unwrap_or_else(|| self.directory_id.clone()),
offset: seg.tag_span.start,
});
}
return Ok(());
};
let max_elements = def.max_element_position();
let min_elements = def.last_mandatory_position();
let actual = seg.elements.len();
if self.structure_checks && (actual < min_elements || actual > max_elements) {
return Err(EdifactError::InvalidElementCount {
tag: seg.tag.to_owned(),
min: min_elements,
max: max_elements,
actual,
offset: seg.span.start,
});
}
if self.structure_checks {
def.for_each_mandatory_position(|idx, _de| {
let is_present = seg.elements.get(idx).is_some_and(|elem| {
elem.components.iter().any(|(c, _)| !c.as_ref().is_empty())
});
if !is_present {
return Err(EdifactError::MissingRequiredElement {
tag: seg.tag.to_owned(),
element_index: idx,
});
}
Ok(())
})?;
self.validate_component_counts(seg)?;
if let Some(rule) = &self.additional_structure_rule {
rule(seg)?;
}
}
if self.code_list_checks {
self.validate_code_lists(seg)?;
}
Ok(())
}
}
impl Validator for DirectoryValidator {
fn set_message_type(&mut self, message_type: Option<&str>) {
self.message_type = message_type.map(str::to_owned);
}
fn validate_batch(
&self,
segments: &[Segment<'_>],
report: &mut ValidationReport,
_context: &ValidationRuleContext<'_>,
) {
for seg in segments {
if let Err(err) = self.validate_segment(seg) {
report_error(report, err);
}
}
if self.structure_checks {
if let Some(message_type) = self.detect_message_type(segments) {
for required_tag in (self.required_segments)(&message_type) {
if segments.iter().all(|s| s.tag != *required_tag) {
report.add_error(
ValidationIssue::new(
ValidationSeverity::Error,
format!(
"required segment {} missing for message type {}",
required_tag, message_type
),
)
.with_segment(*required_tag)
.with_suggestion("Add the mandatory segment at the correct position"),
);
}
}
let seq = (self.required_segments)(&message_type);
let mut last_idx = None;
for tag in seq {
if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
if let Some(prev) = last_idx {
if idx < prev {
report.add_error(
ValidationIssue::new(
ValidationSeverity::Error,
format!(
"segment sequence violation for message type {}: '{}' appears out of order",
message_type, tag
),
)
.with_segment(*tag)
.with_suggestion(
"Ensure required segments follow UN/EDIFACT canonical order",
),
);
}
}
last_idx = Some(idx);
}
}
}
}
}
}
#[derive(Debug, Default)]
pub struct DirectoryValidatorBuilder {
directory_id: Option<String>,
segments: Vec<OwnedSegmentDef>,
}
impl DirectoryValidatorBuilder {
pub fn new(directory_id: impl Into<String>) -> Self {
Self {
directory_id: Some(directory_id.into()),
segments: Vec::new(),
}
}
pub fn add_segment(mut self, def: OwnedSegmentDef) -> Self {
self.segments.push(def);
self
}
pub fn add_segments(mut self, defs: impl IntoIterator<Item = OwnedSegmentDef>) -> Self {
self.segments.extend(defs);
self
}
pub fn build(self) -> DirectoryValidator {
let mut validator = DirectoryValidator::from_owned_definitions(self.segments);
if let Some(id) = self.directory_id {
validator.directory_id = id;
}
validator
}
}
#[cfg(test)]
mod tests {
use super::*;
static TEST_ELEMENTS: &[ElementRef] = &[ElementRef::new(1, "C507", Status::Mandatory, 1)];
static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
tag: "TST",
name: "Test segment",
elements: TEST_ELEMENTS,
};
fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
match tag {
"TST" => Some(&TEST_SEGMENT),
_ => None,
}
}
fn code_valid(_de: &str, _code: &str) -> bool {
true
}
fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
None
}
fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
None
}
#[test]
fn mandatory_composite_present_when_any_component_non_empty() {
let input = b"TST+:ABC'";
let segments: Vec<_> = crate::from_bytes(input)
.collect::<Result<Vec<_>, _>>()
.expect("parse should succeed");
let validator = DirectoryValidator::new(
"TEST",
segment_lookup,
code_valid,
suggest_code,
expected_components,
None,
);
let mut report = ValidationReport::default();
validator.validate_batch(
&segments,
&mut report,
&crate::validator::ValidationRuleContext::empty(),
);
assert!(!report.has_errors());
}
fn parse_single(input: &[u8]) -> crate::OwnedSegment {
crate::from_reader_collect(std::io::Cursor::new(input))
.expect("parse should succeed")
.into_iter()
.next()
.expect("at least one segment")
}
#[test]
fn trailing_empty_component_stripped_from_dtm() {
let owned = parse_single(b"DTM+137:20200101:'");
let seg = owned.as_borrowed();
let count = DirectoryValidator::effective_component_count(&seg, 0);
assert_eq!(
count,
Some(2),
"trailing empty component should be stripped"
);
}
#[test]
fn all_empty_components_result_in_zero() {
let owned = parse_single(b"NAD+MS++:'");
let seg = owned.as_borrowed();
let count = DirectoryValidator::effective_component_count(&seg, 2);
assert_eq!(
count,
Some(0),
"all-empty composite should have effective count 0"
);
}
#[test]
fn non_empty_component_not_stripped() {
let owned = parse_single(b"DTM+137:20200101:102'");
let seg = owned.as_borrowed();
let count = DirectoryValidator::effective_component_count(&seg, 0);
assert_eq!(
count,
Some(3),
"no components should be stripped when all non-empty"
);
}
#[test]
fn with_code_list_rules_overrides_base() {
fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
match tag {
"TST" => &[(0, 0, "CUSTOM_DE")],
_ => &[],
}
}
fn custom_code_valid(_de: &str, code: &str) -> bool {
code == "VALID"
}
fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
None
}
let input = b"TST+INVALID'";
let segments: Vec<_> = crate::from_bytes(input)
.collect::<Result<Vec<_>, _>>()
.expect("parse should succeed");
let validator = DirectoryValidator::new(
"TEST",
segment_lookup,
custom_code_valid,
no_suggestion,
expected_components,
None,
)
.with_code_list_rules(custom_rules);
let mut report = ValidationReport::default();
validator.validate_batch(
&segments,
&mut report,
&crate::validator::ValidationRuleContext::empty(),
);
assert!(
report.has_warnings(),
"INVALID is not in the custom code list so validation must warn"
);
}
}