use crate::{OwnedSegment, error::EdifactError, model::Segment};
pub(crate) trait SegmentReader: sealed::Sealed {
fn tag(&self) -> &str;
fn span_start(&self) -> usize;
fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str>;
fn required_component_field(
&self,
elem_idx: usize,
comp_idx: usize,
) -> Result<&str, EdifactError> {
self.component(elem_idx, comp_idx)
.filter(|s| !s.is_empty())
.ok_or_else(|| EdifactError::MissingRequiredComponent {
tag: self.tag().to_owned(),
element_index: elem_idx,
component_index: comp_idx,
})
}
}
mod sealed {
pub trait Sealed {}
impl Sealed for crate::model::Segment<'_> {}
impl Sealed for crate::OwnedSegment {}
}
impl SegmentReader for Segment<'_> {
#[inline]
fn tag(&self) -> &str {
self.tag
}
#[inline]
fn span_start(&self) -> usize {
self.span.start
}
#[inline]
fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str> {
self.get_element(elem_idx)?.get_component(comp_idx)
}
}
impl SegmentReader for OwnedSegment {
#[inline]
fn tag(&self) -> &str {
&self.tag
}
#[inline]
fn span_start(&self) -> usize {
self.span.start
}
#[inline]
fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str> {
self.component_str(elem_idx, comp_idx)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InterchangeEnvelope {
pub syntax_identifier: String,
pub sender_id: String,
pub recipient_id: String,
pub datetime: String,
pub control_ref: String,
pub declared_message_count: u32,
pub actual_message_count: u32,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MessageEnvelope {
pub message_ref: String,
pub message_type: String,
pub version: String,
pub release: String,
pub controlling_agency: String,
pub association_code: String,
pub declared_segment_count: u32,
pub actual_segment_count: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct MessageIdentifier<'a> {
pub message_type: &'a str,
pub version: &'a str,
pub release: &'a str,
pub controlling_agency: &'a str,
pub association_assigned: &'a str,
}
pub fn parse_unh<'a>(unh: &'a Segment<'a>) -> Result<MessageIdentifier<'a>, EdifactError> {
let elem = unh
.get_element(1)
.ok_or_else(|| EdifactError::MissingRequiredElement {
tag: "UNH".to_owned(),
element_index: 1,
})?;
let message_type =
elem.get_component(0)
.ok_or_else(|| EdifactError::MissingRequiredComponent {
tag: "UNH".to_owned(),
element_index: 1,
component_index: 0,
})?;
Ok(MessageIdentifier {
message_type,
version: elem.get_component(1).unwrap_or(""),
release: elem.get_component(2).unwrap_or(""),
controlling_agency: elem.get_component(3).unwrap_or(""),
association_assigned: elem.get_component(4).unwrap_or(""),
})
}
pub fn validate_envelope(
segments: &[Segment<'_>],
) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
validate_envelope_impl(segments)
}
pub fn validate_envelope_from_owned(
segments: &[OwnedSegment],
) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
validate_envelope_impl(segments)
}
fn validate_envelope_impl<S: SegmentReader>(
segments: &[S],
) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
if let Some(ung_or_une) = segments
.iter()
.find(|s| s.tag() == "UNG" || s.tag() == "UNE")
{
return Err(EdifactError::FunctionalGroupNotSupported {
offset: ung_or_une.span_start(),
});
}
let mut interchange_env = extract_interchange(segments)?;
let message_envs = extract_messages(segments)?;
interchange_env.actual_message_count =
u32::try_from(message_envs.len()).map_err(|_| EdifactError::InterchangeTooLarge {
count: message_envs.len() as u64,
})?;
if interchange_env.declared_message_count != interchange_env.actual_message_count {
return Err(EdifactError::MessageCountMismatch {
expected: interchange_env.declared_message_count,
actual: interchange_env.actual_message_count,
});
}
for msg in &message_envs {
if msg.declared_segment_count != msg.actual_segment_count {
return Err(EdifactError::SegmentCountMismatch {
expected: msg.declared_segment_count,
actual: msg.actual_segment_count,
message_ref: msg.message_ref.clone(),
});
}
}
Ok((interchange_env, message_envs))
}
pub fn validate_envelope_lenient(segments: &[Segment<'_>]) -> Vec<EdifactError> {
validate_envelope_lenient_impl(segments)
}
pub fn validate_envelope_lenient_from_owned(segments: &[OwnedSegment]) -> Vec<EdifactError> {
validate_envelope_lenient_impl(segments)
}
fn validate_envelope_lenient_impl<S: SegmentReader>(segments: &[S]) -> Vec<EdifactError> {
let mut errors: Vec<EdifactError> = Vec::new();
if let Some(ung_or_une) = segments
.iter()
.find(|s| s.tag() == "UNG" || s.tag() == "UNE")
{
errors.push(EdifactError::FunctionalGroupNotSupported {
offset: ung_or_une.span_start(),
});
return errors;
}
match validate_envelope_impl(segments) {
Ok(_) => {}
Err(first) => {
errors.push(first);
if let Ok(mut ie) = extract_interchange(segments) {
match extract_messages(segments) {
Ok(msgs) => {
ie.actual_message_count = u32::try_from(msgs.len()).unwrap_or(u32::MAX);
if ie.declared_message_count != ie.actual_message_count {
let dup = EdifactError::MessageCountMismatch {
expected: ie.declared_message_count,
actual: ie.actual_message_count,
};
if !errors.iter().any(|e| e == &dup) {
errors.push(dup);
}
}
for msg in &msgs {
if msg.declared_segment_count != msg.actual_segment_count {
let dup = EdifactError::SegmentCountMismatch {
expected: msg.declared_segment_count,
actual: msg.actual_segment_count,
message_ref: msg.message_ref.clone(),
};
if !errors.iter().any(|e| e == &dup) {
errors.push(dup);
}
}
}
}
Err(e) => {
if !errors.iter().any(|err| err == &e) {
errors.push(e);
}
}
}
}
}
}
errors
}
fn extract_interchange<S: SegmentReader>(
segments: &[S],
) -> Result<InterchangeEnvelope, EdifactError> {
if segments.first().map(|s| s.tag()) != Some("UNB") {
return Err(EdifactError::MissingSegment {
tag: "UNB".to_owned(),
expected_position: "first segment of interchange".to_owned(),
});
}
if segments.last().map(|s| s.tag()) != Some("UNZ") {
return Err(EdifactError::MissingSegment {
tag: "UNZ".to_owned(),
expected_position: "last segment of interchange".to_owned(),
});
}
let unb = &segments[0];
let unz = &segments[segments.len() - 1];
let syntax_identifier = unb.required_component_field(0, 0)?.to_owned();
let sender_id = unb.required_component_field(1, 0)?.to_owned();
let recipient_id = unb.required_component_field(2, 0)?.to_owned();
let date = unb.required_component_field(3, 0)?;
let time = unb.component(3, 1).unwrap_or("");
let datetime = if time.is_empty() {
date.to_owned()
} else {
format!("{date}:{time}")
};
let control_ref = unb.required_component_field(4, 0)?.to_owned();
let unz_control_ref = unz.required_component_field(1, 0)?;
if unz_control_ref != control_ref {
return Err(EdifactError::QualifierMismatch {
tag: "UNZ".to_owned(),
actual: unz_control_ref.to_owned(),
expected: control_ref,
offset: unz.span_start(),
});
}
let declared_message_count: u32 =
unz.required_component_field(0, 0)?
.parse()
.map_err(|_| EdifactError::InvalidText {
offset: unz.span_start(),
})?;
Ok(InterchangeEnvelope {
syntax_identifier,
sender_id,
recipient_id,
datetime,
control_ref,
declared_message_count,
actual_message_count: 0,
})
}
fn extract_messages<S: SegmentReader>(
segments: &[S],
) -> Result<Vec<MessageEnvelope>, EdifactError> {
let mut messages: Vec<MessageEnvelope> = Vec::new();
let mut in_message = false;
let mut msg_start_idx: usize = 0;
let mut current_unh_idx: Option<usize> = None;
let inner = if segments.len() >= 2 {
&segments[1..segments.len() - 1]
} else {
return Ok(messages);
};
for (i, seg) in inner.iter().enumerate() {
match seg.tag() {
"UNH" => {
if in_message {
return Err(EdifactError::InvalidSegmentForMessage {
tag: "UNH".to_owned(),
message_type: "ENVELOPE".to_owned(),
offset: seg.span_start(),
});
}
in_message = true;
msg_start_idx = i;
current_unh_idx = Some(i);
}
"UNT" if in_message => {
let unh_idx = current_unh_idx.take().ok_or_else(|| {
EdifactError::InvalidSegmentForMessage {
tag: "UNT".to_owned(),
message_type: "ENVELOPE".to_owned(),
offset: seg.span_start(),
}
})?;
let unh = &inner[unh_idx];
let message_ref = unh.required_component_field(0, 0)?.to_owned();
let message_type = unh.required_component_field(1, 0)?.to_owned();
let version = unh.required_component_field(1, 1)?.to_owned();
let release = unh.required_component_field(1, 2)?.to_owned();
let controlling_agency = unh.required_component_field(1, 3)?.to_owned();
let association_code = unh.component(1, 4).unwrap_or("").to_owned();
let declared_segment_count: u32 = seg
.required_component_field(0, 0)?
.parse()
.map_err(|_| EdifactError::InvalidText {
offset: seg.span_start(),
})?;
let unt_ref = seg.required_component_field(1, 0)?;
if unt_ref != message_ref {
return Err(EdifactError::QualifierMismatch {
tag: "UNT".to_owned(),
actual: unt_ref.to_owned(),
expected: message_ref.clone(),
offset: seg.span_start(),
});
}
let actual_segment_count = u32::try_from(i - msg_start_idx + 1).map_err(|_| {
EdifactError::InterchangeTooLarge {
count: u64::try_from(i - msg_start_idx + 1).unwrap_or(u64::MAX),
}
})?;
in_message = false;
messages.push(MessageEnvelope {
message_ref,
message_type,
version,
release,
controlling_agency,
association_code,
declared_segment_count,
actual_segment_count,
});
}
"UNT" => {
return Err(EdifactError::InvalidSegmentForMessage {
tag: "UNT".to_owned(),
message_type: "ENVELOPE".to_owned(),
offset: seg.span_start(),
});
}
"UNB" | "UNZ" if in_message => {
return Err(EdifactError::InvalidSegmentForMessage {
tag: seg.tag().to_owned(),
message_type: "ENVELOPE".to_owned(),
offset: seg.span_start(),
});
}
_ if !in_message => {
return Err(EdifactError::InvalidSegmentForMessage {
tag: seg.tag().to_owned(),
message_type: "ENVELOPE".to_owned(),
offset: seg.span_start(),
});
}
_ => {}
}
}
if in_message {
return Err(EdifactError::MissingSegment {
tag: "UNT".to_owned(),
expected_position: "end of message group".to_owned(),
});
}
Ok(messages)
}
#[cfg(test)]
mod tests {
use super::*;
fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
crate::from_reader_collect(std::io::Cursor::new(input)).expect("parse failed")
}
fn parse_and_validate(
input: &[u8],
) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
let owned = parse(input);
let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
validate_envelope(&segs)
}
fn parse_and_validate_owned(
input: &[u8],
) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
validate_envelope_from_owned(&parse(input))
}
const VALID_INTERCHANGE: &[u8] =
b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
#[test]
fn valid_envelope_parses_ok() {
let (interchange, messages) =
parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
assert_eq!(interchange.sender_id, "SENDER");
assert_eq!(interchange.recipient_id, "RECEIVER");
assert_eq!(interchange.control_ref, "00001");
assert_eq!(interchange.declared_message_count, 1);
assert_eq!(interchange.actual_message_count, 1);
assert_eq!(messages.len(), 1);
assert_eq!(messages[0].message_type, "ORDERS");
assert_eq!(messages[0].association_code, "EAN010");
assert_eq!(messages[0].declared_segment_count, 4);
assert_eq!(messages[0].actual_segment_count, 4); }
#[test]
fn valid_envelope_parses_ok_owned_path() {
let (interchange, messages) =
parse_and_validate_owned(VALID_INTERCHANGE).expect("envelope should be valid");
assert_eq!(interchange.sender_id, "SENDER");
assert_eq!(interchange.actual_message_count, 1);
assert_eq!(messages[0].declared_segment_count, 4);
}
#[test]
fn unt_count_mismatch_returns_err() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
matches!(
result,
Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
),
"expected SegmentCountMismatch, got {result:?}"
);
}
#[test]
fn unz_count_mismatch_returns_err() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
let result = parse_and_validate(input);
assert!(
matches!(
result,
Err(EdifactError::MessageCountMismatch {
expected: 2,
actual: 1
})
),
"expected MessageCountMismatch(2,1), got {result:?}"
);
}
#[test]
fn missing_unb_returns_err() {
let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(result.is_err());
}
#[test]
fn extracts_una_interchange_correctly() {
let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
assert_eq!(env.syntax_identifier, "UNOA");
assert_eq!(env.datetime, "230401:0900");
}
#[test]
fn dangling_unh_without_unt_returns_err() {
let input =
b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT")
);
}
#[test]
fn stray_segment_outside_message_returns_err() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(matches!(
result,
Err(EdifactError::InvalidSegmentForMessage { .. })
));
}
#[test]
fn missing_unb_sender_component_returns_err() {
let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
"expected MissingRequiredComponent for empty sender, got: {result:?}"
);
}
#[test]
fn nested_unh_without_closing_previous_message_returns_err() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
"expected InvalidSegmentForMessage(UNH), got {result:?}"
);
}
#[test]
fn unt_message_reference_must_match_unh() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
}
#[test]
fn unz_control_reference_must_match_unb() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
let result = parse_and_validate(input);
assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
}
#[test]
fn missing_unh_message_type_components_return_err() {
let input =
b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
"expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
);
}
#[test]
fn nested_unz_inside_message_returns_err() {
let input =
b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ")
);
}
#[test]
fn envelope_with_ung_returns_explicit_error() {
let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
UNH+1+ORDERS:D:96A:UN'\
BGM+220+PO-001+9'\
UNT+3+1'\
UNE+1+1'\
UNZ+1+1'";
let result = parse_and_validate(input);
assert!(
result.is_err(),
"UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
);
assert!(
matches!(
result,
Err(EdifactError::FunctionalGroupNotSupported { .. })
),
"expected FunctionalGroupNotSupported, got {result:?}"
);
}
}