1use crate::{OwnedSegment, error::EdifactError, model::Segment};
7
8pub(crate) trait SegmentReader: sealed::Sealed {
15 fn tag(&self) -> &str;
16 fn span_start(&self) -> usize;
17 fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str>;
19
20 fn required_component_field(
21 &self,
22 elem_idx: usize,
23 comp_idx: usize,
24 ) -> Result<&str, EdifactError> {
25 self.component(elem_idx, comp_idx)
26 .filter(|s| !s.is_empty())
27 .ok_or_else(|| EdifactError::MissingRequiredComponent {
28 tag: self.tag().to_owned(),
29 element_index: elem_idx,
30 component_index: comp_idx,
31 })
32 }
33}
34
35mod sealed {
36 pub trait Sealed {}
37 impl Sealed for crate::model::Segment<'_> {}
38 impl Sealed for crate::OwnedSegment {}
39}
40
41impl SegmentReader for Segment<'_> {
42 #[inline]
43 fn tag(&self) -> &str {
44 self.tag
45 }
46 #[inline]
47 fn span_start(&self) -> usize {
48 self.span.start
49 }
50 #[inline]
51 fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str> {
52 self.get_element(elem_idx)?.get_component(comp_idx)
53 }
54}
55
56impl SegmentReader for OwnedSegment {
57 #[inline]
58 fn tag(&self) -> &str {
59 &self.tag
60 }
61 #[inline]
62 fn span_start(&self) -> usize {
63 self.span.start
64 }
65 #[inline]
66 fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str> {
67 self.component_str(elem_idx, comp_idx)
68 }
69}
70
71#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct InterchangeEnvelope {
74 pub syntax_identifier: String,
76 pub sender_id: String,
78 pub recipient_id: String,
80 pub datetime: String,
82 pub control_ref: String,
84 pub declared_message_count: u32,
86 pub actual_message_count: u32,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
92pub struct MessageEnvelope {
93 pub message_ref: String,
95 pub message_type: String,
97 pub version: String,
99 pub release: String,
101 pub controlling_agency: String,
103 pub association_code: String,
105 pub declared_segment_count: u32,
107 pub actual_segment_count: u32,
109}
110
111#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct MessageIdentifier<'a> {
117 pub message_type: &'a str,
119 pub version: &'a str,
121 pub release: &'a str,
123 pub controlling_agency: &'a str,
125 pub association_assigned: &'a str,
127}
128
129pub fn parse_unh<'a>(unh: &'a Segment<'a>) -> Result<MessageIdentifier<'a>, EdifactError> {
140 let elem = unh
141 .get_element(1)
142 .ok_or_else(|| EdifactError::MissingRequiredElement {
143 tag: "UNH".to_owned(),
144 element_index: 1,
145 })?;
146 let message_type =
147 elem.get_component(0)
148 .ok_or_else(|| EdifactError::MissingRequiredComponent {
149 tag: "UNH".to_owned(),
150 element_index: 1,
151 component_index: 0,
152 })?;
153 Ok(MessageIdentifier {
154 message_type,
155 version: elem.get_component(1).unwrap_or(""),
156 release: elem.get_component(2).unwrap_or(""),
157 controlling_agency: elem.get_component(3).unwrap_or(""),
158 association_assigned: elem.get_component(4).unwrap_or(""),
159 })
160}
161
162pub fn validate_envelope(
182 segments: &[Segment<'_>],
183) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
184 validate_envelope_impl(segments)
185}
186
187pub fn validate_envelope_from_owned(
192 segments: &[OwnedSegment],
193) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
194 validate_envelope_impl(segments)
195}
196
197fn validate_envelope_impl<S: SegmentReader>(
198 segments: &[S],
199) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
200 if let Some(ung_or_une) = segments
204 .iter()
205 .find(|s| s.tag() == "UNG" || s.tag() == "UNE")
206 {
207 return Err(EdifactError::FunctionalGroupNotSupported {
208 offset: ung_or_une.span_start(),
209 });
210 }
211
212 let mut interchange_env = extract_interchange(segments)?;
213 let message_envs = extract_messages(segments)?;
214 interchange_env.actual_message_count =
215 u32::try_from(message_envs.len()).map_err(|_| EdifactError::InterchangeTooLarge {
216 count: message_envs.len() as u64,
217 })?;
218
219 if interchange_env.declared_message_count != interchange_env.actual_message_count {
221 return Err(EdifactError::MessageCountMismatch {
222 expected: interchange_env.declared_message_count,
223 actual: interchange_env.actual_message_count,
224 });
225 }
226
227 for msg in &message_envs {
229 if msg.declared_segment_count != msg.actual_segment_count {
230 return Err(EdifactError::SegmentCountMismatch {
231 expected: msg.declared_segment_count,
232 actual: msg.actual_segment_count,
233 message_ref: msg.message_ref.clone(),
234 });
235 }
236 }
237
238 Ok((interchange_env, message_envs))
239}
240
241pub fn validate_envelope_lenient(segments: &[Segment<'_>]) -> Vec<EdifactError> {
262 validate_envelope_lenient_impl(segments)
263}
264
265pub fn validate_envelope_lenient_from_owned(segments: &[OwnedSegment]) -> Vec<EdifactError> {
270 validate_envelope_lenient_impl(segments)
271}
272
273fn validate_envelope_lenient_impl<S: SegmentReader>(segments: &[S]) -> Vec<EdifactError> {
274 let mut errors: Vec<EdifactError> = Vec::new();
275
276 if let Some(ung_or_une) = segments
278 .iter()
279 .find(|s| s.tag() == "UNG" || s.tag() == "UNE")
280 {
281 errors.push(EdifactError::FunctionalGroupNotSupported {
282 offset: ung_or_une.span_start(),
283 });
284 return errors;
286 }
287
288 match validate_envelope_impl(segments) {
290 Ok(_) => {}
291 Err(first) => {
292 errors.push(first);
293
294 if let Ok(mut ie) = extract_interchange(segments) {
297 match extract_messages(segments) {
299 Ok(msgs) => {
300 ie.actual_message_count = u32::try_from(msgs.len()).unwrap_or(u32::MAX);
301 if ie.declared_message_count != ie.actual_message_count {
302 let dup = EdifactError::MessageCountMismatch {
305 expected: ie.declared_message_count,
306 actual: ie.actual_message_count,
307 };
308 if !errors.iter().any(|e| e == &dup) {
309 errors.push(dup);
310 }
311 }
312 for msg in &msgs {
313 if msg.declared_segment_count != msg.actual_segment_count {
314 let dup = EdifactError::SegmentCountMismatch {
315 expected: msg.declared_segment_count,
316 actual: msg.actual_segment_count,
317 message_ref: msg.message_ref.clone(),
318 };
319 if !errors.iter().any(|e| e == &dup) {
320 errors.push(dup);
321 }
322 }
323 }
324 }
325 Err(e) => {
326 if !errors.iter().any(|err| err == &e) {
327 errors.push(e);
328 }
329 }
330 }
331 }
332 }
333 }
334
335 errors
336}
337
338fn extract_interchange<S: SegmentReader>(
339 segments: &[S],
340) -> Result<InterchangeEnvelope, EdifactError> {
341 if segments.first().map(|s| s.tag()) != Some("UNB") {
342 return Err(EdifactError::MissingSegment {
343 tag: "UNB".to_owned(),
344 expected_position: "first segment of interchange".to_owned(),
345 });
346 }
347 if segments.last().map(|s| s.tag()) != Some("UNZ") {
348 return Err(EdifactError::MissingSegment {
349 tag: "UNZ".to_owned(),
350 expected_position: "last segment of interchange".to_owned(),
351 });
352 }
353
354 let unb = &segments[0];
355 let unz = &segments[segments.len() - 1];
356
357 let syntax_identifier = unb.required_component_field(0, 0)?.to_owned();
358 let sender_id = unb.required_component_field(1, 0)?.to_owned();
359 let recipient_id = unb.required_component_field(2, 0)?.to_owned();
360
361 let date = unb.required_component_field(3, 0)?;
363 let time = unb.component(3, 1).unwrap_or("");
364 let datetime = if time.is_empty() {
365 date.to_owned()
366 } else {
367 format!("{date}:{time}")
368 };
369
370 let control_ref = unb.required_component_field(4, 0)?.to_owned();
371 let unz_control_ref = unz.required_component_field(1, 0)?;
372 if unz_control_ref != control_ref {
373 return Err(EdifactError::QualifierMismatch {
374 tag: "UNZ".to_owned(),
375 actual: unz_control_ref.to_owned(),
376 expected: control_ref,
377 offset: unz.span_start(),
378 });
379 }
380
381 let declared_message_count: u32 =
382 unz.required_component_field(0, 0)?
383 .parse()
384 .map_err(|_| EdifactError::InvalidText {
385 offset: unz.span_start(),
386 })?;
387
388 Ok(InterchangeEnvelope {
389 syntax_identifier,
390 sender_id,
391 recipient_id,
392 datetime,
393 control_ref,
394 declared_message_count,
395 actual_message_count: 0,
396 })
397}
398
399fn extract_messages<S: SegmentReader>(
400 segments: &[S],
401) -> Result<Vec<MessageEnvelope>, EdifactError> {
402 let mut messages: Vec<MessageEnvelope> = Vec::new();
403 let mut in_message = false;
404 let mut msg_start_idx: usize = 0;
405 let mut current_unh_idx: Option<usize> = None;
406
407 let inner = if segments.len() >= 2 {
409 &segments[1..segments.len() - 1]
410 } else {
411 return Ok(messages);
412 };
413
414 for (i, seg) in inner.iter().enumerate() {
415 match seg.tag() {
416 "UNH" => {
417 if in_message {
418 return Err(EdifactError::InvalidSegmentForMessage {
419 tag: "UNH".to_owned(),
420 message_type: "ENVELOPE".to_owned(),
421 offset: seg.span_start(),
422 });
423 }
424 in_message = true;
425 msg_start_idx = i;
426 current_unh_idx = Some(i);
427 }
428 "UNT" if in_message => {
429 let unh_idx = current_unh_idx.take().ok_or_else(|| {
430 EdifactError::InvalidSegmentForMessage {
431 tag: "UNT".to_owned(),
432 message_type: "ENVELOPE".to_owned(),
433 offset: seg.span_start(),
434 }
435 })?;
436 let unh = &inner[unh_idx];
437
438 let message_ref = unh.required_component_field(0, 0)?.to_owned();
439 let message_type = unh.required_component_field(1, 0)?.to_owned();
440 let version = unh.required_component_field(1, 1)?.to_owned();
441 let release = unh.required_component_field(1, 2)?.to_owned();
442 let controlling_agency = unh.required_component_field(1, 3)?.to_owned();
443 let association_code = unh.component(1, 4).unwrap_or("").to_owned();
444
445 let declared_segment_count: u32 = seg
446 .required_component_field(0, 0)?
447 .parse()
448 .map_err(|_| EdifactError::InvalidText {
449 offset: seg.span_start(),
450 })?;
451 let unt_ref = seg.required_component_field(1, 0)?;
452 if unt_ref != message_ref {
453 return Err(EdifactError::QualifierMismatch {
454 tag: "UNT".to_owned(),
455 actual: unt_ref.to_owned(),
456 expected: message_ref.clone(),
457 offset: seg.span_start(),
458 });
459 }
460
461 let actual_segment_count = u32::try_from(i - msg_start_idx + 1).map_err(|_| {
463 EdifactError::InterchangeTooLarge {
464 count: u64::try_from(i - msg_start_idx + 1).unwrap_or(u64::MAX),
465 }
466 })?;
467
468 in_message = false;
469 messages.push(MessageEnvelope {
470 message_ref,
471 message_type,
472 version,
473 release,
474 controlling_agency,
475 association_code,
476 declared_segment_count,
477 actual_segment_count,
478 });
479 }
480 "UNT" => {
481 return Err(EdifactError::InvalidSegmentForMessage {
482 tag: "UNT".to_owned(),
483 message_type: "ENVELOPE".to_owned(),
484 offset: seg.span_start(),
485 });
486 }
487 "UNB" | "UNZ" if in_message => {
488 return Err(EdifactError::InvalidSegmentForMessage {
489 tag: seg.tag().to_owned(),
490 message_type: "ENVELOPE".to_owned(),
491 offset: seg.span_start(),
492 });
493 }
494 _ if !in_message => {
495 return Err(EdifactError::InvalidSegmentForMessage {
496 tag: seg.tag().to_owned(),
497 message_type: "ENVELOPE".to_owned(),
498 offset: seg.span_start(),
499 });
500 }
501 _ => {}
502 }
503 }
504
505 if in_message {
506 return Err(EdifactError::MissingSegment {
507 tag: "UNT".to_owned(),
508 expected_position: "end of message group".to_owned(),
509 });
510 }
511
512 Ok(messages)
513}
514
515#[cfg(test)]
516mod tests {
517 use super::*;
518
519 fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
521 crate::from_reader_collect(std::io::Cursor::new(input)).expect("parse failed")
522 }
523
524 fn parse_and_validate(
526 input: &[u8],
527 ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
528 let owned = parse(input);
529 let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
530 validate_envelope(&segs)
531 }
532
533 fn parse_and_validate_owned(
535 input: &[u8],
536 ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
537 validate_envelope_from_owned(&parse(input))
538 }
539
540 const VALID_INTERCHANGE: &[u8] =
541 b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
542
543 #[test]
544 fn valid_envelope_parses_ok() {
545 let (interchange, messages) =
546 parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
547 assert_eq!(interchange.sender_id, "SENDER");
548 assert_eq!(interchange.recipient_id, "RECEIVER");
549 assert_eq!(interchange.control_ref, "00001");
550 assert_eq!(interchange.declared_message_count, 1);
551 assert_eq!(interchange.actual_message_count, 1);
552 assert_eq!(messages.len(), 1);
553 assert_eq!(messages[0].message_type, "ORDERS");
554 assert_eq!(messages[0].association_code, "EAN010");
555 assert_eq!(messages[0].declared_segment_count, 4);
556 assert_eq!(messages[0].actual_segment_count, 4); }
558
559 #[test]
560 fn valid_envelope_parses_ok_owned_path() {
561 let (interchange, messages) =
563 parse_and_validate_owned(VALID_INTERCHANGE).expect("envelope should be valid");
564 assert_eq!(interchange.sender_id, "SENDER");
565 assert_eq!(interchange.actual_message_count, 1);
566 assert_eq!(messages[0].declared_segment_count, 4);
567 }
568
569 #[test]
570 fn unt_count_mismatch_returns_err() {
571 let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
573 let result = parse_and_validate(input);
574 assert!(
575 matches!(
576 result,
577 Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
578 ),
579 "expected SegmentCountMismatch, got {result:?}"
580 );
581 }
582
583 #[test]
584 fn unz_count_mismatch_returns_err() {
585 let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
587 let result = parse_and_validate(input);
588 assert!(
589 matches!(
590 result,
591 Err(EdifactError::MessageCountMismatch {
592 expected: 2,
593 actual: 1
594 })
595 ),
596 "expected MessageCountMismatch(2,1), got {result:?}"
597 );
598 }
599
600 #[test]
601 fn missing_unb_returns_err() {
602 let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
603 let result = parse_and_validate(input);
604 assert!(result.is_err());
605 }
606
607 #[test]
608 fn extracts_una_interchange_correctly() {
609 let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
611 assert_eq!(env.syntax_identifier, "UNOA");
613 assert_eq!(env.datetime, "230401:0900");
614 }
615
616 #[test]
617 fn dangling_unh_without_unt_returns_err() {
618 let input =
619 b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
620 let result = parse_and_validate(input);
621 assert!(
622 matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT")
623 );
624 }
625
626 #[test]
627 fn stray_segment_outside_message_returns_err() {
628 let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
629 let result = parse_and_validate(input);
630 assert!(matches!(
631 result,
632 Err(EdifactError::InvalidSegmentForMessage { .. })
633 ));
634 }
635
636 #[test]
637 fn missing_unb_sender_component_returns_err() {
638 let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
639 let result = parse_and_validate(input);
640 assert!(
642 matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
643 "expected MissingRequiredComponent for empty sender, got: {result:?}"
644 );
645 }
646
647 #[test]
648 fn nested_unh_without_closing_previous_message_returns_err() {
649 let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
650 let result = parse_and_validate(input);
651 assert!(
652 matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
653 "expected InvalidSegmentForMessage(UNH), got {result:?}"
654 );
655 }
656
657 #[test]
658 fn unt_message_reference_must_match_unh() {
659 let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
660 let result = parse_and_validate(input);
661 assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
662 }
663
664 #[test]
665 fn unz_control_reference_must_match_unb() {
666 let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
667 let result = parse_and_validate(input);
668 assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
669 }
670
671 #[test]
672 fn missing_unh_message_type_components_return_err() {
673 let input =
674 b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
675 let result = parse_and_validate(input);
676 assert!(
678 matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
679 "expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
680 );
681 }
682
683 #[test]
684 fn nested_unz_inside_message_returns_err() {
685 let input =
686 b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
687 let result = parse_and_validate(input);
688 assert!(
689 matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ")
690 );
691 }
692
693 #[test]
702 fn envelope_with_ung_returns_explicit_error() {
703 let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
706 UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
707 UNH+1+ORDERS:D:96A:UN'\
708 BGM+220+PO-001+9'\
709 UNT+3+1'\
710 UNE+1+1'\
711 UNZ+1+1'";
712 let result = parse_and_validate(input);
713 assert!(
714 result.is_err(),
715 "UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
716 );
717 assert!(
720 matches!(
721 result,
722 Err(EdifactError::FunctionalGroupNotSupported { .. })
723 ),
724 "expected FunctionalGroupNotSupported, got {result:?}"
725 );
726 }
727}