Skip to main content

edifact_rs/
envelope.rs

1//! EDIFACT envelope validation (Story 2.4).
2//!
3//! Validates UNB / UNH / UNT / UNZ envelope segment structure and count
4//! consistency — independently of business-rule (AHB) validation.
5
6use crate::{OwnedSegment, error::EdifactError, model::Segment};
7
8// ── Sealed segment-access trait ──────────────────────────────────────────────
9
10/// Minimal read-only view over an EDIFACT segment, implemented by both
11/// [`Segment`] and [`OwnedSegment`].
12///
13/// Sealed so external crates cannot implement it; internal only.
14pub(crate) trait SegmentReader: sealed::Sealed {
15    fn tag(&self) -> &str;
16    fn span_start(&self) -> usize;
17    /// Extract `segments[elem_idx].components[comp_idx]`, or `None`.
18    fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str>;
19
20    fn required_component_field(
21        &self,
22        elem_idx: usize,
23        comp_idx: usize,
24    ) -> Result<&str, EdifactError> {
25        self.component(elem_idx, comp_idx)
26            .filter(|s| !s.is_empty())
27            .ok_or_else(|| EdifactError::MissingRequiredComponent {
28                tag: self.tag().to_owned(),
29                element_index: elem_idx,
30                component_index: comp_idx,
31            })
32    }
33}
34
35mod sealed {
36    pub trait Sealed {}
37    impl Sealed for crate::model::Segment<'_> {}
38    impl Sealed for crate::OwnedSegment {}
39}
40
41impl SegmentReader for Segment<'_> {
42    #[inline]
43    fn tag(&self) -> &str {
44        self.tag
45    }
46    #[inline]
47    fn span_start(&self) -> usize {
48        self.span.start
49    }
50    #[inline]
51    fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str> {
52        self.get_element(elem_idx)?.get_component(comp_idx)
53    }
54}
55
56impl SegmentReader for OwnedSegment {
57    #[inline]
58    fn tag(&self) -> &str {
59        &self.tag
60    }
61    #[inline]
62    fn span_start(&self) -> usize {
63        self.span.start
64    }
65    #[inline]
66    fn component(&self, elem_idx: usize, comp_idx: usize) -> Option<&str> {
67        self.component_str(elem_idx, comp_idx)
68    }
69}
70
71/// Extracted data from the `UNB` / `UNZ` interchange envelope.
72#[derive(Debug, Clone, PartialEq, Eq)]
73pub struct InterchangeEnvelope {
74    /// Syntax identifier, e.g. `"UNOA"`.
75    pub syntax_identifier: String,
76    /// Interchange sender identification.
77    pub sender_id: String,
78    /// Interchange recipient identification.
79    pub recipient_id: String,
80    /// Interchange date-time string as found in the source.
81    pub datetime: String,
82    /// Interchange control reference.
83    pub control_ref: String,
84    /// Declared message (functional group) count from `UNZ`.
85    pub declared_message_count: u32,
86    /// Actual message count encountered between `UNB` and `UNZ`.
87    pub actual_message_count: u32,
88}
89
90/// Extracted data from a single `UNH` / `UNT` message envelope.
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub struct MessageEnvelope {
93    /// Message reference from `UNH` element 0.
94    pub message_ref: String,
95    /// EDIFACT message type, e.g. `"ORDERS"`.
96    pub message_type: String,
97    /// Version number, e.g. `"D"`.
98    pub version: String,
99    /// Release number, e.g. `"11A"`.
100    pub release: String,
101    /// Controlling agency code, e.g. `"UN"`.
102    pub controlling_agency: String,
103    /// Association assigned code (MIG version), e.g. `"FV2510"`.
104    pub association_code: String,
105    /// Declared segment count from `UNT`.
106    pub declared_segment_count: u32,
107    /// Actual segment count between this `UNH` and its `UNT`.
108    pub actual_segment_count: u32,
109}
110
111/// Parsed identifier fields from a `UNH` segment.
112///
113/// Produced by [`parse_unh`].  All string slices borrow from the input bytes
114/// passed to the parser, so they live as long as the original byte buffer.
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub struct MessageIdentifier<'a> {
117    /// EDIFACT message type, e.g. `"ORDERS"`.
118    pub message_type: &'a str,
119    /// Version number, e.g. `"D"`.
120    pub version: &'a str,
121    /// Release number, e.g. `"11A"`.
122    pub release: &'a str,
123    /// Controlling agency code, e.g. `"UN"`.
124    pub controlling_agency: &'a str,
125    /// Association assigned code (MIG version), e.g. `"FV2510"`.
126    pub association_assigned: &'a str,
127}
128
129/// Extract identifier fields from a `UNH` segment.
130///
131/// Returns a [`MessageIdentifier`] borrowing directly from the segment's
132/// component slices — zero allocation.
133///
134/// # Errors
135///
136/// Returns [`EdifactError::MissingRequiredElement`] if element 1 of the `UNH`
137/// segment is absent, or [`EdifactError::MissingRequiredComponent`] if
138/// component 0 of that element (the message type) is absent.
139pub fn parse_unh<'a>(unh: &'a Segment<'a>) -> Result<MessageIdentifier<'a>, EdifactError> {
140    let elem = unh
141        .get_element(1)
142        .ok_or_else(|| EdifactError::MissingRequiredElement {
143            tag: "UNH".to_owned(),
144            element_index: 1,
145        })?;
146    let message_type =
147        elem.get_component(0)
148            .ok_or_else(|| EdifactError::MissingRequiredComponent {
149                tag: "UNH".to_owned(),
150                element_index: 1,
151                component_index: 0,
152            })?;
153    Ok(MessageIdentifier {
154        message_type,
155        version: elem.get_component(1).unwrap_or(""),
156        release: elem.get_component(2).unwrap_or(""),
157        controlling_agency: elem.get_component(3).unwrap_or(""),
158        association_assigned: elem.get_component(4).unwrap_or(""),
159    })
160}
161
162/// Validates the EDIFACT interchange envelope for the given segments.
163///
164/// Checks:
165/// - `UNB` is present (first meaningful segment)
166/// - `UNZ` is present (last segment) with correct message count
167/// - Each `UNH` is paired with a `UNT` carrying a matching segment count
168/// - `UNZ` message count matches the number of `UNH`/`UNT` pairs found
169///
170/// Returns `Ok((interchange_env, message_envs))` on success,
171/// or an [`EdifactError`] on any structural violation.
172///
173/// # Errors
174///
175/// Returns [`EdifactError::FunctionalGroupNotSupported`] if the input contains
176/// `UNG`/`UNE` functional group segments.  Strip them before calling this
177/// function if functional groups are not relevant to your use case.
178///
179/// Returns [`EdifactError::MessageCountMismatch`] or
180/// [`EdifactError::SegmentCountMismatch`] on count discrepancies.
181pub fn validate_envelope(
182    segments: &[Segment<'_>],
183) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
184    validate_envelope_impl(segments)
185}
186
187/// Validate the EDIFACT interchange envelope for an owned-segment slice.
188///
189/// Identical to [`validate_envelope`] but accepts `&[OwnedSegment]` directly,
190/// eliminating the intermediate `Vec<Segment<'_>>` conversion.
191pub fn validate_envelope_from_owned(
192    segments: &[OwnedSegment],
193) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
194    validate_envelope_impl(segments)
195}
196
197fn validate_envelope_impl<S: SegmentReader>(
198    segments: &[S],
199) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
200    // Functional group segments are not supported.  Detect them early so the
201    // caller gets a clear diagnostic rather than a misleading segment-count
202    // mismatch or `InvalidSegmentForMessage` buried deep in the parse.
203    if let Some(ung_or_une) = segments
204        .iter()
205        .find(|s| s.tag() == "UNG" || s.tag() == "UNE")
206    {
207        return Err(EdifactError::FunctionalGroupNotSupported {
208            offset: ung_or_une.span_start(),
209        });
210    }
211
212    let mut interchange_env = extract_interchange(segments)?;
213    let message_envs = extract_messages(segments)?;
214    interchange_env.actual_message_count =
215        u32::try_from(message_envs.len()).map_err(|_| EdifactError::InterchangeTooLarge {
216            count: message_envs.len() as u64,
217        })?;
218
219    // Cross-check UNZ declared count vs. actual UNH/UNT pair count
220    if interchange_env.declared_message_count != interchange_env.actual_message_count {
221        return Err(EdifactError::MessageCountMismatch {
222            expected: interchange_env.declared_message_count,
223            actual: interchange_env.actual_message_count,
224        });
225    }
226
227    // Cross-check each UNT segment count vs. actual count
228    for msg in &message_envs {
229        if msg.declared_segment_count != msg.actual_segment_count {
230            return Err(EdifactError::SegmentCountMismatch {
231                expected: msg.declared_segment_count,
232                actual: msg.actual_segment_count,
233                message_ref: msg.message_ref.clone(),
234            });
235        }
236    }
237
238    Ok((interchange_env, message_envs))
239}
240
241/// Validate the EDIFACT envelope structure and collect **all** errors rather
242/// than stopping at the first failure.
243///
244/// This is the lenient counterpart of [`validate_envelope`].  It attempts
245/// every check independently and accumulates all violations into the returned
246/// `Vec`.  An empty `Vec` means the envelope is valid.
247///
248/// This is particularly useful for diagnostic tooling, linters, or batch
249/// processors where surfacing every problem at once is more helpful than
250/// short-circuiting on the first error.
251///
252/// # Caveats
253///
254/// Because checks build on each other (e.g., count checks require a valid UNB
255/// and UNZ), some secondary errors may be silenced when a prerequisite check
256/// already failed.  Most *independent* checks (control-reference match,
257/// message/segment counts) are run even after the first failure.  However,
258/// if a functional group segment (`UNG`/`UNE`) is detected, the function
259/// returns immediately with only that error — the remaining structure is
260/// ambiguous and running further checks would produce misleading results.
261pub fn validate_envelope_lenient(segments: &[Segment<'_>]) -> Vec<EdifactError> {
262    validate_envelope_lenient_impl(segments)
263}
264
265/// Lenient validation over an owned-segment slice — collects all errors.
266///
267/// Identical to [`validate_envelope_lenient`] but accepts `&[OwnedSegment]`
268/// directly, eliminating the intermediate `Vec<Segment<'_>>` conversion.
269pub fn validate_envelope_lenient_from_owned(segments: &[OwnedSegment]) -> Vec<EdifactError> {
270    validate_envelope_lenient_impl(segments)
271}
272
273fn validate_envelope_lenient_impl<S: SegmentReader>(segments: &[S]) -> Vec<EdifactError> {
274    let mut errors: Vec<EdifactError> = Vec::new();
275
276    // Functional group check is always independent.
277    if let Some(ung_or_une) = segments
278        .iter()
279        .find(|s| s.tag() == "UNG" || s.tag() == "UNE")
280    {
281        errors.push(EdifactError::FunctionalGroupNotSupported {
282            offset: ung_or_une.span_start(),
283        });
284        // UNG/UNE makes the rest of the envelope ambiguous — stop early.
285        return errors;
286    }
287
288    // Run the normal path and, if it succeeds, we're done.
289    match validate_envelope_impl(segments) {
290        Ok(_) => {}
291        Err(first) => {
292            errors.push(first);
293
294            // Now try individual sub-checks that are independent of each other.
295            // Interchange envelope checks.
296            if let Ok(mut ie) = extract_interchange(segments) {
297                // extract_interchange succeeded — try message extraction separately.
298                match extract_messages(segments) {
299                    Ok(msgs) => {
300                        ie.actual_message_count = u32::try_from(msgs.len()).unwrap_or(u32::MAX);
301                        if ie.declared_message_count != ie.actual_message_count {
302                            // Only push if not already in errors (the normal path
303                            // may have returned this as the first error).
304                            let dup = EdifactError::MessageCountMismatch {
305                                expected: ie.declared_message_count,
306                                actual: ie.actual_message_count,
307                            };
308                            if !errors.iter().any(|e| e == &dup) {
309                                errors.push(dup);
310                            }
311                        }
312                        for msg in &msgs {
313                            if msg.declared_segment_count != msg.actual_segment_count {
314                                let dup = EdifactError::SegmentCountMismatch {
315                                    expected: msg.declared_segment_count,
316                                    actual: msg.actual_segment_count,
317                                    message_ref: msg.message_ref.clone(),
318                                };
319                                if !errors.iter().any(|e| e == &dup) {
320                                    errors.push(dup);
321                                }
322                            }
323                        }
324                    }
325                    Err(e) => {
326                        if !errors.iter().any(|err| err == &e) {
327                            errors.push(e);
328                        }
329                    }
330                }
331            }
332        }
333    }
334
335    errors
336}
337
338fn extract_interchange<S: SegmentReader>(
339    segments: &[S],
340) -> Result<InterchangeEnvelope, EdifactError> {
341    if segments.first().map(|s| s.tag()) != Some("UNB") {
342        return Err(EdifactError::MissingSegment {
343            tag: "UNB".to_owned(),
344            expected_position: "first segment of interchange".to_owned(),
345        });
346    }
347    if segments.last().map(|s| s.tag()) != Some("UNZ") {
348        return Err(EdifactError::MissingSegment {
349            tag: "UNZ".to_owned(),
350            expected_position: "last segment of interchange".to_owned(),
351        });
352    }
353
354    let unb = &segments[0];
355    let unz = &segments[segments.len() - 1];
356
357    let syntax_identifier = unb.required_component_field(0, 0)?.to_owned();
358    let sender_id = unb.required_component_field(1, 0)?.to_owned();
359    let recipient_id = unb.required_component_field(2, 0)?.to_owned();
360
361    // Element 3: date/time composite
362    let date = unb.required_component_field(3, 0)?;
363    let time = unb.component(3, 1).unwrap_or("");
364    let datetime = if time.is_empty() {
365        date.to_owned()
366    } else {
367        format!("{date}:{time}")
368    };
369
370    let control_ref = unb.required_component_field(4, 0)?.to_owned();
371    let unz_control_ref = unz.required_component_field(1, 0)?;
372    if unz_control_ref != control_ref {
373        return Err(EdifactError::QualifierMismatch {
374            tag: "UNZ".to_owned(),
375            actual: unz_control_ref.to_owned(),
376            expected: control_ref,
377            offset: unz.span_start(),
378        });
379    }
380
381    let declared_message_count: u32 =
382        unz.required_component_field(0, 0)?
383            .parse()
384            .map_err(|_| EdifactError::InvalidText {
385                offset: unz.span_start(),
386            })?;
387
388    Ok(InterchangeEnvelope {
389        syntax_identifier,
390        sender_id,
391        recipient_id,
392        datetime,
393        control_ref,
394        declared_message_count,
395        actual_message_count: 0,
396    })
397}
398
399fn extract_messages<S: SegmentReader>(
400    segments: &[S],
401) -> Result<Vec<MessageEnvelope>, EdifactError> {
402    let mut messages: Vec<MessageEnvelope> = Vec::new();
403    let mut in_message = false;
404    let mut msg_start_idx: usize = 0;
405    let mut current_unh_idx: Option<usize> = None;
406
407    // Skip UNB (index 0) and UNZ (last index) — iterate only message content
408    let inner = if segments.len() >= 2 {
409        &segments[1..segments.len() - 1]
410    } else {
411        return Ok(messages);
412    };
413
414    for (i, seg) in inner.iter().enumerate() {
415        match seg.tag() {
416            "UNH" => {
417                if in_message {
418                    return Err(EdifactError::InvalidSegmentForMessage {
419                        tag: "UNH".to_owned(),
420                        message_type: "ENVELOPE".to_owned(),
421                        offset: seg.span_start(),
422                    });
423                }
424                in_message = true;
425                msg_start_idx = i;
426                current_unh_idx = Some(i);
427            }
428            "UNT" if in_message => {
429                let unh_idx = current_unh_idx.take().ok_or_else(|| {
430                    EdifactError::InvalidSegmentForMessage {
431                        tag: "UNT".to_owned(),
432                        message_type: "ENVELOPE".to_owned(),
433                        offset: seg.span_start(),
434                    }
435                })?;
436                let unh = &inner[unh_idx];
437
438                let message_ref = unh.required_component_field(0, 0)?.to_owned();
439                let message_type = unh.required_component_field(1, 0)?.to_owned();
440                let version = unh.required_component_field(1, 1)?.to_owned();
441                let release = unh.required_component_field(1, 2)?.to_owned();
442                let controlling_agency = unh.required_component_field(1, 3)?.to_owned();
443                let association_code = unh.component(1, 4).unwrap_or("").to_owned();
444
445                let declared_segment_count: u32 = seg
446                    .required_component_field(0, 0)?
447                    .parse()
448                    .map_err(|_| EdifactError::InvalidText {
449                        offset: seg.span_start(),
450                    })?;
451                let unt_ref = seg.required_component_field(1, 0)?;
452                if unt_ref != message_ref {
453                    return Err(EdifactError::QualifierMismatch {
454                        tag: "UNT".to_owned(),
455                        actual: unt_ref.to_owned(),
456                        expected: message_ref.clone(),
457                        offset: seg.span_start(),
458                    });
459                }
460
461                // actual count = segments from UNH (inclusive) to UNT (inclusive)
462                let actual_segment_count = u32::try_from(i - msg_start_idx + 1).map_err(|_| {
463                    EdifactError::InterchangeTooLarge {
464                        count: u64::try_from(i - msg_start_idx + 1).unwrap_or(u64::MAX),
465                    }
466                })?;
467
468                in_message = false;
469                messages.push(MessageEnvelope {
470                    message_ref,
471                    message_type,
472                    version,
473                    release,
474                    controlling_agency,
475                    association_code,
476                    declared_segment_count,
477                    actual_segment_count,
478                });
479            }
480            "UNT" => {
481                return Err(EdifactError::InvalidSegmentForMessage {
482                    tag: "UNT".to_owned(),
483                    message_type: "ENVELOPE".to_owned(),
484                    offset: seg.span_start(),
485                });
486            }
487            "UNB" | "UNZ" if in_message => {
488                return Err(EdifactError::InvalidSegmentForMessage {
489                    tag: seg.tag().to_owned(),
490                    message_type: "ENVELOPE".to_owned(),
491                    offset: seg.span_start(),
492                });
493            }
494            _ if !in_message => {
495                return Err(EdifactError::InvalidSegmentForMessage {
496                    tag: seg.tag().to_owned(),
497                    message_type: "ENVELOPE".to_owned(),
498                    offset: seg.span_start(),
499                });
500            }
501            _ => {}
502        }
503    }
504
505    if in_message {
506        return Err(EdifactError::MissingSegment {
507            tag: "UNT".to_owned(),
508            expected_position: "end of message group".to_owned(),
509        });
510    }
511
512    Ok(messages)
513}
514
515#[cfg(test)]
516mod tests {
517    use super::*;
518
519    /// Parse test fixtures into an owned-segment vec (no memory leaks).
520    fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
521        crate::from_reader_collect(std::io::Cursor::new(input)).expect("parse failed")
522    }
523
524    /// Parse then validate using the borrowed-segment path.
525    fn parse_and_validate(
526        input: &[u8],
527    ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
528        let owned = parse(input);
529        let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
530        validate_envelope(&segs)
531    }
532
533    /// Parse then validate using the owned-segment path (exercises the generic path).
534    fn parse_and_validate_owned(
535        input: &[u8],
536    ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
537        validate_envelope_from_owned(&parse(input))
538    }
539
540    const VALID_INTERCHANGE: &[u8] =
541        b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
542
543    #[test]
544    fn valid_envelope_parses_ok() {
545        let (interchange, messages) =
546            parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
547        assert_eq!(interchange.sender_id, "SENDER");
548        assert_eq!(interchange.recipient_id, "RECEIVER");
549        assert_eq!(interchange.control_ref, "00001");
550        assert_eq!(interchange.declared_message_count, 1);
551        assert_eq!(interchange.actual_message_count, 1);
552        assert_eq!(messages.len(), 1);
553        assert_eq!(messages[0].message_type, "ORDERS");
554        assert_eq!(messages[0].association_code, "EAN010");
555        assert_eq!(messages[0].declared_segment_count, 4);
556        assert_eq!(messages[0].actual_segment_count, 4); // UNH + BGM + DTM + UNT
557    }
558
559    #[test]
560    fn valid_envelope_parses_ok_owned_path() {
561        // Verify that the owned-segment path produces identical results to the borrowed path.
562        let (interchange, messages) =
563            parse_and_validate_owned(VALID_INTERCHANGE).expect("envelope should be valid");
564        assert_eq!(interchange.sender_id, "SENDER");
565        assert_eq!(interchange.actual_message_count, 1);
566        assert_eq!(messages[0].declared_segment_count, 4);
567    }
568
569    #[test]
570    fn unt_count_mismatch_returns_err() {
571        // UNT declares 99 segments but only 4 are present
572        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
573        let result = parse_and_validate(input);
574        assert!(
575            matches!(
576                result,
577                Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
578            ),
579            "expected SegmentCountMismatch, got {result:?}"
580        );
581    }
582
583    #[test]
584    fn unz_count_mismatch_returns_err() {
585        // UNZ declares 2 messages but only 1 UNH/UNT pair is present
586        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
587        let result = parse_and_validate(input);
588        assert!(
589            matches!(
590                result,
591                Err(EdifactError::MessageCountMismatch {
592                    expected: 2,
593                    actual: 1
594                })
595            ),
596            "expected MessageCountMismatch(2,1), got {result:?}"
597        );
598    }
599
600    #[test]
601    fn missing_unb_returns_err() {
602        let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
603        let result = parse_and_validate(input);
604        assert!(result.is_err());
605    }
606
607    #[test]
608    fn extracts_una_interchange_correctly() {
609        // Test that UNA does not interfere with envelope field extraction
610        let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
611        // UNA is parsed by tokenizer; UNB field extraction must be correct
612        assert_eq!(env.syntax_identifier, "UNOA");
613        assert_eq!(env.datetime, "230401:0900");
614    }
615
616    #[test]
617    fn dangling_unh_without_unt_returns_err() {
618        let input =
619            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
620        let result = parse_and_validate(input);
621        assert!(
622            matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT")
623        );
624    }
625
626    #[test]
627    fn stray_segment_outside_message_returns_err() {
628        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
629        let result = parse_and_validate(input);
630        assert!(matches!(
631            result,
632            Err(EdifactError::InvalidSegmentForMessage { .. })
633        ));
634    }
635
636    #[test]
637    fn missing_unb_sender_component_returns_err() {
638        let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
639        let result = parse_and_validate(input);
640        // Element 1 (sender) exists but is empty ("+") — component 0 is absent.
641        assert!(
642            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
643            "expected MissingRequiredComponent for empty sender, got: {result:?}"
644        );
645    }
646
647    #[test]
648    fn nested_unh_without_closing_previous_message_returns_err() {
649        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
650        let result = parse_and_validate(input);
651        assert!(
652            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
653            "expected InvalidSegmentForMessage(UNH), got {result:?}"
654        );
655    }
656
657    #[test]
658    fn unt_message_reference_must_match_unh() {
659        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
660        let result = parse_and_validate(input);
661        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
662    }
663
664    #[test]
665    fn unz_control_reference_must_match_unb() {
666        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
667        let result = parse_and_validate(input);
668        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
669    }
670
671    #[test]
672    fn missing_unh_message_type_components_return_err() {
673        let input =
674            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
675        let result = parse_and_validate(input);
676        // UNH element 1 = "ORDERS:D:11A" — component 3 (controlling agency) is absent.
677        assert!(
678            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
679            "expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
680        );
681    }
682
683    #[test]
684    fn nested_unz_inside_message_returns_err() {
685        let input =
686            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
687        let result = parse_and_validate(input);
688        assert!(
689            matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ")
690        );
691    }
692
693    // ── UNG/UNE functional-group regression guard ────────────────────────────
694    //
695    // ISO 9735-1 defines optional functional groups (UNG/UNE) that may wrap
696    // one or more UNH/UNT pairs.  `validate_envelope` currently documents that
697    // UNG/UNE are NOT supported (see module doc at line ~62).  These tests
698    // assert the *documented* behaviour: UNG/UNE-wrapped interchanges must
699    // not silently produce incorrect counts — they must return an explicit error.
700
701    #[test]
702    fn envelope_with_ung_returns_explicit_error() {
703        // A UNG segment appearing between UNB and UNH is not a recognized
704        // envelope segment — validate_envelope must reject it explicitly.
705        let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
706                      UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
707                      UNH+1+ORDERS:D:96A:UN'\
708                      BGM+220+PO-001+9'\
709                      UNT+3+1'\
710                      UNE+1+1'\
711                      UNZ+1+1'";
712        let result = parse_and_validate(input);
713        assert!(
714            result.is_err(),
715            "UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
716        );
717        // The error must be the dedicated FunctionalGroupNotSupported variant,
718        // not some unrelated internal failure.
719        assert!(
720            matches!(
721                result,
722                Err(EdifactError::FunctionalGroupNotSupported { .. })
723            ),
724            "expected FunctionalGroupNotSupported, got {result:?}"
725        );
726    }
727}