Skip to main content

edifact_rs/
envelope.rs

1//! EDIFACT envelope validation (Story 2.4).
2//!
3//! Validates UNB / UNH / UNT / UNZ envelope segment structure and count
4//! consistency — independently of business-rule (AHB) validation.
5
6use crate::{error::EdifactError, model::Segment};
7
8/// Extracted data from the `UNB` / `UNZ` interchange envelope.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct InterchangeEnvelope {
11    /// Syntax identifier, e.g. `"UNOA"`.
12    pub syntax_identifier: String,
13    /// Interchange sender identification.
14    pub sender_id: String,
15    /// Interchange recipient identification.
16    pub recipient_id: String,
17    /// Interchange date-time string as found in the source.
18    pub datetime: String,
19    /// Interchange control reference.
20    pub control_ref: String,
21    /// Declared message (functional group) count from `UNZ`.
22    pub declared_message_count: u32,
23    /// Actual message count encountered between `UNB` and `UNZ`.
24    pub actual_message_count: u32,
25}
26
27/// Extracted data from a single `UNH` / `UNT` message envelope.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct MessageEnvelope {
30    /// Message reference from `UNH` element 0.
31    pub message_ref: String,
32    /// EDIFACT message type, e.g. `"ORDERS"`.
33    pub message_type: String,
34    /// Version number, e.g. `"D"`.
35    pub version: String,
36    /// Release number, e.g. `"11A"`.
37    pub release: String,
38    /// Controlling agency code, e.g. `"UN"`.
39    pub controlling_agency: String,
40    /// Association assigned code (MIG version), e.g. `"FV2510"`.
41    pub association_code: String,
42    /// Declared segment count from `UNT`.
43    pub declared_segment_count: u32,
44    /// Actual segment count between this `UNH` and its `UNT`.
45    pub actual_segment_count: u32,
46}
47
48/// Parsed identifier fields from a `UNH` segment.
49///
50/// Produced by [`parse_unh`].  All string slices borrow from the input bytes
51/// passed to the parser, so they live as long as the original byte buffer.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct MessageIdentifier<'a> {
54    /// EDIFACT message type, e.g. `"ORDERS"`.
55    pub message_type: &'a str,
56    /// Version number, e.g. `"D"`.
57    pub version: &'a str,
58    /// Release number, e.g. `"11A"`.
59    pub release: &'a str,
60    /// Controlling agency code, e.g. `"UN"`.
61    pub controlling_agency: &'a str,
62    /// Association assigned code (MIG version), e.g. `"FV2510"`.
63    pub association_assigned: &'a str,
64}
65
66/// Extract identifier fields from a `UNH` segment.
67///
68/// Returns a [`MessageIdentifier`] borrowing directly from the segment's
69/// component slices — zero allocation.
70///
71/// # Errors
72///
73/// Returns [`EdifactError::MissingRequiredElement`] if element 1 of the `UNH`
74/// segment is absent, or [`EdifactError::MissingRequiredComponent`] if
75/// component 0 of that element (the message type) is absent.
76pub fn parse_unh<'a>(unh: &'a Segment<'a>) -> Result<MessageIdentifier<'a>, EdifactError> {
77    let elem = unh
78        .get_element(1)
79        .ok_or_else(|| EdifactError::MissingRequiredElement {
80            tag: "UNH".to_owned(),
81            element_index: 1,
82        })?;
83    let message_type =
84        elem.get_component(0)
85            .ok_or_else(|| EdifactError::MissingRequiredComponent {
86                tag: "UNH".to_owned(),
87                element_index: 1,
88                component_index: 0,
89            })?;
90    Ok(MessageIdentifier {
91        message_type,
92        version: elem.get_component(1).unwrap_or(""),
93        release: elem.get_component(2).unwrap_or(""),
94        controlling_agency: elem.get_component(3).unwrap_or(""),
95        association_assigned: elem.get_component(4).unwrap_or(""),
96    })
97}
98
99/// Validates the EDIFACT interchange envelope for the given segments.
100///
101/// Checks:
102/// - `UNB` is present (first meaningful segment)
103/// - `UNZ` is present (last segment) with correct message count
104/// - Each `UNH` is paired with a `UNT` carrying a matching segment count
105/// - `UNZ` message count matches the number of `UNH`/`UNT` pairs found
106///
107/// Returns `Ok((interchange_env, message_envs))` on success,
108/// or an [`EdifactError`] on any structural violation.
109///
110/// # Errors
111///
112/// Returns [`EdifactError::FunctionalGroupNotSupported`] if the input contains
113/// `UNG`/`UNE` functional group segments.  Strip them before calling this
114/// function if functional groups are not relevant to your use case.
115///
116/// Returns [`EdifactError::MessageCountMismatch`] or
117/// [`EdifactError::SegmentCountMismatch`] on count discrepancies.
118pub fn validate_envelope(
119    segments: &[Segment<'_>],
120) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
121    // Functional group segments are not supported.  Detect them early so the
122    // caller gets a clear diagnostic rather than a misleading segment-count
123    // mismatch or `InvalidSegmentForMessage` buried deep in the parse.
124    if let Some(ung_or_une) = segments.iter().find(|s| s.tag == "UNG" || s.tag == "UNE") {
125        return Err(EdifactError::FunctionalGroupNotSupported {
126            offset: ung_or_une.span.start,
127        });
128    }
129
130    let mut interchange_env = extract_interchange(segments)?;
131    let message_envs = extract_messages(segments)?;
132    interchange_env.actual_message_count =
133        u32::try_from(message_envs.len()).map_err(|_| EdifactError::InterchangeTooLarge {
134            count: message_envs.len() as u64,
135        })?;
136
137    // Cross-check UNZ declared count vs. actual UNH/UNT pair count
138    if interchange_env.declared_message_count != interchange_env.actual_message_count {
139        return Err(EdifactError::MessageCountMismatch {
140            expected: interchange_env.declared_message_count,
141            actual: interchange_env.actual_message_count,
142        });
143    }
144
145    // Cross-check each UNT segment count vs. actual count
146    for msg in &message_envs {
147        if msg.declared_segment_count != msg.actual_segment_count {
148            return Err(EdifactError::SegmentCountMismatch {
149                expected: msg.declared_segment_count,
150                actual: msg.actual_segment_count,
151                message_ref: msg.message_ref.clone(),
152            });
153        }
154    }
155
156    Ok((interchange_env, message_envs))
157}
158
159fn extract_interchange(segments: &[Segment<'_>]) -> Result<InterchangeEnvelope, EdifactError> {
160    if segments.first().map(|segment| segment.tag) != Some("UNB") {
161        return Err(EdifactError::MissingSegment {
162            tag: "UNB".to_owned(),
163            expected_position: "first segment of interchange".to_owned(),
164        });
165    }
166
167    if segments.last().map(|segment| segment.tag) != Some("UNZ") {
168        return Err(EdifactError::MissingSegment {
169            tag: "UNZ".to_owned(),
170            expected_position: "last segment of interchange".to_owned(),
171        });
172    }
173
174    let unb = &segments[0];
175    let unz = &segments[segments.len() - 1];
176
177    let syntax_identifier = required_component(unb, 0, 0)?.to_owned();
178
179    let sender_id = required_component(unb, 1, 0)?.to_owned();
180
181    let recipient_id = required_component(unb, 2, 0)?.to_owned();
182
183    // Element 3: date/time composite
184    let date = required_component(unb, 3, 0)?;
185    let time = unb
186        .get_element(3)
187        .and_then(|e| e.get_component(1))
188        .unwrap_or("");
189    let datetime = if time.is_empty() {
190        date.to_owned()
191    } else {
192        format!("{date}:{time}")
193    };
194
195    let control_ref = required_component(unb, 4, 0)?.to_owned();
196    let unz_control_ref = required_component(unz, 1, 0)?;
197    if unz_control_ref != control_ref {
198        return Err(EdifactError::QualifierMismatch {
199            tag: "UNZ".to_owned(),
200            actual: unz_control_ref.to_owned(),
201            expected: control_ref,
202            offset: unz.span.start,
203        });
204    }
205
206    let declared_message_count: u32 =
207        required_component(unz, 0, 0)?
208            .parse()
209            .map_err(|_| EdifactError::InvalidText {
210                offset: unz.span.start,
211            })?;
212
213    Ok(InterchangeEnvelope {
214        syntax_identifier,
215        sender_id,
216        recipient_id,
217        datetime,
218        control_ref,
219        declared_message_count,
220        actual_message_count: 0,
221    })
222}
223
224/// Thin shim that forwards to [`crate::de::required_component`].
225#[inline]
226fn required_component<'a>(
227    segment: &'a Segment<'_>,
228    element_index: usize,
229    component_index: usize,
230) -> Result<&'a str, EdifactError> {
231    crate::de::required_component(segment, element_index, component_index)
232}
233
234fn extract_messages(segments: &[Segment<'_>]) -> Result<Vec<MessageEnvelope>, EdifactError> {
235    let mut messages: Vec<MessageEnvelope> = Vec::new();
236    let mut in_message = false;
237    let mut msg_start_idx: usize = 0;
238    let mut current_unh: Option<&Segment<'_>> = None;
239
240    for (i, seg) in segments[1..segments.len() - 1].iter().enumerate() {
241        match seg.tag {
242            "UNH" => {
243                if in_message {
244                    return Err(EdifactError::InvalidSegmentForMessage {
245                        tag: "UNH".to_owned(),
246                        message_type: "ENVELOPE".to_owned(),
247                        offset: seg.span.start,
248                    });
249                }
250                in_message = true;
251                msg_start_idx = i;
252                current_unh = Some(seg);
253            }
254            "UNT" if in_message => {
255                let unh = current_unh
256                    .take()
257                    .ok_or(EdifactError::InvalidSegmentForMessage {
258                        tag: "UNT".to_owned(),
259                        message_type: "ENVELOPE".to_owned(),
260                        offset: seg.span.start,
261                    })?;
262
263                let message_ref = required_component(unh, 0, 0)?.to_owned();
264
265                let message_type = required_component(unh, 1, 0)?.to_owned();
266                let version = required_component(unh, 1, 1)?.to_owned();
267                let release = required_component(unh, 1, 2)?.to_owned();
268                let controlling_agency = required_component(unh, 1, 3)?.to_owned();
269                let association_code = unh
270                    .get_element(1)
271                    .and_then(|e| e.get_component(4))
272                    .unwrap_or("")
273                    .to_owned();
274
275                let declared_segment_count: u32 =
276                    required_component(seg, 0, 0)?.parse().map_err(|_| {
277                        EdifactError::InvalidText {
278                            offset: seg.span.start,
279                        }
280                    })?;
281                let unt_ref = required_component(seg, 1, 0)?;
282                if unt_ref != message_ref {
283                    return Err(EdifactError::QualifierMismatch {
284                        tag: "UNT".to_owned(),
285                        actual: unt_ref.to_owned(),
286                        expected: message_ref.clone(),
287                        offset: seg.span.start,
288                    });
289                }
290
291                // actual count = segments from UNH (inclusive) to UNT (inclusive)
292                let actual_segment_count = u32::try_from(i - msg_start_idx + 1).map_err(|_| {
293                    EdifactError::InterchangeTooLarge {
294                        // INVARIANT: usize ≤ u64::MAX on all supported targets; unwrap_or is
295                        // unreachable but prevents a panic on hypothetical exotic platforms.
296                        count: u64::try_from(i - msg_start_idx + 1).unwrap_or(u64::MAX),
297                    }
298                })?;
299
300                in_message = false;
301                messages.push(MessageEnvelope {
302                    message_ref,
303                    message_type,
304                    version,
305                    release,
306                    controlling_agency,
307                    association_code,
308                    declared_segment_count,
309                    actual_segment_count,
310                });
311            }
312            "UNT" => {
313                return Err(EdifactError::InvalidSegmentForMessage {
314                    tag: "UNT".to_owned(),
315                    message_type: "ENVELOPE".to_owned(),
316                    offset: seg.span.start,
317                });
318            }
319            "UNB" | "UNZ" if in_message => {
320                return Err(EdifactError::InvalidSegmentForMessage {
321                    tag: seg.tag.to_owned(),
322                    message_type: "ENVELOPE".to_owned(),
323                    offset: seg.span.start,
324                });
325            }
326            _ if !in_message => {
327                return Err(EdifactError::InvalidSegmentForMessage {
328                    tag: seg.tag.to_owned(),
329                    message_type: "ENVELOPE".to_owned(),
330                    offset: seg.span.start,
331                });
332            }
333            _ => {}
334        }
335    }
336
337    if in_message {
338        return Err(EdifactError::MissingSegment {
339            tag: "UNT".to_owned(),
340            expected_position: "end of message group".to_owned(),
341        });
342    }
343
344    Ok(messages)
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350
351    /// Parse test fixtures into an owned-segment vec (no memory leaks).
352    fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
353        crate::from_reader(std::io::Cursor::new(input)).expect("parse failed")
354    }
355
356    /// Parse then validate: convenience wrapper for tests that only need the result.
357    fn parse_and_validate(
358        input: &[u8],
359    ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
360        let owned = parse(input);
361        let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
362        validate_envelope(&segs)
363    }
364
365    const VALID_INTERCHANGE: &[u8] =
366        b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
367
368    #[test]
369    fn valid_envelope_parses_ok() {
370        let (interchange, messages) =
371            parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
372        assert_eq!(interchange.sender_id, "SENDER");
373        assert_eq!(interchange.recipient_id, "RECEIVER");
374        assert_eq!(interchange.control_ref, "00001");
375        assert_eq!(interchange.declared_message_count, 1);
376        assert_eq!(interchange.actual_message_count, 1);
377        assert_eq!(messages.len(), 1);
378        assert_eq!(messages[0].message_type, "ORDERS");
379        assert_eq!(messages[0].association_code, "EAN010");
380        assert_eq!(messages[0].declared_segment_count, 4);
381        assert_eq!(messages[0].actual_segment_count, 4); // UNH + BGM + DTM + UNT
382    }
383
384    #[test]
385    fn unt_count_mismatch_returns_err() {
386        // UNT declares 99 segments but only 4 are present
387        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
388        let result = parse_and_validate(input);
389        assert!(
390            matches!(
391                result,
392                Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
393            ),
394            "expected SegmentCountMismatch, got {result:?}"
395        );
396    }
397
398    #[test]
399    fn unz_count_mismatch_returns_err() {
400        // UNZ declares 2 messages but only 1 UNH/UNT pair is present
401        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
402        let result = parse_and_validate(input);
403        assert!(
404            matches!(
405                result,
406                Err(EdifactError::MessageCountMismatch {
407                    expected: 2,
408                    actual: 1
409                })
410            ),
411            "expected MessageCountMismatch(2,1), got {result:?}"
412        );
413    }
414
415    #[test]
416    fn missing_unb_returns_err() {
417        let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
418        let result = parse_and_validate(input);
419        assert!(result.is_err());
420    }
421
422    #[test]
423    fn extracts_una_interchange_correctly() {
424        // Test that UNA does not interfere with envelope field extraction
425        let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
426        // UNA is parsed by tokenizer; UNB field extraction must be correct
427        assert_eq!(env.syntax_identifier, "UNOA");
428        assert_eq!(env.datetime, "230401:0900");
429    }
430
431    #[test]
432    fn dangling_unh_without_unt_returns_err() {
433        let input =
434            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
435        let result = parse_and_validate(input);
436        assert!(
437            matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT")
438        );
439    }
440
441    #[test]
442    fn stray_segment_outside_message_returns_err() {
443        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
444        let result = parse_and_validate(input);
445        assert!(matches!(
446            result,
447            Err(EdifactError::InvalidSegmentForMessage { .. })
448        ));
449    }
450
451    #[test]
452    fn missing_unb_sender_component_returns_err() {
453        let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
454        let result = parse_and_validate(input);
455        // Element 1 (sender) exists but is empty ("+") — component 0 is absent.
456        assert!(
457            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
458            "expected MissingRequiredComponent for empty sender, got: {result:?}"
459        );
460    }
461
462    #[test]
463    fn nested_unh_without_closing_previous_message_returns_err() {
464        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
465        let result = parse_and_validate(input);
466        assert!(
467            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
468            "expected InvalidSegmentForMessage(UNH), got {result:?}"
469        );
470    }
471
472    #[test]
473    fn unt_message_reference_must_match_unh() {
474        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
475        let result = parse_and_validate(input);
476        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
477    }
478
479    #[test]
480    fn unz_control_reference_must_match_unb() {
481        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
482        let result = parse_and_validate(input);
483        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
484    }
485
486    #[test]
487    fn missing_unh_message_type_components_return_err() {
488        let input =
489            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
490        let result = parse_and_validate(input);
491        // UNH element 1 = "ORDERS:D:11A" — component 3 (controlling agency) is absent.
492        assert!(
493            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
494            "expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
495        );
496    }
497
498    #[test]
499    fn nested_unz_inside_message_returns_err() {
500        let input =
501            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
502        let result = parse_and_validate(input);
503        assert!(
504            matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ")
505        );
506    }
507
508    // ── UNG/UNE functional-group regression guard ────────────────────────────
509    //
510    // ISO 9735-1 defines optional functional groups (UNG/UNE) that may wrap
511    // one or more UNH/UNT pairs.  `validate_envelope` currently documents that
512    // UNG/UNE are NOT supported (see module doc at line ~62).  These tests
513    // assert the *documented* behaviour: UNG/UNE-wrapped interchanges must
514    // not silently produce incorrect counts — they must return an explicit error.
515
516    #[test]
517    fn envelope_with_ung_returns_explicit_error() {
518        // A UNG segment appearing between UNB and UNH is not a recognized
519        // envelope segment — validate_envelope must reject it explicitly.
520        let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
521                      UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
522                      UNH+1+ORDERS:D:96A:UN'\
523                      BGM+220+PO-001+9'\
524                      UNT+3+1'\
525                      UNE+1+1'\
526                      UNZ+1+1'";
527        let result = parse_and_validate(input);
528        assert!(
529            result.is_err(),
530            "UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
531        );
532        // The error must be the dedicated FunctionalGroupNotSupported variant,
533        // not some unrelated internal failure.
534        assert!(
535            matches!(
536                result,
537                Err(EdifactError::FunctionalGroupNotSupported { .. })
538            ),
539            "expected FunctionalGroupNotSupported, got {result:?}"
540        );
541    }
542}