Skip to main content

edifact_rs/
envelope.rs

1//! EDIFACT envelope validation (Story 2.4).
2//!
3//! Validates UNB / UNH / UNT / UNZ envelope segment structure and count
4//! consistency — independently of business-rule (AHB) validation.
5
6use crate::{error::EdifactError, model::Segment};
7
8/// Extracted data from the `UNB` / `UNZ` interchange envelope.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct InterchangeEnvelope {
11    /// Syntax identifier, e.g. `"UNOA"`.
12    pub syntax_identifier: String,
13    /// Interchange sender identification.
14    pub sender_id: String,
15    /// Interchange recipient identification.
16    pub recipient_id: String,
17    /// Interchange date-time string as found in the source.
18    pub datetime: String,
19    /// Interchange control reference.
20    pub control_ref: String,
21    /// Declared message (functional group) count from `UNZ`.
22    pub declared_message_count: u32,
23    /// Actual message count encountered between `UNB` and `UNZ`.
24    pub actual_message_count: u32,
25}
26
27/// Extracted data from a single `UNH` / `UNT` message envelope.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct MessageEnvelope {
30    /// Message reference from `UNH` element 0.
31    pub message_ref: String,
32    /// EDIFACT message type, e.g. `"ORDERS"`.
33    pub message_type: String,
34    /// Version number, e.g. `"D"`.
35    pub version: String,
36    /// Release number, e.g. `"11A"`.
37    pub release: String,
38    /// Controlling agency code, e.g. `"UN"`.
39    pub controlling_agency: String,
40    /// Association assigned code (MIG version), e.g. `"FV2510"`.
41    pub association_code: String,
42    /// Declared segment count from `UNT`.
43    pub declared_segment_count: u32,
44    /// Actual segment count between this `UNH` and its `UNT`.
45    pub actual_segment_count: u32,
46}
47
48/// Parsed identifier fields from a `UNH` segment.
49///
50/// Produced by [`parse_unh`].  All string slices borrow from the input bytes
51/// passed to the parser, so they live as long as the original byte buffer.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct MessageIdentifier<'a> {
54    /// EDIFACT message type, e.g. `"ORDERS"`.
55    pub message_type: &'a str,
56    /// Version number, e.g. `"D"`.
57    pub version: &'a str,
58    /// Release number, e.g. `"11A"`.
59    pub release: &'a str,
60    /// Controlling agency code, e.g. `"UN"`.
61    pub controlling_agency: &'a str,
62    /// Association assigned code (MIG version), e.g. `"FV2510"`.
63    pub association_assigned: &'a str,
64}
65
66/// Extract identifier fields from a `UNH` segment.
67///
68/// Returns a [`MessageIdentifier`] borrowing directly from the segment's
69/// component slices — zero allocation.
70///
71/// # Errors
72///
73/// Returns [`EdifactError::MissingRequiredElement`] if element 1 of the `UNH`
74/// segment is absent, or [`EdifactError::MissingRequiredComponent`] if
75/// component 0 of that element (the message type) is absent.
76pub fn parse_unh<'a>(unh: &'a Segment<'a>) -> Result<MessageIdentifier<'a>, EdifactError> {
77    let elem = unh.get_element(1).ok_or_else(|| EdifactError::MissingRequiredElement {
78        tag: "UNH".to_owned(),
79        element_index: 1,
80    })?;
81    let message_type = elem.get_component(0).ok_or_else(|| EdifactError::MissingRequiredComponent {
82        tag: "UNH".to_owned(),
83        element_index: 1,
84        component_index: 0,
85    })?;
86    Ok(MessageIdentifier {
87        message_type,
88        version: elem.get_component(1).unwrap_or(""),
89        release: elem.get_component(2).unwrap_or(""),
90        controlling_agency: elem.get_component(3).unwrap_or(""),
91        association_assigned: elem.get_component(4).unwrap_or(""),
92    })
93}
94
95/// Validates the EDIFACT interchange envelope for the given segments.
96///
97/// Checks:
98/// - `UNB` is present (first meaningful segment)
99/// - `UNZ` is present (last segment) with correct message count
100/// - Each `UNH` is paired with a `UNT` carrying a matching segment count
101/// - `UNZ` message count matches the number of `UNH`/`UNT` pairs found
102///
103/// Returns `Ok((interchange_env, message_envs))` on success,
104/// or `Err(EdifactError::MessageCountMismatch)` / `Err(EdifactError::SegmentCountMismatch)` on
105/// count discrepancies.
106///
107/// # Limitations
108///
109/// Functional group segments (`UNG`/`UNE`) are **not supported**.  If the
110/// input contains `UNG` or `UNE` segments they will be treated as regular
111/// message segments and may cause an [`EdifactError::InvalidSegmentForMessage`]
112/// error or incorrect segment counting.  Strip functional-group wrappers
113/// before calling this function.
114pub fn validate_envelope(
115    segments: &[Segment<'_>],
116) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
117    let mut interchange_env = extract_interchange(segments)?;
118    let message_envs = extract_messages(segments)?;
119    interchange_env.actual_message_count = u32::try_from(message_envs.len())
120        .map_err(|_| EdifactError::InterchangeTooLarge {
121            count: message_envs.len() as u64,
122        })?;
123
124    // Cross-check UNZ declared count vs. actual UNH/UNT pair count
125    if interchange_env.declared_message_count != interchange_env.actual_message_count {
126        return Err(EdifactError::MessageCountMismatch {
127            expected: interchange_env.declared_message_count,
128            actual: interchange_env.actual_message_count,
129        });
130    }
131
132    // Cross-check each UNT segment count vs. actual count
133    for msg in &message_envs {
134        if msg.declared_segment_count != msg.actual_segment_count {
135            return Err(EdifactError::SegmentCountMismatch {
136                expected: msg.declared_segment_count,
137                actual: msg.actual_segment_count,
138                message_ref: msg.message_ref.clone(),
139            });
140        }
141    }
142
143    Ok((interchange_env, message_envs))
144}
145
146fn extract_interchange(segments: &[Segment<'_>]) -> Result<InterchangeEnvelope, EdifactError> {
147    if segments.first().map(|segment| segment.tag) != Some("UNB") {
148        return Err(EdifactError::MissingSegment {
149            tag: "UNB".to_owned(),
150            expected_position: "first segment of interchange".to_owned(),
151        });
152    }
153
154    if segments.last().map(|segment| segment.tag) != Some("UNZ") {
155        return Err(EdifactError::MissingSegment {
156            tag: "UNZ".to_owned(),
157            expected_position: "last segment of interchange".to_owned(),
158        });
159    }
160
161    let unb = &segments[0];
162    let unz = &segments[segments.len() - 1];
163
164    let syntax_identifier = required_component(unb, 0, 0)?.to_owned();
165
166    let sender_id = required_component(unb, 1, 0)?.to_owned();
167
168    let recipient_id = required_component(unb, 2, 0)?.to_owned();
169
170    // Element 3: date/time composite
171    let date = required_component(unb, 3, 0)?;
172    let time = unb
173        .get_element(3)
174        .and_then(|e| e.get_component(1))
175        .unwrap_or("");
176    let datetime = if time.is_empty() {
177        date.to_owned()
178    } else {
179        format!("{date}:{time}")
180    };
181
182    let control_ref = required_component(unb, 4, 0)?.to_owned();
183    let unz_control_ref = required_component(unz, 1, 0)?;
184    if unz_control_ref != control_ref {
185        return Err(EdifactError::QualifierMismatch {
186            tag: "UNZ".to_owned(),
187            actual: unz_control_ref.to_owned(),
188            expected: control_ref,
189            offset: unz.span.start,
190        });
191    }
192
193    let declared_message_count: u32 = required_component(unz, 0, 0)?
194        .parse()
195        .map_err(|_| EdifactError::InvalidText {
196            offset: unz.span.start,
197        })?;
198
199    Ok(InterchangeEnvelope {
200        syntax_identifier,
201        sender_id,
202        recipient_id,
203        datetime,
204        control_ref,
205        declared_message_count,
206        actual_message_count: 0,
207    })
208}
209
210/// Thin shim that forwards to [`crate::de::required_component`].
211#[inline]
212fn required_component<'a>(
213    segment: &'a Segment<'_>,
214    element_index: usize,
215    component_index: usize,
216) -> Result<&'a str, EdifactError> {
217    crate::de::required_component(segment, element_index, component_index)
218}
219
220fn extract_messages(segments: &[Segment<'_>]) -> Result<Vec<MessageEnvelope>, EdifactError> {
221    let mut messages: Vec<MessageEnvelope> = Vec::new();
222    let mut in_message = false;
223    let mut msg_start_idx: usize = 0;
224    let mut current_unh: Option<&Segment<'_>> = None;
225
226    for (i, seg) in segments[1..segments.len() - 1].iter().enumerate() {
227        match seg.tag {
228            "UNH" => {
229                if in_message {
230                    return Err(EdifactError::InvalidSegmentForMessage {
231                        tag: "UNH".to_owned(),
232                        message_type: "ENVELOPE".to_owned(),
233                        offset: seg.span.start,
234                    });
235                }
236                in_message = true;
237                msg_start_idx = i;
238                current_unh = Some(seg);
239            }
240            "UNT" if in_message => {
241                let unh = current_unh
242                    .take()
243                    .ok_or(EdifactError::InvalidSegmentForMessage {
244                        tag: "UNT".to_owned(),
245                        message_type: "ENVELOPE".to_owned(),
246                        offset: seg.span.start,
247                    })?;
248
249                let message_ref = required_component(unh, 0, 0)?.to_owned();
250
251                let message_type = required_component(unh, 1, 0)?.to_owned();
252                let version = required_component(unh, 1, 1)?.to_owned();
253                let release = required_component(unh, 1, 2)?.to_owned();
254                let controlling_agency = required_component(unh, 1, 3)?.to_owned();
255                let association_code = unh
256                    .get_element(1)
257                    .and_then(|e| e.get_component(4))
258                    .unwrap_or("")
259                    .to_owned();
260
261                let declared_segment_count: u32 = required_component(seg, 0, 0)?
262                    .parse()
263                    .map_err(|_| EdifactError::InvalidText {
264                        offset: seg.span.start,
265                    })?;
266                let unt_ref = required_component(seg, 1, 0)?;
267                if unt_ref != message_ref {
268                    return Err(EdifactError::QualifierMismatch {
269                        tag: "UNT".to_owned(),
270                        actual: unt_ref.to_owned(),
271                        expected: message_ref.clone(),
272                        offset: seg.span.start,
273                    });
274                }
275
276                // actual count = segments from UNH (inclusive) to UNT (inclusive)
277                let actual_segment_count = u32::try_from(i - msg_start_idx + 1)
278                    .map_err(|_| EdifactError::InterchangeTooLarge {
279                        // SAFETY: usize ≤ u64::MAX on all supported targets
280                        count: u64::try_from(i - msg_start_idx + 1).unwrap_or(u64::MAX),
281                    })?;
282
283                in_message = false;
284                messages.push(MessageEnvelope {
285                    message_ref,
286                    message_type,
287                    version,
288                    release,
289                    controlling_agency,
290                    association_code,
291                    declared_segment_count,
292                    actual_segment_count,
293                });
294            }
295            "UNT" => {
296                return Err(EdifactError::InvalidSegmentForMessage {
297                    tag: "UNT".to_owned(),
298                    message_type: "ENVELOPE".to_owned(),
299                    offset: seg.span.start,
300                });
301            }
302            "UNB" | "UNZ" if in_message => {
303                return Err(EdifactError::InvalidSegmentForMessage {
304                    tag: seg.tag.to_owned(),
305                    message_type: "ENVELOPE".to_owned(),
306                    offset: seg.span.start,
307                });
308            }
309            _ if !in_message => {
310                return Err(EdifactError::InvalidSegmentForMessage {
311                    tag: seg.tag.to_owned(),
312                    message_type: "ENVELOPE".to_owned(),
313                    offset: seg.span.start,
314                });
315            }
316            _ => {}
317        }
318    }
319
320    if in_message {
321        return Err(EdifactError::MissingSegment {
322            tag: "UNT".to_owned(),
323            expected_position: "end of message group".to_owned(),
324        });
325    }
326
327    Ok(messages)
328}
329
330#[cfg(test)]
331mod tests {
332    use super::*;
333
334    /// Parse test fixtures into an owned-segment vec (no memory leaks).
335    fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
336        crate::from_reader(std::io::Cursor::new(input)).expect("parse failed")
337    }
338
339    /// Parse then validate: convenience wrapper for tests that only need the result.
340    fn parse_and_validate(
341        input: &[u8],
342    ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
343        let owned = parse(input);
344        let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
345        validate_envelope(&segs)
346    }
347
348    const VALID_INTERCHANGE: &[u8] =
349        b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
350
351    #[test]
352    fn valid_envelope_parses_ok() {
353        let (interchange, messages) =
354            parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
355        assert_eq!(interchange.sender_id, "SENDER");
356        assert_eq!(interchange.recipient_id, "RECEIVER");
357        assert_eq!(interchange.control_ref, "00001");
358        assert_eq!(interchange.declared_message_count, 1);
359        assert_eq!(interchange.actual_message_count, 1);
360        assert_eq!(messages.len(), 1);
361        assert_eq!(messages[0].message_type, "ORDERS");
362        assert_eq!(messages[0].association_code, "EAN010");
363        assert_eq!(messages[0].declared_segment_count, 4);
364        assert_eq!(messages[0].actual_segment_count, 4); // UNH + BGM + DTM + UNT
365    }
366
367    #[test]
368    fn unt_count_mismatch_returns_err() {
369        // UNT declares 99 segments but only 4 are present
370        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
371        let result = parse_and_validate(input);
372        assert!(
373            matches!(
374                result,
375                Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
376            ),
377            "expected SegmentCountMismatch, got {result:?}"
378        );
379    }
380
381    #[test]
382    fn unz_count_mismatch_returns_err() {
383        // UNZ declares 2 messages but only 1 UNH/UNT pair is present
384        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
385        let result = parse_and_validate(input);
386        assert!(
387            matches!(
388                result,
389                Err(EdifactError::MessageCountMismatch {
390                    expected: 2,
391                    actual: 1
392                })
393            ),
394            "expected MessageCountMismatch(2,1), got {result:?}"
395        );
396    }
397
398    #[test]
399    fn missing_unb_returns_err() {
400        let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
401        let result = parse_and_validate(input);
402        assert!(result.is_err());
403    }
404
405    #[test]
406    fn extracts_una_interchange_correctly() {
407        // Test that UNA does not interfere with envelope field extraction
408        let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
409        // UNA is parsed by tokenizer; UNB field extraction must be correct
410        assert_eq!(env.syntax_identifier, "UNOA");
411        assert_eq!(env.datetime, "230401:0900");
412    }
413
414    #[test]
415    fn dangling_unh_without_unt_returns_err() {
416        let input =
417            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
418        let result = parse_and_validate(input);
419        assert!(matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT"));
420    }
421
422    #[test]
423    fn stray_segment_outside_message_returns_err() {
424        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
425        let result = parse_and_validate(input);
426        assert!(matches!(result, Err(EdifactError::InvalidSegmentForMessage { .. })));
427    }
428
429    #[test]
430    fn missing_unb_sender_component_returns_err() {
431        let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
432        let result = parse_and_validate(input);
433        // Element 1 (sender) exists but is empty ("+") — component 0 is absent.
434        assert!(
435            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
436            "expected MissingRequiredComponent for empty sender, got: {result:?}"
437        );
438    }
439
440    #[test]
441    fn nested_unh_without_closing_previous_message_returns_err() {
442        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
443        let result = parse_and_validate(input);
444        assert!(
445            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
446            "expected InvalidSegmentForMessage(UNH), got {result:?}"
447        );
448    }
449
450    #[test]
451    fn unt_message_reference_must_match_unh() {
452        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
453        let result = parse_and_validate(input);
454        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
455    }
456
457    #[test]
458    fn unz_control_reference_must_match_unb() {
459        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
460        let result = parse_and_validate(input);
461        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
462    }
463
464    #[test]
465    fn missing_unh_message_type_components_return_err() {
466        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
467        let result = parse_and_validate(input);
468        // UNH element 1 = "ORDERS:D:11A" — component 3 (controlling agency) is absent.
469        assert!(
470            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
471            "expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
472        );
473    }
474
475    #[test]
476    fn nested_unz_inside_message_returns_err() {
477        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
478        let result = parse_and_validate(input);
479        assert!(matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ"));
480    }
481
482    // ── UNG/UNE functional-group regression guard ────────────────────────────
483    //
484    // ISO 9735-1 defines optional functional groups (UNG/UNE) that may wrap
485    // one or more UNH/UNT pairs.  `validate_envelope` currently documents that
486    // UNG/UNE are NOT supported (see module doc at line ~62).  These tests
487    // assert the *documented* behaviour: UNG/UNE-wrapped interchanges must
488    // not silently produce incorrect counts — they must return an explicit error.
489
490    #[test]
491    fn envelope_with_ung_returns_explicit_error() {
492        // A UNG segment appearing between UNB and UNH is not a recognized
493        // envelope segment — validate_envelope must reject it explicitly.
494        let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
495                      UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
496                      UNH+1+ORDERS:D:96A:UN'\
497                      BGM+220+PO-001+9'\
498                      UNT+3+1'\
499                      UNE+1+1'\
500                      UNZ+1+1'";
501        let result = parse_and_validate(input);
502        assert!(
503            result.is_err(),
504            "UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
505        );
506        // The error must identify the offending segment (UNG or UNE), not some
507        // unrelated internal failure.
508        assert!(
509            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNG" || tag == "UNE"),
510            "expected InvalidSegmentForMessage for UNG or UNE, got {result:?}"
511        );
512    }
513}