Skip to main content

edifact_rs/
envelope.rs

1//! EDIFACT envelope validation (Story 2.4).
2//!
3//! Validates UNB / UNH / UNT / UNZ envelope segment structure and count
4//! consistency — independently of business-rule (AHB) validation.
5
6use crate::{error::EdifactError, model::Segment};
7
8/// Extracted data from the `UNB` / `UNZ` interchange envelope.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct InterchangeEnvelope {
11    /// Syntax identifier, e.g. `"UNOA"`.
12    pub syntax_identifier: String,
13    /// Interchange sender identification.
14    pub sender_id: String,
15    /// Interchange recipient identification.
16    pub recipient_id: String,
17    /// Interchange date-time string as found in the source.
18    pub datetime: String,
19    /// Interchange control reference.
20    pub control_ref: String,
21    /// Declared message (functional group) count from `UNZ`.
22    pub declared_message_count: u32,
23    /// Actual message count encountered between `UNB` and `UNZ`.
24    pub actual_message_count: u32,
25}
26
27/// Extracted data from a single `UNH` / `UNT` message envelope.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct MessageEnvelope {
30    /// Message reference from `UNH` element 0.
31    pub message_ref: String,
32    /// EDIFACT message type, e.g. `"ORDERS"`.
33    pub message_type: String,
34    /// Version number, e.g. `"D"`.
35    pub version: String,
36    /// Release number, e.g. `"11A"`.
37    pub release: String,
38    /// Controlling agency code, e.g. `"UN"`.
39    pub controlling_agency: String,
40    /// Association assigned code (MIG version), e.g. `"FV2510"`.
41    pub association_code: String,
42    /// Declared segment count from `UNT`.
43    pub declared_segment_count: u32,
44    /// Actual segment count between this `UNH` and its `UNT`.
45    pub actual_segment_count: u32,
46}
47
48/// Parsed identifier fields from a `UNH` segment.
49///
50/// Produced by [`parse_unh`].  All string slices borrow from the input bytes
51/// passed to the parser, so they live as long as the original byte buffer.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub struct MessageIdentifier<'a> {
54    /// EDIFACT message type, e.g. `"ORDERS"`.
55    pub message_type: &'a str,
56    /// Version number, e.g. `"D"`.
57    pub version: &'a str,
58    /// Release number, e.g. `"11A"`.
59    pub release: &'a str,
60    /// Controlling agency code, e.g. `"UN"`.
61    pub controlling_agency: &'a str,
62    /// Association assigned code (MIG version), e.g. `"FV2510"`.
63    pub association_assigned: &'a str,
64}
65
66/// Extract identifier fields from a `UNH` segment.
67///
68/// Returns a [`MessageIdentifier`] borrowing directly from the segment's
69/// component slices — zero allocation.
70///
71/// # Errors
72///
73/// Returns [`EdifactError::MissingRequiredElement`] if element 1 of the `UNH`
74/// segment is absent, or [`EdifactError::MissingRequiredComponent`] if
75/// component 0 of that element (the message type) is absent.
76pub fn parse_unh<'a>(unh: &'a Segment<'a>) -> Result<MessageIdentifier<'a>, EdifactError> {
77    let elem = unh
78        .get_element(1)
79        .ok_or_else(|| EdifactError::MissingRequiredElement {
80            tag: "UNH".to_owned(),
81            element_index: 1,
82        })?;
83    let message_type =
84        elem.get_component(0)
85            .ok_or_else(|| EdifactError::MissingRequiredComponent {
86                tag: "UNH".to_owned(),
87                element_index: 1,
88                component_index: 0,
89            })?;
90    Ok(MessageIdentifier {
91        message_type,
92        version: elem.get_component(1).unwrap_or(""),
93        release: elem.get_component(2).unwrap_or(""),
94        controlling_agency: elem.get_component(3).unwrap_or(""),
95        association_assigned: elem.get_component(4).unwrap_or(""),
96    })
97}
98
99/// Validates the EDIFACT interchange envelope for the given segments.
100///
101/// Checks:
102/// - `UNB` is present (first meaningful segment)
103/// - `UNZ` is present (last segment) with correct message count
104/// - Each `UNH` is paired with a `UNT` carrying a matching segment count
105/// - `UNZ` message count matches the number of `UNH`/`UNT` pairs found
106///
107/// Returns `Ok((interchange_env, message_envs))` on success,
108/// or `Err(EdifactError::MessageCountMismatch)` / `Err(EdifactError::SegmentCountMismatch)` on
109/// count discrepancies.
110///
111/// # Limitations
112///
113/// Functional group segments (`UNG`/`UNE`) are **not supported**.  If the
114/// input contains `UNG` or `UNE` segments they will be treated as regular
115/// message segments and may cause an [`EdifactError::InvalidSegmentForMessage`]
116/// error or incorrect segment counting.  Strip functional-group wrappers
117/// before calling this function.
118pub fn validate_envelope(
119    segments: &[Segment<'_>],
120) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
121    let mut interchange_env = extract_interchange(segments)?;
122    let message_envs = extract_messages(segments)?;
123    interchange_env.actual_message_count =
124        u32::try_from(message_envs.len()).map_err(|_| EdifactError::InterchangeTooLarge {
125            count: message_envs.len() as u64,
126        })?;
127
128    // Cross-check UNZ declared count vs. actual UNH/UNT pair count
129    if interchange_env.declared_message_count != interchange_env.actual_message_count {
130        return Err(EdifactError::MessageCountMismatch {
131            expected: interchange_env.declared_message_count,
132            actual: interchange_env.actual_message_count,
133        });
134    }
135
136    // Cross-check each UNT segment count vs. actual count
137    for msg in &message_envs {
138        if msg.declared_segment_count != msg.actual_segment_count {
139            return Err(EdifactError::SegmentCountMismatch {
140                expected: msg.declared_segment_count,
141                actual: msg.actual_segment_count,
142                message_ref: msg.message_ref.clone(),
143            });
144        }
145    }
146
147    Ok((interchange_env, message_envs))
148}
149
150fn extract_interchange(segments: &[Segment<'_>]) -> Result<InterchangeEnvelope, EdifactError> {
151    if segments.first().map(|segment| segment.tag) != Some("UNB") {
152        return Err(EdifactError::MissingSegment {
153            tag: "UNB".to_owned(),
154            expected_position: "first segment of interchange".to_owned(),
155        });
156    }
157
158    if segments.last().map(|segment| segment.tag) != Some("UNZ") {
159        return Err(EdifactError::MissingSegment {
160            tag: "UNZ".to_owned(),
161            expected_position: "last segment of interchange".to_owned(),
162        });
163    }
164
165    let unb = &segments[0];
166    let unz = &segments[segments.len() - 1];
167
168    let syntax_identifier = required_component(unb, 0, 0)?.to_owned();
169
170    let sender_id = required_component(unb, 1, 0)?.to_owned();
171
172    let recipient_id = required_component(unb, 2, 0)?.to_owned();
173
174    // Element 3: date/time composite
175    let date = required_component(unb, 3, 0)?;
176    let time = unb
177        .get_element(3)
178        .and_then(|e| e.get_component(1))
179        .unwrap_or("");
180    let datetime = if time.is_empty() {
181        date.to_owned()
182    } else {
183        format!("{date}:{time}")
184    };
185
186    let control_ref = required_component(unb, 4, 0)?.to_owned();
187    let unz_control_ref = required_component(unz, 1, 0)?;
188    if unz_control_ref != control_ref {
189        return Err(EdifactError::QualifierMismatch {
190            tag: "UNZ".to_owned(),
191            actual: unz_control_ref.to_owned(),
192            expected: control_ref,
193            offset: unz.span.start,
194        });
195    }
196
197    let declared_message_count: u32 =
198        required_component(unz, 0, 0)?
199            .parse()
200            .map_err(|_| EdifactError::InvalidText {
201                offset: unz.span.start,
202            })?;
203
204    Ok(InterchangeEnvelope {
205        syntax_identifier,
206        sender_id,
207        recipient_id,
208        datetime,
209        control_ref,
210        declared_message_count,
211        actual_message_count: 0,
212    })
213}
214
215/// Thin shim that forwards to [`crate::de::required_component`].
216#[inline]
217fn required_component<'a>(
218    segment: &'a Segment<'_>,
219    element_index: usize,
220    component_index: usize,
221) -> Result<&'a str, EdifactError> {
222    crate::de::required_component(segment, element_index, component_index)
223}
224
225fn extract_messages(segments: &[Segment<'_>]) -> Result<Vec<MessageEnvelope>, EdifactError> {
226    let mut messages: Vec<MessageEnvelope> = Vec::new();
227    let mut in_message = false;
228    let mut msg_start_idx: usize = 0;
229    let mut current_unh: Option<&Segment<'_>> = None;
230
231    for (i, seg) in segments[1..segments.len() - 1].iter().enumerate() {
232        match seg.tag {
233            "UNH" => {
234                if in_message {
235                    return Err(EdifactError::InvalidSegmentForMessage {
236                        tag: "UNH".to_owned(),
237                        message_type: "ENVELOPE".to_owned(),
238                        offset: seg.span.start,
239                    });
240                }
241                in_message = true;
242                msg_start_idx = i;
243                current_unh = Some(seg);
244            }
245            "UNT" if in_message => {
246                let unh = current_unh
247                    .take()
248                    .ok_or(EdifactError::InvalidSegmentForMessage {
249                        tag: "UNT".to_owned(),
250                        message_type: "ENVELOPE".to_owned(),
251                        offset: seg.span.start,
252                    })?;
253
254                let message_ref = required_component(unh, 0, 0)?.to_owned();
255
256                let message_type = required_component(unh, 1, 0)?.to_owned();
257                let version = required_component(unh, 1, 1)?.to_owned();
258                let release = required_component(unh, 1, 2)?.to_owned();
259                let controlling_agency = required_component(unh, 1, 3)?.to_owned();
260                let association_code = unh
261                    .get_element(1)
262                    .and_then(|e| e.get_component(4))
263                    .unwrap_or("")
264                    .to_owned();
265
266                let declared_segment_count: u32 =
267                    required_component(seg, 0, 0)?.parse().map_err(|_| {
268                        EdifactError::InvalidText {
269                            offset: seg.span.start,
270                        }
271                    })?;
272                let unt_ref = required_component(seg, 1, 0)?;
273                if unt_ref != message_ref {
274                    return Err(EdifactError::QualifierMismatch {
275                        tag: "UNT".to_owned(),
276                        actual: unt_ref.to_owned(),
277                        expected: message_ref.clone(),
278                        offset: seg.span.start,
279                    });
280                }
281
282                // actual count = segments from UNH (inclusive) to UNT (inclusive)
283                let actual_segment_count = u32::try_from(i - msg_start_idx + 1).map_err(|_| {
284                    EdifactError::InterchangeTooLarge {
285                        // SAFETY: usize ≤ u64::MAX on all supported targets
286                        count: u64::try_from(i - msg_start_idx + 1).unwrap_or(u64::MAX),
287                    }
288                })?;
289
290                in_message = false;
291                messages.push(MessageEnvelope {
292                    message_ref,
293                    message_type,
294                    version,
295                    release,
296                    controlling_agency,
297                    association_code,
298                    declared_segment_count,
299                    actual_segment_count,
300                });
301            }
302            "UNT" => {
303                return Err(EdifactError::InvalidSegmentForMessage {
304                    tag: "UNT".to_owned(),
305                    message_type: "ENVELOPE".to_owned(),
306                    offset: seg.span.start,
307                });
308            }
309            "UNB" | "UNZ" if in_message => {
310                return Err(EdifactError::InvalidSegmentForMessage {
311                    tag: seg.tag.to_owned(),
312                    message_type: "ENVELOPE".to_owned(),
313                    offset: seg.span.start,
314                });
315            }
316            _ if !in_message => {
317                return Err(EdifactError::InvalidSegmentForMessage {
318                    tag: seg.tag.to_owned(),
319                    message_type: "ENVELOPE".to_owned(),
320                    offset: seg.span.start,
321                });
322            }
323            _ => {}
324        }
325    }
326
327    if in_message {
328        return Err(EdifactError::MissingSegment {
329            tag: "UNT".to_owned(),
330            expected_position: "end of message group".to_owned(),
331        });
332    }
333
334    Ok(messages)
335}
336
337#[cfg(test)]
338mod tests {
339    use super::*;
340
341    /// Parse test fixtures into an owned-segment vec (no memory leaks).
342    fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
343        crate::from_reader(std::io::Cursor::new(input)).expect("parse failed")
344    }
345
346    /// Parse then validate: convenience wrapper for tests that only need the result.
347    fn parse_and_validate(
348        input: &[u8],
349    ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
350        let owned = parse(input);
351        let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
352        validate_envelope(&segs)
353    }
354
355    const VALID_INTERCHANGE: &[u8] =
356        b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
357
358    #[test]
359    fn valid_envelope_parses_ok() {
360        let (interchange, messages) =
361            parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
362        assert_eq!(interchange.sender_id, "SENDER");
363        assert_eq!(interchange.recipient_id, "RECEIVER");
364        assert_eq!(interchange.control_ref, "00001");
365        assert_eq!(interchange.declared_message_count, 1);
366        assert_eq!(interchange.actual_message_count, 1);
367        assert_eq!(messages.len(), 1);
368        assert_eq!(messages[0].message_type, "ORDERS");
369        assert_eq!(messages[0].association_code, "EAN010");
370        assert_eq!(messages[0].declared_segment_count, 4);
371        assert_eq!(messages[0].actual_segment_count, 4); // UNH + BGM + DTM + UNT
372    }
373
374    #[test]
375    fn unt_count_mismatch_returns_err() {
376        // UNT declares 99 segments but only 4 are present
377        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
378        let result = parse_and_validate(input);
379        assert!(
380            matches!(
381                result,
382                Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
383            ),
384            "expected SegmentCountMismatch, got {result:?}"
385        );
386    }
387
388    #[test]
389    fn unz_count_mismatch_returns_err() {
390        // UNZ declares 2 messages but only 1 UNH/UNT pair is present
391        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
392        let result = parse_and_validate(input);
393        assert!(
394            matches!(
395                result,
396                Err(EdifactError::MessageCountMismatch {
397                    expected: 2,
398                    actual: 1
399                })
400            ),
401            "expected MessageCountMismatch(2,1), got {result:?}"
402        );
403    }
404
405    #[test]
406    fn missing_unb_returns_err() {
407        let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
408        let result = parse_and_validate(input);
409        assert!(result.is_err());
410    }
411
412    #[test]
413    fn extracts_una_interchange_correctly() {
414        // Test that UNA does not interfere with envelope field extraction
415        let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
416        // UNA is parsed by tokenizer; UNB field extraction must be correct
417        assert_eq!(env.syntax_identifier, "UNOA");
418        assert_eq!(env.datetime, "230401:0900");
419    }
420
421    #[test]
422    fn dangling_unh_without_unt_returns_err() {
423        let input =
424            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
425        let result = parse_and_validate(input);
426        assert!(
427            matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT")
428        );
429    }
430
431    #[test]
432    fn stray_segment_outside_message_returns_err() {
433        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
434        let result = parse_and_validate(input);
435        assert!(matches!(
436            result,
437            Err(EdifactError::InvalidSegmentForMessage { .. })
438        ));
439    }
440
441    #[test]
442    fn missing_unb_sender_component_returns_err() {
443        let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
444        let result = parse_and_validate(input);
445        // Element 1 (sender) exists but is empty ("+") — component 0 is absent.
446        assert!(
447            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
448            "expected MissingRequiredComponent for empty sender, got: {result:?}"
449        );
450    }
451
452    #[test]
453    fn nested_unh_without_closing_previous_message_returns_err() {
454        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
455        let result = parse_and_validate(input);
456        assert!(
457            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
458            "expected InvalidSegmentForMessage(UNH), got {result:?}"
459        );
460    }
461
462    #[test]
463    fn unt_message_reference_must_match_unh() {
464        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
465        let result = parse_and_validate(input);
466        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
467    }
468
469    #[test]
470    fn unz_control_reference_must_match_unb() {
471        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
472        let result = parse_and_validate(input);
473        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
474    }
475
476    #[test]
477    fn missing_unh_message_type_components_return_err() {
478        let input =
479            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
480        let result = parse_and_validate(input);
481        // UNH element 1 = "ORDERS:D:11A" — component 3 (controlling agency) is absent.
482        assert!(
483            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
484            "expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
485        );
486    }
487
488    #[test]
489    fn nested_unz_inside_message_returns_err() {
490        let input =
491            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
492        let result = parse_and_validate(input);
493        assert!(
494            matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ")
495        );
496    }
497
498    // ── UNG/UNE functional-group regression guard ────────────────────────────
499    //
500    // ISO 9735-1 defines optional functional groups (UNG/UNE) that may wrap
501    // one or more UNH/UNT pairs.  `validate_envelope` currently documents that
502    // UNG/UNE are NOT supported (see module doc at line ~62).  These tests
503    // assert the *documented* behaviour: UNG/UNE-wrapped interchanges must
504    // not silently produce incorrect counts — they must return an explicit error.
505
506    #[test]
507    fn envelope_with_ung_returns_explicit_error() {
508        // A UNG segment appearing between UNB and UNH is not a recognized
509        // envelope segment — validate_envelope must reject it explicitly.
510        let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
511                      UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
512                      UNH+1+ORDERS:D:96A:UN'\
513                      BGM+220+PO-001+9'\
514                      UNT+3+1'\
515                      UNE+1+1'\
516                      UNZ+1+1'";
517        let result = parse_and_validate(input);
518        assert!(
519            result.is_err(),
520            "UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
521        );
522        // The error must identify the offending segment (UNG or UNE), not some
523        // unrelated internal failure.
524        assert!(
525            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNG" || tag == "UNE"),
526            "expected InvalidSegmentForMessage for UNG or UNE, got {result:?}"
527        );
528    }
529}