Skip to main content

edifact_rs/
envelope.rs

1//! EDIFACT envelope validation (Story 2.4).
2//!
3//! Validates UNB / UNH / UNT / UNZ envelope segment structure and count
4//! consistency — independently of business-rule (AHB) validation.
5
6use crate::{error::EdifactError, model::Segment};
7
8/// Extracted data from the `UNB` / `UNZ` interchange envelope.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct InterchangeEnvelope {
11    /// Syntax identifier, e.g. `"UNOA"`.
12    pub syntax_identifier: String,
13    /// Interchange sender identification.
14    pub sender_id: String,
15    /// Interchange recipient identification.
16    pub recipient_id: String,
17    /// Interchange date-time string as found in the source.
18    pub datetime: String,
19    /// Interchange control reference.
20    pub control_ref: String,
21    /// Declared message (functional group) count from `UNZ`.
22    pub declared_message_count: u32,
23    /// Actual message count encountered between `UNB` and `UNZ`.
24    pub actual_message_count: u32,
25}
26
27/// Extracted data from a single `UNH` / `UNT` message envelope.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct MessageEnvelope {
30    /// Message reference from `UNH` element 0.
31    pub message_ref: String,
32    /// EDIFACT message type, e.g. `"ORDERS"`.
33    pub message_type: String,
34    /// Version number, e.g. `"D"`.
35    pub version: String,
36    /// Release number, e.g. `"11A"`.
37    pub release: String,
38    /// Controlling agency code, e.g. `"UN"`.
39    pub controlling_agency: String,
40    /// Association assigned code (MIG version), e.g. `"FV2510"`.
41    pub association_code: String,
42    /// Declared segment count from `UNT`.
43    pub declared_segment_count: u32,
44    /// Actual segment count between this `UNH` and its `UNT`.
45    pub actual_segment_count: u32,
46}
47
48/// Validates the EDIFACT interchange envelope for the given segments.
49///
50/// Checks:
51/// - `UNB` is present (first meaningful segment)
52/// - `UNZ` is present (last segment) with correct message count
53/// - Each `UNH` is paired with a `UNT` carrying a matching segment count
54/// - `UNZ` message count matches the number of `UNH`/`UNT` pairs found
55///
56/// Returns `Ok((interchange_env, message_envs))` on success,
57/// or `Err(EdifactError::MessageCountMismatch)` / `Err(EdifactError::SegmentCountMismatch)` on
58/// count discrepancies.
59///
60/// # Limitations
61///
62/// Functional group segments (`UNG`/`UNE`) are **not supported**.  If the
63/// input contains `UNG` or `UNE` segments they will be treated as regular
64/// message segments and may cause an [`EdifactError::InvalidSegmentForMessage`]
65/// error or incorrect segment counting.  Strip functional-group wrappers
66/// before calling this function.
67pub fn validate_envelope(
68    segments: &[Segment<'_>],
69) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
70    let mut interchange_env = extract_interchange(segments)?;
71    let message_envs = extract_messages(segments)?;
72    interchange_env.actual_message_count = u32::try_from(message_envs.len())
73        .unwrap_or(u32::MAX);
74
75    // Cross-check UNZ declared count vs. actual UNH/UNT pair count
76    if interchange_env.declared_message_count != interchange_env.actual_message_count {
77        return Err(EdifactError::MessageCountMismatch {
78            expected: interchange_env.declared_message_count,
79            actual: interchange_env.actual_message_count,
80        });
81    }
82
83    // Cross-check each UNT segment count vs. actual count
84    for msg in &message_envs {
85        if msg.declared_segment_count != msg.actual_segment_count {
86            return Err(EdifactError::SegmentCountMismatch {
87                expected: msg.declared_segment_count,
88                actual: msg.actual_segment_count,
89                message_ref: msg.message_ref.clone(),
90            });
91        }
92    }
93
94    Ok((interchange_env, message_envs))
95}
96
97fn extract_interchange(segments: &[Segment<'_>]) -> Result<InterchangeEnvelope, EdifactError> {
98    if segments.first().map(|segment| segment.tag) != Some("UNB") {
99        return Err(EdifactError::MissingSegment {
100            tag: "UNB".to_owned(),
101            expected_position: "first segment of interchange".to_owned(),
102        });
103    }
104
105    if segments.last().map(|segment| segment.tag) != Some("UNZ") {
106        return Err(EdifactError::MissingSegment {
107            tag: "UNZ".to_owned(),
108            expected_position: "last segment of interchange".to_owned(),
109        });
110    }
111
112    let unb = &segments[0];
113    let unz = &segments[segments.len() - 1];
114
115    let syntax_identifier = required_component(unb, 0, 0)?.to_owned();
116
117    let sender_id = required_component(unb, 1, 0)?.to_owned();
118
119    let recipient_id = required_component(unb, 2, 0)?.to_owned();
120
121    // Element 3: date/time composite
122    let date = required_component(unb, 3, 0)?;
123    let time = unb
124        .get_element(3)
125        .and_then(|e| e.get_component(1))
126        .unwrap_or("");
127    let datetime = if time.is_empty() {
128        date.to_owned()
129    } else {
130        format!("{date}:{time}")
131    };
132
133    let control_ref = required_component(unb, 4, 0)?.to_owned();
134    let unz_control_ref = required_component(unz, 1, 0)?;
135    if unz_control_ref != control_ref {
136        return Err(EdifactError::QualifierMismatch {
137            tag: "UNZ".to_owned(),
138            actual: unz_control_ref.to_owned(),
139            expected: control_ref,
140            offset: unz.span.start,
141        });
142    }
143
144    let declared_message_count: u32 = required_component(unz, 0, 0)?
145        .parse()
146        .map_err(|_| EdifactError::InvalidText {
147            offset: unz.span.start,
148        })?;
149
150    Ok(InterchangeEnvelope {
151        syntax_identifier,
152        sender_id,
153        recipient_id,
154        datetime,
155        control_ref,
156        declared_message_count,
157        actual_message_count: 0,
158    })
159}
160
161/// Thin shim that forwards to [`crate::de::required_component`].
162#[inline]
163fn required_component<'a>(
164    segment: &'a Segment<'_>,
165    element_index: usize,
166    component_index: usize,
167) -> Result<&'a str, EdifactError> {
168    crate::de::required_component(segment, element_index, component_index)
169}
170
171fn extract_messages(segments: &[Segment<'_>]) -> Result<Vec<MessageEnvelope>, EdifactError> {
172    let mut messages: Vec<MessageEnvelope> = Vec::new();
173    let mut in_message = false;
174    let mut msg_start_idx: usize = 0;
175    let mut current_unh: Option<&Segment<'_>> = None;
176
177    for (i, seg) in segments[1..segments.len() - 1].iter().enumerate() {
178        match seg.tag {
179            "UNH" => {
180                if in_message {
181                    return Err(EdifactError::InvalidSegmentForMessage {
182                        tag: "UNH".to_owned(),
183                        message_type: "ENVELOPE".to_owned(),
184                        offset: seg.span.start,
185                    });
186                }
187                in_message = true;
188                msg_start_idx = i;
189                current_unh = Some(seg);
190            }
191            "UNT" if in_message => {
192                let unh = current_unh
193                    .take()
194                    .ok_or(EdifactError::InvalidSegmentForMessage {
195                        tag: "UNT".to_owned(),
196                        message_type: "ENVELOPE".to_owned(),
197                        offset: seg.span.start,
198                    })?;
199
200                let message_ref = required_component(unh, 0, 0)?.to_owned();
201
202                let message_type = required_component(unh, 1, 0)?.to_owned();
203                let version = required_component(unh, 1, 1)?.to_owned();
204                let release = required_component(unh, 1, 2)?.to_owned();
205                let controlling_agency = required_component(unh, 1, 3)?.to_owned();
206                let association_code = unh
207                    .get_element(1)
208                    .and_then(|e| e.get_component(4))
209                    .unwrap_or("")
210                    .to_owned();
211
212                let declared_segment_count: u32 = required_component(seg, 0, 0)?
213                    .parse()
214                    .map_err(|_| EdifactError::InvalidText {
215                        offset: seg.span.start,
216                    })?;
217                let unt_ref = required_component(seg, 1, 0)?;
218                if unt_ref != message_ref {
219                    return Err(EdifactError::QualifierMismatch {
220                        tag: "UNT".to_owned(),
221                        actual: unt_ref.to_owned(),
222                        expected: message_ref.clone(),
223                        offset: seg.span.start,
224                    });
225                }
226
227                // actual count = segments from UNH (inclusive) to UNT (inclusive)
228                let actual_segment_count = u32::try_from(i - msg_start_idx + 1)
229                    .unwrap_or(u32::MAX);
230
231                in_message = false;
232                messages.push(MessageEnvelope {
233                    message_ref,
234                    message_type,
235                    version,
236                    release,
237                    controlling_agency,
238                    association_code,
239                    declared_segment_count,
240                    actual_segment_count,
241                });
242            }
243            "UNT" => {
244                return Err(EdifactError::InvalidSegmentForMessage {
245                    tag: "UNT".to_owned(),
246                    message_type: "ENVELOPE".to_owned(),
247                    offset: seg.span.start,
248                });
249            }
250            "UNB" | "UNZ" if in_message => {
251                return Err(EdifactError::InvalidSegmentForMessage {
252                    tag: seg.tag.to_owned(),
253                    message_type: "ENVELOPE".to_owned(),
254                    offset: seg.span.start,
255                });
256            }
257            _ if !in_message => {
258                return Err(EdifactError::InvalidSegmentForMessage {
259                    tag: seg.tag.to_owned(),
260                    message_type: "ENVELOPE".to_owned(),
261                    offset: seg.span.start,
262                });
263            }
264            _ => {}
265        }
266    }
267
268    if in_message {
269        return Err(EdifactError::MissingSegment {
270            tag: "UNT".to_owned(),
271            expected_position: "end of message group".to_owned(),
272        });
273    }
274
275    Ok(messages)
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    /// Parse test fixtures into an owned-segment vec (no memory leaks).
283    fn parse(input: &[u8]) -> Vec<crate::OwnedSegment> {
284        crate::from_reader(std::io::Cursor::new(input)).expect("parse failed")
285    }
286
287    /// Parse then validate: convenience wrapper for tests that only need the result.
288    fn parse_and_validate(
289        input: &[u8],
290    ) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
291        let owned = parse(input);
292        let segs: Vec<Segment<'_>> = owned.iter().map(crate::OwnedSegment::as_borrowed).collect();
293        validate_envelope(&segs)
294    }
295
296    const VALID_INTERCHANGE: &[u8] =
297        b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
298
299    #[test]
300    fn valid_envelope_parses_ok() {
301        let (interchange, messages) =
302            parse_and_validate(VALID_INTERCHANGE).expect("envelope should be valid");
303        assert_eq!(interchange.sender_id, "SENDER");
304        assert_eq!(interchange.recipient_id, "RECEIVER");
305        assert_eq!(interchange.control_ref, "00001");
306        assert_eq!(interchange.declared_message_count, 1);
307        assert_eq!(interchange.actual_message_count, 1);
308        assert_eq!(messages.len(), 1);
309        assert_eq!(messages[0].message_type, "ORDERS");
310        assert_eq!(messages[0].association_code, "EAN010");
311        assert_eq!(messages[0].declared_segment_count, 4);
312        assert_eq!(messages[0].actual_segment_count, 4); // UNH + BGM + DTM + UNT
313    }
314
315    #[test]
316    fn unt_count_mismatch_returns_err() {
317        // UNT declares 99 segments but only 4 are present
318        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
319        let result = parse_and_validate(input);
320        assert!(
321            matches!(
322                result,
323                Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
324            ),
325            "expected SegmentCountMismatch, got {result:?}"
326        );
327    }
328
329    #[test]
330    fn unz_count_mismatch_returns_err() {
331        // UNZ declares 2 messages but only 1 UNH/UNT pair is present
332        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
333        let result = parse_and_validate(input);
334        assert!(
335            matches!(
336                result,
337                Err(EdifactError::MessageCountMismatch {
338                    expected: 2,
339                    actual: 1
340                })
341            ),
342            "expected MessageCountMismatch(2,1), got {result:?}"
343        );
344    }
345
346    #[test]
347    fn missing_unb_returns_err() {
348        let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
349        let result = parse_and_validate(input);
350        assert!(result.is_err());
351    }
352
353    #[test]
354    fn extracts_una_interchange_correctly() {
355        // Test that UNA does not interfere with envelope field extraction
356        let (env, _) = parse_and_validate(VALID_INTERCHANGE).unwrap();
357        // UNA is parsed by tokenizer; UNB field extraction must be correct
358        assert_eq!(env.syntax_identifier, "UNOA");
359        assert_eq!(env.datetime, "230401:0900");
360    }
361
362    #[test]
363    fn dangling_unh_without_unt_returns_err() {
364        let input =
365            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
366        let result = parse_and_validate(input);
367        assert!(matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT"));
368    }
369
370    #[test]
371    fn stray_segment_outside_message_returns_err() {
372        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
373        let result = parse_and_validate(input);
374        assert!(matches!(result, Err(EdifactError::InvalidSegmentForMessage { .. })));
375    }
376
377    #[test]
378    fn missing_unb_sender_component_returns_err() {
379        let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
380        let result = parse_and_validate(input);
381        // Element 1 (sender) exists but is empty ("+") — component 0 is absent.
382        assert!(
383            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 0 }) if tag == "UNB"),
384            "expected MissingRequiredComponent for empty sender, got: {result:?}"
385        );
386    }
387
388    #[test]
389    fn nested_unh_without_closing_previous_message_returns_err() {
390        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
391        let result = parse_and_validate(input);
392        assert!(
393            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"),
394            "expected InvalidSegmentForMessage(UNH), got {result:?}"
395        );
396    }
397
398    #[test]
399    fn unt_message_reference_must_match_unh() {
400        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
401        let result = parse_and_validate(input);
402        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
403    }
404
405    #[test]
406    fn unz_control_reference_must_match_unb() {
407        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
408        let result = parse_and_validate(input);
409        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
410    }
411
412    #[test]
413    fn missing_unh_message_type_components_return_err() {
414        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
415        let result = parse_and_validate(input);
416        // UNH element 1 = "ORDERS:D:11A" — component 3 (controlling agency) is absent.
417        assert!(
418            matches!(result, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 1, component_index: 3 }) if tag == "UNH"),
419            "expected MissingRequiredComponent for truncated UNH message type, got: {result:?}"
420        );
421    }
422
423    #[test]
424    fn nested_unz_inside_message_returns_err() {
425        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
426        let result = parse_and_validate(input);
427        assert!(matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ"));
428    }
429
430    // ── UNG/UNE functional-group regression guard ────────────────────────────
431    //
432    // ISO 9735-1 defines optional functional groups (UNG/UNE) that may wrap
433    // one or more UNH/UNT pairs.  `validate_envelope` currently documents that
434    // UNG/UNE are NOT supported (see module doc at line ~62).  These tests
435    // assert the *documented* behaviour: UNG/UNE-wrapped interchanges must
436    // not silently produce incorrect counts — they must return an explicit error.
437
438    #[test]
439    fn envelope_with_ung_returns_explicit_error() {
440        // A UNG segment appearing between UNB and UNH is not a recognized
441        // envelope segment — validate_envelope must reject it explicitly.
442        let input = b"UNB+UNOA:3+S+R+200101:0900+1'\
443                      UNG+ORDERS+S+R+200101:0900+1+UN+D:96A'\
444                      UNH+1+ORDERS:D:96A:UN'\
445                      BGM+220+PO-001+9'\
446                      UNT+3+1'\
447                      UNE+1+1'\
448                      UNZ+1+1'";
449        let result = parse_and_validate(input);
450        assert!(
451            result.is_err(),
452            "UNG/UNE is documented as unsupported; must return an error, not silently produce wrong counts"
453        );
454        // The error must identify the offending segment (UNG or UNE), not some
455        // unrelated internal failure.
456        assert!(
457            matches!(result, Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNG" || tag == "UNE"),
458            "expected InvalidSegmentForMessage for UNG or UNE, got {result:?}"
459        );
460    }
461}