Skip to main content

edifact_rs/
envelope.rs

1//! EDIFACT envelope validation (Story 2.4).
2//!
3//! Validates UNB / UNH / UNT / UNZ envelope segment structure and count
4//! consistency — independently of business-rule (AHB) validation.
5
6use crate::{error::EdifactError, model::Segment};
7
8/// Extracted data from the `UNB` / `UNZ` interchange envelope.
9#[derive(Debug, Clone, PartialEq, Eq)]
10pub struct InterchangeEnvelope {
11    /// Syntax identifier, e.g. `"UNOA"`.
12    pub syntax_identifier: String,
13    /// Interchange sender identification.
14    pub sender_id: String,
15    /// Interchange recipient identification.
16    pub recipient_id: String,
17    /// Interchange date-time string as found in the source.
18    pub datetime: String,
19    /// Interchange control reference.
20    pub control_ref: String,
21    /// Declared message (functional group) count from `UNZ`.
22    pub declared_message_count: u32,
23    /// Actual message count encountered between `UNB` and `UNZ`.
24    pub actual_message_count: u32,
25}
26
27/// Extracted data from a single `UNH` / `UNT` message envelope.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub struct MessageEnvelope {
30    /// Message reference from `UNH` element 0.
31    pub message_ref: String,
32    /// EDIFACT message type, e.g. `"ORDERS"`.
33    pub message_type: String,
34    /// Version number, e.g. `"D"`.
35    pub version: String,
36    /// Release number, e.g. `"11A"`.
37    pub release: String,
38    /// Controlling agency code, e.g. `"UN"`.
39    pub controlling_agency: String,
40    /// Association assigned code (MIG version), e.g. `"FV2510"`.
41    pub association_code: String,
42    /// Declared segment count from `UNT`.
43    pub declared_segment_count: u32,
44    /// Actual segment count between this `UNH` and its `UNT`.
45    pub actual_segment_count: u32,
46}
47
48/// Validates the EDIFACT interchange envelope for the given segments.
49///
50/// Checks:
51/// - `UNB` is present (first meaningful segment)
52/// - `UNZ` is present (last segment) with correct message count
53/// - Each `UNH` is paired with a `UNT` carrying a matching segment count
54/// - `UNZ` message count matches the number of `UNH`/`UNT` pairs found
55///
56/// Returns `Ok((interchange_env, message_envs))` on success,
57/// or `Err(EdifactError::MessageCountMismatch)` / `Err(EdifactError::SegmentCountMismatch)` on
58/// count discrepancies.
59pub fn validate_envelope(
60    segments: &[Segment<'_>],
61) -> Result<(InterchangeEnvelope, Vec<MessageEnvelope>), EdifactError> {
62    let mut interchange_env = extract_interchange(segments)?;
63    let message_envs = extract_messages(segments)?;
64    interchange_env.actual_message_count = message_envs.len() as u32;
65
66    // Cross-check UNZ declared count vs. actual UNH/UNT pair count
67    if interchange_env.declared_message_count != interchange_env.actual_message_count {
68        return Err(EdifactError::MessageCountMismatch {
69            expected: interchange_env.declared_message_count,
70            actual: interchange_env.actual_message_count,
71        });
72    }
73
74    // Cross-check each UNT segment count vs. actual count
75    for msg in &message_envs {
76        if msg.declared_segment_count != msg.actual_segment_count {
77            return Err(EdifactError::SegmentCountMismatch {
78                expected: msg.declared_segment_count,
79                actual: msg.actual_segment_count,
80                message_ref: msg.message_ref.clone(),
81            });
82        }
83    }
84
85    Ok((interchange_env, message_envs))
86}
87
88fn extract_interchange(segments: &[Segment<'_>]) -> Result<InterchangeEnvelope, EdifactError> {
89    if segments.first().map(|segment| segment.tag) != Some("UNB") {
90        return Err(EdifactError::MissingSegment {
91            tag: "UNB".to_owned(),
92            expected_position: "first segment of interchange".to_owned(),
93        });
94    }
95
96    if segments.last().map(|segment| segment.tag) != Some("UNZ") {
97        return Err(EdifactError::MissingSegment {
98            tag: "UNZ".to_owned(),
99            expected_position: "last segment of interchange".to_owned(),
100        });
101    }
102
103    let unb = &segments[0];
104    let unz = &segments[segments.len() - 1];
105
106    let syntax_identifier = required_component(unb, "UNB", 0, 0)?.to_owned();
107
108    let sender_id = required_component(unb, "UNB", 1, 0)?.to_owned();
109
110    let recipient_id = required_component(unb, "UNB", 2, 0)?.to_owned();
111
112    // Element 3: date/time composite
113    let date = required_component(unb, "UNB", 3, 0)?;
114    let time = unb
115        .get_element(3)
116        .and_then(|e| e.get_component(1))
117        .unwrap_or("");
118    let datetime = if time.is_empty() {
119        date.to_owned()
120    } else {
121        format!("{date}:{time}")
122    };
123
124    let control_ref = required_component(unb, "UNB", 4, 0)?.to_owned();
125    let unz_control_ref = required_component(unz, "UNZ", 1, 0)?;
126    if unz_control_ref != control_ref {
127        return Err(EdifactError::QualifierMismatch {
128            tag: "UNZ".to_owned(),
129            actual: unz_control_ref.to_owned(),
130            expected: control_ref,
131            offset: unz.span.start,
132        });
133    }
134
135    let declared_message_count: u32 = required_component(unz, "UNZ", 0, 0)?
136        .parse()
137        .map_err(|_| EdifactError::InvalidText {
138            offset: unz.span.start,
139        })?;
140
141    Ok(InterchangeEnvelope {
142        syntax_identifier,
143        sender_id,
144        recipient_id,
145        datetime,
146        control_ref,
147        declared_message_count,
148        actual_message_count: 0,
149    })
150}
151
152/// Thin shim that forwards to [`crate::de::required_component`].
153///
154/// The `_tag` parameter was previously used in the locally-duplicated error
155/// path; the underlying function now derives the tag from `segment.tag`
156/// directly, so the argument is retained only for call-site readability.
157#[inline]
158fn required_component<'a>(
159    segment: &'a Segment<'_>,
160    _tag: &'static str,
161    element_index: usize,
162    component_index: usize,
163) -> Result<&'a str, EdifactError> {
164    crate::de::required_component(segment, element_index, component_index)
165}
166
167fn extract_messages(segments: &[Segment<'_>]) -> Result<Vec<MessageEnvelope>, EdifactError> {
168    let mut messages: Vec<MessageEnvelope> = Vec::new();
169    let mut in_message = false;
170    let mut msg_start_idx: usize = 0;
171    let mut current_unh: Option<&Segment<'_>> = None;
172
173    for (i, seg) in segments[1..segments.len() - 1].iter().enumerate() {
174        match seg.tag {
175            "UNH" => {
176                if in_message {
177                    return Err(EdifactError::UnexpectedEof {
178                        offset: seg.span.start,
179                    });
180                }
181                in_message = true;
182                msg_start_idx = i;
183                current_unh = Some(seg);
184            }
185            "UNT" if in_message => {
186                let unh = current_unh
187                    .take()
188                    .ok_or(EdifactError::UnexpectedEof {
189                        offset: seg.span.start,
190                    })?;
191
192                let message_ref = required_component(unh, "UNH", 0, 0)?.to_owned();
193
194                let message_type = required_component(unh, "UNH", 1, 0)?.to_owned();
195                let version = required_component(unh, "UNH", 1, 1)?.to_owned();
196                let release = required_component(unh, "UNH", 1, 2)?.to_owned();
197                let controlling_agency = required_component(unh, "UNH", 1, 3)?.to_owned();
198                let association_code = unh
199                    .get_element(1)
200                    .and_then(|e| e.get_component(4))
201                    .unwrap_or("")
202                    .to_owned();
203
204                let declared_segment_count: u32 = required_component(seg, "UNT", 0, 0)?
205                    .parse()
206                    .map_err(|_| EdifactError::InvalidText {
207                        offset: seg.span.start,
208                    })?;
209                let unt_ref = required_component(seg, "UNT", 1, 0)?;
210                if unt_ref != message_ref {
211                    return Err(EdifactError::QualifierMismatch {
212                        tag: "UNT".to_owned(),
213                        actual: unt_ref.to_owned(),
214                        expected: message_ref.clone(),
215                        offset: seg.span.start,
216                    });
217                }
218
219                // actual count = segments from UNH (inclusive) to UNT (inclusive)
220                let actual_segment_count = (i - msg_start_idx + 1) as u32;
221
222                in_message = false;
223                messages.push(MessageEnvelope {
224                    message_ref,
225                    message_type,
226                    version,
227                    release,
228                    controlling_agency,
229                    association_code,
230                    declared_segment_count,
231                    actual_segment_count,
232                });
233            }
234            "UNT" => {
235                return Err(EdifactError::InvalidSegmentForMessage {
236                    tag: "UNT".to_owned(),
237                    message_type: "ENVELOPE".to_owned(),
238                    offset: seg.span.start,
239                });
240            }
241            "UNB" | "UNZ" if in_message => {
242                return Err(EdifactError::InvalidSegmentForMessage {
243                    tag: seg.tag.to_owned(),
244                    message_type: "ENVELOPE".to_owned(),
245                    offset: seg.span.start,
246                });
247            }
248            _ if !in_message => {
249                return Err(EdifactError::InvalidSegmentForMessage {
250                    tag: seg.tag.to_owned(),
251                    message_type: "ENVELOPE".to_owned(),
252                    offset: seg.span.start,
253                });
254            }
255            _ => {}
256        }
257    }
258
259    if in_message {
260        return Err(EdifactError::MissingSegment {
261            tag: "UNT".to_owned(),
262            expected_position: "end of message group".to_owned(),
263        });
264    }
265
266    Ok(messages)
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272    use crate::from_bytes;
273
274    /// Parse test fixtures into a segment vec.
275    ///
276    /// Uses `Box::leak` so each test-only allocation lives for the test
277    /// process lifetime (bounded, small inputs).  `validate_envelope` requires
278    /// `&[Segment<'_>]` whose lifetime is tied to the underlying bytes; leaking
279    /// the backing buffer is the simplest way to satisfy this in a unit test
280    /// without heap-allocating an `OwnedSegment`-to-borrowed conversion at
281    /// every call site.
282    fn parse(input: &[u8]) -> Vec<Segment<'static>> {
283        let leaked: &'static [u8] = Box::leak(input.to_vec().into_boxed_slice());
284        from_bytes(leaked)
285            .collect::<Result<Vec<_>, _>>()
286            .expect("parse failed")
287    }
288
289    const VALID_INTERCHANGE: &[u8] =
290        b"UNA:+.? 'UNB+UNOA:3+SENDER::293+RECEIVER::293+230401:0900+00001'UNH+00001+ORDERS:D:11A:UN:EAN010'BGM+220+PO-4711+9'DTM+137:20230401:102'UNT+4+00001'UNZ+1+00001'";
291
292    #[test]
293    fn valid_envelope_parses_ok() {
294        let segs = parse(VALID_INTERCHANGE);
295        let (interchange, messages) = validate_envelope(&segs).expect("envelope should be valid");
296        assert_eq!(interchange.sender_id, "SENDER");
297        assert_eq!(interchange.recipient_id, "RECEIVER");
298        assert_eq!(interchange.control_ref, "00001");
299        assert_eq!(interchange.declared_message_count, 1);
300        assert_eq!(interchange.actual_message_count, 1);
301        assert_eq!(messages.len(), 1);
302        assert_eq!(messages[0].message_type, "ORDERS");
303        assert_eq!(messages[0].association_code, "EAN010");
304        assert_eq!(messages[0].declared_segment_count, 4);
305        assert_eq!(messages[0].actual_segment_count, 4); // UNH + BGM + DTM + UNT
306    }
307
308    #[test]
309    fn unt_count_mismatch_returns_err() {
310        // UNT declares 99 segments but only 4 are present
311        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'DTM+137:20200101:102'UNT+99+1'UNZ+1+1'";
312        let segs = parse(input);
313        let result = validate_envelope(&segs);
314        assert!(
315            matches!(
316                result,
317                Err(EdifactError::SegmentCountMismatch { expected: 99, .. })
318            ),
319            "expected SegmentCountMismatch, got {result:?}"
320        );
321    }
322
323    #[test]
324    fn unz_count_mismatch_returns_err() {
325        // UNZ declares 2 messages but only 1 UNH/UNT pair is present
326        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+2+1'";
327        let segs = parse(input);
328        let result = validate_envelope(&segs);
329        assert!(
330            matches!(
331                result,
332                Err(EdifactError::MessageCountMismatch {
333                    expected: 2,
334                    actual: 1
335                })
336            ),
337            "expected MessageCountMismatch(2,1), got {result:?}"
338        );
339    }
340
341    #[test]
342    fn missing_unb_returns_err() {
343        let input = b"UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
344        let segs = parse(input);
345        let result = validate_envelope(&segs);
346        assert!(result.is_err());
347    }
348
349    #[test]
350    fn extracts_una_interchange_correctly() {
351        // Test that UNA does not interfere with envelope field extraction
352        let segs = parse(VALID_INTERCHANGE);
353        let (env, _) = validate_envelope(&segs).unwrap();
354        // UNA is parsed by tokenizer; UNB field extraction must be correct
355        assert_eq!(env.syntax_identifier, "UNOA");
356        assert_eq!(env.datetime, "230401:0900");
357    }
358
359    #[test]
360    fn dangling_unh_without_unt_returns_err() {
361        let input =
362            b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNZ+1+1'";
363        let segs = parse(input);
364        let result = validate_envelope(&segs);
365        assert!(matches!(result, Err(EdifactError::MissingSegment { ref tag, .. }) if tag == "UNT"));
366    }
367
368    #[test]
369    fn stray_segment_outside_message_returns_err() {
370        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'BGM+999+PO-2+9'UNZ+1+1'";
371        let segs = parse(input);
372        let result = validate_envelope(&segs);
373        assert!(matches!(result, Err(EdifactError::InvalidSegmentForMessage { .. })));
374    }
375
376    #[test]
377    fn missing_unb_sender_component_returns_err() {
378        let input = b"UNB+UNOA:3++R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
379        let segs = parse(input);
380        let result = validate_envelope(&segs);
381        assert!(matches!(result, Err(EdifactError::MissingRequiredElement { tag, .. }) if tag == "UNB"));
382    }
383
384    #[test]
385    fn nested_unh_without_closing_previous_message_returns_err() {
386        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNH+2+ORDERS:D:11A:UN:EAN010'UNT+3+2'UNZ+1+1'";
387        let segs = parse(input);
388        let result = validate_envelope(&segs);
389        assert!(matches!(result, Err(EdifactError::UnexpectedEof { .. })));
390    }
391
392    #[test]
393    fn unt_message_reference_must_match_unh() {
394        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+999'UNZ+1+1'";
395        let segs = parse(input);
396        let result = validate_envelope(&segs);
397        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNT"));
398    }
399
400    #[test]
401    fn unz_control_reference_must_match_unb() {
402        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'BGM+220+PO-1+9'UNT+3+1'UNZ+1+999'";
403        let segs = parse(input);
404        let result = validate_envelope(&segs);
405        assert!(matches!(result, Err(EdifactError::QualifierMismatch { tag, .. }) if tag == "UNZ"));
406    }
407
408    #[test]
409    fn missing_unh_message_type_components_return_err() {
410        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A'BGM+220+PO-1+9'UNT+3+1'UNZ+1+1'";
411        let segs = parse(input);
412        let result = validate_envelope(&segs);
413        assert!(matches!(result, Err(EdifactError::MissingRequiredElement { tag, .. }) if tag == "UNH"));
414    }
415
416    #[test]
417    fn nested_unz_inside_message_returns_err() {
418        let input = b"UNB+UNOA:3+S+R+200101:0900+1'UNH+1+ORDERS:D:11A:UN:EAN010'UNZ+1+1'UNT+2+1'UNZ+1+1'";
419        let segs = parse(input);
420        let result = validate_envelope(&segs);
421        assert!(matches!(result, Err(EdifactError::InvalidSegmentForMessage { tag, .. }) if tag == "UNZ"));
422    }
423}