Skip to main content

edifact_rs/
de.rs

1//! Custom deserialization trait for EDIFACT.
2//!
3//! [`EdifactDeserialize`] maps a slice of parsed [`Segment`]s to a Rust value.
4//! [`EdifactSegmentTag`] is a companion trait that carries the segment tag and
5//! optional qualifier at the type level, enabling the blanket
6//! `impl EdifactDeserialize for Vec<T>`.
7
8use crate::{EdifactError, Segment};
9use std::io::Read;
10use std::str::FromStr;
11
12// ── traits ────────────────────────────────────────────────────────────────────
13
14/// Types that can be deserialized from a slice of EDIFACT segments.
15///
16/// Implement manually or derive with `#[derive(EdifactDeserialize)]` from the
17/// `edifact-rs-derive` crate.
18pub trait EdifactDeserialize: Sized {
19    /// Deserialize `Self` from the provided segment slice.
20    ///
21    /// The slice may contain any number of segments; implementations extract
22    /// only the ones they care about and ignore the rest.
23    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError>;
24
25    /// Deserialize `Self` from a slice of owned EDIFACT segments.
26    ///
27    /// # Default implementation
28    ///
29    /// Converts each [`crate::OwnedSegment`] to its borrowed form via
30    /// [`crate::OwnedSegment::as_borrowed`] and delegates to
31    /// [`edifact_deserialize`][Self::edifact_deserialize].  This incurs one
32    /// `Vec<Segment<'_>>` allocation per call.
33    ///
34    /// # Override when performance matters
35    ///
36    /// Types generated by `#[derive(EdifactDeserialize)]` automatically override
37    /// this method to work directly on the owned data without the intermediate
38    /// allocation.  Manual implementations should also override when used in the
39    /// high-throughput reader-streaming path
40    /// ([`deserialize_first_from_reader`], [`deserialize_all_from_reader`],
41    /// [`deserialize_messages_from_reader`]) to avoid the per-message allocation.
42    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
43        let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
44        Self::edifact_deserialize(&borrowed)
45    }
46}
47
48/// Types that can be deserialized from a composite EDIFACT element.
49///
50/// Implement this for custom composite structs used with
51/// `#[edifact(composite)]` in derive macros.
52pub trait EdifactCompositeDeserialize: Sized {
53    /// Deserialize `Self` from a composite element.
54    fn edifact_deserialize_composite(composite: CompositeElement<'_>)
55    -> Result<Self, EdifactError>;
56}
57
58impl EdifactCompositeDeserialize for Vec<String> {
59    fn edifact_deserialize_composite(
60        composite: CompositeElement<'_>,
61    ) -> Result<Self, EdifactError> {
62        Ok(composite.iter().map(str::to_owned).collect())
63    }
64}
65
66/// Companion trait that declares a type's segment tag (and optional qualifier).
67///
68/// Required for the `Vec<T>` blanket impl and for finding the right segment in
69/// a message-level struct deserialization.
70pub trait EdifactSegmentTag {
71    /// The 3-character EDIFACT segment tag (e.g. `"BGM"`, `"NAD"`).
72    const SEGMENT_TAG: &'static str;
73
74    /// Optional qualifier pattern to further constrain segment matching.
75    ///
76    /// Examples:
77    /// - `Some("MS")` for exact qualifier matching.
78    /// - `Some("M*")` for wildcard prefix matching (matches `"MS"`, `"MR"`, etc.).
79    const QUALIFIER_PATTERN: Option<&'static str> = None;
80
81    /// Return `true` if `seg`'s qualifier matches this type's qualifier pattern.
82    fn matches_qualifier(seg: &Segment<'_>) -> bool {
83        match Self::QUALIFIER_PATTERN {
84            Some(pattern) => seg
85                .element_str(0)
86                .is_some_and(|q| qualifier_matches_pattern(q, pattern)),
87            None => true,
88        }
89    }
90
91    /// Return `true` if `seg` is the segment this type maps to.
92    ///
93    /// Default: `seg.tag == Self::SEGMENT_TAG`.  Override to also match on a
94    /// qualifier (e.g. `NAD+BY` — element 0 = `"BY"`).
95    fn matches_segment(seg: &Segment<'_>) -> bool {
96        seg.tag == Self::SEGMENT_TAG && Self::matches_qualifier(seg)
97    }
98
99    /// Like [`matches_segment`][Self::matches_segment] but works directly on an
100    /// [`crate::OwnedSegment`] without incurring the `Vec` allocation of
101    /// [`crate::OwnedSegment::as_borrowed`].
102    fn matches_owned_segment(seg: &crate::OwnedSegment) -> bool {
103        if seg.tag != Self::SEGMENT_TAG {
104            return false;
105        }
106        match Self::QUALIFIER_PATTERN {
107            None => true,
108            Some(pattern) => {
109                let q = seg
110                    .elements
111                    .first()
112                    .and_then(|e| e.components.first())
113                    .map(|c| c.as_str())
114                    .unwrap_or("");
115                qualifier_matches_pattern(q, pattern)
116            }
117        }
118    }
119}
120
121// ── blanket impl for Vec<T> ───────────────────────────────────────────────────
122
123/// Deserializes each segment matching `T::matches_segment` as an independent
124/// single-segment slice, collecting the results.
125impl<T> EdifactDeserialize for Vec<T>
126where
127    T: EdifactDeserialize + EdifactSegmentTag,
128{
129    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
130        segments
131            .iter()
132            .filter(|s| T::matches_segment(s))
133            .map(|seg| T::edifact_deserialize(std::slice::from_ref(seg)))
134            .collect()
135    }
136
137    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
138        segments
139            .iter()
140            .filter(|s| T::matches_owned_segment(s))
141            .map(|seg| T::edifact_deserialize_owned(std::slice::from_ref(seg)))
142            .collect()
143    }
144}
145
146// ── public API ────────────────────────────────────────────────────────────────
147
148/// Deserialize a value of type `T` from EDIFACT bytes.
149///
150/// Unlike [`crate::from_bytes`], which parses bytes into raw [`Segment`]s, this
151/// function fully deserializes the payload into a typed Rust value via [`EdifactDeserialize`].
152///
153/// This API currently buffers all parsed segments into a `Vec` before invoking
154/// typed deserialization.
155pub fn deserialize<T: EdifactDeserialize>(input: &[u8]) -> Result<T, EdifactError> {
156    let segments: Vec<Segment<'_>> = crate::from_bytes(input).collect::<Result<_, _>>()?;
157    T::edifact_deserialize(&segments)
158}
159
160/// Stream-parse EDIFACT bytes and deserialize the first matching segment as `T`.
161///
162/// This avoids allocating a full `Vec<Segment>` and is intended for low-memory
163/// extraction of segment-scoped types.
164pub fn deserialize_first_streaming<T>(input: &[u8]) -> Result<T, EdifactError>
165where
166    T: EdifactDeserialize + EdifactSegmentTag,
167{
168    for segment in crate::from_bytes(input) {
169        let segment = segment?;
170        if T::matches_segment(&segment) {
171            return T::edifact_deserialize(std::slice::from_ref(&segment));
172        }
173    }
174
175    Err(EdifactError::MissingSegment {
176        tag: T::SEGMENT_TAG.to_owned(),
177        expected_position: "any position in input".to_owned(),
178    })
179}
180
181/// Stream-parse EDIFACT bytes and deserialize all matching segments as `Vec<T>`.
182///
183/// This avoids buffering non-matching segments in memory.
184pub fn deserialize_all_streaming<T>(input: &[u8]) -> Result<Vec<T>, EdifactError>
185where
186    T: EdifactDeserialize + EdifactSegmentTag,
187{
188    let mut out = Vec::new();
189    for segment in crate::from_bytes(input) {
190        let segment = segment?;
191        if T::matches_segment(&segment) {
192            out.push(T::edifact_deserialize(std::slice::from_ref(&segment))?);
193        }
194    }
195    Ok(out)
196}
197
198/// Stream-parse EDIFACT from a reader and deserialize the first matching segment as `T`.
199///
200/// This is the low-memory typed path for large payloads read from I/O streams.
201pub fn deserialize_first_from_reader<T, R>(reader: R) -> Result<T, EdifactError>
202where
203    T: EdifactDeserialize + EdifactSegmentTag,
204    R: Read,
205{
206    for segment in crate::from_reader_iter(reader) {
207        let segment = segment?;
208        // O(1) tag + qualifier check before paying for as_borrowed().
209        if !T::matches_owned_segment(&segment) {
210            continue;
211        }
212        return T::edifact_deserialize_owned(std::slice::from_ref(&segment));
213    }
214
215    Err(EdifactError::MissingSegment {
216        tag: T::SEGMENT_TAG.to_owned(),
217        expected_position: "any position in input".to_owned(),
218    })
219}
220
221/// Stream-parse EDIFACT from a reader and deserialize all matching segments as `Vec<T>`.
222pub fn deserialize_all_from_reader<T, R>(reader: R) -> Result<Vec<T>, EdifactError>
223where
224    T: EdifactDeserialize + EdifactSegmentTag,
225    R: Read,
226{
227    let mut out = Vec::new();
228    for segment in crate::from_reader_iter(reader) {
229        let segment = segment?;
230        // O(1) tag + qualifier check before paying for as_borrowed().
231        if !T::matches_owned_segment(&segment) {
232            continue;
233        }
234        out.push(T::edifact_deserialize_owned(std::slice::from_ref(&segment))?);
235    }
236    Ok(out)
237}
238
239/// Deserialize a value of type `T` from an EDIFACT string.
240pub fn deserialize_str<T: EdifactDeserialize>(input: &str) -> Result<T, EdifactError> {
241    deserialize(input.as_bytes())
242}
243
244// ── helper functions ──────────────────────────────────────────────────────────
245
246/// Find the first segment with the given tag.
247pub fn find_segment<'s, 'd>(segments: &'s [Segment<'d>], tag: &str) -> Option<&'s Segment<'d>> {
248    segments.iter().find(|s| s.tag == tag)
249}
250
251/// Iterate over all segments with the given tag without allocating a `Vec`.
252pub fn find_segments_iter<'s, 'd: 's>(
253    segments: &'s [Segment<'d>],
254    tag: &'s str,
255) -> impl Iterator<Item = &'s Segment<'d>> {
256    segments.iter().filter(move |s| s.tag == tag)
257}
258
259/// Find the first segment matching `tag` whose element 0 equals `qualifier`.
260pub fn find_qualified_segment<'s, 'd>(
261    segments: &'s [Segment<'d>],
262    tag: &str,
263    qualifier: &str,
264) -> Option<&'s Segment<'d>> {
265    segments
266        .iter()
267        .find(|s| s.tag == tag && s.element_str(0).unwrap_or("") == qualifier)
268}
269
270/// Find the first segment by type-level qualifier pattern.
271pub fn find_segment_typed<'s, 'd, T>(segments: &'s [Segment<'d>]) -> Option<&'s Segment<'d>>
272where
273    T: EdifactSegmentTag,
274{
275    segments.iter().find(|s| T::matches_segment(s))
276}
277
278/// Iterate over all segments by type-level qualifier pattern.
279pub fn find_segments_typed<'s, 'd: 's, T>(
280    segments: &'s [Segment<'d>],
281) -> impl Iterator<Item = &'s Segment<'d>>
282where
283    T: EdifactSegmentTag,
284{
285    segments.iter().filter(|s| T::matches_segment(s))
286}
287
288/// Collect contiguous groups of segments that match `T`.
289pub fn contiguous_groups_by_qualifier<'s, 'd, T>(
290    segments: &'s [Segment<'d>],
291) -> Vec<&'s [Segment<'d>]>
292where
293    T: EdifactSegmentTag,
294{
295    let mut groups = Vec::new();
296    let mut idx = 0;
297
298    while idx < segments.len() {
299        if T::matches_segment(&segments[idx]) {
300            let start = idx;
301            idx += 1;
302            while idx < segments.len() && T::matches_segment(&segments[idx]) {
303                idx += 1;
304            }
305            groups.push(&segments[start..idx]);
306        } else {
307            idx += 1;
308        }
309    }
310
311    groups
312}
313
314/// Return `true` if all segments matching `T` are in one contiguous block.
315pub fn groups_are_contiguous_by_qualifier<T>(segments: &[Segment<'_>]) -> bool
316where
317    T: EdifactSegmentTag,
318{
319    let mut seen_match = false;
320    let mut seen_gap_after_match = false;
321
322    for seg in segments {
323        if T::matches_segment(seg) {
324            if seen_gap_after_match {
325                return false;
326            }
327            seen_match = true;
328        } else if seen_match {
329            seen_gap_after_match = true;
330        }
331    }
332
333    true
334}
335
336/// Match a qualifier value against an exact or wildcard pattern.
337///
338/// Rules:
339/// - If `pattern` contains `*`, it is treated as a glob wildcard (e.g. `"M*"` matches `"MS"`, `"MR"`).
340/// - If no wildcard is present, exact match is required.
341///
342/// Prefix matching without an explicit `*` was deliberately removed: `"M"` matches only `"M"`,
343/// not `"MS"` or `"MR"`.  Use `"M*"` for prefix semantics.
344pub fn qualifier_matches_pattern(value: &str, pattern: &str) -> bool {
345    if pattern.is_empty() {
346        return value.is_empty();
347    }
348
349    if !pattern.contains('*') {
350        return value == pattern;
351    }
352
353    // Fast path: single wildcard (dominant case — e.g. "M*" or "*:MS")
354    if let Some((prefix, suffix)) = pattern.split_once('*') {
355        // Only one wildcard — prefix and suffix cannot overlap in a second split.
356        if !pattern[prefix.len() + 1..].contains('*') {
357            return value.len() >= prefix.len() + suffix.len()
358                && value.starts_with(prefix)
359                && value.ends_with(suffix)
360                && {
361                    // Ensure prefix and suffix don't overlap.
362                    let mid_start = prefix.len();
363                    let mid_end = value.len().saturating_sub(suffix.len());
364                    mid_start <= mid_end
365                };
366        }
367    }
368
369    // General multi-wildcard path.
370    let parts: smallvec::SmallVec<[&str; 4]> = pattern.split('*').collect();
371    let prefix = parts[0];
372    let suffix = parts[parts.len() - 1];
373
374    if !value.starts_with(prefix) || !value.ends_with(suffix) {
375        return false;
376    }
377
378    let mid_start = prefix.len();
379    let mid_end = value.len().saturating_sub(suffix.len());
380
381    if mid_start > mid_end {
382        return parts[1..parts.len() - 1].iter().all(|p| p.is_empty());
383    }
384
385    let mut remaining = &value[mid_start..mid_end];
386
387    for part in &parts[1..parts.len() - 1] {
388        if part.is_empty() {
389            continue;
390        }
391        match remaining.find(part) {
392            Some(idx) => remaining = &remaining[idx + part.len()..],
393            None => return false,
394        }
395    }
396
397    true
398}
399
400/// Extract the string value of element `idx` from `seg`, or `""` if absent.
401#[inline]
402pub fn element_str<'s>(seg: &'s Segment<'_>, idx: usize) -> &'s str {
403    seg.element_str(idx).unwrap_or("")
404}
405
406// ── segment accessor helpers ───────────────────────────────────────────────────
407
408/// Extract a required text element from a segment.
409///
410/// Returns the element's first component, or an error if absent or empty.
411pub fn required_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Result<&'a str, EdifactError> {
412    seg.element_str(idx)
413        .filter(|s| !s.is_empty())
414        .ok_or_else(|| EdifactError::MissingRequiredElement {
415            tag: seg.tag.to_owned(),
416            element_index: idx,
417        })
418}
419
420/// Extract an optional text element from a segment.
421///
422/// Returns the element's first component, or None if absent or empty.
423pub fn optional_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<&'a str> {
424    seg.element_str(idx)
425        .filter(|s| !s.is_empty())
426}
427
428/// Extract a required component from a segment element.
429///
430/// Returns the component value, or an error if the element or component is absent.
431pub fn required_component<'a>(
432    seg: &'a Segment<'_>,
433    elem_idx: usize,
434    comp_idx: usize,
435) -> Result<&'a str, EdifactError> {
436    seg.elements
437        .get(elem_idx)
438        .and_then(|elem| elem.get_component(comp_idx))
439        .filter(|s| !s.is_empty())
440        .ok_or_else(|| EdifactError::MissingRequiredElement {
441            tag: seg.tag.to_owned(),
442            element_index: elem_idx,
443        })
444}
445
446/// Extract an optional component from a segment element.
447///
448/// Returns the component value, or None if absent or empty.
449pub fn optional_component<'a>(seg: &'a Segment<'_>, elem_idx: usize, comp_idx: usize) -> Option<&'a str> {
450    seg.elements
451        .get(elem_idx)
452        .and_then(|elem| elem.get_component(comp_idx))
453        .filter(|s| !s.is_empty())
454}
455
456/// Iterate over all components of an element without allocating a `Vec`.
457///
458/// Yields an empty iterator if the element is absent.
459pub fn get_components_iter<'a>(
460    seg: &'a Segment<'_>,
461    idx: usize,
462) -> impl Iterator<Item = &'a str> {
463    seg.elements
464        .get(idx)
465        .into_iter()
466        .flat_map(|elem| elem.components.iter().map(|c| c.as_ref()))
467}
468
469/// A composite data element wrapper for clearer ergonomics.
470pub struct CompositeElement<'a> {
471    components: &'a [std::borrow::Cow<'a, str>],
472}
473
474impl<'a> CompositeElement<'a> {
475    /// Get the component at index `i`, or None if absent.
476    pub fn get(&self, i: usize) -> Option<&'a str> {
477        self.components.get(i).map(|c| c.as_ref())
478    }
479
480    /// Get the component at index `i`, or empty string if absent.
481    pub fn get_or_empty(&self, i: usize) -> &'a str {
482        self.get(i).unwrap_or("")
483    }
484
485    /// Get the number of components.
486    pub fn len(&self) -> usize {
487        self.components.len()
488    }
489
490    /// Check if the composite is empty.
491    pub fn is_empty(&self) -> bool {
492        self.components.is_empty()
493    }
494
495    /// Iterate over all components.
496    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
497        self.components.iter().map(|c| c.as_ref())
498    }
499
500    /// Create a `CompositeElement` from a pre-existing component slice.
501    ///
502    /// Used internally by [`edifact_deserialize_owned`][EdifactDeserialize::edifact_deserialize_owned]
503    /// generated code to pass component data without converting the whole segment.
504    pub fn from_slice(components: &'a [std::borrow::Cow<'a, str>]) -> Self {
505        Self { components }
506    }
507}
508
509/// Get a composite element from a segment with clearer ergonomics.
510pub fn composite_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<CompositeElement<'a>> {
511    seg.elements.get(idx).map(|elem| CompositeElement {
512        components: &elem.components,
513    })
514}
515
516/// Find the first [`OwnedSegment`] with the given tag.
517///
518/// Zero-allocation counterpart of [`find_segment`] for use in
519/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
520///
521/// [`OwnedSegment`]: crate::OwnedSegment
522pub fn find_segment_owned<'s>(
523    segments: &'s [crate::OwnedSegment],
524    tag: &str,
525) -> Option<&'s crate::OwnedSegment> {
526    segments.iter().find(|s| s.tag == tag)
527}
528
529/// Find the first [`OwnedSegment`] with the given tag **and** qualifier.
530///
531/// The qualifier is compared against the first component of element 0.
532/// Zero-allocation counterpart of [`find_qualified_segment`] for use in
533/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
534///
535/// [`OwnedSegment`]: crate::OwnedSegment
536pub fn find_qualified_segment_owned<'s>(
537    segments: &'s [crate::OwnedSegment],
538    tag: &str,
539    qualifier: &str,
540) -> Option<&'s crate::OwnedSegment> {
541    segments.iter().find(|s| {
542        s.tag == tag && s.element_str(0).unwrap_or("") == qualifier
543    })
544}
545
546/// Segment accessor trait for ergonomic typed extraction.
547pub trait SegmentAccessor<'a> {
548    /// Get non-empty element text at index `idx`.
549    fn get_element(&'a self, idx: usize) -> Option<&'a str>;
550    /// Get non-empty component text at element/component indexes.
551    fn get_component(&'a self, elem: usize, comp: usize) -> Option<&'a str>;
552    /// Get a composite wrapper for element `idx`.
553    fn get_composite(&'a self, idx: usize) -> Option<CompositeElement<'a>>;
554
555    /// Get required non-empty element text.
556    fn text_element(&'a self, idx: usize) -> Result<&'a str, EdifactError>;
557    /// Get optional non-empty element text.
558    fn optional_element(&'a self, idx: usize) -> Option<&'a str>;
559    /// Parse a typed code value from a required element.
560    fn code_element<T: FromStr>(&'a self, idx: usize) -> Result<T, EdifactError>;
561    /// Get required non-empty composite component.
562    fn required_composite(&'a self, elem: usize, comp: usize) -> Result<&'a str, EdifactError>;
563    /// Get `count` required components starting at `start_idx` from element `elem`.
564    fn repeating_components(
565        &'a self,
566        elem: usize,
567        start_idx: usize,
568        count: usize,
569    ) -> Result<Vec<&'a str>, EdifactError>;
570
571    /// Iterate over `count` required components starting at `start_idx` from element `elem`.
572    ///
573    /// Allocation-free alternative to [`repeating_components`][Self::repeating_components];
574    /// the caller supplies the iteration budget and consumes results on the fly.
575    fn repeating_components_iter(
576        &'a self,
577        elem: usize,
578        start_idx: usize,
579        count: usize,
580    ) -> impl Iterator<Item = Result<&'a str, EdifactError>> + 'a;
581}
582
583impl<'s, 'd> SegmentAccessor<'s> for Segment<'d>
584where
585    'd: 's,
586{
587    fn get_element(&'s self, idx: usize) -> Option<&'s str> {
588        self.element_str(idx).filter(|s| !s.is_empty())
589    }
590
591    fn get_component(&'s self, elem: usize, comp: usize) -> Option<&'s str> {
592        self.elements
593            .get(elem)
594            .and_then(|e| e.get_component(comp))
595            .filter(|s| !s.is_empty())
596    }
597
598    fn get_composite(&'s self, idx: usize) -> Option<CompositeElement<'s>> {
599        composite_element(self, idx)
600    }
601
602    fn text_element(&'s self, idx: usize) -> Result<&'s str, EdifactError> {
603        <Self as SegmentAccessor>::get_element(self, idx).ok_or_else(|| {
604            EdifactError::MissingRequiredElement {
605                tag: self.tag.to_owned(),
606                element_index: idx,
607            }
608        })
609    }
610
611    fn optional_element(&'s self, idx: usize) -> Option<&'s str> {
612        <Self as SegmentAccessor>::get_element(self, idx)
613    }
614
615    fn code_element<T: FromStr>(&'s self, idx: usize) -> Result<T, EdifactError> {
616        let raw = self.text_element(idx)?;
617        raw.parse::<T>().map_err(|_| EdifactError::InvalidText {
618            offset: self.element_span(idx).map(|s| s.start).unwrap_or(self.span.start),
619        })
620    }
621
622    fn required_composite(&'s self, elem: usize, comp: usize) -> Result<&'s str, EdifactError> {
623        <Self as SegmentAccessor>::get_component(self, elem, comp).ok_or_else(|| {
624            EdifactError::MissingRequiredElement {
625                tag: self.tag.to_owned(),
626                element_index: elem,
627            }
628        })
629    }
630
631    fn repeating_components(
632        &'s self,
633        elem: usize,
634        start_idx: usize,
635        count: usize,
636    ) -> Result<Vec<&'s str>, EdifactError> {
637        let comp =
638            self.get_composite(elem)
639                .ok_or_else(|| EdifactError::MissingRequiredElement {
640                    tag: self.tag.to_owned(),
641                    element_index: elem,
642                })?;
643
644        (start_idx..start_idx + count)
645            .map(|idx| {
646                comp.get(idx).filter(|s| !s.is_empty()).ok_or_else(|| {
647                    EdifactError::MissingRequiredElement {
648                        tag: self.tag.to_owned(),
649                        element_index: elem,
650                    }
651                })
652            })
653            .collect()
654    }
655
656    fn repeating_components_iter(
657        &'s self,
658        elem: usize,
659        start_idx: usize,
660        count: usize,
661    ) -> impl Iterator<Item = Result<&'s str, EdifactError>> + 's {
662        let tag = self.tag;
663        let components = self
664            .elements
665            .get(elem)
666            .map(|e| e.components.as_slice())
667            .unwrap_or(&[]);
668        (start_idx..start_idx + count).map(move |idx| {
669            components
670                .get(idx)
671                .map(|c| c.as_ref())
672                .filter(|s| !s.is_empty())
673                .ok_or_else(|| EdifactError::MissingRequiredElement {
674                    tag: tag.to_owned(),
675                    element_index: elem,
676                })
677        })
678    }
679}
680
681// ── message-window streaming ──────────────────────────────────────────────────
682
683/// An iterator that groups borrowed EDIFACT segments into per-message windows.
684///
685/// Zero-copy counterpart to [`MessageWindowsIter`] for in-memory byte slices.
686/// Each yielded `Vec<Segment<'_>>` borrows from the original input; no heap
687/// allocations occur per segment.  Envelope segments outside a `UNH..UNT` pair
688/// are silently skipped.
689///
690/// Obtain this via [`message_windows_bytes`].
691pub struct MessageWindowsSliceIter<'a> {
692    inner: crate::FromBytesIter<'a>,
693    buf: Vec<crate::Segment<'a>>,
694    in_message: bool,
695    done: bool,
696}
697
698impl<'a> MessageWindowsSliceIter<'a> {
699    fn new(inner: crate::FromBytesIter<'a>) -> Self {
700        Self {
701            inner,
702            buf: Vec::new(),
703            in_message: false,
704            done: false,
705        }
706    }
707}
708
709impl<'a> Iterator for MessageWindowsSliceIter<'a> {
710    type Item = Result<Vec<crate::Segment<'a>>, EdifactError>;
711
712    fn next(&mut self) -> Option<Self::Item> {
713        if self.done {
714            return None;
715        }
716        loop {
717            let segment = match self.inner.next() {
718                Some(Ok(s)) => s,
719                Some(Err(e)) => {
720                    self.done = true;
721                    return Some(Err(e));
722                }
723                None => {
724                    self.done = true;
725                    if self.in_message && !self.buf.is_empty() {
726                        self.in_message = false;
727                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
728                        return Some(Err(EdifactError::UnexpectedEof { offset }));
729                    }
730                    return None;
731                }
732            };
733
734            match segment.tag {
735                "UNH" => {
736                    if self.in_message {
737                        self.buf.clear();
738                        self.in_message = false;
739                        self.done = true;
740                        return Some(Err(EdifactError::ValidationFailed {
741                            error_count: 1,
742                            first_message:
743                                "UNH seen while a message window is already open (missing UNT)"
744                                    .to_owned(),
745                        }));
746                    }
747                    self.buf.clear();
748                    self.in_message = true;
749                    self.buf.push(segment);
750                }
751                "UNT" if self.in_message => {
752                    self.buf.push(segment);
753                    self.in_message = false;
754                    return Some(Ok(std::mem::take(&mut self.buf)));
755                }
756                _ if self.in_message => {
757                    self.buf.push(segment);
758                }
759                _ => {
760                    // Envelope segment outside a window — skip.
761                }
762            }
763        }
764    }
765}
766
767/// An iterator that groups owned EDIFACT segments into per-message windows.
768///
769/// Each yielded item is a `Vec<OwnedSegment>` containing the segments for one
770/// complete `UNH..UNT` message, inclusive of both service segments.
771/// Envelope-level segments (`UNB`, `UNG`, `UNZ`, `UNE`) that sit outside any
772/// `UNH..UNT` pair are silently skipped.
773///
774/// # Errors
775///
776/// - An inner-iterator error is forwarded immediately and iteration stops.
777/// - A `UNH` seen while a prior window is still open (missing `UNT`) is an error.
778/// - Input that ends while a `UNH` window is open (stream truncation) yields
779///   `Err(EdifactError::UnexpectedEof { … })` before returning `None`.
780///
781/// # Construction
782///
783/// Use [`message_windows_from_reader`] or [`message_windows_bytes`] to
784/// obtain a `MessageWindowsIter` directly.  For fully custom sources, call
785/// [`MessageWindowsIter::new`] with any `Iterator<Item = Result<OwnedSegment,
786/// EdifactError>>`.
787pub struct MessageWindowsIter<I> {
788    inner: I,
789    buf: Vec<crate::OwnedSegment>,
790    in_message: bool,
791    /// Set to `true` after any terminal condition (error or clean EOF) so that
792    /// subsequent `next()` calls immediately return `None`.
793    done: bool,
794}
795
796impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> MessageWindowsIter<I> {
797    /// Wrap any owned-segment iterator as a message-window iterator.
798    pub fn new(inner: I) -> Self {
799        Self {
800            inner,
801            buf: Vec::new(),
802            in_message: false,
803            done: false,
804        }
805    }
806}
807
808impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> Iterator
809    for MessageWindowsIter<I>
810{
811    type Item = Result<Vec<crate::OwnedSegment>, EdifactError>;
812
813    fn next(&mut self) -> Option<Self::Item> {
814        if self.done {
815            return None;
816        }
817        loop {
818            let segment = match self.inner.next() {
819                Some(Ok(s)) => s,
820                Some(Err(e)) => {
821                    self.done = true;
822                    return Some(Err(e));
823                }
824                None => {
825                    self.done = true;
826                    // A window that opened (UNH seen) but never closed (no UNT)
827                    // means the stream was truncated — surface as an error.
828                    if self.in_message && !self.buf.is_empty() {
829                        self.in_message = false;
830                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
831                        return Some(Err(EdifactError::UnexpectedEof { offset }));
832                    }
833                    return None;
834                }
835            };
836
837            match segment.tag.as_str() {
838                "UNH" => {
839                    if self.in_message {
840                        // Malformed: new UNH without a prior UNT.
841                        self.buf.clear();
842                        self.in_message = false;
843                        self.done = true;
844                        return Some(Err(EdifactError::ValidationFailed {
845                            error_count: 1,
846                            first_message:
847                                "UNH seen while a message window is already open (missing UNT)"
848                                    .to_owned(),
849                        }));
850                    }
851                    self.buf.clear();
852                    self.in_message = true;
853                    self.buf.push(segment);
854                }
855                "UNT" if self.in_message => {
856                    self.buf.push(segment);
857                    self.in_message = false;
858                    return Some(Ok(std::mem::take(&mut self.buf)));
859                }
860                _ if self.in_message => {
861                    self.buf.push(segment);
862                }
863                _ => {
864                    // Envelope segment outside a window — skip.
865                }
866            }
867        }
868    }
869}
870
871/// Stream-parse EDIFACT bytes into an iterator of per-message windows.
872///
873/// Each window is a `Vec<Segment<'_>>` spanning one `UNH..UNT` pair, with
874/// segments borrowing from `input` — **zero heap allocations per segment**.
875/// Envelope segments (`UNB`, `UNZ`, …) are skipped automatically.
876///
877/// # Example
878/// ```
879/// use edifact_rs::message_windows_bytes;
880/// let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'\
881///               UNH+1+ORDERS:D:96A:UN'\
882///               BGM+220+PO-001+9'\
883///               UNT+3+1'\
884///               UNZ+1+1'";
885///
886/// let windows: Vec<_> = message_windows_bytes(input)
887///     .collect::<Result<_, _>>()
888///     .unwrap();
889/// assert_eq!(windows.len(), 1);
890/// assert_eq!(windows[0][0].tag, "UNH");
891/// assert_eq!(windows[0].last().unwrap().tag, "UNT");
892/// ```
893pub fn message_windows_bytes(input: &[u8]) -> MessageWindowsSliceIter<'_> {
894    MessageWindowsSliceIter::new(crate::from_bytes(input))
895}
896
897/// Stream-parse EDIFACT from a reader into an iterator of per-message windows.
898///
899/// Each window is a `Vec<OwnedSegment>` spanning one `UNH..UNT` pair.
900/// This variant reads lazily — only enough input to complete one window is
901/// consumed per [`Iterator::next`] call.
902pub fn message_windows_from_reader<R: Read>(
903    reader: R,
904) -> MessageWindowsIter<crate::FromReaderIter<R>> {
905    MessageWindowsIter::new(crate::from_reader_iter(reader))
906}
907
908/// Stream typed messages from a reader by deserializing each `UNH..UNT` window.
909///
910/// This is the highest-level streaming API: it returns one `T` per message,
911/// reading only as much data as needed to complete each window.
912///
913/// Each message window is deserialized via
914/// [`EdifactDeserialize::edifact_deserialize_owned`], which avoids the
915/// intermediate `Vec<Segment<'_>>` allocation incurred by the slice-based path.
916/// Types derived with `#[derive(EdifactDeserialize)]` provide an efficient
917/// override; manual implementations fall back to [`crate::OwnedSegment::as_borrowed`].
918///
919/// # Example
920/// ```ignore
921/// // Assuming `OrdersMessage` implements `EdifactDeserialize`:
922/// let messages: Vec<OrdersMessage> =
923///     deserialize_messages_from_reader::<OrdersMessage, _>(reader)
924///         .collect::<Result<_, _>>()?;
925/// ```
926pub fn deserialize_messages_from_reader<T, R>(
927    reader: R,
928) -> impl Iterator<Item = Result<T, EdifactError>>
929where
930    T: EdifactDeserialize,
931    R: Read,
932{
933    message_windows_from_reader(reader).map(|window| {
934        let window = window?;
935        T::edifact_deserialize_owned(&window)
936    })
937}
938
939/// Stream typed messages from a byte slice by deserializing each `UNH..UNT` window.
940pub fn deserialize_messages_bytes<T>(
941    input: &[u8],
942) -> impl Iterator<Item = Result<T, EdifactError>> + '_
943where
944    T: EdifactDeserialize,
945{
946    message_windows_bytes(input).map(|window| {
947        let window = window?;
948        T::edifact_deserialize(&window)
949    })
950}
951
952#[cfg(test)]
953mod tests {
954    use super::*;
955
956    // ── manual test impl ──────────────────────────────────────────────────────
957    #[derive(Debug, PartialEq)]
958    struct BgmSegment {
959        doc_name_code: String,
960        pruef_id: String,
961        msg_function: Option<String>,
962    }
963
964    impl EdifactSegmentTag for BgmSegment {
965        const SEGMENT_TAG: &'static str = "BGM";
966    }
967
968    struct NadM;
969
970    impl EdifactSegmentTag for NadM {
971        const SEGMENT_TAG: &'static str = "NAD";
972        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
973    }
974
975    struct NadWildcard;
976
977    impl EdifactSegmentTag for NadWildcard {
978        const SEGMENT_TAG: &'static str = "NAD";
979        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
980    }
981
982    impl EdifactDeserialize for BgmSegment {
983        fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
984            let seg = find_segment(segments, "BGM").ok_or_else(|| {
985                EdifactError::MissingRequiredElement {
986                    tag: "BGM".to_owned(),
987                    element_index: 0,
988                }
989            })?;
990            Ok(Self {
991                doc_name_code: element_str(seg, 0).to_owned(),
992                pruef_id: element_str(seg, 1).to_owned(),
993                msg_function: seg
994                    .element_str(2)
995                    .filter(|s| !s.is_empty())
996                    .map(str::to_owned),
997            })
998        }
999    }
1000
1001    #[test]
1002    fn deserialize_single_segment() {
1003        let input = b"BGM+E03+11042+9'";
1004        let bgm: BgmSegment = deserialize(input).unwrap();
1005        assert_eq!(bgm.doc_name_code, "E03");
1006        assert_eq!(bgm.pruef_id, "11042");
1007        assert_eq!(bgm.msg_function, Some("9".to_owned()));
1008    }
1009
1010    #[test]
1011    fn streaming_deserialize_first_from_bytes() {
1012        let input = b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'";
1013        let bgm: BgmSegment = deserialize_first_streaming(input).unwrap();
1014        assert_eq!(bgm.pruef_id, "11042");
1015    }
1016
1017    #[test]
1018    fn streaming_deserialize_all_from_bytes() {
1019        let input = b"BGM+E03+11042+9'RFF+AA:1'BGM+E01+11043+9'";
1020        let bgms: Vec<BgmSegment> = deserialize_all_streaming(input).unwrap();
1021        assert_eq!(bgms.len(), 2);
1022        assert_eq!(bgms[0].pruef_id, "11042");
1023        assert_eq!(bgms[1].pruef_id, "11043");
1024    }
1025
1026    #[test]
1027    fn streaming_deserialize_first_from_reader() {
1028        let input = std::io::Cursor::new(b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'".to_vec());
1029        let bgm: BgmSegment = deserialize_first_from_reader(input).unwrap();
1030        assert_eq!(bgm.pruef_id, "11042");
1031    }
1032
1033    #[test]
1034    fn streaming_deserialize_all_from_reader() {
1035        let input = std::io::Cursor::new(b"BGM+E03+11042+9'BGM+E01+11043+9'".to_vec());
1036        let bgms: Vec<BgmSegment> = deserialize_all_from_reader(input).unwrap();
1037        assert_eq!(bgms.len(), 2);
1038        assert_eq!(bgms[0].pruef_id, "11042");
1039        assert_eq!(bgms[1].pruef_id, "11043");
1040    }
1041
1042    #[test]
1043    fn missing_segment_returns_error() {
1044        let input = b"DTM+137:20230401:102'";
1045        let result: Result<BgmSegment, _> = deserialize(input);
1046        assert!(result.is_err());
1047    }
1048
1049    #[test]
1050    fn vec_collects_all_matching_segments() {
1051        let input = b"DTM+137:20230401:102'BGM+E03+11042+9'BGM+E01+11043+9'";
1052        let bgms: Vec<BgmSegment> = deserialize(input).unwrap();
1053        assert_eq!(bgms.len(), 2);
1054        assert_eq!(bgms[0].pruef_id, "11042");
1055        assert_eq!(bgms[1].pruef_id, "11043");
1056    }
1057
1058    #[test]
1059    fn find_qualified_segment_matches_qualifier() {
1060        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1061        let segments: Vec<Segment<'_>> =
1062            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1063        let nad_ms = find_qualified_segment(&segments, "NAD", "MS");
1064        let nad_mr = find_qualified_segment(&segments, "NAD", "MR");
1065        assert!(nad_ms.is_some());
1066        assert!(nad_mr.is_some());
1067        assert_eq!(element_str(nad_ms.unwrap(), 0), "MS");
1068        assert_eq!(element_str(nad_mr.unwrap(), 0), "MR");
1069    }
1070
1071    #[test]
1072    fn round_trip_str_api() {
1073        let input = "BGM+E03+11042+9'";
1074        let bgm: BgmSegment = deserialize_str(input).unwrap();
1075        assert_eq!(bgm.pruef_id, "11042");
1076    }
1077
1078    #[test]
1079    fn required_element_extraction() {
1080        let input = b"BGM+E03+11042+9'";
1081        let segments: Vec<Segment<'_>> =
1082            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1083        let seg = &segments[0];
1084
1085        assert_eq!(required_element(seg, 0).unwrap(), "E03");
1086        assert_eq!(required_element(seg, 1).unwrap(), "11042");
1087        // Element 5 doesn't exist
1088        assert!(required_element(seg, 5).is_err());
1089    }
1090
1091    #[test]
1092    fn optional_element_extraction() {
1093        let input = b"BGM+E03+11042+9'BGM+E01++absent'";
1094        let segments: Vec<Segment<'_>> =
1095            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1096
1097        // First segment
1098        assert_eq!(optional_element(&segments[0], 0), Some("E03"));
1099        assert_eq!(optional_element(&segments[0], 1), Some("11042"));
1100        assert_eq!(optional_element(&segments[0], 5), None);
1101
1102        // Second segment with empty element
1103        assert_eq!(optional_element(&segments[1], 1), None);
1104    }
1105
1106    #[test]
1107    fn component_extraction() {
1108        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1109        let segments: Vec<Segment<'_>> =
1110            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1111        let seg = &segments[0];
1112
1113        assert_eq!(required_component(seg, 0, 0).unwrap(), "UNOA");
1114        assert_eq!(required_component(seg, 0, 1).unwrap(), "1");
1115        // Non-existent component
1116        assert!(required_component(seg, 0, 5).is_err());
1117    }
1118
1119    #[test]
1120    fn composite_element_helper() {
1121        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1122        let segments: Vec<Segment<'_>> =
1123            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1124        let seg = &segments[0];
1125
1126        let comp = composite_element(seg, 0).unwrap();
1127        assert_eq!(comp.len(), 2);
1128        assert_eq!(comp.get(0), Some("UNOA"));
1129        assert_eq!(comp.get(1), Some("1"));
1130        assert_eq!(comp.get(5), None);
1131        assert_eq!(comp.get_or_empty(5), "");
1132    }
1133
1134    #[test]
1135    fn get_all_components() {
1136        // UNB has composite element: UNOA:1
1137        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1138        let segments: Vec<Segment<'_>> =
1139            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1140        let seg = &segments[0];
1141
1142        let comps: Vec<&str> = get_components_iter(seg, 0).collect(); // First element is UNOA:1
1143        assert!(!comps.is_empty(), "Expected components but got empty");
1144        assert_eq!(comps.len(), 2);
1145        assert_eq!(comps[0], "UNOA");
1146        assert_eq!(comps[1], "1");
1147    }
1148
1149    #[test]
1150    fn qualifier_pattern_matching_supports_exact_and_wildcard() {
1151        // Exact match (no wildcard)
1152        assert!(qualifier_matches_pattern("MS", "MS"));
1153        assert!(!qualifier_matches_pattern("MS", "M")); // Not a prefix match after R-003
1154        // Wildcard patterns
1155        assert!(qualifier_matches_pattern("MS", "M*"));
1156        assert!(qualifier_matches_pattern("MRY", "M*Y"));
1157        assert!(!qualifier_matches_pattern("AB", "M*"));
1158    }
1159
1160    /// Comprehensive table-driven tests for `qualifier_matches_pattern`.
1161    #[test]
1162    fn qualifier_matches_pattern_table() {
1163        // (value, pattern, expected)
1164        let cases: &[(&str, &str, bool)] = &[
1165            // ── empty inputs ────────────────────────────────────────────────
1166            ("", "", true),        // empty matches empty
1167            ("", "*", true),       // wildcard matches empty string
1168            ("A", "", false),      // non-empty does not match empty pattern
1169            ("", "A", false),      // empty does not match non-empty literal
1170            // ── literal (no wildcard) ────────────────────────────────────────
1171            ("MS", "MS", true),
1172            ("BY", "BY", true),
1173            ("ms", "MS", false),   // case-sensitive
1174            ("MSX", "MS", false),  // prefix is NOT a match without wildcard
1175            ("M", "MS", false),    // too short
1176            // ── single wildcard at the end (prefix match) ────────────────────
1177            ("MS", "M*", true),
1178            ("MULTI", "MUL*", true),
1179            ("AB", "M*", false),
1180            ("", "M*", false),     // empty does not start with 'M'
1181            // ── single wildcard at the start (suffix match) ──────────────────
1182            ("MSG", "*G", true),
1183            ("G", "*G", true),
1184            ("MSG", "*X", false),
1185            ("", "*G", false),
1186            // ── wildcard in the middle ───────────────────────────────────────
1187            ("MRY", "M*Y", true),
1188            ("MAY", "M*Y", true),
1189            ("MY", "M*Y", true),   // zero-width wildcard: "M" + "" + "Y"
1190            ("MYY", "M*Y", true),  // last 'Y' matches, wildcard = 'Y'
1191            ("MAYZ", "M*Y", false),// does not end with 'Y'
1192            ("AB", "M*Y", false),
1193            // ── bare wildcard (match-all) ────────────────────────────────────
1194            ("*", "*", true),      // literal '*' value vs wildcard pattern
1195            ("anything", "*", true),
1196            ("", "*", true),
1197            // ── multiple wildcards ────────────────────────────────────────────
1198            ("ABCDE", "A*C*E", true),
1199            ("ACE", "A*C*E", true),  // zero-width wildcards
1200            ("AXCYE", "A*C*E", true),
1201            ("ABCDF", "A*C*E", false),
1202            // ── wildcard with empty segment between stars ─────────────────────
1203            ("AB", "A**B", true),   // "A**B" → parts ["A", "", "B"] → ends_with_wildcard?
1204            // ── pattern longer than value ─────────────────────────────────────
1205            ("AB", "A*B*C", false),
1206            // ── value contains pattern as substring but must anchor start ─────
1207            ("XMS", "MS", false),
1208        ];
1209
1210        for (value, pattern, expected) in cases {
1211            let got = qualifier_matches_pattern(value, pattern);
1212            assert_eq!(
1213                got, *expected,
1214                "qualifier_matches_pattern({value:?}, {pattern:?}) expected {expected} but got {got}"
1215            );
1216        }
1217    }
1218
1219    #[test]
1220    fn typed_qualifier_helpers_work() {
1221        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1222        let segments: Vec<Segment<'_>> =
1223            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1224
1225        let first = find_segment_typed::<NadM>(&segments).unwrap();
1226        assert_eq!(first.element_str(0), Some("MS"));
1227
1228        let all: Vec<_> = find_segments_typed::<NadWildcard>(&segments).collect();
1229        assert_eq!(all.len(), 2);
1230    }
1231
1232    #[test]
1233    fn segment_accessor_trait_methods_work() {
1234        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1235        let segments: Vec<Segment<'_>> =
1236            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1237        let seg = &segments[0];
1238
1239        assert_eq!(SegmentAccessor::get_element(seg, 1), Some("SENDER"));
1240        assert_eq!(SegmentAccessor::required_composite(seg, 0, 1).unwrap(), "1");
1241        let parsed: i32 = SegmentAccessor::code_element(seg, 4).unwrap();
1242        assert_eq!(parsed, 1);
1243        let reps = SegmentAccessor::repeating_components(seg, 3, 0, 2).unwrap();
1244        assert_eq!(reps, vec!["200101", "0900"]);
1245    }
1246
1247    #[test]
1248    fn group_helpers_detect_contiguity() {
1249        struct NadAny;
1250        impl EdifactSegmentTag for NadAny {
1251            const SEGMENT_TAG: &'static str = "NAD";
1252        }
1253
1254        let contiguous_input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'";
1255        let contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(contiguous_input)
1256            .collect::<Result<_, _>>()
1257            .unwrap();
1258        assert!(groups_are_contiguous_by_qualifier::<NadAny>(
1259            &contiguous_segments
1260        ));
1261
1262        let non_contiguous_input = b"NAD+MS+1'RFF+AA:1'NAD+MR+2'";
1263        let non_contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(non_contiguous_input)
1264            .collect::<Result<_, _>>()
1265            .unwrap();
1266        assert!(!groups_are_contiguous_by_qualifier::<NadAny>(
1267            &non_contiguous_segments
1268        ));
1269    }
1270
1271    #[test]
1272    fn group_helpers_collect_contiguous_groups() {
1273        struct NadAny;
1274        impl EdifactSegmentTag for NadAny {
1275            const SEGMENT_TAG: &'static str = "NAD";
1276        }
1277
1278        let input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'NAD+BY+3'";
1279        let segments: Vec<Segment<'_>> =
1280            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1281        let groups = contiguous_groups_by_qualifier::<NadAny>(&segments);
1282
1283        assert_eq!(groups.len(), 2);
1284        assert_eq!(groups[0].len(), 2);
1285        assert_eq!(groups[1].len(), 1);
1286    }
1287
1288    // ── MessageWindowsIter tests ──────────────────────────────────────────────
1289
1290    #[test]
1291    fn message_windows_bytes_yields_complete_windows() {
1292        let input = b"UNB+UNOA:1+S+R+200101:0900+1'\
1293                      UNH+1+ORDERS:D:96A:UN'\
1294                      BGM+220+PO-001+9'\
1295                      UNT+3+1'\
1296                      UNZ+1+1'";
1297        let windows: Vec<_> = message_windows_bytes(input)
1298            .collect::<Result<_, _>>()
1299            .unwrap();
1300        assert_eq!(windows.len(), 1);
1301        assert_eq!(windows[0][0].tag, "UNH");
1302        assert_eq!(windows[0].last().unwrap().tag, "UNT");
1303    }
1304
1305    #[test]
1306    fn message_windows_truncated_stream_returns_error() {
1307        // Stream ends after UNH and BGM but without UNT — truncation must be an error
1308        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1309        let results: Vec<_> = message_windows_bytes(input).collect();
1310        assert_eq!(results.len(), 1);
1311        assert!(
1312            matches!(results[0], Err(EdifactError::UnexpectedEof { .. })),
1313            "expected UnexpectedEof for truncated window, got: {:?}",
1314            results[0]
1315        );
1316    }
1317
1318    #[test]
1319    fn message_windows_subsequent_calls_return_none_after_truncation() {
1320        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1321        let mut iter = message_windows_bytes(input);
1322        assert!(matches!(
1323            iter.next(),
1324            Some(Err(EdifactError::UnexpectedEof { .. }))
1325        ));
1326        // After the error, the iterator must be fused (done = true)
1327        assert!(iter.next().is_none());
1328    }
1329
1330    #[test]
1331    fn message_windows_unh_without_unt_before_next_unh_returns_error() {
1332        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'\
1333                      UNH+2+ORDERS:D:96A:UN'BGM+220+PO-002+9'UNT+3+2'";
1334        let results: Vec<_> = message_windows_bytes(input).collect();
1335        // First item must be an error (UNH before UNT)
1336        assert!(
1337            matches!(results[0], Err(EdifactError::ValidationFailed { .. })),
1338            "expected ValidationFailed, got: {:?}",
1339            results[0]
1340        );
1341    }
1342}