Skip to main content

edifact_rs/
de.rs

1//! Custom deserialization trait for EDIFACT.
2//!
3//! [`EdifactDeserialize`] maps a slice of parsed [`Segment`]s to a Rust value.
4//! [`EdifactSegmentTag`] is a companion trait that carries the segment tag and
5//! optional qualifier at the type level, enabling the blanket
6//! `impl EdifactDeserialize for Vec<T>`.
7
8use crate::{EdifactError, Segment};
9use std::borrow::Cow;
10use std::io::Read;
11use std::str::FromStr;
12
13// ── traits ────────────────────────────────────────────────────────────────────
14
15/// Types that can be deserialized from a slice of EDIFACT segments.
16///
17/// Implement manually or derive with `#[derive(EdifactDeserialize)]` from the
18/// `edifact-rs-derive` crate.
19pub trait EdifactDeserialize: Sized {
20    /// Deserialize `Self` from the provided segment slice.
21    ///
22    /// The slice may contain any number of segments; implementations extract
23    /// only the ones they care about and ignore the rest.
24    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError>;
25
26    /// Deserialize `Self` from a slice of owned EDIFACT segments.
27    ///
28    /// # Default implementation
29    ///
30    /// Converts each [`crate::OwnedSegment`] to its borrowed form via
31    /// [`crate::OwnedSegment::as_borrowed`] and delegates to
32    /// [`edifact_deserialize`][Self::edifact_deserialize].  This incurs one
33    /// `Vec<Segment<'_>>` allocation per call.
34    ///
35    /// # Warning: allocation overhead
36    ///
37    /// This default implementation incurs one [`Vec<Segment<'_>>`](Vec)
38    /// allocation per call.  Types generated by `#[derive(EdifactDeserialize)]`
39    /// automatically override this method to work directly on the owned data
40    /// without the intermediate allocation.  Manual implementations should also
41    /// override when used in the high-throughput reader-streaming path
42    /// ([`deserialize_first_from_reader`], [`deserialize_all_from_reader`],
43    /// [`deserialize_messages_from_reader`]) to avoid the per-message allocation.
44    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
45        let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
46        Self::edifact_deserialize(&borrowed)
47    }
48}
49
50/// Types that can be deserialized from a composite EDIFACT element.
51///
52/// Implement this for custom composite structs used with
53/// `#[edifact(composite)]` in derive macros.
54pub trait EdifactCompositeDeserialize: Sized {
55    /// Deserialize `Self` from a composite element.
56    fn edifact_deserialize_composite(composite: CompositeElement<'_>)
57    -> Result<Self, EdifactError>;
58}
59
60impl EdifactCompositeDeserialize for Vec<String> {
61    fn edifact_deserialize_composite(
62        composite: CompositeElement<'_>,
63    ) -> Result<Self, EdifactError> {
64        Ok(composite.iter().map(str::to_owned).collect())
65    }
66}
67
68/// Companion trait that declares a type's segment tag (and optional qualifier).
69///
70/// Required for the `Vec<T>` blanket impl and for finding the right segment in
71/// a message-level struct deserialization.
72pub trait EdifactSegmentTag {
73    /// The 3-character EDIFACT segment tag (e.g. `"BGM"`, `"NAD"`).
74    const SEGMENT_TAG: &'static str;
75
76    /// Optional qualifier pattern to further constrain segment matching.
77    ///
78    /// Examples:
79    /// - `Some("MS")` for exact qualifier matching.
80    /// - `Some("M*")` for wildcard prefix matching (matches `"MS"`, `"MR"`, etc.).
81    const QUALIFIER_PATTERN: Option<&'static str> = None;
82
83    /// Return `true` if `seg`'s qualifier matches this type's qualifier pattern.
84    fn matches_qualifier(seg: &Segment<'_>) -> bool {
85        match Self::QUALIFIER_PATTERN {
86            Some(pattern) => seg
87                .element_str(0)
88                .is_some_and(|q| qualifier_matches_pattern(q, pattern)),
89            None => true,
90        }
91    }
92
93    /// Return `true` if `seg` is the segment this type maps to.
94    ///
95    /// Default: `seg.tag == Self::SEGMENT_TAG`.  Override to also match on a
96    /// qualifier (e.g. `NAD+BY` — element 0 = `"BY"`).
97    fn matches_segment(seg: &Segment<'_>) -> bool {
98        seg.tag == Self::SEGMENT_TAG && Self::matches_qualifier(seg)
99    }
100
101    /// Like [`matches_segment`][Self::matches_segment] but works directly on an
102    /// [`crate::OwnedSegment`] without incurring the `Vec` allocation of
103    /// [`crate::OwnedSegment::as_borrowed`].
104    fn matches_owned_segment(seg: &crate::OwnedSegment) -> bool {
105        if seg.tag != Self::SEGMENT_TAG {
106            return false;
107        }
108        match Self::QUALIFIER_PATTERN {
109            None => true,
110            Some(pattern) => {
111                let q = seg
112                    .elements
113                    .first()
114                    .and_then(|e| e.components.first())
115                    .map(|c| c.as_str())
116                    .unwrap_or("");
117                qualifier_matches_pattern(q, pattern)
118            }
119        }
120    }
121}
122
123// ── blanket impl for Vec<T> ───────────────────────────────────────────────────
124
125/// Deserializes each segment matching `T::matches_segment` as an independent
126/// single-segment slice, collecting the results.
127impl<T> EdifactDeserialize for Vec<T>
128where
129    T: EdifactDeserialize + EdifactSegmentTag,
130{
131    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
132        segments
133            .iter()
134            .filter(|s| T::matches_segment(s))
135            .map(|seg| T::edifact_deserialize(std::slice::from_ref(seg)))
136            .collect()
137    }
138
139    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
140        segments
141            .iter()
142            .filter(|s| T::matches_owned_segment(s))
143            .map(|seg| T::edifact_deserialize_owned(std::slice::from_ref(seg)))
144            .collect()
145    }
146}
147
148// ── public API ────────────────────────────────────────────────────────────────
149
150/// Deserialize a value of type `T` from EDIFACT bytes.
151///
152/// Unlike [`crate::from_bytes`], which parses bytes into raw [`Segment`]s, this
153/// function fully deserializes the payload into a typed Rust value via [`EdifactDeserialize`].
154///
155/// This API currently buffers all parsed segments into a `Vec` before invoking
156/// typed deserialization.
157pub fn deserialize<T: EdifactDeserialize>(input: &[u8]) -> Result<T, EdifactError> {
158    let segments: Vec<Segment<'_>> = crate::from_bytes(input).collect::<Result<_, _>>()?;
159    T::edifact_deserialize(&segments)
160}
161
162/// Stream-parse EDIFACT bytes and deserialize the first matching segment as `T`.
163///
164/// This avoids allocating a full `Vec<Segment>` and is intended for low-memory
165/// extraction of segment-scoped types.
166pub fn deserialize_first_streaming<T>(input: &[u8]) -> Result<T, EdifactError>
167where
168    T: EdifactDeserialize + EdifactSegmentTag,
169{
170    for segment in crate::from_bytes(input) {
171        let segment = segment?;
172        if T::matches_segment(&segment) {
173            return T::edifact_deserialize(std::slice::from_ref(&segment));
174        }
175    }
176
177    Err(EdifactError::MissingSegment {
178        tag: T::SEGMENT_TAG.to_owned(),
179        expected_position: "any position in input".to_owned(),
180    })
181}
182
183/// Stream-parse EDIFACT bytes and deserialize all matching segments as `Vec<T>`.
184///
185/// This avoids buffering non-matching segments in memory.
186pub fn deserialize_all_streaming<T>(input: &[u8]) -> Result<Vec<T>, EdifactError>
187where
188    T: EdifactDeserialize + EdifactSegmentTag,
189{
190    let mut out = Vec::new();
191    for segment in crate::from_bytes(input) {
192        let segment = segment?;
193        if T::matches_segment(&segment) {
194            out.push(T::edifact_deserialize(std::slice::from_ref(&segment))?);
195        }
196    }
197    Ok(out)
198}
199
200/// Stream-parse EDIFACT from a reader and deserialize the first matching segment as `T`.
201///
202/// This is the low-memory typed path for large payloads read from I/O streams.
203pub fn deserialize_first_from_reader<T, R>(reader: R) -> Result<T, EdifactError>
204where
205    T: EdifactDeserialize + EdifactSegmentTag,
206    R: Read,
207{
208    for segment in crate::from_reader_iter(reader) {
209        let segment = segment?;
210        // O(1) tag + qualifier check before paying for as_borrowed().
211        if !T::matches_owned_segment(&segment) {
212            continue;
213        }
214        return T::edifact_deserialize_owned(std::slice::from_ref(&segment));
215    }
216
217    Err(EdifactError::MissingSegment {
218        tag: T::SEGMENT_TAG.to_owned(),
219        expected_position: "any position in input".to_owned(),
220    })
221}
222
223/// Stream-parse EDIFACT from a reader and deserialize all matching segments as `Vec<T>`.
224pub fn deserialize_all_from_reader<T, R>(reader: R) -> Result<Vec<T>, EdifactError>
225where
226    T: EdifactDeserialize + EdifactSegmentTag,
227    R: Read,
228{
229    let mut out = Vec::new();
230    for segment in crate::from_reader_iter(reader) {
231        let segment = segment?;
232        // O(1) tag + qualifier check before paying for as_borrowed().
233        if !T::matches_owned_segment(&segment) {
234            continue;
235        }
236        out.push(T::edifact_deserialize_owned(std::slice::from_ref(
237            &segment,
238        ))?);
239    }
240    Ok(out)
241}
242
243/// Deserialize a value of type `T` from an EDIFACT string.
244pub fn deserialize_str<T: EdifactDeserialize>(input: &str) -> Result<T, EdifactError> {
245    deserialize(input.as_bytes())
246}
247
248// ── helper functions ──────────────────────────────────────────────────────────
249
250/// Find the first segment with the given tag.
251pub fn find_segment<'s, 'd>(segments: &'s [Segment<'d>], tag: &str) -> Option<&'s Segment<'d>> {
252    segments.iter().find(|s| s.tag == tag)
253}
254
255/// Iterate over all segments with the given tag without allocating a `Vec`.
256pub fn find_segments_iter<'s, 'd: 's>(
257    segments: &'s [Segment<'d>],
258    tag: &'s str,
259) -> impl Iterator<Item = &'s Segment<'d>> {
260    segments.iter().filter(move |s| s.tag == tag)
261}
262
263/// Find the first segment matching `tag` whose element 0 equals `qualifier`.
264pub fn find_qualified_segment<'s, 'd>(
265    segments: &'s [Segment<'d>],
266    tag: &str,
267    qualifier: &str,
268) -> Option<&'s Segment<'d>> {
269    segments
270        .iter()
271        .find(|s| s.tag == tag && s.element_str(0).unwrap_or("") == qualifier)
272}
273
274/// Find the first segment by type-level qualifier pattern.
275pub fn find_segment_typed<'s, 'd, T>(segments: &'s [Segment<'d>]) -> Option<&'s Segment<'d>>
276where
277    T: EdifactSegmentTag,
278{
279    segments.iter().find(|s| T::matches_segment(s))
280}
281
282/// Iterate over all segments by type-level qualifier pattern.
283pub fn find_segments_typed<'s, 'd: 's, T>(
284    segments: &'s [Segment<'d>],
285) -> impl Iterator<Item = &'s Segment<'d>>
286where
287    T: EdifactSegmentTag,
288{
289    segments.iter().filter(|s| T::matches_segment(s))
290}
291
292/// Collect contiguous groups of segments that match `T`.
293///
294/// Each group is a borrowed slice of the original `segments` array.
295/// Use [`contiguous_groups_iter`] to avoid the outer `Vec` allocation.
296pub fn contiguous_groups_by_qualifier<'s, 'd, T>(
297    segments: &'s [Segment<'d>],
298) -> Vec<&'s [Segment<'d>]>
299where
300    T: EdifactSegmentTag,
301{
302    let mut groups = Vec::new();
303    let mut idx = 0;
304    while idx < segments.len() {
305        if T::matches_segment(&segments[idx]) {
306            let start = idx;
307            idx += 1;
308            while idx < segments.len() && T::matches_segment(&segments[idx]) {
309                idx += 1;
310            }
311            groups.push(&segments[start..idx]);
312        } else {
313            idx += 1;
314        }
315    }
316    groups
317}
318
319/// Iterate lazily over contiguous groups of segments that match `T`.
320///
321/// Each yielded item is a borrowed slice `&[Segment<'_>]` that forms one
322/// contiguous run of `T`-matching segments.  No outer `Vec` is allocated —
323/// the caller can break early or collect only as many groups as needed.
324///
325/// This function uses separate lifetimes for the slice reference (`'s`) and
326/// the segment data (`'d`), matching the signature of
327/// [`contiguous_groups_by_qualifier`].
328///
329/// # Example
330/// ```rust,ignore
331/// for group in contiguous_groups_iter::<UnaSegment>(&segments) {
332///     process_group(group);
333/// }
334/// ```
335pub fn contiguous_groups_iter<'s, 'd, T>(
336    segments: &'s [Segment<'d>],
337) -> impl Iterator<Item = &'s [Segment<'d>]> + 's
338where
339    T: EdifactSegmentTag,
340{
341    let mut idx = 0;
342    let len = segments.len();
343    std::iter::from_fn(move || {
344        // Skip non-matching segments
345        while idx < len && !T::matches_segment(&segments[idx]) {
346            idx += 1;
347        }
348        if idx >= len {
349            return None;
350        }
351        let start = idx;
352        idx += 1;
353        while idx < len && T::matches_segment(&segments[idx]) {
354            idx += 1;
355        }
356        Some(&segments[start..idx])
357    })
358}
359
360/// Return `true` if all segments matching `T` are in one contiguous block.
361pub fn groups_are_contiguous_by_qualifier<T>(segments: &[Segment<'_>]) -> bool
362where
363    T: EdifactSegmentTag,
364{
365    let mut seen_match = false;
366    let mut seen_gap_after_match = false;
367
368    for seg in segments {
369        if T::matches_segment(seg) {
370            if seen_gap_after_match {
371                return false;
372            }
373            seen_match = true;
374        } else if seen_match {
375            seen_gap_after_match = true;
376        }
377    }
378
379    true
380}
381
382/// Match a qualifier value against an exact or wildcard pattern.
383///
384/// Rules:
385/// - If `pattern` contains `*`, it is treated as a glob wildcard (e.g. `"M*"` matches `"MS"`, `"MR"`).
386/// - If no wildcard is present, exact match is required.
387///
388/// Prefix matching without an explicit `*` was deliberately removed: `"M"` matches only `"M"`,
389/// not `"MS"` or `"MR"`.  Use `"M*"` for prefix semantics.
390pub fn qualifier_matches_pattern(value: &str, pattern: &str) -> bool {
391    if pattern.is_empty() {
392        return value.is_empty();
393    }
394
395    if !pattern.contains('*') {
396        return value == pattern;
397    }
398
399    // Fast path: single wildcard (dominant case — e.g. "M*" or "*:MS")
400    if let Some((prefix, suffix)) = pattern.split_once('*') {
401        // Only one wildcard — prefix and suffix cannot overlap in a second split.
402        if !pattern[prefix.len() + 1..].contains('*') {
403            return value.len() >= prefix.len() + suffix.len()
404                && value.starts_with(prefix)
405                && value.ends_with(suffix)
406                && {
407                    // Ensure prefix and suffix don't overlap.
408                    let mid_start = prefix.len();
409                    let mid_end = value.len().saturating_sub(suffix.len());
410                    mid_start <= mid_end
411                };
412        }
413    }
414
415    // General multi-wildcard path.
416    let parts: smallvec::SmallVec<[&str; 4]> = pattern.split('*').collect();
417    let prefix = parts[0];
418    let suffix = parts[parts.len() - 1];
419
420    if !value.starts_with(prefix) || !value.ends_with(suffix) {
421        return false;
422    }
423
424    let mid_start = prefix.len();
425    let mid_end = value.len().saturating_sub(suffix.len());
426
427    if mid_start > mid_end {
428        return parts[1..parts.len() - 1].iter().all(|p| p.is_empty());
429    }
430
431    let mut remaining = &value[mid_start..mid_end];
432
433    for part in &parts[1..parts.len() - 1] {
434        if part.is_empty() {
435            continue;
436        }
437        match remaining.find(part) {
438            Some(idx) => remaining = &remaining[idx + part.len()..],
439            None => return false,
440        }
441    }
442
443    true
444}
445
446/// Extract the string value of element `idx` from `seg`, or `""` if absent.
447#[inline]
448pub fn element_str<'s>(seg: &'s Segment<'_>, idx: usize) -> &'s str {
449    seg.element_str(idx).unwrap_or("")
450}
451
452// ── segment accessor helpers ───────────────────────────────────────────────────
453
454/// Extract a required text element from a segment.
455///
456/// Returns the element's first component, or an error if absent or empty.
457///
458/// # Empty-string semantics
459///
460/// EDIFACT allows elements to be syntactically present but carry an empty
461/// string value (e.g., `SEG++'`). This function treats an empty string as
462/// *absent* — it returns [`EdifactError::MissingRequiredElement`] in that
463/// case, matching the EDIFACT rule that mandatory data elements must carry
464/// a non-empty value.
465///
466/// Delegates to [`SegmentAccessor::text_element`].
467pub fn required_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Result<&'a str, EdifactError> {
468    seg.text_element(idx)
469}
470
471/// Extract an optional text element from a segment.
472///
473/// Returns the element's first component, or None if absent or empty.
474///
475/// Delegates to [`SegmentAccessor::optional_element`].
476pub fn optional_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<&'a str> {
477    SegmentAccessor::optional_element(seg, idx)
478}
479
480/// Extract a required component from a segment element.
481///
482/// Returns the component value, or an error if the element or component is absent.
483///
484/// # Empty-string semantics
485///
486/// Like [`required_element`], an empty string component value is treated as
487/// *absent*.  A component that is syntactically present as `''` (two
488/// consecutive component separators) will cause this function to return
489/// [`EdifactError::MissingRequiredComponent`].
490///
491/// # Failure modes
492///
493/// - [`EdifactError::MissingRequiredElement`] — element `elem_idx` is absent.
494/// - [`EdifactError::MissingRequiredComponent`] — element is present but component `comp_idx` is absent or empty.
495///
496/// Delegates to [`SegmentAccessor::required_composite`].
497pub fn required_component<'a>(
498    seg: &'a Segment<'_>,
499    elem_idx: usize,
500    comp_idx: usize,
501) -> Result<&'a str, EdifactError> {
502    seg.required_composite(elem_idx, comp_idx)
503}
504
505/// Extract an optional component from a segment element.
506///
507/// Returns the component value, or None if absent or empty.
508///
509/// Delegates to [`SegmentAccessor::get_component`].
510pub fn optional_component<'a>(
511    seg: &'a Segment<'_>,
512    elem_idx: usize,
513    comp_idx: usize,
514) -> Option<&'a str> {
515    SegmentAccessor::get_component(seg, elem_idx, comp_idx)
516}
517
518/// Iterate over all components of an element without allocating a `Vec`.
519///
520/// Yields an empty iterator if the element is absent.
521pub fn get_components_iter<'a>(seg: &'a Segment<'_>, idx: usize) -> impl Iterator<Item = &'a str> {
522    seg.elements
523        .get(idx)
524        .into_iter()
525        .flat_map(|elem| elem.components.iter().map(|c| c.as_ref()))
526}
527
528/// A composite data element wrapper for clearer ergonomics.
529pub struct CompositeElement<'a> {
530    components: &'a [std::borrow::Cow<'a, str>],
531}
532
533impl<'a> CompositeElement<'a> {
534    /// Get the component at index `i`, or None if absent.
535    pub fn get(&self, i: usize) -> Option<&'a str> {
536        self.components.get(i).map(|c| c.as_ref())
537    }
538
539    /// Get the component at index `i`, or empty string if absent.
540    pub fn get_or_empty(&self, i: usize) -> &'a str {
541        self.get(i).unwrap_or("")
542    }
543
544    /// Get the number of components.
545    pub fn len(&self) -> usize {
546        self.components.len()
547    }
548
549    /// Check if the composite is empty.
550    pub fn is_empty(&self) -> bool {
551        self.components.is_empty()
552    }
553
554    /// Iterate over all components.
555    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
556        self.components.iter().map(|c| c.as_ref())
557    }
558
559    /// Create a `CompositeElement` from a pre-existing component slice.
560    ///
561    /// Used internally by [`edifact_deserialize_owned`][EdifactDeserialize::edifact_deserialize_owned]
562    /// generated code to pass component data without converting the whole segment.
563    pub fn from_slice(components: &'a [std::borrow::Cow<'a, str>]) -> Self {
564        Self { components }
565    }
566}
567
568/// Get a composite element from a segment with clearer ergonomics.
569pub fn composite_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<CompositeElement<'a>> {
570    seg.elements.get(idx).map(|elem| CompositeElement {
571        components: &elem.components,
572    })
573}
574
575/// Find the first [`OwnedSegment`] with the given tag.
576///
577/// Zero-allocation counterpart of [`find_segment`] for use in
578/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
579///
580/// [`OwnedSegment`]: crate::OwnedSegment
581pub fn find_segment_owned<'s>(
582    segments: &'s [crate::OwnedSegment],
583    tag: &str,
584) -> Option<&'s crate::OwnedSegment> {
585    segments.iter().find(|s| s.tag == tag)
586}
587
588/// Find the first [`OwnedSegment`] with the given tag **and** qualifier.
589///
590/// The qualifier is compared against the first component of element 0.
591/// Zero-allocation counterpart of [`find_qualified_segment`] for use in
592/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
593///
594/// [`OwnedSegment`]: crate::OwnedSegment
595pub fn find_qualified_segment_owned<'s>(
596    segments: &'s [crate::OwnedSegment],
597    tag: &str,
598    qualifier: &str,
599) -> Option<&'s crate::OwnedSegment> {
600    segments
601        .iter()
602        .find(|s| s.tag == tag && s.element_str(0).unwrap_or("") == qualifier)
603}
604
605/// Segment accessor trait for ergonomic typed extraction.
606pub trait SegmentAccessor<'a> {
607    /// Get non-empty element text at index `idx`.
608    fn get_element(&'a self, idx: usize) -> Option<&'a str>;
609    /// Get non-empty component text at element/component indexes.
610    fn get_component(&'a self, elem: usize, comp: usize) -> Option<&'a str>;
611    /// Get a composite wrapper for element `idx`.
612    fn get_composite(&'a self, idx: usize) -> Option<CompositeElement<'a>>;
613
614    /// Get required non-empty element text.
615    fn text_element(&'a self, idx: usize) -> Result<&'a str, EdifactError>;
616    /// Get optional non-empty element text.
617    fn optional_element(&'a self, idx: usize) -> Option<&'a str>;
618    /// Parse a typed code value from a required element.
619    fn code_element<T: FromStr>(&'a self, idx: usize) -> Result<T, EdifactError>;
620    /// Get required non-empty composite component.
621    fn required_composite(&'a self, elem: usize, comp: usize) -> Result<&'a str, EdifactError>;
622    /// Get `count` required components starting at `start_idx` from element `elem`.
623    ///
624    /// Allocates a `Vec`.  For a zero-alloc alternative, use
625    /// [`repeating_components_iter`][Self::repeating_components_iter] and
626    /// consume the iterator directly without collecting.
627    fn repeating_components(
628        &'a self,
629        elem: usize,
630        start_idx: usize,
631        count: usize,
632    ) -> Result<Vec<&'a str>, EdifactError> {
633        // Default implementation delegates to the zero-alloc iterator and
634        // collects.  Implementors that can do better should override this.
635        self.repeating_components_iter(elem, start_idx, count)
636            .collect()
637    }
638
639    /// Iterate over `count` required components starting at `start_idx` from element `elem`.
640    ///
641    /// Allocation-free alternative to [`repeating_components`][Self::repeating_components];
642    /// the caller supplies the iteration budget and consumes results on the fly.
643    fn repeating_components_iter(
644        &'a self,
645        elem: usize,
646        start_idx: usize,
647        count: usize,
648    ) -> impl Iterator<Item = Result<&'a str, EdifactError>> + 'a;
649}
650
651impl<'s, 'd> SegmentAccessor<'s> for Segment<'d>
652where
653    'd: 's,
654{
655    fn get_element(&'s self, idx: usize) -> Option<&'s str> {
656        self.element_str(idx).filter(|s| !s.is_empty())
657    }
658
659    fn get_component(&'s self, elem: usize, comp: usize) -> Option<&'s str> {
660        self.elements
661            .get(elem)
662            .and_then(|e| e.get_component(comp))
663            .filter(|s| !s.is_empty())
664    }
665
666    fn get_composite(&'s self, idx: usize) -> Option<CompositeElement<'s>> {
667        composite_element(self, idx)
668    }
669
670    fn text_element(&'s self, idx: usize) -> Result<&'s str, EdifactError> {
671        <Self as SegmentAccessor>::get_element(self, idx).ok_or_else(|| {
672            EdifactError::MissingRequiredElement {
673                tag: self.tag.to_owned(),
674                element_index: idx,
675            }
676        })
677    }
678
679    fn optional_element(&'s self, idx: usize) -> Option<&'s str> {
680        <Self as SegmentAccessor>::get_element(self, idx)
681    }
682
683    fn code_element<T: FromStr>(&'s self, idx: usize) -> Result<T, EdifactError> {
684        let raw = self.text_element(idx)?;
685        raw.parse::<T>().map_err(|_| EdifactError::InvalidText {
686            offset: self
687                .element_span(idx)
688                .map(|s| s.start)
689                .unwrap_or(self.span.start),
690        })
691    }
692
693    fn required_composite(&'s self, elem: usize, comp: usize) -> Result<&'s str, EdifactError> {
694        match self.elements.get(elem) {
695            None => Err(EdifactError::MissingRequiredElement {
696                tag: self.tag.to_owned(),
697                element_index: elem,
698            }),
699            Some(e) => e
700                .get_component(comp)
701                .filter(|s| !s.is_empty())
702                .ok_or_else(|| EdifactError::MissingRequiredComponent {
703                    tag: self.tag.to_owned(),
704                    element_index: elem,
705                    component_index: comp,
706                }),
707        }
708    }
709
710    fn repeating_components_iter(
711        &'s self,
712        elem: usize,
713        start_idx: usize,
714        count: usize,
715    ) -> impl Iterator<Item = Result<&'s str, EdifactError>> + 's {
716        let tag = self.tag;
717        let element_exists = self.elements.get(elem).is_some();
718        let components = self
719            .elements
720            .get(elem)
721            .map(|e| e.components.as_slice())
722            .unwrap_or(&[]);
723        (start_idx..start_idx + count).map(move |idx| {
724            components
725                .get(idx)
726                .map(|c| c.as_ref())
727                .filter(|s| !s.is_empty())
728                .ok_or_else(|| {
729                    if element_exists {
730                        EdifactError::MissingRequiredComponent {
731                            tag: tag.to_owned(),
732                            element_index: elem,
733                            component_index: idx,
734                        }
735                    } else {
736                        EdifactError::MissingRequiredElement {
737                            tag: tag.to_owned(),
738                            element_index: elem,
739                        }
740                    }
741                })
742        })
743    }
744}
745
746// ── message-window streaming ──────────────────────────────────────────────────
747
748/// A complete `UNH..UNT` message window that borrows from the original input.
749///
750/// Produced by [`MessageWindowsSliceIter`] / [`message_windows_bytes`].
751/// The `message_type` and `association_code` fields are extracted from the
752/// `UNH` segment at construction time, so callers do not need to traverse the
753/// segment list themselves.
754///
755/// `segments` contains the full window including the `UNH` and `UNT` service
756/// segments so that envelope-aware consumers have access to them.
757///
758/// # Accessing segments
759///
760/// ```rust,ignore
761/// for window in message_windows_bytes(input) {
762///     let window = window?;
763///     println!("type={:?} code={:?}", window.message_type, window.association_code);
764///     let bgm = window.segments.iter().find(|s| s.tag == "BGM");
765/// }
766/// ```
767#[derive(Debug)]
768pub struct MessageWindow<'a> {
769    /// EDIFACT message type extracted from `UNH` element 1, component 0.
770    ///
771    /// Borrowed when the component can be referenced directly, owned when
772    /// release-character unescaping requires allocation.
773    pub message_type: Option<Cow<'a, str>>,
774    /// Association-assigned code (DE 0057) from `UNH` element 1, component 4.
775    ///
776    /// Borrowed when the component can be referenced directly, owned when
777    /// release-character unescaping requires allocation.
778    pub association_code: Option<Cow<'a, str>>,
779    /// All segments in this window, from `UNH` through `UNT` (inclusive).
780    pub segments: Vec<crate::Segment<'a>>,
781}
782
783impl<'a> MessageWindow<'a> {
784    /// Build a `MessageWindow` from a completed segment buffer.
785    ///
786    /// Extracts `message_type` and `association_code` from the leading `UNH`
787    /// segment.  Metadata extraction is allocation-free for borrowed components;
788    /// release-character unescaping may allocate owned strings when necessary.
789    fn from_segments(segments: Vec<crate::Segment<'a>>) -> Self {
790        let message_type = segments
791            .first()
792            .filter(|s| s.tag == "UNH")
793            .and_then(|unh| unh_component(unh, 0));
794        let association_code = segments
795            .first()
796            .filter(|s| s.tag == "UNH")
797            .and_then(|unh| unh_component(unh, 4));
798        Self {
799            message_type,
800            association_code,
801            segments,
802        }
803    }
804}
805
806/// Extract a non-empty string component from UNH element 1, preserving the
807/// component's borrowed/owned state.
808///
809/// By using two distinct lifetime parameters (`'b` for the borrow of `seg`,
810/// `'a` for the segment data), we tell the borrow checker that the returned
811/// `&'a str` lives independently of how long we hold `&seg`, which lets callers
812/// move `seg` into a containing struct after this call returns.
813fn unh_component<'a, 'b>(seg: &'b crate::Segment<'a>, comp_idx: usize) -> Option<Cow<'a, str>>
814where
815    'a: 'b,
816{
817    seg.elements
818        .get(1)
819        .and_then(|e| e.components.get(comp_idx))
820        .and_then(|c| if c.is_empty() { None } else { Some(c.clone()) })
821}
822
823/// An owned, heap-allocated `UNH..UNT` message window.
824///
825/// Produced by [`MessageWindowsIter`] / [`message_windows_from_reader`].
826/// Equivalent to [`MessageWindow`] but with all data owned, so it outlives
827/// the original reader.
828///
829/// `segments` contains the full window including the `UNH` and `UNT` service
830/// segments.
831#[derive(Debug, Clone)]
832pub struct OwnedMessageWindow {
833    /// EDIFACT message type extracted from `UNH` element 1, component 0.
834    pub message_type: Option<String>,
835    /// Association-assigned code (DE 0057) from `UNH` element 1, component 4.
836    pub association_code: Option<String>,
837    /// All segments in this window, from `UNH` through `UNT` (inclusive).
838    pub segments: Vec<crate::OwnedSegment>,
839}
840
841impl OwnedMessageWindow {
842    fn from_segments(segments: Vec<crate::OwnedSegment>) -> Self {
843        let unh = segments.first().filter(|s| s.tag == "UNH");
844        let message_type = unh
845            .and_then(|s| s.elements.get(1))
846            .and_then(|e| e.components.first())
847            .map(|c| c.as_ref())
848            .filter(|s: &&str| !s.is_empty())
849            .map(str::to_owned);
850        let association_code = unh
851            .and_then(|s| s.elements.get(1))
852            .and_then(|e| e.components.get(4))
853            .map(|c| c.as_ref())
854            .filter(|s: &&str| !s.is_empty())
855            .map(str::to_owned);
856        Self {
857            message_type,
858            association_code,
859            segments,
860        }
861    }
862}
863
864/// An iterator that groups borrowed EDIFACT segments into per-message windows.
865///
866/// Zero-copy counterpart to [`MessageWindowsIter`] for in-memory byte slices.
867/// Text content borrows from the original input; segment structure allocates
868/// element vectors during parsing. Release-character unescaping may further
869/// allocate owned strings when escape sequences are present. Envelope segments
870/// outside a `UNH..UNT` pair are silently skipped.
871///
872/// Obtain this via [`message_windows_bytes`].
873pub struct MessageWindowsSliceIter<'a> {
874    inner: crate::FromBytesIter<'a>,
875    buf: Vec<crate::Segment<'a>>,
876    in_message: bool,
877    done: bool,
878}
879
880impl<'a> MessageWindowsSliceIter<'a> {
881    fn new(inner: crate::FromBytesIter<'a>) -> Self {
882        Self {
883            inner,
884            buf: Vec::new(),
885            in_message: false,
886            done: false,
887        }
888    }
889}
890
891impl<'a> Iterator for MessageWindowsSliceIter<'a> {
892    type Item = Result<MessageWindow<'a>, EdifactError>;
893
894    fn next(&mut self) -> Option<Self::Item> {
895        if self.done {
896            return None;
897        }
898        loop {
899            let segment = match self.inner.next() {
900                Some(Ok(s)) => s,
901                Some(Err(e)) => {
902                    self.done = true;
903                    return Some(Err(e));
904                }
905                None => {
906                    self.done = true;
907                    if self.in_message && !self.buf.is_empty() {
908                        self.in_message = false;
909                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
910                        return Some(Err(EdifactError::UnexpectedEof { offset }));
911                    }
912                    return None;
913                }
914            };
915
916            match segment.tag {
917                "UNH" => {
918                    if self.in_message {
919                        self.buf.clear();
920                        self.in_message = false;
921                        self.done = true;
922                        let offset = segment.span.start;
923                        return Some(Err(EdifactError::InvalidSegmentForMessage {
924                            tag: "UNH".to_owned(),
925                            message_type: "ENVELOPE".to_owned(),
926                            offset,
927                        }));
928                    }
929                    self.buf.clear();
930                    self.in_message = true;
931                    self.buf.push(segment);
932                }
933                "UNT" if self.in_message => {
934                    self.buf.push(segment);
935                    self.in_message = false;
936                    let segments = std::mem::take(&mut self.buf);
937                    return Some(Ok(MessageWindow::from_segments(segments)));
938                }
939                _ if self.in_message => {
940                    self.buf.push(segment);
941                }
942                _ => {
943                    // Envelope segment outside a window — skip.
944                }
945            }
946        }
947    }
948}
949
950/// An iterator that groups owned EDIFACT segments into per-message windows.
951///
952/// Each yielded item is an [`OwnedMessageWindow`] containing the segments for one
953/// complete `UNH..UNT` message, inclusive of both service segments.
954/// Envelope-level segments (`UNB`, `UNG`, `UNZ`, `UNE`) that sit outside any
955/// `UNH..UNT` pair are silently skipped.
956///
957/// # Errors
958///
959/// - An inner-iterator error is forwarded immediately and iteration stops.
960/// - A `UNH` seen while a prior window is still open (missing `UNT`) is an error.
961/// - Input that ends while a `UNH` window is open (stream truncation) yields
962///   `Err(EdifactError::UnexpectedEof { … })` before returning `None`.
963///
964/// # Construction
965///
966/// Use [`message_windows_from_reader`] or [`message_windows_bytes`] to
967/// obtain a `MessageWindowsIter` directly.  For fully custom sources, call
968/// [`MessageWindowsIter::new`] with any `Iterator<Item = Result<OwnedSegment,
969/// EdifactError>>`.
970pub struct MessageWindowsIter<I> {
971    inner: I,
972    buf: Vec<crate::OwnedSegment>,
973    in_message: bool,
974    /// Set to `true` after any terminal condition (error or clean EOF) so that
975    /// subsequent `next()` calls immediately return `None`.
976    done: bool,
977}
978
979impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> MessageWindowsIter<I> {
980    /// Wrap any owned-segment iterator as a message-window iterator.
981    pub fn new(inner: I) -> Self {
982        Self {
983            inner,
984            buf: Vec::new(),
985            in_message: false,
986            done: false,
987        }
988    }
989}
990
991impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> Iterator
992    for MessageWindowsIter<I>
993{
994    type Item = Result<OwnedMessageWindow, EdifactError>;
995
996    fn next(&mut self) -> Option<Self::Item> {
997        if self.done {
998            return None;
999        }
1000        loop {
1001            let segment = match self.inner.next() {
1002                Some(Ok(s)) => s,
1003                Some(Err(e)) => {
1004                    self.done = true;
1005                    return Some(Err(e));
1006                }
1007                None => {
1008                    self.done = true;
1009                    // A window that opened (UNH seen) but never closed (no UNT)
1010                    // means the stream was truncated — surface as an error.
1011                    if self.in_message && !self.buf.is_empty() {
1012                        self.in_message = false;
1013                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
1014                        return Some(Err(EdifactError::UnexpectedEof { offset }));
1015                    }
1016                    return None;
1017                }
1018            };
1019
1020            match segment.tag.as_str() {
1021                "UNH" => {
1022                    if self.in_message {
1023                        // Malformed: new UNH without closing the prior UNT.
1024                        self.buf.clear();
1025                        self.in_message = false;
1026                        self.done = true;
1027                        let offset = segment.span.start;
1028                        return Some(Err(EdifactError::InvalidSegmentForMessage {
1029                            tag: "UNH".to_owned(),
1030                            message_type: "ENVELOPE".to_owned(),
1031                            offset,
1032                        }));
1033                    }
1034                    self.buf.clear();
1035                    self.in_message = true;
1036                    self.buf.push(segment);
1037                }
1038                "UNT" if self.in_message => {
1039                    self.buf.push(segment);
1040                    self.in_message = false;
1041                    let segments = std::mem::take(&mut self.buf);
1042                    return Some(Ok(OwnedMessageWindow::from_segments(segments)));
1043                }
1044                _ if self.in_message => {
1045                    self.buf.push(segment);
1046                }
1047                _ => {
1048                    // Envelope segment outside a window — skip.
1049                }
1050            }
1051        }
1052    }
1053}
1054
1055/// Stream-parse EDIFACT bytes into an iterator of per-message windows.
1056///
1057/// Each yielded [`MessageWindow`] spans one `UNH..UNT` pair, with segments
1058/// borrowing from `input` for their text content. Segment assembly is
1059/// zero-copy for borrowed input bytes; release-character unescaping may
1060/// allocate owned component strings when necessary.
1061/// Envelope segments (`UNB`, `UNZ`, …) are skipped automatically.
1062///
1063/// The `message_type` and `association_code` fields are populated directly from
1064/// the `UNH` segment so that routing logic does not need to traverse `segments`.
1065///
1066/// # Example
1067/// ```
1068/// use edifact_rs::message_windows_bytes;
1069/// let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'\
1070///               UNH+1+ORDERS:D:96A:UN'\
1071///               BGM+220+PO-001+9'\
1072///               UNT+3+1'\
1073///               UNZ+1+1'";
1074///
1075/// let windows: Vec<_> = message_windows_bytes(input)
1076///     .collect::<Result<_, _>>()
1077///     .unwrap();
1078/// assert_eq!(windows.len(), 1);
1079/// assert_eq!(windows[0].message_type.as_deref(), Some("ORDERS"));
1080/// assert_eq!(windows[0].segments[0].tag, "UNH");
1081/// assert_eq!(windows[0].segments.last().unwrap().tag, "UNT");
1082/// ```
1083pub fn message_windows_bytes(input: &[u8]) -> MessageWindowsSliceIter<'_> {
1084    MessageWindowsSliceIter::new(crate::from_bytes(input))
1085}
1086
1087/// Stream-parse EDIFACT from a reader into an iterator of per-message windows.
1088///
1089/// Each yielded [`OwnedMessageWindow`] spans one `UNH..UNT` pair.
1090/// This variant reads lazily — only enough input to complete one window is
1091/// consumed per [`Iterator::next`] call.
1092pub fn message_windows_from_reader<R: Read>(
1093    reader: R,
1094) -> MessageWindowsIter<crate::FromReaderIter<R>> {
1095    MessageWindowsIter::new(crate::from_reader_iter(reader))
1096}
1097
1098/// Stream typed messages from a reader by deserializing each `UNH..UNT` window.
1099///
1100/// This is the highest-level streaming API: it returns one `T` per message,
1101/// reading only as much data as needed to complete each window.
1102///
1103/// Each message window is deserialized via
1104/// [`EdifactDeserialize::edifact_deserialize_owned`], which avoids the
1105/// intermediate `Vec<Segment<'_>>` allocation incurred by the slice-based path.
1106/// Types derived with `#[derive(EdifactDeserialize)]` provide an efficient
1107/// override; manual implementations fall back to [`crate::OwnedSegment::as_borrowed`].
1108///
1109/// # Example
1110/// ```ignore
1111/// // Assuming `OrdersMessage` implements `EdifactDeserialize`:
1112/// let messages: Vec<OrdersMessage> =
1113///     deserialize_messages_from_reader::<OrdersMessage, _>(reader)
1114///         .collect::<Result<_, _>>()?;
1115/// ```
1116pub fn deserialize_messages_from_reader<T, R>(
1117    reader: R,
1118) -> impl Iterator<Item = Result<T, EdifactError>>
1119where
1120    T: EdifactDeserialize,
1121    R: Read,
1122{
1123    message_windows_from_reader(reader).map(|window| {
1124        let window = window?;
1125        T::edifact_deserialize_owned(&window.segments)
1126    })
1127}
1128
1129/// Stream typed messages from a byte slice by deserializing each `UNH..UNT` window.
1130pub fn deserialize_messages_bytes<T>(
1131    input: &[u8],
1132) -> impl Iterator<Item = Result<T, EdifactError>> + '_
1133where
1134    T: EdifactDeserialize,
1135{
1136    message_windows_bytes(input).map(|window| {
1137        let window = window?;
1138        T::edifact_deserialize(&window.segments)
1139    })
1140}
1141
1142// ── MessageDispatch ───────────────────────────────────────────────────────────
1143
1144/// A type-erased deserialized message produced by [`MessageDispatch`].
1145pub struct DispatchedMessage {
1146    /// The EDIFACT message type string extracted from the `UNH` segment.
1147    pub message_type: String,
1148    value: Box<dyn std::any::Any + Send + Sync>,
1149}
1150
1151impl DispatchedMessage {
1152    /// Attempt to downcast the inner value to `T`.
1153    ///
1154    /// Returns `None` if the stored type does not match `T`.
1155    pub fn downcast<T: std::any::Any + Send + Sync + 'static>(&self) -> Option<&T> {
1156        self.value.downcast_ref::<T>()
1157    }
1158}
1159
1160impl std::fmt::Debug for DispatchedMessage {
1161    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1162        f.debug_struct("DispatchedMessage")
1163            .field("message_type", &self.message_type)
1164            .finish_non_exhaustive()
1165    }
1166}
1167
1168type DispatchHandlerFn = Box<
1169    dyn for<'a> Fn(&[Segment<'a>]) -> Result<Box<dyn std::any::Any + Send + Sync>, EdifactError>
1170        + Send
1171        + Sync,
1172>;
1173
1174type FallbackHandlerFn = Box<
1175    dyn for<'a> Fn(
1176            &[Segment<'a>],
1177            &str,
1178        ) -> Result<Box<dyn std::any::Any + Send + Sync>, EdifactError>
1179        + Send
1180        + Sync,
1181>;
1182
1183/// Type-based dispatcher for mixed-message EDIFACT streams.
1184///
1185/// Register one handler per message type with [`on`][Self::on], then call
1186/// [`dispatch`][Self::dispatch] on each message window.  If no handler matches
1187/// and a [`fallback`][Self::fallback] was registered it is invoked instead;
1188/// otherwise an [`EdifactError::UnexpectedMessageType`] is returned.
1189///
1190/// # Example
1191///
1192/// ```rust,ignore
1193/// let dispatch = MessageDispatch::new()
1194///     .on("ORDERS",  |segs| Orders::edifact_deserialize(segs))
1195///     .on("INVOIC",  |segs| Invoice::edifact_deserialize(segs));
1196///
1197/// for window in message_windows_bytes(input) {
1198///     let window = window?;
1199///     let msg = dispatch.dispatch(&window)?;
1200///     match msg.message_type.as_str() {
1201///         "ORDERS"  => { let o = msg.downcast::<Orders>().unwrap(); /* … */ }
1202///         "INVOIC"  => { let i = msg.downcast::<Invoice>().unwrap(); /* … */ }
1203///         _         => unreachable!(),
1204///     }
1205/// }
1206/// ```
1207pub struct MessageDispatch {
1208    handlers: Vec<(String, DispatchHandlerFn)>,
1209    fallback: Option<FallbackHandlerFn>,
1210}
1211
1212impl Default for MessageDispatch {
1213    fn default() -> Self {
1214        Self::new()
1215    }
1216}
1217
1218impl MessageDispatch {
1219    /// Create an empty dispatcher.
1220    pub fn new() -> Self {
1221        Self {
1222            handlers: Vec::new(),
1223            fallback: None,
1224        }
1225    }
1226
1227    /// Register a handler for `message_type`.
1228    ///
1229    /// The closure receives the full message window and returns a typed value
1230    /// that is boxed and stored inside [`DispatchedMessage`].
1231    pub fn on<T, F>(mut self, message_type: &str, handler: F) -> Self
1232    where
1233        T: std::any::Any + Send + Sync + 'static,
1234        F: for<'a> Fn(&[Segment<'a>]) -> Result<T, EdifactError> + Send + Sync + 'static,
1235    {
1236        let erased: DispatchHandlerFn = Box::new(move |segs| {
1237            let val = handler(segs)?;
1238            Ok(Box::new(val) as Box<dyn std::any::Any + Send + Sync>)
1239        });
1240        self.handlers.push((message_type.to_owned(), erased));
1241        self
1242    }
1243
1244    /// Register a fallback handler for unrecognised message types.
1245    ///
1246    /// The closure receives the segment window **and** the unknown message-type
1247    /// string.
1248    pub fn fallback<T, F>(mut self, handler: F) -> Self
1249    where
1250        T: std::any::Any + Send + Sync + 'static,
1251        F: for<'a> Fn(&[Segment<'a>], &str) -> Result<T, EdifactError> + Send + Sync + 'static,
1252    {
1253        let erased: FallbackHandlerFn = Box::new(move |segs, mt| {
1254            let val = handler(segs, mt)?;
1255            Ok(Box::new(val) as Box<dyn std::any::Any + Send + Sync>)
1256        });
1257        self.fallback = Some(erased);
1258        self
1259    }
1260
1261    /// Dispatch a single message window to the appropriate handler.
1262    ///
1263    /// The message type is extracted from the `UNH` segment.  If no `UNH` is
1264    /// present, [`EdifactError::MissingSegment`] is returned.
1265    pub fn dispatch(&self, window: &[Segment<'_>]) -> Result<DispatchedMessage, EdifactError> {
1266        let message_type = window
1267            .iter()
1268            .find(|s| s.tag == "UNH")
1269            .and_then(|unh| unh.get_element(1))
1270            .and_then(|e| e.get_component(0))
1271            .map(|s| s.to_owned())
1272            .ok_or_else(|| EdifactError::MissingSegment {
1273                tag: "UNH".to_owned(),
1274                expected_position: "first segment of message window".to_owned(),
1275            })?;
1276
1277        for (mt, handler) in &self.handlers {
1278            if *mt == message_type {
1279                let value = handler(window)?;
1280                return Ok(DispatchedMessage {
1281                    message_type,
1282                    value,
1283                });
1284            }
1285        }
1286
1287        if let Some(fallback) = &self.fallback {
1288            let value = fallback(window, &message_type)?;
1289            return Ok(DispatchedMessage {
1290                message_type,
1291                value,
1292            });
1293        }
1294
1295        Err(EdifactError::UnexpectedMessageType { message_type })
1296    }
1297
1298    /// Dispatch all messages from a byte reader.
1299    ///
1300    /// Each message window is extracted and dispatched in order.  The returned
1301    /// iterator is lazy — errors are yielded as `Err` items.
1302    pub fn dispatch_all_from_bytes<'a>(
1303        &'a self,
1304        input: &'a [u8],
1305    ) -> impl Iterator<Item = Result<DispatchedMessage, EdifactError>> + 'a {
1306        message_windows_bytes(input).map(move |window| {
1307            let window = window?;
1308            self.dispatch(&window.segments)
1309        })
1310    }
1311
1312    /// Dispatch all messages from a reader.
1313    ///
1314    /// Parses the stream into message windows and dispatches each.  The
1315    /// returned iterator yields owned [`DispatchedMessage`] values lazily:
1316    /// each window is fully buffered in memory (as `Vec<OwnedSegment>`) before
1317    /// dispatch, but windows are processed one at a time rather than all at once.
1318    pub fn dispatch_all_from_reader<R: Read + 'static>(
1319        &self,
1320        reader: R,
1321    ) -> impl Iterator<Item = Result<DispatchedMessage, EdifactError>> + '_ {
1322        message_windows_from_reader(reader).map(|window| {
1323            let window = window?;
1324            let borrowed: Vec<Segment<'_>> =
1325                window.segments.iter().map(|s| s.as_borrowed()).collect();
1326            self.dispatch(&borrowed)
1327        })
1328    }
1329}
1330
1331#[cfg(test)]
1332mod tests {
1333    use super::*;
1334
1335    // ── manual test impl ──────────────────────────────────────────────────────
1336    #[derive(Debug, PartialEq)]
1337    struct BgmSegment {
1338        doc_name_code: String,
1339        pruef_id: String,
1340        msg_function: Option<String>,
1341    }
1342
1343    impl EdifactSegmentTag for BgmSegment {
1344        const SEGMENT_TAG: &'static str = "BGM";
1345    }
1346
1347    struct NadM;
1348
1349    impl EdifactSegmentTag for NadM {
1350        const SEGMENT_TAG: &'static str = "NAD";
1351        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
1352    }
1353
1354    struct NadWildcard;
1355
1356    impl EdifactSegmentTag for NadWildcard {
1357        const SEGMENT_TAG: &'static str = "NAD";
1358        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
1359    }
1360
1361    impl EdifactDeserialize for BgmSegment {
1362        fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
1363            let seg = find_segment(segments, "BGM").ok_or_else(|| {
1364                EdifactError::MissingRequiredElement {
1365                    tag: "BGM".to_owned(),
1366                    element_index: 0,
1367                }
1368            })?;
1369            Ok(Self {
1370                doc_name_code: element_str(seg, 0).to_owned(),
1371                pruef_id: element_str(seg, 1).to_owned(),
1372                msg_function: seg
1373                    .element_str(2)
1374                    .filter(|s| !s.is_empty())
1375                    .map(str::to_owned),
1376            })
1377        }
1378    }
1379
1380    #[test]
1381    fn deserialize_single_segment() {
1382        let input = b"BGM+E03+11042+9'";
1383        let bgm: BgmSegment = deserialize(input).unwrap();
1384        assert_eq!(bgm.doc_name_code, "E03");
1385        assert_eq!(bgm.pruef_id, "11042");
1386        assert_eq!(bgm.msg_function, Some("9".to_owned()));
1387    }
1388
1389    #[test]
1390    fn streaming_deserialize_first_from_bytes() {
1391        let input = b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'";
1392        let bgm: BgmSegment = deserialize_first_streaming(input).unwrap();
1393        assert_eq!(bgm.pruef_id, "11042");
1394    }
1395
1396    #[test]
1397    fn streaming_deserialize_all_from_bytes() {
1398        let input = b"BGM+E03+11042+9'RFF+AA:1'BGM+E01+11043+9'";
1399        let bgms: Vec<BgmSegment> = deserialize_all_streaming(input).unwrap();
1400        assert_eq!(bgms.len(), 2);
1401        assert_eq!(bgms[0].pruef_id, "11042");
1402        assert_eq!(bgms[1].pruef_id, "11043");
1403    }
1404
1405    #[test]
1406    fn streaming_deserialize_first_from_reader() {
1407        let input =
1408            std::io::Cursor::new(b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'".to_vec());
1409        let bgm: BgmSegment = deserialize_first_from_reader(input).unwrap();
1410        assert_eq!(bgm.pruef_id, "11042");
1411    }
1412
1413    #[test]
1414    fn streaming_deserialize_all_from_reader() {
1415        let input = std::io::Cursor::new(b"BGM+E03+11042+9'BGM+E01+11043+9'".to_vec());
1416        let bgms: Vec<BgmSegment> = deserialize_all_from_reader(input).unwrap();
1417        assert_eq!(bgms.len(), 2);
1418        assert_eq!(bgms[0].pruef_id, "11042");
1419        assert_eq!(bgms[1].pruef_id, "11043");
1420    }
1421
1422    #[test]
1423    fn missing_segment_returns_error() {
1424        let input = b"DTM+137:20230401:102'";
1425        let result: Result<BgmSegment, _> = deserialize(input);
1426        assert!(result.is_err());
1427    }
1428
1429    #[test]
1430    fn vec_collects_all_matching_segments() {
1431        let input = b"DTM+137:20230401:102'BGM+E03+11042+9'BGM+E01+11043+9'";
1432        let bgms: Vec<BgmSegment> = deserialize(input).unwrap();
1433        assert_eq!(bgms.len(), 2);
1434        assert_eq!(bgms[0].pruef_id, "11042");
1435        assert_eq!(bgms[1].pruef_id, "11043");
1436    }
1437
1438    #[test]
1439    fn find_qualified_segment_matches_qualifier() {
1440        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1441        let segments: Vec<Segment<'_>> =
1442            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1443        let nad_ms = find_qualified_segment(&segments, "NAD", "MS");
1444        let nad_mr = find_qualified_segment(&segments, "NAD", "MR");
1445        assert!(nad_ms.is_some());
1446        assert!(nad_mr.is_some());
1447        assert_eq!(element_str(nad_ms.unwrap(), 0), "MS");
1448        assert_eq!(element_str(nad_mr.unwrap(), 0), "MR");
1449    }
1450
1451    #[test]
1452    fn round_trip_str_api() {
1453        let input = "BGM+E03+11042+9'";
1454        let bgm: BgmSegment = deserialize_str(input).unwrap();
1455        assert_eq!(bgm.pruef_id, "11042");
1456    }
1457
1458    #[test]
1459    fn required_element_extraction() {
1460        let input = b"BGM+E03+11042+9'";
1461        let segments: Vec<Segment<'_>> =
1462            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1463        let seg = &segments[0];
1464
1465        assert_eq!(required_element(seg, 0).unwrap(), "E03");
1466        assert_eq!(required_element(seg, 1).unwrap(), "11042");
1467        // Element 5 doesn't exist
1468        assert!(required_element(seg, 5).is_err());
1469    }
1470
1471    #[test]
1472    fn optional_element_extraction() {
1473        let input = b"BGM+E03+11042+9'BGM+E01++absent'";
1474        let segments: Vec<Segment<'_>> =
1475            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1476
1477        // First segment
1478        assert_eq!(optional_element(&segments[0], 0), Some("E03"));
1479        assert_eq!(optional_element(&segments[0], 1), Some("11042"));
1480        assert_eq!(optional_element(&segments[0], 5), None);
1481
1482        // Second segment with empty element
1483        assert_eq!(optional_element(&segments[1], 1), None);
1484    }
1485
1486    #[test]
1487    fn component_extraction() {
1488        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1489        let segments: Vec<Segment<'_>> =
1490            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1491        let seg = &segments[0];
1492
1493        assert_eq!(required_component(seg, 0, 0).unwrap(), "UNOA");
1494        assert_eq!(required_component(seg, 0, 1).unwrap(), "1");
1495        // Non-existent component
1496        assert!(required_component(seg, 0, 5).is_err());
1497    }
1498
1499    #[test]
1500    fn composite_element_helper() {
1501        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1502        let segments: Vec<Segment<'_>> =
1503            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1504        let seg = &segments[0];
1505
1506        let comp = composite_element(seg, 0).unwrap();
1507        assert_eq!(comp.len(), 2);
1508        assert_eq!(comp.get(0), Some("UNOA"));
1509        assert_eq!(comp.get(1), Some("1"));
1510        assert_eq!(comp.get(5), None);
1511        assert_eq!(comp.get_or_empty(5), "");
1512    }
1513
1514    #[test]
1515    fn get_all_components() {
1516        // UNB has composite element: UNOA:1
1517        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1518        let segments: Vec<Segment<'_>> =
1519            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1520        let seg = &segments[0];
1521
1522        let comps: Vec<&str> = get_components_iter(seg, 0).collect(); // First element is UNOA:1
1523        assert!(!comps.is_empty(), "Expected components but got empty");
1524        assert_eq!(comps.len(), 2);
1525        assert_eq!(comps[0], "UNOA");
1526        assert_eq!(comps[1], "1");
1527    }
1528
1529    #[test]
1530    fn qualifier_pattern_matching_supports_exact_and_wildcard() {
1531        // Exact match (no wildcard)
1532        assert!(qualifier_matches_pattern("MS", "MS"));
1533        assert!(!qualifier_matches_pattern("MS", "M")); // Not a prefix match after R-003
1534        // Wildcard patterns
1535        assert!(qualifier_matches_pattern("MS", "M*"));
1536        assert!(qualifier_matches_pattern("MRY", "M*Y"));
1537        assert!(!qualifier_matches_pattern("AB", "M*"));
1538    }
1539
1540    /// Comprehensive table-driven tests for `qualifier_matches_pattern`.
1541    #[test]
1542    fn qualifier_matches_pattern_table() {
1543        // (value, pattern, expected)
1544        let cases: &[(&str, &str, bool)] = &[
1545            // ── empty inputs ────────────────────────────────────────────────
1546            ("", "", true),   // empty matches empty
1547            ("", "*", true),  // wildcard matches empty string
1548            ("A", "", false), // non-empty does not match empty pattern
1549            ("", "A", false), // empty does not match non-empty literal
1550            // ── literal (no wildcard) ────────────────────────────────────────
1551            ("MS", "MS", true),
1552            ("BY", "BY", true),
1553            ("ms", "MS", false),  // case-sensitive
1554            ("MSX", "MS", false), // prefix is NOT a match without wildcard
1555            ("M", "MS", false),   // too short
1556            // ── single wildcard at the end (prefix match) ────────────────────
1557            ("MS", "M*", true),
1558            ("MULTI", "MUL*", true),
1559            ("AB", "M*", false),
1560            ("", "M*", false), // empty does not start with 'M'
1561            // ── single wildcard at the start (suffix match) ──────────────────
1562            ("MSG", "*G", true),
1563            ("G", "*G", true),
1564            ("MSG", "*X", false),
1565            ("", "*G", false),
1566            // ── wildcard in the middle ───────────────────────────────────────
1567            ("MRY", "M*Y", true),
1568            ("MAY", "M*Y", true),
1569            ("MY", "M*Y", true),    // zero-width wildcard: "M" + "" + "Y"
1570            ("MYY", "M*Y", true),   // last 'Y' matches, wildcard = 'Y'
1571            ("MAYZ", "M*Y", false), // does not end with 'Y'
1572            ("AB", "M*Y", false),
1573            // ── bare wildcard (match-all) ────────────────────────────────────
1574            ("*", "*", true), // literal '*' value vs wildcard pattern
1575            ("anything", "*", true),
1576            ("", "*", true),
1577            // ── multiple wildcards ────────────────────────────────────────────
1578            ("ABCDE", "A*C*E", true),
1579            ("ACE", "A*C*E", true), // zero-width wildcards
1580            ("AXCYE", "A*C*E", true),
1581            ("ABCDF", "A*C*E", false),
1582            // ── wildcard with empty segment between stars ─────────────────────
1583            ("AB", "A**B", true), // "A**B" → parts ["A", "", "B"] → ends_with_wildcard?
1584            // ── pattern longer than value ─────────────────────────────────────
1585            ("AB", "A*B*C", false),
1586            // ── value contains pattern as substring but must anchor start ─────
1587            ("XMS", "MS", false),
1588        ];
1589
1590        for (value, pattern, expected) in cases {
1591            let got = qualifier_matches_pattern(value, pattern);
1592            assert_eq!(
1593                got, *expected,
1594                "qualifier_matches_pattern({value:?}, {pattern:?}) expected {expected} but got {got}"
1595            );
1596        }
1597    }
1598
1599    #[test]
1600    fn typed_qualifier_helpers_work() {
1601        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1602        let segments: Vec<Segment<'_>> =
1603            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1604
1605        let first = find_segment_typed::<NadM>(&segments).unwrap();
1606        assert_eq!(first.element_str(0), Some("MS"));
1607
1608        let all: Vec<_> = find_segments_typed::<NadWildcard>(&segments).collect();
1609        assert_eq!(all.len(), 2);
1610    }
1611
1612    #[test]
1613    fn segment_accessor_trait_methods_work() {
1614        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1615        let segments: Vec<Segment<'_>> =
1616            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1617        let seg = &segments[0];
1618
1619        assert_eq!(SegmentAccessor::get_element(seg, 1), Some("SENDER"));
1620        assert_eq!(SegmentAccessor::required_composite(seg, 0, 1).unwrap(), "1");
1621        let parsed: i32 = SegmentAccessor::code_element(seg, 4).unwrap();
1622        assert_eq!(parsed, 1);
1623        let reps = SegmentAccessor::repeating_components(seg, 3, 0, 2).unwrap();
1624        assert_eq!(reps, vec!["200101", "0900"]);
1625    }
1626
1627    #[test]
1628    fn group_helpers_detect_contiguity() {
1629        struct NadAny;
1630        impl EdifactSegmentTag for NadAny {
1631            const SEGMENT_TAG: &'static str = "NAD";
1632        }
1633
1634        let contiguous_input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'";
1635        let contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(contiguous_input)
1636            .collect::<Result<_, _>>()
1637            .unwrap();
1638        assert!(groups_are_contiguous_by_qualifier::<NadAny>(
1639            &contiguous_segments
1640        ));
1641
1642        let non_contiguous_input = b"NAD+MS+1'RFF+AA:1'NAD+MR+2'";
1643        let non_contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(non_contiguous_input)
1644            .collect::<Result<_, _>>()
1645            .unwrap();
1646        assert!(!groups_are_contiguous_by_qualifier::<NadAny>(
1647            &non_contiguous_segments
1648        ));
1649    }
1650
1651    #[test]
1652    fn group_helpers_collect_contiguous_groups() {
1653        struct NadAny;
1654        impl EdifactSegmentTag for NadAny {
1655            const SEGMENT_TAG: &'static str = "NAD";
1656        }
1657
1658        let input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'NAD+BY+3'";
1659        let segments: Vec<Segment<'_>> =
1660            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1661        let groups = contiguous_groups_by_qualifier::<NadAny>(&segments);
1662
1663        assert_eq!(groups.len(), 2);
1664        assert_eq!(groups[0].len(), 2);
1665        assert_eq!(groups[1].len(), 1);
1666    }
1667
1668    // ── MessageWindowsIter tests ──────────────────────────────────────────────
1669
1670    #[test]
1671    fn message_windows_bytes_yields_complete_windows() {
1672        let input = b"UNB+UNOA:1+S+R+200101:0900+1'\
1673                      UNH+1+ORDERS:D:96A:UN'\
1674                      BGM+220+PO-001+9'\
1675                      UNT+3+1'\
1676                      UNZ+1+1'";
1677        let windows: Vec<_> = message_windows_bytes(input)
1678            .collect::<Result<_, _>>()
1679            .unwrap();
1680        assert_eq!(windows.len(), 1);
1681        assert_eq!(windows[0].segments[0].tag, "UNH");
1682        assert_eq!(windows[0].segments.last().unwrap().tag, "UNT");
1683        assert_eq!(windows[0].message_type.as_deref(), Some("ORDERS"));
1684        assert_eq!(windows[0].association_code.as_deref(), None);
1685    }
1686
1687    #[test]
1688    fn message_windows_bytes_preserves_owned_unh_metadata() {
1689        let input = b"UNB+UNOA:1+S+R+200101:0900+1'\
1690                      UNH+1+ORD?ERS:D:96A:UN:5??5??3a'\
1691                      BGM+220+PO-001+9'\
1692                      UNT+3+1'\
1693                      UNZ+1+1'";
1694        let windows: Vec<_> = message_windows_bytes(input)
1695            .collect::<Result<_, _>>()
1696            .unwrap();
1697
1698        assert_eq!(windows.len(), 1);
1699        assert_eq!(windows[0].message_type.as_deref(), Some("ORDERS"));
1700        assert_eq!(windows[0].association_code.as_deref(), Some("5?5?3a"));
1701    }
1702
1703    #[test]
1704    fn message_windows_truncated_stream_returns_error() {
1705        // Stream ends after UNH and BGM but without UNT — truncation must be an error
1706        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1707        let results: Vec<_> = message_windows_bytes(input).collect();
1708        assert_eq!(results.len(), 1);
1709        assert!(
1710            matches!(results[0], Err(EdifactError::UnexpectedEof { .. })),
1711            "expected UnexpectedEof for truncated window, got: {:?}",
1712            results[0]
1713        );
1714    }
1715
1716    #[test]
1717    fn message_windows_subsequent_calls_return_none_after_truncation() {
1718        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1719        let mut iter = message_windows_bytes(input);
1720        assert!(matches!(
1721            iter.next(),
1722            Some(Err(EdifactError::UnexpectedEof { .. }))
1723        ));
1724        // After the error, the iterator must be fused (done = true)
1725        assert!(iter.next().is_none());
1726    }
1727
1728    #[test]
1729    fn message_windows_unh_without_unt_before_next_unh_returns_error() {
1730        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'\
1731                      UNH+2+ORDERS:D:96A:UN'BGM+220+PO-002+9'UNT+3+2'";
1732        let results: Vec<_> = message_windows_bytes(input).collect();
1733        // First item must be an error (UNH before UNT — missing closer)
1734        assert!(
1735            matches!(
1736                results[0],
1737                Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"
1738            ),
1739            "expected InvalidSegmentForMessage(UNH), got: {:?}",
1740            results[0]
1741        );
1742    }
1743
1744    // ── SegmentAccessor unit tests ─────────────────────────────────────────────
1745
1746    fn parse_one(input: &str) -> crate::OwnedSegment {
1747        crate::from_reader(std::io::Cursor::new(input.as_bytes()))
1748            .expect("parse failed")
1749            .into_iter()
1750            .next()
1751            .expect("at least one segment")
1752    }
1753
1754    #[test]
1755    fn segment_accessor_get_element_returns_value() {
1756        let owned = parse_one("BGM+220+PO-001+9'");
1757        let seg = owned.as_borrowed();
1758        assert_eq!(SegmentAccessor::get_element(&seg, 0), Some("220"));
1759        assert_eq!(SegmentAccessor::get_element(&seg, 1), Some("PO-001"));
1760        assert_eq!(SegmentAccessor::get_element(&seg, 2), Some("9"));
1761        assert_eq!(
1762            SegmentAccessor::get_element(&seg, 9),
1763            None,
1764            "out-of-bounds must return None"
1765        );
1766    }
1767
1768    #[test]
1769    fn segment_accessor_get_element_filters_empty() {
1770        let owned = parse_one("TST+++VALUE'");
1771        let seg = owned.as_borrowed();
1772        // elements 0 and 1 are empty; element 2 is "VALUE"
1773        assert_eq!(
1774            SegmentAccessor::get_element(&seg, 0),
1775            None,
1776            "empty element must return None"
1777        );
1778        assert_eq!(
1779            SegmentAccessor::get_element(&seg, 1),
1780            None,
1781            "empty element must return None"
1782        );
1783        assert_eq!(SegmentAccessor::get_element(&seg, 2), Some("VALUE"));
1784    }
1785
1786    #[test]
1787    fn segment_accessor_get_component_returns_value() {
1788        let owned = parse_one("UNH+1+ORDERS:D:96A:UN'");
1789        let seg = owned.as_borrowed();
1790        assert_eq!(seg.get_component(1, 0), Some("ORDERS"));
1791        assert_eq!(seg.get_component(1, 1), Some("D"));
1792        assert_eq!(seg.get_component(1, 2), Some("96A"));
1793        assert_eq!(seg.get_component(1, 3), Some("UN"));
1794        assert_eq!(
1795            seg.get_component(1, 9),
1796            None,
1797            "out-of-bounds must return None"
1798        );
1799    }
1800
1801    #[test]
1802    fn segment_accessor_text_element_errors_on_missing() {
1803        let owned = parse_one("BGM+'");
1804        let seg = owned.as_borrowed();
1805        // element 0 is empty — text_element must return an error
1806        let err = seg.text_element(0);
1807        assert!(
1808            matches!(err, Err(EdifactError::MissingRequiredElement { ref tag, element_index: 0 }) if tag == "BGM"),
1809            "expected MissingRequiredElement, got: {err:?}"
1810        );
1811    }
1812
1813    #[test]
1814    fn segment_accessor_required_composite_errors_on_missing() {
1815        let owned = parse_one("DTM+137'");
1816        let seg = owned.as_borrowed();
1817        // component 1 of element 0 is absent
1818        let err = seg.required_composite(0, 1);
1819        assert!(
1820            matches!(err, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 0, component_index: 1 }) if tag == "DTM"),
1821            "expected MissingRequiredComponent, got: {err:?}"
1822        );
1823    }
1824
1825    #[test]
1826    fn segment_accessor_code_element_parses_integer() {
1827        let owned = parse_one("QTY+21:100'");
1828        let seg = owned.as_borrowed();
1829        let qty: u32 = seg.code_element(0).expect("should parse qualifier as u32");
1830        assert_eq!(qty, 21);
1831    }
1832
1833    #[test]
1834    fn segment_accessor_optional_element_absent_returns_none() {
1835        let owned = parse_one("BGM+220'");
1836        let seg = owned.as_borrowed();
1837        assert_eq!(seg.optional_element(5), None);
1838    }
1839}