Skip to main content

edifact_rs/
de.rs

1//! Custom deserialization trait for EDIFACT.
2//!
3//! [`EdifactDeserialize`] maps a slice of parsed [`Segment`]s to a Rust value.
4//! [`EdifactSegmentTag`] is a companion trait that carries the segment tag and
5//! optional qualifier at the type level, enabling the blanket
6//! `impl EdifactDeserialize for Vec<T>`.
7
8use crate::{EdifactError, Segment};
9use std::borrow::Cow;
10use std::io::Read;
11use std::str::FromStr;
12
13// ── traits ────────────────────────────────────────────────────────────────────
14
15/// Types that can be deserialized from a slice of EDIFACT segments.
16///
17/// Implement manually or derive with `#[derive(EdifactDeserialize)]` from the
18/// `edifact-rs-derive` crate.
19pub trait EdifactDeserialize: Sized {
20    /// Deserialize `Self` from the provided segment slice.
21    ///
22    /// The slice may contain any number of segments; implementations extract
23    /// only the ones they care about and ignore the rest.
24    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError>;
25
26    /// Deserialize `Self` from a slice of owned EDIFACT segments.
27    ///
28    /// # Default implementation
29    ///
30    /// Converts each [`crate::OwnedSegment`] to its borrowed form via
31    /// [`crate::OwnedSegment::as_borrowed`] and delegates to
32    /// [`edifact_deserialize`][Self::edifact_deserialize].  This incurs one
33    /// `Vec<Segment<'_>>` allocation per call.
34    ///
35    /// # Override when performance matters
36    ///
37    /// Types generated by `#[derive(EdifactDeserialize)]` automatically override
38    /// this method to work directly on the owned data without the intermediate
39    /// allocation.  Manual implementations should also override when used in the
40    /// high-throughput reader-streaming path
41    /// ([`deserialize_first_from_reader`], [`deserialize_all_from_reader`],
42    /// [`deserialize_messages_from_reader`]) to avoid the per-message allocation.
43    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
44        let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
45        Self::edifact_deserialize(&borrowed)
46    }
47}
48
49/// Types that can be deserialized from a composite EDIFACT element.
50///
51/// Implement this for custom composite structs used with
52/// `#[edifact(composite)]` in derive macros.
53pub trait EdifactCompositeDeserialize: Sized {
54    /// Deserialize `Self` from a composite element.
55    fn edifact_deserialize_composite(composite: CompositeElement<'_>)
56    -> Result<Self, EdifactError>;
57}
58
59impl EdifactCompositeDeserialize for Vec<String> {
60    fn edifact_deserialize_composite(
61        composite: CompositeElement<'_>,
62    ) -> Result<Self, EdifactError> {
63        Ok(composite.iter().map(str::to_owned).collect())
64    }
65}
66
67/// Companion trait that declares a type's segment tag (and optional qualifier).
68///
69/// Required for the `Vec<T>` blanket impl and for finding the right segment in
70/// a message-level struct deserialization.
71pub trait EdifactSegmentTag {
72    /// The 3-character EDIFACT segment tag (e.g. `"BGM"`, `"NAD"`).
73    const SEGMENT_TAG: &'static str;
74
75    /// Optional qualifier pattern to further constrain segment matching.
76    ///
77    /// Examples:
78    /// - `Some("MS")` for exact qualifier matching.
79    /// - `Some("M*")` for wildcard prefix matching (matches `"MS"`, `"MR"`, etc.).
80    const QUALIFIER_PATTERN: Option<&'static str> = None;
81
82    /// Return `true` if `seg`'s qualifier matches this type's qualifier pattern.
83    fn matches_qualifier(seg: &Segment<'_>) -> bool {
84        match Self::QUALIFIER_PATTERN {
85            Some(pattern) => seg
86                .element_str(0)
87                .is_some_and(|q| qualifier_matches_pattern(q, pattern)),
88            None => true,
89        }
90    }
91
92    /// Return `true` if `seg` is the segment this type maps to.
93    ///
94    /// Default: `seg.tag == Self::SEGMENT_TAG`.  Override to also match on a
95    /// qualifier (e.g. `NAD+BY` — element 0 = `"BY"`).
96    fn matches_segment(seg: &Segment<'_>) -> bool {
97        seg.tag == Self::SEGMENT_TAG && Self::matches_qualifier(seg)
98    }
99
100    /// Like [`matches_segment`][Self::matches_segment] but works directly on an
101    /// [`crate::OwnedSegment`] without incurring the `Vec` allocation of
102    /// [`crate::OwnedSegment::as_borrowed`].
103    fn matches_owned_segment(seg: &crate::OwnedSegment) -> bool {
104        if seg.tag != Self::SEGMENT_TAG {
105            return false;
106        }
107        match Self::QUALIFIER_PATTERN {
108            None => true,
109            Some(pattern) => {
110                let q = seg
111                    .elements
112                    .first()
113                    .and_then(|e| e.components.first())
114                    .map(|c| c.as_str())
115                    .unwrap_or("");
116                qualifier_matches_pattern(q, pattern)
117            }
118        }
119    }
120}
121
122// ── blanket impl for Vec<T> ───────────────────────────────────────────────────
123
124/// Deserializes each segment matching `T::matches_segment` as an independent
125/// single-segment slice, collecting the results.
126impl<T> EdifactDeserialize for Vec<T>
127where
128    T: EdifactDeserialize + EdifactSegmentTag,
129{
130    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
131        segments
132            .iter()
133            .filter(|s| T::matches_segment(s))
134            .map(|seg| T::edifact_deserialize(std::slice::from_ref(seg)))
135            .collect()
136    }
137
138    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
139        segments
140            .iter()
141            .filter(|s| T::matches_owned_segment(s))
142            .map(|seg| T::edifact_deserialize_owned(std::slice::from_ref(seg)))
143            .collect()
144    }
145}
146
147// ── public API ────────────────────────────────────────────────────────────────
148
149/// Deserialize a value of type `T` from EDIFACT bytes.
150///
151/// Unlike [`crate::from_bytes`], which parses bytes into raw [`Segment`]s, this
152/// function fully deserializes the payload into a typed Rust value via [`EdifactDeserialize`].
153///
154/// This API currently buffers all parsed segments into a `Vec` before invoking
155/// typed deserialization.
156pub fn deserialize<T: EdifactDeserialize>(input: &[u8]) -> Result<T, EdifactError> {
157    let segments: Vec<Segment<'_>> = crate::from_bytes(input).collect::<Result<_, _>>()?;
158    T::edifact_deserialize(&segments)
159}
160
161/// Stream-parse EDIFACT bytes and deserialize the first matching segment as `T`.
162///
163/// This avoids allocating a full `Vec<Segment>` and is intended for low-memory
164/// extraction of segment-scoped types.
165pub fn deserialize_first_streaming<T>(input: &[u8]) -> Result<T, EdifactError>
166where
167    T: EdifactDeserialize + EdifactSegmentTag,
168{
169    for segment in crate::from_bytes(input) {
170        let segment = segment?;
171        if T::matches_segment(&segment) {
172            return T::edifact_deserialize(std::slice::from_ref(&segment));
173        }
174    }
175
176    Err(EdifactError::MissingSegment {
177        tag: T::SEGMENT_TAG.to_owned(),
178        expected_position: "any position in input".to_owned(),
179    })
180}
181
182/// Stream-parse EDIFACT bytes and deserialize all matching segments as `Vec<T>`.
183///
184/// This avoids buffering non-matching segments in memory.
185pub fn deserialize_all_streaming<T>(input: &[u8]) -> Result<Vec<T>, EdifactError>
186where
187    T: EdifactDeserialize + EdifactSegmentTag,
188{
189    let mut out = Vec::new();
190    for segment in crate::from_bytes(input) {
191        let segment = segment?;
192        if T::matches_segment(&segment) {
193            out.push(T::edifact_deserialize(std::slice::from_ref(&segment))?);
194        }
195    }
196    Ok(out)
197}
198
199/// Stream-parse EDIFACT from a reader and deserialize the first matching segment as `T`.
200///
201/// This is the low-memory typed path for large payloads read from I/O streams.
202pub fn deserialize_first_from_reader<T, R>(reader: R) -> Result<T, EdifactError>
203where
204    T: EdifactDeserialize + EdifactSegmentTag,
205    R: Read,
206{
207    for segment in crate::from_reader_iter(reader) {
208        let segment = segment?;
209        // O(1) tag + qualifier check before paying for as_borrowed().
210        if !T::matches_owned_segment(&segment) {
211            continue;
212        }
213        return T::edifact_deserialize_owned(std::slice::from_ref(&segment));
214    }
215
216    Err(EdifactError::MissingSegment {
217        tag: T::SEGMENT_TAG.to_owned(),
218        expected_position: "any position in input".to_owned(),
219    })
220}
221
222/// Stream-parse EDIFACT from a reader and deserialize all matching segments as `Vec<T>`.
223pub fn deserialize_all_from_reader<T, R>(reader: R) -> Result<Vec<T>, EdifactError>
224where
225    T: EdifactDeserialize + EdifactSegmentTag,
226    R: Read,
227{
228    let mut out = Vec::new();
229    for segment in crate::from_reader_iter(reader) {
230        let segment = segment?;
231        // O(1) tag + qualifier check before paying for as_borrowed().
232        if !T::matches_owned_segment(&segment) {
233            continue;
234        }
235        out.push(T::edifact_deserialize_owned(std::slice::from_ref(
236            &segment,
237        ))?);
238    }
239    Ok(out)
240}
241
242/// Deserialize a value of type `T` from an EDIFACT string.
243pub fn deserialize_str<T: EdifactDeserialize>(input: &str) -> Result<T, EdifactError> {
244    deserialize(input.as_bytes())
245}
246
247// ── helper functions ──────────────────────────────────────────────────────────
248
249/// Find the first segment with the given tag.
250pub fn find_segment<'s, 'd>(segments: &'s [Segment<'d>], tag: &str) -> Option<&'s Segment<'d>> {
251    segments.iter().find(|s| s.tag == tag)
252}
253
254/// Iterate over all segments with the given tag without allocating a `Vec`.
255pub fn find_segments_iter<'s, 'd: 's>(
256    segments: &'s [Segment<'d>],
257    tag: &'s str,
258) -> impl Iterator<Item = &'s Segment<'d>> {
259    segments.iter().filter(move |s| s.tag == tag)
260}
261
262/// Find the first segment matching `tag` whose element 0 equals `qualifier`.
263pub fn find_qualified_segment<'s, 'd>(
264    segments: &'s [Segment<'d>],
265    tag: &str,
266    qualifier: &str,
267) -> Option<&'s Segment<'d>> {
268    segments
269        .iter()
270        .find(|s| s.tag == tag && s.element_str(0).unwrap_or("") == qualifier)
271}
272
273/// Find the first segment by type-level qualifier pattern.
274pub fn find_segment_typed<'s, 'd, T>(segments: &'s [Segment<'d>]) -> Option<&'s Segment<'d>>
275where
276    T: EdifactSegmentTag,
277{
278    segments.iter().find(|s| T::matches_segment(s))
279}
280
281/// Iterate over all segments by type-level qualifier pattern.
282pub fn find_segments_typed<'s, 'd: 's, T>(
283    segments: &'s [Segment<'d>],
284) -> impl Iterator<Item = &'s Segment<'d>>
285where
286    T: EdifactSegmentTag,
287{
288    segments.iter().filter(|s| T::matches_segment(s))
289}
290
291/// Collect contiguous groups of segments that match `T`.
292///
293/// Each group is a borrowed slice of the original `segments` array.
294/// Use [`contiguous_groups_iter`] to avoid the outer `Vec` allocation.
295pub fn contiguous_groups_by_qualifier<'s, 'd, T>(
296    segments: &'s [Segment<'d>],
297) -> Vec<&'s [Segment<'d>]>
298where
299    T: EdifactSegmentTag,
300{
301    let mut groups = Vec::new();
302    let mut idx = 0;
303    while idx < segments.len() {
304        if T::matches_segment(&segments[idx]) {
305            let start = idx;
306            idx += 1;
307            while idx < segments.len() && T::matches_segment(&segments[idx]) {
308                idx += 1;
309            }
310            groups.push(&segments[start..idx]);
311        } else {
312            idx += 1;
313        }
314    }
315    groups
316}
317
318/// Iterate lazily over contiguous groups of segments that match `T`.
319///
320/// Each yielded item is a borrowed slice `&[Segment<'_>]` that forms one
321/// contiguous run of `T`-matching segments.  No outer `Vec` is allocated —
322/// the caller can break early or collect only as many groups as needed.
323///
324/// This function uses separate lifetimes for the slice reference (`'s`) and
325/// the segment data (`'d`), matching the signature of
326/// [`contiguous_groups_by_qualifier`].
327///
328/// # Example
329/// ```rust,ignore
330/// for group in contiguous_groups_iter::<UnaSegment>(&segments) {
331///     process_group(group);
332/// }
333/// ```
334pub fn contiguous_groups_iter<'s, 'd, T>(
335    segments: &'s [Segment<'d>],
336) -> impl Iterator<Item = &'s [Segment<'d>]> + 's
337where
338    T: EdifactSegmentTag,
339{
340    let mut idx = 0;
341    let len = segments.len();
342    std::iter::from_fn(move || {
343        // Skip non-matching segments
344        while idx < len && !T::matches_segment(&segments[idx]) {
345            idx += 1;
346        }
347        if idx >= len {
348            return None;
349        }
350        let start = idx;
351        idx += 1;
352        while idx < len && T::matches_segment(&segments[idx]) {
353            idx += 1;
354        }
355        Some(&segments[start..idx])
356    })
357}
358
359/// Return `true` if all segments matching `T` are in one contiguous block.
360pub fn groups_are_contiguous_by_qualifier<T>(segments: &[Segment<'_>]) -> bool
361where
362    T: EdifactSegmentTag,
363{
364    let mut seen_match = false;
365    let mut seen_gap_after_match = false;
366
367    for seg in segments {
368        if T::matches_segment(seg) {
369            if seen_gap_after_match {
370                return false;
371            }
372            seen_match = true;
373        } else if seen_match {
374            seen_gap_after_match = true;
375        }
376    }
377
378    true
379}
380
381/// Match a qualifier value against an exact or wildcard pattern.
382///
383/// Rules:
384/// - If `pattern` contains `*`, it is treated as a glob wildcard (e.g. `"M*"` matches `"MS"`, `"MR"`).
385/// - If no wildcard is present, exact match is required.
386///
387/// Prefix matching without an explicit `*` was deliberately removed: `"M"` matches only `"M"`,
388/// not `"MS"` or `"MR"`.  Use `"M*"` for prefix semantics.
389pub fn qualifier_matches_pattern(value: &str, pattern: &str) -> bool {
390    if pattern.is_empty() {
391        return value.is_empty();
392    }
393
394    if !pattern.contains('*') {
395        return value == pattern;
396    }
397
398    // Fast path: single wildcard (dominant case — e.g. "M*" or "*:MS")
399    if let Some((prefix, suffix)) = pattern.split_once('*') {
400        // Only one wildcard — prefix and suffix cannot overlap in a second split.
401        if !pattern[prefix.len() + 1..].contains('*') {
402            return value.len() >= prefix.len() + suffix.len()
403                && value.starts_with(prefix)
404                && value.ends_with(suffix)
405                && {
406                    // Ensure prefix and suffix don't overlap.
407                    let mid_start = prefix.len();
408                    let mid_end = value.len().saturating_sub(suffix.len());
409                    mid_start <= mid_end
410                };
411        }
412    }
413
414    // General multi-wildcard path.
415    let parts: smallvec::SmallVec<[&str; 4]> = pattern.split('*').collect();
416    let prefix = parts[0];
417    let suffix = parts[parts.len() - 1];
418
419    if !value.starts_with(prefix) || !value.ends_with(suffix) {
420        return false;
421    }
422
423    let mid_start = prefix.len();
424    let mid_end = value.len().saturating_sub(suffix.len());
425
426    if mid_start > mid_end {
427        return parts[1..parts.len() - 1].iter().all(|p| p.is_empty());
428    }
429
430    let mut remaining = &value[mid_start..mid_end];
431
432    for part in &parts[1..parts.len() - 1] {
433        if part.is_empty() {
434            continue;
435        }
436        match remaining.find(part) {
437            Some(idx) => remaining = &remaining[idx + part.len()..],
438            None => return false,
439        }
440    }
441
442    true
443}
444
445/// Extract the string value of element `idx` from `seg`, or `""` if absent.
446#[inline]
447pub fn element_str<'s>(seg: &'s Segment<'_>, idx: usize) -> &'s str {
448    seg.element_str(idx).unwrap_or("")
449}
450
451// ── segment accessor helpers ───────────────────────────────────────────────────
452
453/// Extract a required text element from a segment.
454///
455/// Returns the element's first component, or an error if absent or empty.
456///
457/// # Empty-string semantics
458///
459/// EDIFACT allows elements to be syntactically present but carry an empty
460/// string value (e.g., `SEG++'`). This function treats an empty string as
461/// *absent* — it returns [`EdifactError::MissingRequiredElement`] in that
462/// case, matching the EDIFACT rule that mandatory data elements must carry
463/// a non-empty value.
464///
465/// Delegates to [`SegmentAccessor::text_element`].
466pub fn required_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Result<&'a str, EdifactError> {
467    seg.text_element(idx)
468}
469
470/// Extract an optional text element from a segment.
471///
472/// Returns the element's first component, or None if absent or empty.
473///
474/// Delegates to [`SegmentAccessor::optional_element`].
475pub fn optional_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<&'a str> {
476    SegmentAccessor::optional_element(seg, idx)
477}
478
479/// Extract a required component from a segment element.
480///
481/// Returns the component value, or an error if the element or component is absent.
482///
483/// # Empty-string semantics
484///
485/// Like [`required_element`], an empty string component value is treated as
486/// *absent*.  A component that is syntactically present as `''` (two
487/// consecutive component separators) will cause this function to return
488/// [`EdifactError::MissingRequiredComponent`].
489///
490/// # Failure modes
491///
492/// - [`EdifactError::MissingRequiredElement`] — element `elem_idx` is absent.
493/// - [`EdifactError::MissingRequiredComponent`] — element is present but component `comp_idx` is absent or empty.
494///
495/// Delegates to [`SegmentAccessor::required_composite`].
496pub fn required_component<'a>(
497    seg: &'a Segment<'_>,
498    elem_idx: usize,
499    comp_idx: usize,
500) -> Result<&'a str, EdifactError> {
501    seg.required_composite(elem_idx, comp_idx)
502}
503
504/// Extract an optional component from a segment element.
505///
506/// Returns the component value, or None if absent or empty.
507///
508/// Delegates to [`SegmentAccessor::get_component`].
509pub fn optional_component<'a>(
510    seg: &'a Segment<'_>,
511    elem_idx: usize,
512    comp_idx: usize,
513) -> Option<&'a str> {
514    SegmentAccessor::get_component(seg, elem_idx, comp_idx)
515}
516
517/// Iterate over all components of an element without allocating a `Vec`.
518///
519/// Yields an empty iterator if the element is absent.
520pub fn get_components_iter<'a>(seg: &'a Segment<'_>, idx: usize) -> impl Iterator<Item = &'a str> {
521    seg.elements
522        .get(idx)
523        .into_iter()
524        .flat_map(|elem| elem.components.iter().map(|c| c.as_ref()))
525}
526
527/// A composite data element wrapper for clearer ergonomics.
528pub struct CompositeElement<'a> {
529    components: &'a [std::borrow::Cow<'a, str>],
530}
531
532impl<'a> CompositeElement<'a> {
533    /// Get the component at index `i`, or None if absent.
534    pub fn get(&self, i: usize) -> Option<&'a str> {
535        self.components.get(i).map(|c| c.as_ref())
536    }
537
538    /// Get the component at index `i`, or empty string if absent.
539    pub fn get_or_empty(&self, i: usize) -> &'a str {
540        self.get(i).unwrap_or("")
541    }
542
543    /// Get the number of components.
544    pub fn len(&self) -> usize {
545        self.components.len()
546    }
547
548    /// Check if the composite is empty.
549    pub fn is_empty(&self) -> bool {
550        self.components.is_empty()
551    }
552
553    /// Iterate over all components.
554    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
555        self.components.iter().map(|c| c.as_ref())
556    }
557
558    /// Create a `CompositeElement` from a pre-existing component slice.
559    ///
560    /// Used internally by [`edifact_deserialize_owned`][EdifactDeserialize::edifact_deserialize_owned]
561    /// generated code to pass component data without converting the whole segment.
562    pub fn from_slice(components: &'a [std::borrow::Cow<'a, str>]) -> Self {
563        Self { components }
564    }
565}
566
567/// Get a composite element from a segment with clearer ergonomics.
568pub fn composite_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<CompositeElement<'a>> {
569    seg.elements.get(idx).map(|elem| CompositeElement {
570        components: &elem.components,
571    })
572}
573
574/// Find the first [`OwnedSegment`] with the given tag.
575///
576/// Zero-allocation counterpart of [`find_segment`] for use in
577/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
578///
579/// [`OwnedSegment`]: crate::OwnedSegment
580pub fn find_segment_owned<'s>(
581    segments: &'s [crate::OwnedSegment],
582    tag: &str,
583) -> Option<&'s crate::OwnedSegment> {
584    segments.iter().find(|s| s.tag == tag)
585}
586
587/// Find the first [`OwnedSegment`] with the given tag **and** qualifier.
588///
589/// The qualifier is compared against the first component of element 0.
590/// Zero-allocation counterpart of [`find_qualified_segment`] for use in
591/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
592///
593/// [`OwnedSegment`]: crate::OwnedSegment
594pub fn find_qualified_segment_owned<'s>(
595    segments: &'s [crate::OwnedSegment],
596    tag: &str,
597    qualifier: &str,
598) -> Option<&'s crate::OwnedSegment> {
599    segments
600        .iter()
601        .find(|s| s.tag == tag && s.element_str(0).unwrap_or("") == qualifier)
602}
603
604/// Segment accessor trait for ergonomic typed extraction.
605pub trait SegmentAccessor<'a> {
606    /// Get non-empty element text at index `idx`.
607    fn get_element(&'a self, idx: usize) -> Option<&'a str>;
608    /// Get non-empty component text at element/component indexes.
609    fn get_component(&'a self, elem: usize, comp: usize) -> Option<&'a str>;
610    /// Get a composite wrapper for element `idx`.
611    fn get_composite(&'a self, idx: usize) -> Option<CompositeElement<'a>>;
612
613    /// Get required non-empty element text.
614    fn text_element(&'a self, idx: usize) -> Result<&'a str, EdifactError>;
615    /// Get optional non-empty element text.
616    fn optional_element(&'a self, idx: usize) -> Option<&'a str>;
617    /// Parse a typed code value from a required element.
618    fn code_element<T: FromStr>(&'a self, idx: usize) -> Result<T, EdifactError>;
619    /// Get required non-empty composite component.
620    fn required_composite(&'a self, elem: usize, comp: usize) -> Result<&'a str, EdifactError>;
621    /// Get `count` required components starting at `start_idx` from element `elem`.
622    ///
623    /// Allocates a `Vec`.  For a zero-alloc alternative, use
624    /// [`repeating_components_iter`][Self::repeating_components_iter] and
625    /// consume the iterator directly without collecting.
626    fn repeating_components(
627        &'a self,
628        elem: usize,
629        start_idx: usize,
630        count: usize,
631    ) -> Result<Vec<&'a str>, EdifactError> {
632        // Default implementation delegates to the zero-alloc iterator and
633        // collects.  Implementors that can do better should override this.
634        self.repeating_components_iter(elem, start_idx, count)
635            .collect()
636    }
637
638    /// Iterate over `count` required components starting at `start_idx` from element `elem`.
639    ///
640    /// Allocation-free alternative to [`repeating_components`][Self::repeating_components];
641    /// the caller supplies the iteration budget and consumes results on the fly.
642    fn repeating_components_iter(
643        &'a self,
644        elem: usize,
645        start_idx: usize,
646        count: usize,
647    ) -> impl Iterator<Item = Result<&'a str, EdifactError>> + 'a;
648}
649
650impl<'s, 'd> SegmentAccessor<'s> for Segment<'d>
651where
652    'd: 's,
653{
654    fn get_element(&'s self, idx: usize) -> Option<&'s str> {
655        self.element_str(idx).filter(|s| !s.is_empty())
656    }
657
658    fn get_component(&'s self, elem: usize, comp: usize) -> Option<&'s str> {
659        self.elements
660            .get(elem)
661            .and_then(|e| e.get_component(comp))
662            .filter(|s| !s.is_empty())
663    }
664
665    fn get_composite(&'s self, idx: usize) -> Option<CompositeElement<'s>> {
666        composite_element(self, idx)
667    }
668
669    fn text_element(&'s self, idx: usize) -> Result<&'s str, EdifactError> {
670        <Self as SegmentAccessor>::get_element(self, idx).ok_or_else(|| {
671            EdifactError::MissingRequiredElement {
672                tag: self.tag.to_owned(),
673                element_index: idx,
674            }
675        })
676    }
677
678    fn optional_element(&'s self, idx: usize) -> Option<&'s str> {
679        <Self as SegmentAccessor>::get_element(self, idx)
680    }
681
682    fn code_element<T: FromStr>(&'s self, idx: usize) -> Result<T, EdifactError> {
683        let raw = self.text_element(idx)?;
684        raw.parse::<T>().map_err(|_| EdifactError::InvalidText {
685            offset: self
686                .element_span(idx)
687                .map(|s| s.start)
688                .unwrap_or(self.span.start),
689        })
690    }
691
692    fn required_composite(&'s self, elem: usize, comp: usize) -> Result<&'s str, EdifactError> {
693        match self.elements.get(elem) {
694            None => Err(EdifactError::MissingRequiredElement {
695                tag: self.tag.to_owned(),
696                element_index: elem,
697            }),
698            Some(e) => e
699                .get_component(comp)
700                .filter(|s| !s.is_empty())
701                .ok_or_else(|| EdifactError::MissingRequiredComponent {
702                    tag: self.tag.to_owned(),
703                    element_index: elem,
704                    component_index: comp,
705                }),
706        }
707    }
708
709    fn repeating_components_iter(
710        &'s self,
711        elem: usize,
712        start_idx: usize,
713        count: usize,
714    ) -> impl Iterator<Item = Result<&'s str, EdifactError>> + 's {
715        let tag = self.tag;
716        let element_exists = self.elements.get(elem).is_some();
717        let components = self
718            .elements
719            .get(elem)
720            .map(|e| e.components.as_slice())
721            .unwrap_or(&[]);
722        (start_idx..start_idx + count).map(move |idx| {
723            components
724                .get(idx)
725                .map(|c| c.as_ref())
726                .filter(|s| !s.is_empty())
727                .ok_or_else(|| {
728                    if element_exists {
729                        EdifactError::MissingRequiredComponent {
730                            tag: tag.to_owned(),
731                            element_index: elem,
732                            component_index: idx,
733                        }
734                    } else {
735                        EdifactError::MissingRequiredElement {
736                            tag: tag.to_owned(),
737                            element_index: elem,
738                        }
739                    }
740                })
741        })
742    }
743}
744
745// ── message-window streaming ──────────────────────────────────────────────────
746
747/// A complete `UNH..UNT` message window that borrows from the original input.
748///
749/// Produced by [`MessageWindowsSliceIter`] / [`message_windows_bytes`].
750/// The `message_type` and `association_code` fields are extracted from the
751/// `UNH` segment at construction time, so callers do not need to traverse the
752/// segment list themselves.
753///
754/// `segments` contains the full window including the `UNH` and `UNT` service
755/// segments so that envelope-aware consumers have access to them.
756///
757/// # Accessing segments
758///
759/// ```rust,ignore
760/// for window in message_windows_bytes(input) {
761///     let window = window?;
762///     println!("type={:?} code={:?}", window.message_type, window.association_code);
763///     let bgm = window.segments.iter().find(|s| s.tag == "BGM");
764/// }
765/// ```
766#[derive(Debug)]
767pub struct MessageWindow<'a> {
768    /// EDIFACT message type extracted from `UNH` element 1, component 0.
769    ///
770    /// Borrowed when the component can be referenced directly, owned when
771    /// release-character unescaping requires allocation.
772    pub message_type: Option<Cow<'a, str>>,
773    /// Association-assigned code (DE 0057) from `UNH` element 1, component 4.
774    ///
775    /// Borrowed when the component can be referenced directly, owned when
776    /// release-character unescaping requires allocation.
777    pub association_code: Option<Cow<'a, str>>,
778    /// All segments in this window, from `UNH` through `UNT` (inclusive).
779    pub segments: Vec<crate::Segment<'a>>,
780}
781
782impl<'a> MessageWindow<'a> {
783    /// Build a `MessageWindow` from a completed segment buffer.
784    ///
785    /// Extracts `message_type` and `association_code` from the leading `UNH`
786    /// segment.  Metadata extraction is allocation-free for borrowed components;
787    /// release-character unescaping may allocate owned strings when necessary.
788    fn from_segments(segments: Vec<crate::Segment<'a>>) -> Self {
789        let message_type = segments
790            .first()
791            .filter(|s| s.tag == "UNH")
792            .and_then(|unh| unh_component(unh, 0));
793        let association_code = segments
794            .first()
795            .filter(|s| s.tag == "UNH")
796            .and_then(|unh| unh_component(unh, 4));
797        Self {
798            message_type,
799            association_code,
800            segments,
801        }
802    }
803}
804
805/// Extract a non-empty string component from UNH element 1, preserving the
806/// component's borrowed/owned state.
807///
808/// By using two distinct lifetime parameters (`'b` for the borrow of `seg`,
809/// `'a` for the segment data), we tell the borrow checker that the returned
810/// `&'a str` lives independently of how long we hold `&seg`, which lets callers
811/// move `seg` into a containing struct after this call returns.
812fn unh_component<'a, 'b>(seg: &'b crate::Segment<'a>, comp_idx: usize) -> Option<Cow<'a, str>>
813where
814    'a: 'b,
815{
816    seg.elements
817        .get(1)
818        .and_then(|e| e.components.get(comp_idx))
819        .and_then(|c| if c.is_empty() { None } else { Some(c.clone()) })
820}
821
822/// An owned, heap-allocated `UNH..UNT` message window.
823///
824/// Produced by [`MessageWindowsIter`] / [`message_windows_from_reader`].
825/// Equivalent to [`MessageWindow`] but with all data owned, so it outlives
826/// the original reader.
827///
828/// `segments` contains the full window including the `UNH` and `UNT` service
829/// segments.
830#[derive(Debug, Clone)]
831pub struct OwnedMessageWindow {
832    /// EDIFACT message type extracted from `UNH` element 1, component 0.
833    pub message_type: Option<String>,
834    /// Association-assigned code (DE 0057) from `UNH` element 1, component 4.
835    pub association_code: Option<String>,
836    /// All segments in this window, from `UNH` through `UNT` (inclusive).
837    pub segments: Vec<crate::OwnedSegment>,
838}
839
840impl OwnedMessageWindow {
841    fn from_segments(segments: Vec<crate::OwnedSegment>) -> Self {
842        let unh = segments.first().filter(|s| s.tag == "UNH");
843        let message_type = unh
844            .and_then(|s| s.elements.get(1))
845            .and_then(|e| e.components.first())
846            .map(|c| c.as_ref())
847            .filter(|s: &&str| !s.is_empty())
848            .map(str::to_owned);
849        let association_code = unh
850            .and_then(|s| s.elements.get(1))
851            .and_then(|e| e.components.get(4))
852            .map(|c| c.as_ref())
853            .filter(|s: &&str| !s.is_empty())
854            .map(str::to_owned);
855        Self {
856            message_type,
857            association_code,
858            segments,
859        }
860    }
861}
862
863/// An iterator that groups borrowed EDIFACT segments into per-message windows.
864///
865/// Zero-copy counterpart to [`MessageWindowsIter`] for in-memory byte slices.
866/// Text content borrows from the original input; segment structure allocates
867/// element vectors during parsing. Release-character unescaping may further
868/// allocate owned strings when escape sequences are present. Envelope segments
869/// outside a `UNH..UNT` pair are silently skipped.
870///
871/// Obtain this via [`message_windows_bytes`].
872pub struct MessageWindowsSliceIter<'a> {
873    inner: crate::FromBytesIter<'a>,
874    buf: Vec<crate::Segment<'a>>,
875    in_message: bool,
876    done: bool,
877}
878
879impl<'a> MessageWindowsSliceIter<'a> {
880    fn new(inner: crate::FromBytesIter<'a>) -> Self {
881        Self {
882            inner,
883            buf: Vec::new(),
884            in_message: false,
885            done: false,
886        }
887    }
888}
889
890impl<'a> Iterator for MessageWindowsSliceIter<'a> {
891    type Item = Result<MessageWindow<'a>, EdifactError>;
892
893    fn next(&mut self) -> Option<Self::Item> {
894        if self.done {
895            return None;
896        }
897        loop {
898            let segment = match self.inner.next() {
899                Some(Ok(s)) => s,
900                Some(Err(e)) => {
901                    self.done = true;
902                    return Some(Err(e));
903                }
904                None => {
905                    self.done = true;
906                    if self.in_message && !self.buf.is_empty() {
907                        self.in_message = false;
908                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
909                        return Some(Err(EdifactError::UnexpectedEof { offset }));
910                    }
911                    return None;
912                }
913            };
914
915            match segment.tag {
916                "UNH" => {
917                    if self.in_message {
918                        self.buf.clear();
919                        self.in_message = false;
920                        self.done = true;
921                        let offset = segment.span.start;
922                        return Some(Err(EdifactError::InvalidSegmentForMessage {
923                            tag: "UNH".to_owned(),
924                            message_type: "ENVELOPE".to_owned(),
925                            offset,
926                        }));
927                    }
928                    self.buf.clear();
929                    self.in_message = true;
930                    self.buf.push(segment);
931                }
932                "UNT" if self.in_message => {
933                    self.buf.push(segment);
934                    self.in_message = false;
935                    let segments = std::mem::take(&mut self.buf);
936                    return Some(Ok(MessageWindow::from_segments(segments)));
937                }
938                _ if self.in_message => {
939                    self.buf.push(segment);
940                }
941                _ => {
942                    // Envelope segment outside a window — skip.
943                }
944            }
945        }
946    }
947}
948
949/// An iterator that groups owned EDIFACT segments into per-message windows.
950///
951/// Each yielded item is an [`OwnedMessageWindow`] containing the segments for one
952/// complete `UNH..UNT` message, inclusive of both service segments.
953/// Envelope-level segments (`UNB`, `UNG`, `UNZ`, `UNE`) that sit outside any
954/// `UNH..UNT` pair are silently skipped.
955///
956/// # Errors
957///
958/// - An inner-iterator error is forwarded immediately and iteration stops.
959/// - A `UNH` seen while a prior window is still open (missing `UNT`) is an error.
960/// - Input that ends while a `UNH` window is open (stream truncation) yields
961///   `Err(EdifactError::UnexpectedEof { … })` before returning `None`.
962///
963/// # Construction
964///
965/// Use [`message_windows_from_reader`] or [`message_windows_bytes`] to
966/// obtain a `MessageWindowsIter` directly.  For fully custom sources, call
967/// [`MessageWindowsIter::new`] with any `Iterator<Item = Result<OwnedSegment,
968/// EdifactError>>`.
969pub struct MessageWindowsIter<I> {
970    inner: I,
971    buf: Vec<crate::OwnedSegment>,
972    in_message: bool,
973    /// Set to `true` after any terminal condition (error or clean EOF) so that
974    /// subsequent `next()` calls immediately return `None`.
975    done: bool,
976}
977
978impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> MessageWindowsIter<I> {
979    /// Wrap any owned-segment iterator as a message-window iterator.
980    pub fn new(inner: I) -> Self {
981        Self {
982            inner,
983            buf: Vec::new(),
984            in_message: false,
985            done: false,
986        }
987    }
988}
989
990impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> Iterator
991    for MessageWindowsIter<I>
992{
993    type Item = Result<OwnedMessageWindow, EdifactError>;
994
995    fn next(&mut self) -> Option<Self::Item> {
996        if self.done {
997            return None;
998        }
999        loop {
1000            let segment = match self.inner.next() {
1001                Some(Ok(s)) => s,
1002                Some(Err(e)) => {
1003                    self.done = true;
1004                    return Some(Err(e));
1005                }
1006                None => {
1007                    self.done = true;
1008                    // A window that opened (UNH seen) but never closed (no UNT)
1009                    // means the stream was truncated — surface as an error.
1010                    if self.in_message && !self.buf.is_empty() {
1011                        self.in_message = false;
1012                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
1013                        return Some(Err(EdifactError::UnexpectedEof { offset }));
1014                    }
1015                    return None;
1016                }
1017            };
1018
1019            match segment.tag.as_str() {
1020                "UNH" => {
1021                    if self.in_message {
1022                        // Malformed: new UNH without closing the prior UNT.
1023                        self.buf.clear();
1024                        self.in_message = false;
1025                        self.done = true;
1026                        let offset = segment.span.start;
1027                        return Some(Err(EdifactError::InvalidSegmentForMessage {
1028                            tag: "UNH".to_owned(),
1029                            message_type: "ENVELOPE".to_owned(),
1030                            offset,
1031                        }));
1032                    }
1033                    self.buf.clear();
1034                    self.in_message = true;
1035                    self.buf.push(segment);
1036                }
1037                "UNT" if self.in_message => {
1038                    self.buf.push(segment);
1039                    self.in_message = false;
1040                    let segments = std::mem::take(&mut self.buf);
1041                    return Some(Ok(OwnedMessageWindow::from_segments(segments)));
1042                }
1043                _ if self.in_message => {
1044                    self.buf.push(segment);
1045                }
1046                _ => {
1047                    // Envelope segment outside a window — skip.
1048                }
1049            }
1050        }
1051    }
1052}
1053
1054/// Stream-parse EDIFACT bytes into an iterator of per-message windows.
1055///
1056/// Each yielded [`MessageWindow`] spans one `UNH..UNT` pair, with segments
1057/// borrowing from `input` for their text content. Segment assembly is
1058/// zero-copy for borrowed input bytes; release-character unescaping may
1059/// allocate owned component strings when necessary.
1060/// Envelope segments (`UNB`, `UNZ`, …) are skipped automatically.
1061///
1062/// The `message_type` and `association_code` fields are populated directly from
1063/// the `UNH` segment so that routing logic does not need to traverse `segments`.
1064///
1065/// # Example
1066/// ```
1067/// use edifact_rs::message_windows_bytes;
1068/// let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'\
1069///               UNH+1+ORDERS:D:96A:UN'\
1070///               BGM+220+PO-001+9'\
1071///               UNT+3+1'\
1072///               UNZ+1+1'";
1073///
1074/// let windows: Vec<_> = message_windows_bytes(input)
1075///     .collect::<Result<_, _>>()
1076///     .unwrap();
1077/// assert_eq!(windows.len(), 1);
1078/// assert_eq!(windows[0].message_type.as_deref(), Some("ORDERS"));
1079/// assert_eq!(windows[0].segments[0].tag, "UNH");
1080/// assert_eq!(windows[0].segments.last().unwrap().tag, "UNT");
1081/// ```
1082pub fn message_windows_bytes(input: &[u8]) -> MessageWindowsSliceIter<'_> {
1083    MessageWindowsSliceIter::new(crate::from_bytes(input))
1084}
1085
1086/// Stream-parse EDIFACT from a reader into an iterator of per-message windows.
1087///
1088/// Each yielded [`OwnedMessageWindow`] spans one `UNH..UNT` pair.
1089/// This variant reads lazily — only enough input to complete one window is
1090/// consumed per [`Iterator::next`] call.
1091pub fn message_windows_from_reader<R: Read>(
1092    reader: R,
1093) -> MessageWindowsIter<crate::FromReaderIter<R>> {
1094    MessageWindowsIter::new(crate::from_reader_iter(reader))
1095}
1096
1097/// Stream typed messages from a reader by deserializing each `UNH..UNT` window.
1098///
1099/// This is the highest-level streaming API: it returns one `T` per message,
1100/// reading only as much data as needed to complete each window.
1101///
1102/// Each message window is deserialized via
1103/// [`EdifactDeserialize::edifact_deserialize_owned`], which avoids the
1104/// intermediate `Vec<Segment<'_>>` allocation incurred by the slice-based path.
1105/// Types derived with `#[derive(EdifactDeserialize)]` provide an efficient
1106/// override; manual implementations fall back to [`crate::OwnedSegment::as_borrowed`].
1107///
1108/// # Example
1109/// ```ignore
1110/// // Assuming `OrdersMessage` implements `EdifactDeserialize`:
1111/// let messages: Vec<OrdersMessage> =
1112///     deserialize_messages_from_reader::<OrdersMessage, _>(reader)
1113///         .collect::<Result<_, _>>()?;
1114/// ```
1115pub fn deserialize_messages_from_reader<T, R>(
1116    reader: R,
1117) -> impl Iterator<Item = Result<T, EdifactError>>
1118where
1119    T: EdifactDeserialize,
1120    R: Read,
1121{
1122    message_windows_from_reader(reader).map(|window| {
1123        let window = window?;
1124        T::edifact_deserialize_owned(&window.segments)
1125    })
1126}
1127
1128/// Stream typed messages from a byte slice by deserializing each `UNH..UNT` window.
1129pub fn deserialize_messages_bytes<T>(
1130    input: &[u8],
1131) -> impl Iterator<Item = Result<T, EdifactError>> + '_
1132where
1133    T: EdifactDeserialize,
1134{
1135    message_windows_bytes(input).map(|window| {
1136        let window = window?;
1137        T::edifact_deserialize(&window.segments)
1138    })
1139}
1140
1141// ── MessageDispatch ───────────────────────────────────────────────────────────
1142
1143/// A type-erased deserialized message produced by [`MessageDispatch`].
1144pub struct DispatchedMessage {
1145    /// The EDIFACT message type string extracted from the `UNH` segment.
1146    pub message_type: String,
1147    value: Box<dyn std::any::Any + Send + Sync>,
1148}
1149
1150impl DispatchedMessage {
1151    /// Attempt to downcast the inner value to `T`.
1152    ///
1153    /// Returns `None` if the stored type does not match `T`.
1154    pub fn downcast<T: std::any::Any + Send + Sync + 'static>(&self) -> Option<&T> {
1155        self.value.downcast_ref::<T>()
1156    }
1157}
1158
1159impl std::fmt::Debug for DispatchedMessage {
1160    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1161        f.debug_struct("DispatchedMessage")
1162            .field("message_type", &self.message_type)
1163            .finish_non_exhaustive()
1164    }
1165}
1166
1167type DispatchHandlerFn = Box<
1168    dyn for<'a> Fn(&[Segment<'a>]) -> Result<Box<dyn std::any::Any + Send + Sync>, EdifactError>
1169        + Send
1170        + Sync,
1171>;
1172
1173type FallbackHandlerFn = Box<
1174    dyn for<'a> Fn(
1175            &[Segment<'a>],
1176            &str,
1177        ) -> Result<Box<dyn std::any::Any + Send + Sync>, EdifactError>
1178        + Send
1179        + Sync,
1180>;
1181
1182/// Type-based dispatcher for mixed-message EDIFACT streams.
1183///
1184/// Register one handler per message type with [`on`][Self::on], then call
1185/// [`dispatch`][Self::dispatch] on each message window.  If no handler matches
1186/// and a [`fallback`][Self::fallback] was registered it is invoked instead;
1187/// otherwise an [`EdifactError::UnexpectedMessageType`] is returned.
1188///
1189/// # Example
1190///
1191/// ```rust,ignore
1192/// let dispatch = MessageDispatch::new()
1193///     .on("ORDERS",  |segs| Orders::edifact_deserialize(segs))
1194///     .on("INVOIC",  |segs| Invoice::edifact_deserialize(segs));
1195///
1196/// for window in message_windows_bytes(input) {
1197///     let window = window?;
1198///     let msg = dispatch.dispatch(&window)?;
1199///     match msg.message_type.as_str() {
1200///         "ORDERS"  => { let o = msg.downcast::<Orders>().unwrap(); /* … */ }
1201///         "INVOIC"  => { let i = msg.downcast::<Invoice>().unwrap(); /* … */ }
1202///         _         => unreachable!(),
1203///     }
1204/// }
1205/// ```
1206pub struct MessageDispatch {
1207    handlers: Vec<(String, DispatchHandlerFn)>,
1208    fallback: Option<FallbackHandlerFn>,
1209}
1210
1211impl Default for MessageDispatch {
1212    fn default() -> Self {
1213        Self::new()
1214    }
1215}
1216
1217impl MessageDispatch {
1218    /// Create an empty dispatcher.
1219    pub fn new() -> Self {
1220        Self {
1221            handlers: Vec::new(),
1222            fallback: None,
1223        }
1224    }
1225
1226    /// Register a handler for `message_type`.
1227    ///
1228    /// The closure receives the full message window and returns a typed value
1229    /// that is boxed and stored inside [`DispatchedMessage`].
1230    pub fn on<T, F>(mut self, message_type: &str, handler: F) -> Self
1231    where
1232        T: std::any::Any + Send + Sync + 'static,
1233        F: for<'a> Fn(&[Segment<'a>]) -> Result<T, EdifactError> + Send + Sync + 'static,
1234    {
1235        let erased: DispatchHandlerFn = Box::new(move |segs| {
1236            let val = handler(segs)?;
1237            Ok(Box::new(val) as Box<dyn std::any::Any + Send + Sync>)
1238        });
1239        self.handlers.push((message_type.to_owned(), erased));
1240        self
1241    }
1242
1243    /// Register a fallback handler for unrecognised message types.
1244    ///
1245    /// The closure receives the segment window **and** the unknown message-type
1246    /// string.
1247    pub fn fallback<T, F>(mut self, handler: F) -> Self
1248    where
1249        T: std::any::Any + Send + Sync + 'static,
1250        F: for<'a> Fn(&[Segment<'a>], &str) -> Result<T, EdifactError> + Send + Sync + 'static,
1251    {
1252        let erased: FallbackHandlerFn = Box::new(move |segs, mt| {
1253            let val = handler(segs, mt)?;
1254            Ok(Box::new(val) as Box<dyn std::any::Any + Send + Sync>)
1255        });
1256        self.fallback = Some(erased);
1257        self
1258    }
1259
1260    /// Dispatch a single message window to the appropriate handler.
1261    ///
1262    /// The message type is extracted from the `UNH` segment.  If no `UNH` is
1263    /// present, [`EdifactError::MissingSegment`] is returned.
1264    pub fn dispatch(&self, window: &[Segment<'_>]) -> Result<DispatchedMessage, EdifactError> {
1265        let message_type = window
1266            .iter()
1267            .find(|s| s.tag == "UNH")
1268            .and_then(|unh| unh.get_element(1))
1269            .and_then(|e| e.get_component(0))
1270            .map(|s| s.to_owned())
1271            .ok_or_else(|| EdifactError::MissingSegment {
1272                tag: "UNH".to_owned(),
1273                expected_position: "first segment of message window".to_owned(),
1274            })?;
1275
1276        for (mt, handler) in &self.handlers {
1277            if *mt == message_type {
1278                let value = handler(window)?;
1279                return Ok(DispatchedMessage {
1280                    message_type,
1281                    value,
1282                });
1283            }
1284        }
1285
1286        if let Some(fallback) = &self.fallback {
1287            let value = fallback(window, &message_type)?;
1288            return Ok(DispatchedMessage {
1289                message_type,
1290                value,
1291            });
1292        }
1293
1294        Err(EdifactError::UnexpectedMessageType { message_type })
1295    }
1296
1297    /// Dispatch all messages from a byte reader.
1298    ///
1299    /// Each message window is extracted and dispatched in order.  The returned
1300    /// iterator is lazy — errors are yielded as `Err` items.
1301    pub fn dispatch_all_from_bytes<'a>(
1302        &'a self,
1303        input: &'a [u8],
1304    ) -> impl Iterator<Item = Result<DispatchedMessage, EdifactError>> + 'a {
1305        message_windows_bytes(input).map(move |window| {
1306            let window = window?;
1307            self.dispatch(&window.segments)
1308        })
1309    }
1310
1311    /// Dispatch all messages from a reader.
1312    ///
1313    /// Parses the stream into message windows and dispatches each.  The
1314    /// returned iterator yields owned [`DispatchedMessage`] values lazily:
1315    /// each window is fully buffered in memory (as `Vec<OwnedSegment>`) before
1316    /// dispatch, but windows are processed one at a time rather than all at once.
1317    pub fn dispatch_all_from_reader<R: Read + 'static>(
1318        &self,
1319        reader: R,
1320    ) -> impl Iterator<Item = Result<DispatchedMessage, EdifactError>> + '_ {
1321        message_windows_from_reader(reader).map(|window| {
1322            let window = window?;
1323            let borrowed: Vec<Segment<'_>> =
1324                window.segments.iter().map(|s| s.as_borrowed()).collect();
1325            self.dispatch(&borrowed)
1326        })
1327    }
1328}
1329
1330#[cfg(test)]
1331mod tests {
1332    use super::*;
1333
1334    // ── manual test impl ──────────────────────────────────────────────────────
1335    #[derive(Debug, PartialEq)]
1336    struct BgmSegment {
1337        doc_name_code: String,
1338        pruef_id: String,
1339        msg_function: Option<String>,
1340    }
1341
1342    impl EdifactSegmentTag for BgmSegment {
1343        const SEGMENT_TAG: &'static str = "BGM";
1344    }
1345
1346    struct NadM;
1347
1348    impl EdifactSegmentTag for NadM {
1349        const SEGMENT_TAG: &'static str = "NAD";
1350        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
1351    }
1352
1353    struct NadWildcard;
1354
1355    impl EdifactSegmentTag for NadWildcard {
1356        const SEGMENT_TAG: &'static str = "NAD";
1357        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
1358    }
1359
1360    impl EdifactDeserialize for BgmSegment {
1361        fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
1362            let seg = find_segment(segments, "BGM").ok_or_else(|| {
1363                EdifactError::MissingRequiredElement {
1364                    tag: "BGM".to_owned(),
1365                    element_index: 0,
1366                }
1367            })?;
1368            Ok(Self {
1369                doc_name_code: element_str(seg, 0).to_owned(),
1370                pruef_id: element_str(seg, 1).to_owned(),
1371                msg_function: seg
1372                    .element_str(2)
1373                    .filter(|s| !s.is_empty())
1374                    .map(str::to_owned),
1375            })
1376        }
1377    }
1378
1379    #[test]
1380    fn deserialize_single_segment() {
1381        let input = b"BGM+E03+11042+9'";
1382        let bgm: BgmSegment = deserialize(input).unwrap();
1383        assert_eq!(bgm.doc_name_code, "E03");
1384        assert_eq!(bgm.pruef_id, "11042");
1385        assert_eq!(bgm.msg_function, Some("9".to_owned()));
1386    }
1387
1388    #[test]
1389    fn streaming_deserialize_first_from_bytes() {
1390        let input = b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'";
1391        let bgm: BgmSegment = deserialize_first_streaming(input).unwrap();
1392        assert_eq!(bgm.pruef_id, "11042");
1393    }
1394
1395    #[test]
1396    fn streaming_deserialize_all_from_bytes() {
1397        let input = b"BGM+E03+11042+9'RFF+AA:1'BGM+E01+11043+9'";
1398        let bgms: Vec<BgmSegment> = deserialize_all_streaming(input).unwrap();
1399        assert_eq!(bgms.len(), 2);
1400        assert_eq!(bgms[0].pruef_id, "11042");
1401        assert_eq!(bgms[1].pruef_id, "11043");
1402    }
1403
1404    #[test]
1405    fn streaming_deserialize_first_from_reader() {
1406        let input =
1407            std::io::Cursor::new(b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'".to_vec());
1408        let bgm: BgmSegment = deserialize_first_from_reader(input).unwrap();
1409        assert_eq!(bgm.pruef_id, "11042");
1410    }
1411
1412    #[test]
1413    fn streaming_deserialize_all_from_reader() {
1414        let input = std::io::Cursor::new(b"BGM+E03+11042+9'BGM+E01+11043+9'".to_vec());
1415        let bgms: Vec<BgmSegment> = deserialize_all_from_reader(input).unwrap();
1416        assert_eq!(bgms.len(), 2);
1417        assert_eq!(bgms[0].pruef_id, "11042");
1418        assert_eq!(bgms[1].pruef_id, "11043");
1419    }
1420
1421    #[test]
1422    fn missing_segment_returns_error() {
1423        let input = b"DTM+137:20230401:102'";
1424        let result: Result<BgmSegment, _> = deserialize(input);
1425        assert!(result.is_err());
1426    }
1427
1428    #[test]
1429    fn vec_collects_all_matching_segments() {
1430        let input = b"DTM+137:20230401:102'BGM+E03+11042+9'BGM+E01+11043+9'";
1431        let bgms: Vec<BgmSegment> = deserialize(input).unwrap();
1432        assert_eq!(bgms.len(), 2);
1433        assert_eq!(bgms[0].pruef_id, "11042");
1434        assert_eq!(bgms[1].pruef_id, "11043");
1435    }
1436
1437    #[test]
1438    fn find_qualified_segment_matches_qualifier() {
1439        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1440        let segments: Vec<Segment<'_>> =
1441            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1442        let nad_ms = find_qualified_segment(&segments, "NAD", "MS");
1443        let nad_mr = find_qualified_segment(&segments, "NAD", "MR");
1444        assert!(nad_ms.is_some());
1445        assert!(nad_mr.is_some());
1446        assert_eq!(element_str(nad_ms.unwrap(), 0), "MS");
1447        assert_eq!(element_str(nad_mr.unwrap(), 0), "MR");
1448    }
1449
1450    #[test]
1451    fn round_trip_str_api() {
1452        let input = "BGM+E03+11042+9'";
1453        let bgm: BgmSegment = deserialize_str(input).unwrap();
1454        assert_eq!(bgm.pruef_id, "11042");
1455    }
1456
1457    #[test]
1458    fn required_element_extraction() {
1459        let input = b"BGM+E03+11042+9'";
1460        let segments: Vec<Segment<'_>> =
1461            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1462        let seg = &segments[0];
1463
1464        assert_eq!(required_element(seg, 0).unwrap(), "E03");
1465        assert_eq!(required_element(seg, 1).unwrap(), "11042");
1466        // Element 5 doesn't exist
1467        assert!(required_element(seg, 5).is_err());
1468    }
1469
1470    #[test]
1471    fn optional_element_extraction() {
1472        let input = b"BGM+E03+11042+9'BGM+E01++absent'";
1473        let segments: Vec<Segment<'_>> =
1474            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1475
1476        // First segment
1477        assert_eq!(optional_element(&segments[0], 0), Some("E03"));
1478        assert_eq!(optional_element(&segments[0], 1), Some("11042"));
1479        assert_eq!(optional_element(&segments[0], 5), None);
1480
1481        // Second segment with empty element
1482        assert_eq!(optional_element(&segments[1], 1), None);
1483    }
1484
1485    #[test]
1486    fn component_extraction() {
1487        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1488        let segments: Vec<Segment<'_>> =
1489            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1490        let seg = &segments[0];
1491
1492        assert_eq!(required_component(seg, 0, 0).unwrap(), "UNOA");
1493        assert_eq!(required_component(seg, 0, 1).unwrap(), "1");
1494        // Non-existent component
1495        assert!(required_component(seg, 0, 5).is_err());
1496    }
1497
1498    #[test]
1499    fn composite_element_helper() {
1500        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1501        let segments: Vec<Segment<'_>> =
1502            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1503        let seg = &segments[0];
1504
1505        let comp = composite_element(seg, 0).unwrap();
1506        assert_eq!(comp.len(), 2);
1507        assert_eq!(comp.get(0), Some("UNOA"));
1508        assert_eq!(comp.get(1), Some("1"));
1509        assert_eq!(comp.get(5), None);
1510        assert_eq!(comp.get_or_empty(5), "");
1511    }
1512
1513    #[test]
1514    fn get_all_components() {
1515        // UNB has composite element: UNOA:1
1516        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1517        let segments: Vec<Segment<'_>> =
1518            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1519        let seg = &segments[0];
1520
1521        let comps: Vec<&str> = get_components_iter(seg, 0).collect(); // First element is UNOA:1
1522        assert!(!comps.is_empty(), "Expected components but got empty");
1523        assert_eq!(comps.len(), 2);
1524        assert_eq!(comps[0], "UNOA");
1525        assert_eq!(comps[1], "1");
1526    }
1527
1528    #[test]
1529    fn qualifier_pattern_matching_supports_exact_and_wildcard() {
1530        // Exact match (no wildcard)
1531        assert!(qualifier_matches_pattern("MS", "MS"));
1532        assert!(!qualifier_matches_pattern("MS", "M")); // Not a prefix match after R-003
1533        // Wildcard patterns
1534        assert!(qualifier_matches_pattern("MS", "M*"));
1535        assert!(qualifier_matches_pattern("MRY", "M*Y"));
1536        assert!(!qualifier_matches_pattern("AB", "M*"));
1537    }
1538
1539    /// Comprehensive table-driven tests for `qualifier_matches_pattern`.
1540    #[test]
1541    fn qualifier_matches_pattern_table() {
1542        // (value, pattern, expected)
1543        let cases: &[(&str, &str, bool)] = &[
1544            // ── empty inputs ────────────────────────────────────────────────
1545            ("", "", true),   // empty matches empty
1546            ("", "*", true),  // wildcard matches empty string
1547            ("A", "", false), // non-empty does not match empty pattern
1548            ("", "A", false), // empty does not match non-empty literal
1549            // ── literal (no wildcard) ────────────────────────────────────────
1550            ("MS", "MS", true),
1551            ("BY", "BY", true),
1552            ("ms", "MS", false),  // case-sensitive
1553            ("MSX", "MS", false), // prefix is NOT a match without wildcard
1554            ("M", "MS", false),   // too short
1555            // ── single wildcard at the end (prefix match) ────────────────────
1556            ("MS", "M*", true),
1557            ("MULTI", "MUL*", true),
1558            ("AB", "M*", false),
1559            ("", "M*", false), // empty does not start with 'M'
1560            // ── single wildcard at the start (suffix match) ──────────────────
1561            ("MSG", "*G", true),
1562            ("G", "*G", true),
1563            ("MSG", "*X", false),
1564            ("", "*G", false),
1565            // ── wildcard in the middle ───────────────────────────────────────
1566            ("MRY", "M*Y", true),
1567            ("MAY", "M*Y", true),
1568            ("MY", "M*Y", true),    // zero-width wildcard: "M" + "" + "Y"
1569            ("MYY", "M*Y", true),   // last 'Y' matches, wildcard = 'Y'
1570            ("MAYZ", "M*Y", false), // does not end with 'Y'
1571            ("AB", "M*Y", false),
1572            // ── bare wildcard (match-all) ────────────────────────────────────
1573            ("*", "*", true), // literal '*' value vs wildcard pattern
1574            ("anything", "*", true),
1575            ("", "*", true),
1576            // ── multiple wildcards ────────────────────────────────────────────
1577            ("ABCDE", "A*C*E", true),
1578            ("ACE", "A*C*E", true), // zero-width wildcards
1579            ("AXCYE", "A*C*E", true),
1580            ("ABCDF", "A*C*E", false),
1581            // ── wildcard with empty segment between stars ─────────────────────
1582            ("AB", "A**B", true), // "A**B" → parts ["A", "", "B"] → ends_with_wildcard?
1583            // ── pattern longer than value ─────────────────────────────────────
1584            ("AB", "A*B*C", false),
1585            // ── value contains pattern as substring but must anchor start ─────
1586            ("XMS", "MS", false),
1587        ];
1588
1589        for (value, pattern, expected) in cases {
1590            let got = qualifier_matches_pattern(value, pattern);
1591            assert_eq!(
1592                got, *expected,
1593                "qualifier_matches_pattern({value:?}, {pattern:?}) expected {expected} but got {got}"
1594            );
1595        }
1596    }
1597
1598    #[test]
1599    fn typed_qualifier_helpers_work() {
1600        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1601        let segments: Vec<Segment<'_>> =
1602            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1603
1604        let first = find_segment_typed::<NadM>(&segments).unwrap();
1605        assert_eq!(first.element_str(0), Some("MS"));
1606
1607        let all: Vec<_> = find_segments_typed::<NadWildcard>(&segments).collect();
1608        assert_eq!(all.len(), 2);
1609    }
1610
1611    #[test]
1612    fn segment_accessor_trait_methods_work() {
1613        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1614        let segments: Vec<Segment<'_>> =
1615            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1616        let seg = &segments[0];
1617
1618        assert_eq!(SegmentAccessor::get_element(seg, 1), Some("SENDER"));
1619        assert_eq!(SegmentAccessor::required_composite(seg, 0, 1).unwrap(), "1");
1620        let parsed: i32 = SegmentAccessor::code_element(seg, 4).unwrap();
1621        assert_eq!(parsed, 1);
1622        let reps = SegmentAccessor::repeating_components(seg, 3, 0, 2).unwrap();
1623        assert_eq!(reps, vec!["200101", "0900"]);
1624    }
1625
1626    #[test]
1627    fn group_helpers_detect_contiguity() {
1628        struct NadAny;
1629        impl EdifactSegmentTag for NadAny {
1630            const SEGMENT_TAG: &'static str = "NAD";
1631        }
1632
1633        let contiguous_input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'";
1634        let contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(contiguous_input)
1635            .collect::<Result<_, _>>()
1636            .unwrap();
1637        assert!(groups_are_contiguous_by_qualifier::<NadAny>(
1638            &contiguous_segments
1639        ));
1640
1641        let non_contiguous_input = b"NAD+MS+1'RFF+AA:1'NAD+MR+2'";
1642        let non_contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(non_contiguous_input)
1643            .collect::<Result<_, _>>()
1644            .unwrap();
1645        assert!(!groups_are_contiguous_by_qualifier::<NadAny>(
1646            &non_contiguous_segments
1647        ));
1648    }
1649
1650    #[test]
1651    fn group_helpers_collect_contiguous_groups() {
1652        struct NadAny;
1653        impl EdifactSegmentTag for NadAny {
1654            const SEGMENT_TAG: &'static str = "NAD";
1655        }
1656
1657        let input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'NAD+BY+3'";
1658        let segments: Vec<Segment<'_>> =
1659            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1660        let groups = contiguous_groups_by_qualifier::<NadAny>(&segments);
1661
1662        assert_eq!(groups.len(), 2);
1663        assert_eq!(groups[0].len(), 2);
1664        assert_eq!(groups[1].len(), 1);
1665    }
1666
1667    // ── MessageWindowsIter tests ──────────────────────────────────────────────
1668
1669    #[test]
1670    fn message_windows_bytes_yields_complete_windows() {
1671        let input = b"UNB+UNOA:1+S+R+200101:0900+1'\
1672                      UNH+1+ORDERS:D:96A:UN'\
1673                      BGM+220+PO-001+9'\
1674                      UNT+3+1'\
1675                      UNZ+1+1'";
1676        let windows: Vec<_> = message_windows_bytes(input)
1677            .collect::<Result<_, _>>()
1678            .unwrap();
1679        assert_eq!(windows.len(), 1);
1680        assert_eq!(windows[0].segments[0].tag, "UNH");
1681        assert_eq!(windows[0].segments.last().unwrap().tag, "UNT");
1682        assert_eq!(windows[0].message_type.as_deref(), Some("ORDERS"));
1683        assert_eq!(windows[0].association_code.as_deref(), None);
1684    }
1685
1686    #[test]
1687    fn message_windows_bytes_preserves_owned_unh_metadata() {
1688        let input = b"UNB+UNOA:1+S+R+200101:0900+1'\
1689                      UNH+1+ORD?ERS:D:96A:UN:5??5??3a'\
1690                      BGM+220+PO-001+9'\
1691                      UNT+3+1'\
1692                      UNZ+1+1'";
1693        let windows: Vec<_> = message_windows_bytes(input)
1694            .collect::<Result<_, _>>()
1695            .unwrap();
1696
1697        assert_eq!(windows.len(), 1);
1698        assert_eq!(windows[0].message_type.as_deref(), Some("ORDERS"));
1699        assert_eq!(windows[0].association_code.as_deref(), Some("5?5?3a"));
1700    }
1701
1702    #[test]
1703    fn message_windows_truncated_stream_returns_error() {
1704        // Stream ends after UNH and BGM but without UNT — truncation must be an error
1705        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1706        let results: Vec<_> = message_windows_bytes(input).collect();
1707        assert_eq!(results.len(), 1);
1708        assert!(
1709            matches!(results[0], Err(EdifactError::UnexpectedEof { .. })),
1710            "expected UnexpectedEof for truncated window, got: {:?}",
1711            results[0]
1712        );
1713    }
1714
1715    #[test]
1716    fn message_windows_subsequent_calls_return_none_after_truncation() {
1717        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1718        let mut iter = message_windows_bytes(input);
1719        assert!(matches!(
1720            iter.next(),
1721            Some(Err(EdifactError::UnexpectedEof { .. }))
1722        ));
1723        // After the error, the iterator must be fused (done = true)
1724        assert!(iter.next().is_none());
1725    }
1726
1727    #[test]
1728    fn message_windows_unh_without_unt_before_next_unh_returns_error() {
1729        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'\
1730                      UNH+2+ORDERS:D:96A:UN'BGM+220+PO-002+9'UNT+3+2'";
1731        let results: Vec<_> = message_windows_bytes(input).collect();
1732        // First item must be an error (UNH before UNT — missing closer)
1733        assert!(
1734            matches!(
1735                results[0],
1736                Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"
1737            ),
1738            "expected InvalidSegmentForMessage(UNH), got: {:?}",
1739            results[0]
1740        );
1741    }
1742
1743    // ── SegmentAccessor unit tests ─────────────────────────────────────────────
1744
1745    fn parse_one(input: &str) -> crate::OwnedSegment {
1746        crate::from_reader(std::io::Cursor::new(input.as_bytes()))
1747            .expect("parse failed")
1748            .into_iter()
1749            .next()
1750            .expect("at least one segment")
1751    }
1752
1753    #[test]
1754    fn segment_accessor_get_element_returns_value() {
1755        let owned = parse_one("BGM+220+PO-001+9'");
1756        let seg = owned.as_borrowed();
1757        assert_eq!(SegmentAccessor::get_element(&seg, 0), Some("220"));
1758        assert_eq!(SegmentAccessor::get_element(&seg, 1), Some("PO-001"));
1759        assert_eq!(SegmentAccessor::get_element(&seg, 2), Some("9"));
1760        assert_eq!(
1761            SegmentAccessor::get_element(&seg, 9),
1762            None,
1763            "out-of-bounds must return None"
1764        );
1765    }
1766
1767    #[test]
1768    fn segment_accessor_get_element_filters_empty() {
1769        let owned = parse_one("TST+++VALUE'");
1770        let seg = owned.as_borrowed();
1771        // elements 0 and 1 are empty; element 2 is "VALUE"
1772        assert_eq!(
1773            SegmentAccessor::get_element(&seg, 0),
1774            None,
1775            "empty element must return None"
1776        );
1777        assert_eq!(
1778            SegmentAccessor::get_element(&seg, 1),
1779            None,
1780            "empty element must return None"
1781        );
1782        assert_eq!(SegmentAccessor::get_element(&seg, 2), Some("VALUE"));
1783    }
1784
1785    #[test]
1786    fn segment_accessor_get_component_returns_value() {
1787        let owned = parse_one("UNH+1+ORDERS:D:96A:UN'");
1788        let seg = owned.as_borrowed();
1789        assert_eq!(seg.get_component(1, 0), Some("ORDERS"));
1790        assert_eq!(seg.get_component(1, 1), Some("D"));
1791        assert_eq!(seg.get_component(1, 2), Some("96A"));
1792        assert_eq!(seg.get_component(1, 3), Some("UN"));
1793        assert_eq!(
1794            seg.get_component(1, 9),
1795            None,
1796            "out-of-bounds must return None"
1797        );
1798    }
1799
1800    #[test]
1801    fn segment_accessor_text_element_errors_on_missing() {
1802        let owned = parse_one("BGM+'");
1803        let seg = owned.as_borrowed();
1804        // element 0 is empty — text_element must return an error
1805        let err = seg.text_element(0);
1806        assert!(
1807            matches!(err, Err(EdifactError::MissingRequiredElement { ref tag, element_index: 0 }) if tag == "BGM"),
1808            "expected MissingRequiredElement, got: {err:?}"
1809        );
1810    }
1811
1812    #[test]
1813    fn segment_accessor_required_composite_errors_on_missing() {
1814        let owned = parse_one("DTM+137'");
1815        let seg = owned.as_borrowed();
1816        // component 1 of element 0 is absent
1817        let err = seg.required_composite(0, 1);
1818        assert!(
1819            matches!(err, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 0, component_index: 1 }) if tag == "DTM"),
1820            "expected MissingRequiredComponent, got: {err:?}"
1821        );
1822    }
1823
1824    #[test]
1825    fn segment_accessor_code_element_parses_integer() {
1826        let owned = parse_one("QTY+21:100'");
1827        let seg = owned.as_borrowed();
1828        let qty: u32 = seg.code_element(0).expect("should parse qualifier as u32");
1829        assert_eq!(qty, 21);
1830    }
1831
1832    #[test]
1833    fn segment_accessor_optional_element_absent_returns_none() {
1834        let owned = parse_one("BGM+220'");
1835        let seg = owned.as_borrowed();
1836        assert_eq!(seg.optional_element(5), None);
1837    }
1838}