Skip to main content

edifact_rs/
de.rs

1//! Custom deserialization trait for EDIFACT.
2//!
3//! [`EdifactDeserialize`] maps a slice of parsed [`Segment`]s to a Rust value.
4//! [`EdifactSegmentTag`] is a companion trait that carries the segment tag and
5//! optional qualifier at the type level, enabling the blanket
6//! `impl EdifactDeserialize for Vec<T>`.
7
8use crate::{EdifactError, Segment};
9use std::io::Read;
10use std::str::FromStr;
11
12// ── traits ────────────────────────────────────────────────────────────────────
13
14/// Types that can be deserialized from a slice of EDIFACT segments.
15///
16/// Implement manually or derive with `#[derive(EdifactDeserialize)]` from the
17/// `edifact-rs-derive` crate.
18pub trait EdifactDeserialize: Sized {
19    /// Deserialize `Self` from the provided segment slice.
20    ///
21    /// The slice may contain any number of segments; implementations extract
22    /// only the ones they care about and ignore the rest.
23    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError>;
24
25    /// Deserialize `Self` from a slice of owned EDIFACT segments.
26    ///
27    /// # Default implementation
28    ///
29    /// Converts each [`crate::OwnedSegment`] to its borrowed form via
30    /// [`crate::OwnedSegment::as_borrowed`] and delegates to
31    /// [`edifact_deserialize`][Self::edifact_deserialize].  This incurs one
32    /// `Vec<Segment<'_>>` allocation per call.
33    ///
34    /// # Override when performance matters
35    ///
36    /// Types generated by `#[derive(EdifactDeserialize)]` automatically override
37    /// this method to work directly on the owned data without the intermediate
38    /// allocation.  Manual implementations should also override when used in the
39    /// high-throughput reader-streaming path
40    /// ([`deserialize_first_from_reader`], [`deserialize_all_from_reader`],
41    /// [`deserialize_messages_from_reader`]) to avoid the per-message allocation.
42    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
43        let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
44        Self::edifact_deserialize(&borrowed)
45    }
46}
47
48/// Types that can be deserialized from a composite EDIFACT element.
49///
50/// Implement this for custom composite structs used with
51/// `#[edifact(composite)]` in derive macros.
52pub trait EdifactCompositeDeserialize: Sized {
53    /// Deserialize `Self` from a composite element.
54    fn edifact_deserialize_composite(composite: CompositeElement<'_>)
55    -> Result<Self, EdifactError>;
56}
57
58impl EdifactCompositeDeserialize for Vec<String> {
59    fn edifact_deserialize_composite(
60        composite: CompositeElement<'_>,
61    ) -> Result<Self, EdifactError> {
62        Ok(composite.iter().map(str::to_owned).collect())
63    }
64}
65
66/// Companion trait that declares a type's segment tag (and optional qualifier).
67///
68/// Required for the `Vec<T>` blanket impl and for finding the right segment in
69/// a message-level struct deserialization.
70pub trait EdifactSegmentTag {
71    /// The 3-character EDIFACT segment tag (e.g. `"BGM"`, `"NAD"`).
72    const SEGMENT_TAG: &'static str;
73
74    /// Optional qualifier pattern to further constrain segment matching.
75    ///
76    /// Examples:
77    /// - `Some("MS")` for exact qualifier matching.
78    /// - `Some("M*")` for wildcard prefix matching (matches `"MS"`, `"MR"`, etc.).
79    const QUALIFIER_PATTERN: Option<&'static str> = None;
80
81    /// Return `true` if `seg`'s qualifier matches this type's qualifier pattern.
82    fn matches_qualifier(seg: &Segment<'_>) -> bool {
83        match Self::QUALIFIER_PATTERN {
84            Some(pattern) => seg
85                .element_str(0)
86                .is_some_and(|q| qualifier_matches_pattern(q, pattern)),
87            None => true,
88        }
89    }
90
91    /// Return `true` if `seg` is the segment this type maps to.
92    ///
93    /// Default: `seg.tag == Self::SEGMENT_TAG`.  Override to also match on a
94    /// qualifier (e.g. `NAD+BY` — element 0 = `"BY"`).
95    fn matches_segment(seg: &Segment<'_>) -> bool {
96        seg.tag == Self::SEGMENT_TAG && Self::matches_qualifier(seg)
97    }
98
99    /// Like [`matches_segment`][Self::matches_segment] but works directly on an
100    /// [`crate::OwnedSegment`] without incurring the `Vec` allocation of
101    /// [`crate::OwnedSegment::as_borrowed`].
102    fn matches_owned_segment(seg: &crate::OwnedSegment) -> bool {
103        if seg.tag != Self::SEGMENT_TAG {
104            return false;
105        }
106        match Self::QUALIFIER_PATTERN {
107            None => true,
108            Some(pattern) => {
109                let q = seg
110                    .elements
111                    .first()
112                    .and_then(|e| e.components.first())
113                    .map(|c| c.as_str())
114                    .unwrap_or("");
115                qualifier_matches_pattern(q, pattern)
116            }
117        }
118    }
119}
120
121// ── blanket impl for Vec<T> ───────────────────────────────────────────────────
122
123/// Deserializes each segment matching `T::matches_segment` as an independent
124/// single-segment slice, collecting the results.
125impl<T> EdifactDeserialize for Vec<T>
126where
127    T: EdifactDeserialize + EdifactSegmentTag,
128{
129    fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
130        segments
131            .iter()
132            .filter(|s| T::matches_segment(s))
133            .map(|seg| T::edifact_deserialize(std::slice::from_ref(seg)))
134            .collect()
135    }
136
137    fn edifact_deserialize_owned(segments: &[crate::OwnedSegment]) -> Result<Self, EdifactError> {
138        segments
139            .iter()
140            .filter(|s| T::matches_owned_segment(s))
141            .map(|seg| T::edifact_deserialize_owned(std::slice::from_ref(seg)))
142            .collect()
143    }
144}
145
146// ── public API ────────────────────────────────────────────────────────────────
147
148/// Deserialize a value of type `T` from EDIFACT bytes.
149///
150/// Unlike [`crate::from_bytes`], which parses bytes into raw [`Segment`]s, this
151/// function fully deserializes the payload into a typed Rust value via [`EdifactDeserialize`].
152///
153/// This API currently buffers all parsed segments into a `Vec` before invoking
154/// typed deserialization.
155pub fn deserialize<T: EdifactDeserialize>(input: &[u8]) -> Result<T, EdifactError> {
156    let segments: Vec<Segment<'_>> = crate::from_bytes(input).collect::<Result<_, _>>()?;
157    T::edifact_deserialize(&segments)
158}
159
160/// Stream-parse EDIFACT bytes and deserialize the first matching segment as `T`.
161///
162/// This avoids allocating a full `Vec<Segment>` and is intended for low-memory
163/// extraction of segment-scoped types.
164pub fn deserialize_first_streaming<T>(input: &[u8]) -> Result<T, EdifactError>
165where
166    T: EdifactDeserialize + EdifactSegmentTag,
167{
168    for segment in crate::from_bytes(input) {
169        let segment = segment?;
170        if T::matches_segment(&segment) {
171            return T::edifact_deserialize(std::slice::from_ref(&segment));
172        }
173    }
174
175    Err(EdifactError::MissingSegment {
176        tag: T::SEGMENT_TAG.to_owned(),
177        expected_position: "any position in input".to_owned(),
178    })
179}
180
181/// Stream-parse EDIFACT bytes and deserialize all matching segments as `Vec<T>`.
182///
183/// This avoids buffering non-matching segments in memory.
184pub fn deserialize_all_streaming<T>(input: &[u8]) -> Result<Vec<T>, EdifactError>
185where
186    T: EdifactDeserialize + EdifactSegmentTag,
187{
188    let mut out = Vec::new();
189    for segment in crate::from_bytes(input) {
190        let segment = segment?;
191        if T::matches_segment(&segment) {
192            out.push(T::edifact_deserialize(std::slice::from_ref(&segment))?);
193        }
194    }
195    Ok(out)
196}
197
198/// Stream-parse EDIFACT from a reader and deserialize the first matching segment as `T`.
199///
200/// This is the low-memory typed path for large payloads read from I/O streams.
201pub fn deserialize_first_from_reader<T, R>(reader: R) -> Result<T, EdifactError>
202where
203    T: EdifactDeserialize + EdifactSegmentTag,
204    R: Read,
205{
206    for segment in crate::from_reader_iter(reader) {
207        let segment = segment?;
208        // O(1) tag + qualifier check before paying for as_borrowed().
209        if !T::matches_owned_segment(&segment) {
210            continue;
211        }
212        return T::edifact_deserialize_owned(std::slice::from_ref(&segment));
213    }
214
215    Err(EdifactError::MissingSegment {
216        tag: T::SEGMENT_TAG.to_owned(),
217        expected_position: "any position in input".to_owned(),
218    })
219}
220
221/// Stream-parse EDIFACT from a reader and deserialize all matching segments as `Vec<T>`.
222pub fn deserialize_all_from_reader<T, R>(reader: R) -> Result<Vec<T>, EdifactError>
223where
224    T: EdifactDeserialize + EdifactSegmentTag,
225    R: Read,
226{
227    let mut out = Vec::new();
228    for segment in crate::from_reader_iter(reader) {
229        let segment = segment?;
230        // O(1) tag + qualifier check before paying for as_borrowed().
231        if !T::matches_owned_segment(&segment) {
232            continue;
233        }
234        out.push(T::edifact_deserialize_owned(std::slice::from_ref(&segment))?);
235    }
236    Ok(out)
237}
238
239/// Deserialize a value of type `T` from an EDIFACT string.
240pub fn deserialize_str<T: EdifactDeserialize>(input: &str) -> Result<T, EdifactError> {
241    deserialize(input.as_bytes())
242}
243
244// ── helper functions ──────────────────────────────────────────────────────────
245
246/// Find the first segment with the given tag.
247pub fn find_segment<'s, 'd>(segments: &'s [Segment<'d>], tag: &str) -> Option<&'s Segment<'d>> {
248    segments.iter().find(|s| s.tag == tag)
249}
250
251/// Iterate over all segments with the given tag without allocating a `Vec`.
252pub fn find_segments_iter<'s, 'd: 's>(
253    segments: &'s [Segment<'d>],
254    tag: &'s str,
255) -> impl Iterator<Item = &'s Segment<'d>> {
256    segments.iter().filter(move |s| s.tag == tag)
257}
258
259/// Find the first segment matching `tag` whose element 0 equals `qualifier`.
260pub fn find_qualified_segment<'s, 'd>(
261    segments: &'s [Segment<'d>],
262    tag: &str,
263    qualifier: &str,
264) -> Option<&'s Segment<'d>> {
265    segments
266        .iter()
267        .find(|s| s.tag == tag && s.element_str(0).unwrap_or("") == qualifier)
268}
269
270/// Find the first segment by type-level qualifier pattern.
271pub fn find_segment_typed<'s, 'd, T>(segments: &'s [Segment<'d>]) -> Option<&'s Segment<'d>>
272where
273    T: EdifactSegmentTag,
274{
275    segments.iter().find(|s| T::matches_segment(s))
276}
277
278/// Iterate over all segments by type-level qualifier pattern.
279pub fn find_segments_typed<'s, 'd: 's, T>(
280    segments: &'s [Segment<'d>],
281) -> impl Iterator<Item = &'s Segment<'d>>
282where
283    T: EdifactSegmentTag,
284{
285    segments.iter().filter(|s| T::matches_segment(s))
286}
287
288/// Collect contiguous groups of segments that match `T`.
289///
290/// Each group is a borrowed slice of the original `segments` array.
291/// Use [`contiguous_groups_iter`] to avoid the outer `Vec` allocation.
292pub fn contiguous_groups_by_qualifier<'s, 'd, T>(
293    segments: &'s [Segment<'d>],
294) -> Vec<&'s [Segment<'d>]>
295where
296    T: EdifactSegmentTag,
297{
298    let mut groups = Vec::new();
299    let mut idx = 0;
300    while idx < segments.len() {
301        if T::matches_segment(&segments[idx]) {
302            let start = idx;
303            idx += 1;
304            while idx < segments.len() && T::matches_segment(&segments[idx]) {
305                idx += 1;
306            }
307            groups.push(&segments[start..idx]);
308        } else {
309            idx += 1;
310        }
311    }
312    groups
313}
314
315/// Iterate lazily over contiguous groups of segments that match `T`.
316///
317/// Each yielded item is a borrowed slice `&[Segment<'_>]` that forms one
318/// contiguous run of `T`-matching segments.  No outer `Vec` is allocated —
319/// the caller can break early or collect only as many groups as needed.
320///
321/// This function uses separate lifetimes for the slice reference (`'s`) and
322/// the segment data (`'d`), matching the signature of
323/// [`contiguous_groups_by_qualifier`].
324///
325/// # Example
326/// ```rust,ignore
327/// for group in contiguous_groups_iter::<UnaSegment>(&segments) {
328///     process_group(group);
329/// }
330/// ```
331pub fn contiguous_groups_iter<'s, 'd, T>(
332    segments: &'s [Segment<'d>],
333) -> impl Iterator<Item = &'s [Segment<'d>]> + 's
334where
335    T: EdifactSegmentTag,
336{
337    let mut idx = 0;
338    let len = segments.len();
339    std::iter::from_fn(move || {
340        // Skip non-matching segments
341        while idx < len && !T::matches_segment(&segments[idx]) {
342            idx += 1;
343        }
344        if idx >= len {
345            return None;
346        }
347        let start = idx;
348        idx += 1;
349        while idx < len && T::matches_segment(&segments[idx]) {
350            idx += 1;
351        }
352        Some(&segments[start..idx])
353    })
354}
355
356/// Return `true` if all segments matching `T` are in one contiguous block.
357pub fn groups_are_contiguous_by_qualifier<T>(segments: &[Segment<'_>]) -> bool
358where
359    T: EdifactSegmentTag,
360{
361    let mut seen_match = false;
362    let mut seen_gap_after_match = false;
363
364    for seg in segments {
365        if T::matches_segment(seg) {
366            if seen_gap_after_match {
367                return false;
368            }
369            seen_match = true;
370        } else if seen_match {
371            seen_gap_after_match = true;
372        }
373    }
374
375    true
376}
377
378/// Match a qualifier value against an exact or wildcard pattern.
379///
380/// Rules:
381/// - If `pattern` contains `*`, it is treated as a glob wildcard (e.g. `"M*"` matches `"MS"`, `"MR"`).
382/// - If no wildcard is present, exact match is required.
383///
384/// Prefix matching without an explicit `*` was deliberately removed: `"M"` matches only `"M"`,
385/// not `"MS"` or `"MR"`.  Use `"M*"` for prefix semantics.
386pub fn qualifier_matches_pattern(value: &str, pattern: &str) -> bool {
387    if pattern.is_empty() {
388        return value.is_empty();
389    }
390
391    if !pattern.contains('*') {
392        return value == pattern;
393    }
394
395    // Fast path: single wildcard (dominant case — e.g. "M*" or "*:MS")
396    if let Some((prefix, suffix)) = pattern.split_once('*') {
397        // Only one wildcard — prefix and suffix cannot overlap in a second split.
398        if !pattern[prefix.len() + 1..].contains('*') {
399            return value.len() >= prefix.len() + suffix.len()
400                && value.starts_with(prefix)
401                && value.ends_with(suffix)
402                && {
403                    // Ensure prefix and suffix don't overlap.
404                    let mid_start = prefix.len();
405                    let mid_end = value.len().saturating_sub(suffix.len());
406                    mid_start <= mid_end
407                };
408        }
409    }
410
411    // General multi-wildcard path.
412    let parts: smallvec::SmallVec<[&str; 4]> = pattern.split('*').collect();
413    let prefix = parts[0];
414    let suffix = parts[parts.len() - 1];
415
416    if !value.starts_with(prefix) || !value.ends_with(suffix) {
417        return false;
418    }
419
420    let mid_start = prefix.len();
421    let mid_end = value.len().saturating_sub(suffix.len());
422
423    if mid_start > mid_end {
424        return parts[1..parts.len() - 1].iter().all(|p| p.is_empty());
425    }
426
427    let mut remaining = &value[mid_start..mid_end];
428
429    for part in &parts[1..parts.len() - 1] {
430        if part.is_empty() {
431            continue;
432        }
433        match remaining.find(part) {
434            Some(idx) => remaining = &remaining[idx + part.len()..],
435            None => return false,
436        }
437    }
438
439    true
440}
441
442/// Extract the string value of element `idx` from `seg`, or `""` if absent.
443#[inline]
444pub fn element_str<'s>(seg: &'s Segment<'_>, idx: usize) -> &'s str {
445    seg.element_str(idx).unwrap_or("")
446}
447
448// ── segment accessor helpers ───────────────────────────────────────────────────
449
450/// Extract a required text element from a segment.
451///
452/// Returns the element's first component, or an error if absent or empty.
453///
454/// # Empty-string semantics
455///
456/// EDIFACT allows elements to be syntactically present but carry an empty
457/// string value (e.g., `SEG++'`). This function treats an empty string as
458/// *absent* — it returns [`EdifactError::MissingRequiredElement`] in that
459/// case, matching the EDIFACT rule that mandatory data elements must carry
460/// a non-empty value.
461///
462/// Delegates to [`SegmentAccessor::text_element`].
463pub fn required_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Result<&'a str, EdifactError> {
464    seg.text_element(idx)
465}
466
467/// Extract an optional text element from a segment.
468///
469/// Returns the element's first component, or None if absent or empty.
470///
471/// Delegates to [`SegmentAccessor::optional_element`].
472pub fn optional_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<&'a str> {
473    SegmentAccessor::optional_element(seg, idx)
474}
475
476/// Extract a required component from a segment element.
477///
478/// Returns the component value, or an error if the element or component is absent.
479///
480/// # Empty-string semantics
481///
482/// Like [`required_element`], an empty string component value is treated as
483/// *absent*.  A component that is syntactically present as `''` (two
484/// consecutive component separators) will cause this function to return
485/// [`EdifactError::MissingRequiredComponent`].
486///
487/// # Failure modes
488///
489/// - [`EdifactError::MissingRequiredElement`] — element `elem_idx` is absent.
490/// - [`EdifactError::MissingRequiredComponent`] — element is present but component `comp_idx` is absent or empty.
491///
492/// Delegates to [`SegmentAccessor::required_composite`].
493pub fn required_component<'a>(
494    seg: &'a Segment<'_>,
495    elem_idx: usize,
496    comp_idx: usize,
497) -> Result<&'a str, EdifactError> {
498    seg.required_composite(elem_idx, comp_idx)
499}
500
501/// Extract an optional component from a segment element.
502///
503/// Returns the component value, or None if absent or empty.
504///
505/// Delegates to [`SegmentAccessor::get_component`].
506pub fn optional_component<'a>(seg: &'a Segment<'_>, elem_idx: usize, comp_idx: usize) -> Option<&'a str> {
507    SegmentAccessor::get_component(seg, elem_idx, comp_idx)
508}
509
510/// Iterate over all components of an element without allocating a `Vec`.
511///
512/// Yields an empty iterator if the element is absent.
513pub fn get_components_iter<'a>(
514    seg: &'a Segment<'_>,
515    idx: usize,
516) -> impl Iterator<Item = &'a str> {
517    seg.elements
518        .get(idx)
519        .into_iter()
520        .flat_map(|elem| elem.components.iter().map(|c| c.as_ref()))
521}
522
523/// A composite data element wrapper for clearer ergonomics.
524pub struct CompositeElement<'a> {
525    components: &'a [std::borrow::Cow<'a, str>],
526}
527
528impl<'a> CompositeElement<'a> {
529    /// Get the component at index `i`, or None if absent.
530    pub fn get(&self, i: usize) -> Option<&'a str> {
531        self.components.get(i).map(|c| c.as_ref())
532    }
533
534    /// Get the component at index `i`, or empty string if absent.
535    pub fn get_or_empty(&self, i: usize) -> &'a str {
536        self.get(i).unwrap_or("")
537    }
538
539    /// Get the number of components.
540    pub fn len(&self) -> usize {
541        self.components.len()
542    }
543
544    /// Check if the composite is empty.
545    pub fn is_empty(&self) -> bool {
546        self.components.is_empty()
547    }
548
549    /// Iterate over all components.
550    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
551        self.components.iter().map(|c| c.as_ref())
552    }
553
554    /// Create a `CompositeElement` from a pre-existing component slice.
555    ///
556    /// Used internally by [`edifact_deserialize_owned`][EdifactDeserialize::edifact_deserialize_owned]
557    /// generated code to pass component data without converting the whole segment.
558    pub fn from_slice(components: &'a [std::borrow::Cow<'a, str>]) -> Self {
559        Self { components }
560    }
561}
562
563/// Get a composite element from a segment with clearer ergonomics.
564pub fn composite_element<'a>(seg: &'a Segment<'_>, idx: usize) -> Option<CompositeElement<'a>> {
565    seg.elements.get(idx).map(|elem| CompositeElement {
566        components: &elem.components,
567    })
568}
569
570/// Find the first [`OwnedSegment`] with the given tag.
571///
572/// Zero-allocation counterpart of [`find_segment`] for use in
573/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
574///
575/// [`OwnedSegment`]: crate::OwnedSegment
576pub fn find_segment_owned<'s>(
577    segments: &'s [crate::OwnedSegment],
578    tag: &str,
579) -> Option<&'s crate::OwnedSegment> {
580    segments.iter().find(|s| s.tag == tag)
581}
582
583/// Find the first [`OwnedSegment`] with the given tag **and** qualifier.
584///
585/// The qualifier is compared against the first component of element 0.
586/// Zero-allocation counterpart of [`find_qualified_segment`] for use in
587/// [`EdifactDeserialize::edifact_deserialize_owned`] implementations.
588///
589/// [`OwnedSegment`]: crate::OwnedSegment
590pub fn find_qualified_segment_owned<'s>(
591    segments: &'s [crate::OwnedSegment],
592    tag: &str,
593    qualifier: &str,
594) -> Option<&'s crate::OwnedSegment> {
595    segments.iter().find(|s| {
596        s.tag == tag && s.element_str(0).unwrap_or("") == qualifier
597    })
598}
599
600/// Segment accessor trait for ergonomic typed extraction.
601pub trait SegmentAccessor<'a> {
602    /// Get non-empty element text at index `idx`.
603    fn get_element(&'a self, idx: usize) -> Option<&'a str>;
604    /// Get non-empty component text at element/component indexes.
605    fn get_component(&'a self, elem: usize, comp: usize) -> Option<&'a str>;
606    /// Get a composite wrapper for element `idx`.
607    fn get_composite(&'a self, idx: usize) -> Option<CompositeElement<'a>>;
608
609    /// Get required non-empty element text.
610    fn text_element(&'a self, idx: usize) -> Result<&'a str, EdifactError>;
611    /// Get optional non-empty element text.
612    fn optional_element(&'a self, idx: usize) -> Option<&'a str>;
613    /// Parse a typed code value from a required element.
614    fn code_element<T: FromStr>(&'a self, idx: usize) -> Result<T, EdifactError>;
615    /// Get required non-empty composite component.
616    fn required_composite(&'a self, elem: usize, comp: usize) -> Result<&'a str, EdifactError>;
617    /// Get `count` required components starting at `start_idx` from element `elem`.
618    ///
619    /// Allocates a `Vec`.  For a zero-alloc alternative, use
620    /// [`repeating_components_iter`][Self::repeating_components_iter] and
621    /// consume the iterator directly without collecting.
622    fn repeating_components(
623        &'a self,
624        elem: usize,
625        start_idx: usize,
626        count: usize,
627    ) -> Result<Vec<&'a str>, EdifactError> {
628        // Default implementation delegates to the zero-alloc iterator and
629        // collects.  Implementors that can do better should override this.
630        self.repeating_components_iter(elem, start_idx, count).collect()
631    }
632
633    /// Iterate over `count` required components starting at `start_idx` from element `elem`.
634    ///
635    /// Allocation-free alternative to [`repeating_components`][Self::repeating_components];
636    /// the caller supplies the iteration budget and consumes results on the fly.
637    fn repeating_components_iter(
638        &'a self,
639        elem: usize,
640        start_idx: usize,
641        count: usize,
642    ) -> impl Iterator<Item = Result<&'a str, EdifactError>> + 'a;
643}
644
645impl<'s, 'd> SegmentAccessor<'s> for Segment<'d>
646where
647    'd: 's,
648{
649    fn get_element(&'s self, idx: usize) -> Option<&'s str> {
650        self.element_str(idx).filter(|s| !s.is_empty())
651    }
652
653    fn get_component(&'s self, elem: usize, comp: usize) -> Option<&'s str> {
654        self.elements
655            .get(elem)
656            .and_then(|e| e.get_component(comp))
657            .filter(|s| !s.is_empty())
658    }
659
660    fn get_composite(&'s self, idx: usize) -> Option<CompositeElement<'s>> {
661        composite_element(self, idx)
662    }
663
664    fn text_element(&'s self, idx: usize) -> Result<&'s str, EdifactError> {
665        <Self as SegmentAccessor>::get_element(self, idx).ok_or_else(|| {
666            EdifactError::MissingRequiredElement {
667                tag: self.tag.to_owned(),
668                element_index: idx,
669            }
670        })
671    }
672
673    fn optional_element(&'s self, idx: usize) -> Option<&'s str> {
674        <Self as SegmentAccessor>::get_element(self, idx)
675    }
676
677    fn code_element<T: FromStr>(&'s self, idx: usize) -> Result<T, EdifactError> {
678        let raw = self.text_element(idx)?;
679        raw.parse::<T>().map_err(|_| EdifactError::InvalidText {
680            offset: self.element_span(idx).map(|s| s.start).unwrap_or(self.span.start),
681        })
682    }
683
684    fn required_composite(&'s self, elem: usize, comp: usize) -> Result<&'s str, EdifactError> {
685        match self.elements.get(elem) {
686            None => Err(EdifactError::MissingRequiredElement {
687                tag: self.tag.to_owned(),
688                element_index: elem,
689            }),
690            Some(e) => e
691                .get_component(comp)
692                .filter(|s| !s.is_empty())
693                .ok_or_else(|| EdifactError::MissingRequiredComponent {
694                    tag: self.tag.to_owned(),
695                    element_index: elem,
696                    component_index: comp,
697                }),
698        }
699    }
700
701    fn repeating_components_iter(
702        &'s self,
703        elem: usize,
704        start_idx: usize,
705        count: usize,
706    ) -> impl Iterator<Item = Result<&'s str, EdifactError>> + 's {
707        let tag = self.tag;
708        let element_exists = self.elements.get(elem).is_some();
709        let components = self
710            .elements
711            .get(elem)
712            .map(|e| e.components.as_slice())
713            .unwrap_or(&[]);
714        (start_idx..start_idx + count).map(move |idx| {
715            components
716                .get(idx)
717                .map(|c| c.as_ref())
718                .filter(|s| !s.is_empty())
719                .ok_or_else(|| {
720                    if element_exists {
721                        EdifactError::MissingRequiredComponent {
722                            tag: tag.to_owned(),
723                            element_index: elem,
724                            component_index: idx,
725                        }
726                    } else {
727                        EdifactError::MissingRequiredElement {
728                            tag: tag.to_owned(),
729                            element_index: elem,
730                        }
731                    }
732                })
733        })
734    }
735}
736
737// ── message-window streaming ──────────────────────────────────────────────────
738
739/// An iterator that groups borrowed EDIFACT segments into per-message windows.
740///
741/// Zero-copy counterpart to [`MessageWindowsIter`] for in-memory byte slices.
742/// Each yielded `Vec<Segment<'_>>` borrows from the original input; no heap
743/// allocations occur per segment.  Envelope segments outside a `UNH..UNT` pair
744/// are silently skipped.
745///
746/// Obtain this via [`message_windows_bytes`].
747pub struct MessageWindowsSliceIter<'a> {
748    inner: crate::FromBytesIter<'a>,
749    buf: Vec<crate::Segment<'a>>,
750    in_message: bool,
751    done: bool,
752}
753
754impl<'a> MessageWindowsSliceIter<'a> {
755    fn new(inner: crate::FromBytesIter<'a>) -> Self {
756        Self {
757            inner,
758            buf: Vec::new(),
759            in_message: false,
760            done: false,
761        }
762    }
763}
764
765impl<'a> Iterator for MessageWindowsSliceIter<'a> {
766    type Item = Result<Vec<crate::Segment<'a>>, EdifactError>;
767
768    fn next(&mut self) -> Option<Self::Item> {
769        if self.done {
770            return None;
771        }
772        loop {
773            let segment = match self.inner.next() {
774                Some(Ok(s)) => s,
775                Some(Err(e)) => {
776                    self.done = true;
777                    return Some(Err(e));
778                }
779                None => {
780                    self.done = true;
781                    if self.in_message && !self.buf.is_empty() {
782                        self.in_message = false;
783                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
784                        return Some(Err(EdifactError::UnexpectedEof { offset }));
785                    }
786                    return None;
787                }
788            };
789
790            match segment.tag {
791                "UNH" => {
792                    if self.in_message {
793                        self.buf.clear();
794                        self.in_message = false;
795                        self.done = true;
796                        let offset = segment.span.start;
797                        return Some(Err(EdifactError::InvalidSegmentForMessage {
798                            tag: "UNH".to_owned(),
799                            message_type: "ENVELOPE".to_owned(),
800                            offset,
801                        }));
802                    }
803                    self.buf.clear();
804                    self.in_message = true;
805                    self.buf.push(segment);
806                }
807                "UNT" if self.in_message => {
808                    self.buf.push(segment);
809                    self.in_message = false;
810                    return Some(Ok(std::mem::take(&mut self.buf)));
811                }
812                _ if self.in_message => {
813                    self.buf.push(segment);
814                }
815                _ => {
816                    // Envelope segment outside a window — skip.
817                }
818            }
819        }
820    }
821}
822
823/// An iterator that groups owned EDIFACT segments into per-message windows.
824///
825/// Each yielded item is a `Vec<OwnedSegment>` containing the segments for one
826/// complete `UNH..UNT` message, inclusive of both service segments.
827/// Envelope-level segments (`UNB`, `UNG`, `UNZ`, `UNE`) that sit outside any
828/// `UNH..UNT` pair are silently skipped.
829///
830/// # Errors
831///
832/// - An inner-iterator error is forwarded immediately and iteration stops.
833/// - A `UNH` seen while a prior window is still open (missing `UNT`) is an error.
834/// - Input that ends while a `UNH` window is open (stream truncation) yields
835///   `Err(EdifactError::UnexpectedEof { … })` before returning `None`.
836///
837/// # Construction
838///
839/// Use [`message_windows_from_reader`] or [`message_windows_bytes`] to
840/// obtain a `MessageWindowsIter` directly.  For fully custom sources, call
841/// [`MessageWindowsIter::new`] with any `Iterator<Item = Result<OwnedSegment,
842/// EdifactError>>`.
843pub struct MessageWindowsIter<I> {
844    inner: I,
845    buf: Vec<crate::OwnedSegment>,
846    in_message: bool,
847    /// Set to `true` after any terminal condition (error or clean EOF) so that
848    /// subsequent `next()` calls immediately return `None`.
849    done: bool,
850}
851
852impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> MessageWindowsIter<I> {
853    /// Wrap any owned-segment iterator as a message-window iterator.
854    pub fn new(inner: I) -> Self {
855        Self {
856            inner,
857            buf: Vec::new(),
858            in_message: false,
859            done: false,
860        }
861    }
862}
863
864impl<I: Iterator<Item = Result<crate::OwnedSegment, EdifactError>>> Iterator
865    for MessageWindowsIter<I>
866{
867    type Item = Result<Vec<crate::OwnedSegment>, EdifactError>;
868
869    fn next(&mut self) -> Option<Self::Item> {
870        if self.done {
871            return None;
872        }
873        loop {
874            let segment = match self.inner.next() {
875                Some(Ok(s)) => s,
876                Some(Err(e)) => {
877                    self.done = true;
878                    return Some(Err(e));
879                }
880                None => {
881                    self.done = true;
882                    // A window that opened (UNH seen) but never closed (no UNT)
883                    // means the stream was truncated — surface as an error.
884                    if self.in_message && !self.buf.is_empty() {
885                        self.in_message = false;
886                        let offset = self.buf.last().map(|s| s.span.end).unwrap_or(0);
887                        return Some(Err(EdifactError::UnexpectedEof { offset }));
888                    }
889                    return None;
890                }
891            };
892
893            match segment.tag.as_str() {
894                "UNH" => {
895                    if self.in_message {
896                        // Malformed: new UNH without closing the prior UNT.
897                        self.buf.clear();
898                        self.in_message = false;
899                        self.done = true;
900                        let offset = segment.span.start;
901                        return Some(Err(EdifactError::InvalidSegmentForMessage {
902                            tag: "UNH".to_owned(),
903                            message_type: "ENVELOPE".to_owned(),
904                            offset,
905                        }));
906                    }
907                    self.buf.clear();
908                    self.in_message = true;
909                    self.buf.push(segment);
910                }
911                "UNT" if self.in_message => {
912                    self.buf.push(segment);
913                    self.in_message = false;
914                    return Some(Ok(std::mem::take(&mut self.buf)));
915                }
916                _ if self.in_message => {
917                    self.buf.push(segment);
918                }
919                _ => {
920                    // Envelope segment outside a window — skip.
921                }
922            }
923        }
924    }
925}
926
927/// Stream-parse EDIFACT bytes into an iterator of per-message windows.
928///
929/// Each window is a `Vec<Segment<'_>>` spanning one `UNH..UNT` pair, with
930/// segments borrowing from `input` — **zero heap allocations per segment**.
931/// Envelope segments (`UNB`, `UNZ`, …) are skipped automatically.
932///
933/// # Example
934/// ```
935/// use edifact_rs::message_windows_bytes;
936/// let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'\
937///               UNH+1+ORDERS:D:96A:UN'\
938///               BGM+220+PO-001+9'\
939///               UNT+3+1'\
940///               UNZ+1+1'";
941///
942/// let windows: Vec<_> = message_windows_bytes(input)
943///     .collect::<Result<_, _>>()
944///     .unwrap();
945/// assert_eq!(windows.len(), 1);
946/// assert_eq!(windows[0][0].tag, "UNH");
947/// assert_eq!(windows[0].last().unwrap().tag, "UNT");
948/// ```
949pub fn message_windows_bytes(input: &[u8]) -> MessageWindowsSliceIter<'_> {
950    MessageWindowsSliceIter::new(crate::from_bytes(input))
951}
952
953/// Stream-parse EDIFACT from a reader into an iterator of per-message windows.
954///
955/// Each window is a `Vec<OwnedSegment>` spanning one `UNH..UNT` pair.
956/// This variant reads lazily — only enough input to complete one window is
957/// consumed per [`Iterator::next`] call.
958pub fn message_windows_from_reader<R: Read>(
959    reader: R,
960) -> MessageWindowsIter<crate::FromReaderIter<R>> {
961    MessageWindowsIter::new(crate::from_reader_iter(reader))
962}
963
964/// Stream typed messages from a reader by deserializing each `UNH..UNT` window.
965///
966/// This is the highest-level streaming API: it returns one `T` per message,
967/// reading only as much data as needed to complete each window.
968///
969/// Each message window is deserialized via
970/// [`EdifactDeserialize::edifact_deserialize_owned`], which avoids the
971/// intermediate `Vec<Segment<'_>>` allocation incurred by the slice-based path.
972/// Types derived with `#[derive(EdifactDeserialize)]` provide an efficient
973/// override; manual implementations fall back to [`crate::OwnedSegment::as_borrowed`].
974///
975/// # Example
976/// ```ignore
977/// // Assuming `OrdersMessage` implements `EdifactDeserialize`:
978/// let messages: Vec<OrdersMessage> =
979///     deserialize_messages_from_reader::<OrdersMessage, _>(reader)
980///         .collect::<Result<_, _>>()?;
981/// ```
982pub fn deserialize_messages_from_reader<T, R>(
983    reader: R,
984) -> impl Iterator<Item = Result<T, EdifactError>>
985where
986    T: EdifactDeserialize,
987    R: Read,
988{
989    message_windows_from_reader(reader).map(|window| {
990        let window = window?;
991        T::edifact_deserialize_owned(&window)
992    })
993}
994
995/// Stream typed messages from a byte slice by deserializing each `UNH..UNT` window.
996pub fn deserialize_messages_bytes<T>(
997    input: &[u8],
998) -> impl Iterator<Item = Result<T, EdifactError>> + '_
999where
1000    T: EdifactDeserialize,
1001{
1002    message_windows_bytes(input).map(|window| {
1003        let window = window?;
1004        T::edifact_deserialize(&window)
1005    })
1006}
1007
1008// ── message_type_from_window ──────────────────────────────────────────────────
1009
1010/// Extract the EDIFACT message type from a message window.
1011///
1012/// Scans the segment slice for a `UNH` segment and returns a borrow of the
1013/// message-type component (element 1, component 0).  Returns `None` if no
1014/// `UNH` segment is present or if the message-type component is absent.
1015///
1016/// # Example
1017///
1018/// ```rust,ignore
1019/// for window in message_windows_bytes(input) {
1020///     let window = window?;
1021///     match message_type_from_window(&window) {
1022///         Some("ORDERS") => { /* … */ }
1023///         Some("INVOIC") => { /* … */ }
1024///         other => eprintln!("unhandled message type: {other:?}"),
1025///     }
1026/// }
1027/// ```
1028pub fn message_type_from_window<'a>(window: &'a [Segment<'a>]) -> Option<&'a str> {
1029    window
1030        .iter()
1031        .find(|s| s.tag == "UNH")
1032        .and_then(|unh| unh.get_element(1))
1033        .and_then(|e| e.get_component(0))
1034}
1035
1036// ── MessageDispatch ───────────────────────────────────────────────────────────
1037
1038/// A type-erased deserialized message produced by [`MessageDispatch`].
1039pub struct DispatchedMessage {
1040    /// The EDIFACT message type string extracted from the `UNH` segment.
1041    pub message_type: String,
1042    value: Box<dyn std::any::Any + Send + Sync>,
1043}
1044
1045impl DispatchedMessage {
1046    /// Attempt to downcast the inner value to `T`.
1047    ///
1048    /// Returns `None` if the stored type does not match `T`.
1049    pub fn downcast<T: std::any::Any + Send + Sync + 'static>(&self) -> Option<&T> {
1050        self.value.downcast_ref::<T>()
1051    }
1052}
1053
1054impl std::fmt::Debug for DispatchedMessage {
1055    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1056        f.debug_struct("DispatchedMessage")
1057            .field("message_type", &self.message_type)
1058            .finish_non_exhaustive()
1059    }
1060}
1061
1062type DispatchHandlerFn =
1063    Box<dyn for<'a> Fn(&[Segment<'a>]) -> Result<Box<dyn std::any::Any + Send + Sync>, EdifactError> + Send + Sync>;
1064
1065type FallbackHandlerFn =
1066    Box<dyn for<'a> Fn(&[Segment<'a>], &str) -> Result<Box<dyn std::any::Any + Send + Sync>, EdifactError> + Send + Sync>;
1067
1068/// Type-based dispatcher for mixed-message EDIFACT streams.
1069///
1070/// Register one handler per message type with [`on`][Self::on], then call
1071/// [`dispatch`][Self::dispatch] on each message window.  If no handler matches
1072/// and a [`fallback`][Self::fallback] was registered it is invoked instead;
1073/// otherwise an [`EdifactError::UnexpectedMessageType`] is returned.
1074///
1075/// # Example
1076///
1077/// ```rust,ignore
1078/// let dispatch = MessageDispatch::new()
1079///     .on("ORDERS",  |segs| Orders::edifact_deserialize(segs))
1080///     .on("INVOIC",  |segs| Invoice::edifact_deserialize(segs));
1081///
1082/// for window in message_windows_bytes(input) {
1083///     let window = window?;
1084///     let msg = dispatch.dispatch(&window)?;
1085///     match msg.message_type.as_str() {
1086///         "ORDERS"  => { let o = msg.downcast::<Orders>().unwrap(); /* … */ }
1087///         "INVOIC"  => { let i = msg.downcast::<Invoice>().unwrap(); /* … */ }
1088///         _         => unreachable!(),
1089///     }
1090/// }
1091/// ```
1092pub struct MessageDispatch {
1093    handlers: Vec<(String, DispatchHandlerFn)>,
1094    fallback: Option<FallbackHandlerFn>,
1095}
1096
1097impl Default for MessageDispatch {
1098    fn default() -> Self {
1099        Self::new()
1100    }
1101}
1102
1103impl MessageDispatch {
1104    /// Create an empty dispatcher.
1105    pub fn new() -> Self {
1106        Self {
1107            handlers: Vec::new(),
1108            fallback: None,
1109        }
1110    }
1111
1112    /// Register a handler for `message_type`.
1113    ///
1114    /// The closure receives the full message window and returns a typed value
1115    /// that is boxed and stored inside [`DispatchedMessage`].
1116    pub fn on<T, F>(mut self, message_type: &str, handler: F) -> Self
1117    where
1118        T: std::any::Any + Send + Sync + 'static,
1119        F: for<'a> Fn(&[Segment<'a>]) -> Result<T, EdifactError> + Send + Sync + 'static,
1120    {
1121        let erased: DispatchHandlerFn = Box::new(move |segs| {
1122            let val = handler(segs)?;
1123            Ok(Box::new(val) as Box<dyn std::any::Any + Send + Sync>)
1124        });
1125        self.handlers.push((message_type.to_owned(), erased));
1126        self
1127    }
1128
1129    /// Register a fallback handler for unrecognised message types.
1130    ///
1131    /// The closure receives the segment window **and** the unknown message-type
1132    /// string.
1133    pub fn fallback<T, F>(mut self, handler: F) -> Self
1134    where
1135        T: std::any::Any + Send + Sync + 'static,
1136        F: for<'a> Fn(&[Segment<'a>], &str) -> Result<T, EdifactError> + Send + Sync + 'static,
1137    {
1138        let erased: FallbackHandlerFn = Box::new(move |segs, mt| {
1139            let val = handler(segs, mt)?;
1140            Ok(Box::new(val) as Box<dyn std::any::Any + Send + Sync>)
1141        });
1142        self.fallback = Some(erased);
1143        self
1144    }
1145
1146    /// Dispatch a single message window to the appropriate handler.
1147    ///
1148    /// The message type is extracted from the `UNH` segment.  If no `UNH` is
1149    /// present, [`EdifactError::MissingSegment`] is returned.
1150    pub fn dispatch(&self, window: &[Segment<'_>]) -> Result<DispatchedMessage, EdifactError> {
1151        let message_type = window
1152            .iter()
1153            .find(|s| s.tag == "UNH")
1154            .and_then(|unh| unh.get_element(1))
1155            .and_then(|e| e.get_component(0))
1156            .map(|s| s.to_owned())
1157            .ok_or_else(|| EdifactError::MissingSegment {
1158                tag: "UNH".to_owned(),
1159                expected_position: "first segment of message window".to_owned(),
1160            })?;
1161
1162        for (mt, handler) in &self.handlers {
1163            if *mt == message_type {
1164                let value = handler(window)?;
1165                return Ok(DispatchedMessage { message_type, value });
1166            }
1167        }
1168
1169        if let Some(fallback) = &self.fallback {
1170            let value = fallback(window, &message_type)?;
1171            return Ok(DispatchedMessage { message_type, value });
1172        }
1173
1174        Err(EdifactError::UnexpectedMessageType {
1175            message_type,
1176        })
1177    }
1178
1179    /// Dispatch all messages from a byte reader.
1180    ///
1181    /// Each message window is extracted and dispatched in order.  The returned
1182    /// iterator is lazy — errors are yielded as `Err` items.
1183    pub fn dispatch_all_from_bytes<'a>(
1184        &'a self,
1185        input: &'a [u8],
1186    ) -> impl Iterator<Item = Result<DispatchedMessage, EdifactError>> + 'a {
1187        message_windows_bytes(input).map(move |window| {
1188            let window = window?;
1189            self.dispatch(&window)
1190        })
1191    }
1192
1193    /// Dispatch all messages from a reader.
1194    ///
1195    /// Parses the stream into message windows and dispatches each.  The
1196    /// returned iterator yields owned [`DispatchedMessage`] values lazily:
1197    /// each window is fully buffered in memory (as `Vec<OwnedSegment>`) before
1198    /// dispatch, but windows are processed one at a time rather than all at once.
1199    pub fn dispatch_all_from_reader<R: Read + 'static>(
1200        &self,
1201        reader: R,
1202    ) -> impl Iterator<Item = Result<DispatchedMessage, EdifactError>> + '_ {
1203        message_windows_from_reader(reader).map(|window| {
1204            let window = window?;
1205            let borrowed: Vec<Segment<'_>> = window.iter().map(|s| s.as_borrowed()).collect();
1206            self.dispatch(&borrowed)
1207        })
1208    }
1209}
1210
1211#[cfg(test)]
1212mod tests {
1213    use super::*;
1214
1215    // ── manual test impl ──────────────────────────────────────────────────────
1216    #[derive(Debug, PartialEq)]
1217    struct BgmSegment {
1218        doc_name_code: String,
1219        pruef_id: String,
1220        msg_function: Option<String>,
1221    }
1222
1223    impl EdifactSegmentTag for BgmSegment {
1224        const SEGMENT_TAG: &'static str = "BGM";
1225    }
1226
1227    struct NadM;
1228
1229    impl EdifactSegmentTag for NadM {
1230        const SEGMENT_TAG: &'static str = "NAD";
1231        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
1232    }
1233
1234    struct NadWildcard;
1235
1236    impl EdifactSegmentTag for NadWildcard {
1237        const SEGMENT_TAG: &'static str = "NAD";
1238        const QUALIFIER_PATTERN: Option<&'static str> = Some("M*");
1239    }
1240
1241    impl EdifactDeserialize for BgmSegment {
1242        fn edifact_deserialize(segments: &[Segment<'_>]) -> Result<Self, EdifactError> {
1243            let seg = find_segment(segments, "BGM").ok_or_else(|| {
1244                EdifactError::MissingRequiredElement {
1245                    tag: "BGM".to_owned(),
1246                    element_index: 0,
1247                }
1248            })?;
1249            Ok(Self {
1250                doc_name_code: element_str(seg, 0).to_owned(),
1251                pruef_id: element_str(seg, 1).to_owned(),
1252                msg_function: seg
1253                    .element_str(2)
1254                    .filter(|s| !s.is_empty())
1255                    .map(str::to_owned),
1256            })
1257        }
1258    }
1259
1260    #[test]
1261    fn deserialize_single_segment() {
1262        let input = b"BGM+E03+11042+9'";
1263        let bgm: BgmSegment = deserialize(input).unwrap();
1264        assert_eq!(bgm.doc_name_code, "E03");
1265        assert_eq!(bgm.pruef_id, "11042");
1266        assert_eq!(bgm.msg_function, Some("9".to_owned()));
1267    }
1268
1269    #[test]
1270    fn streaming_deserialize_first_from_bytes() {
1271        let input = b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'";
1272        let bgm: BgmSegment = deserialize_first_streaming(input).unwrap();
1273        assert_eq!(bgm.pruef_id, "11042");
1274    }
1275
1276    #[test]
1277    fn streaming_deserialize_all_from_bytes() {
1278        let input = b"BGM+E03+11042+9'RFF+AA:1'BGM+E01+11043+9'";
1279        let bgms: Vec<BgmSegment> = deserialize_all_streaming(input).unwrap();
1280        assert_eq!(bgms.len(), 2);
1281        assert_eq!(bgms[0].pruef_id, "11042");
1282        assert_eq!(bgms[1].pruef_id, "11043");
1283    }
1284
1285    #[test]
1286    fn streaming_deserialize_first_from_reader() {
1287        let input = std::io::Cursor::new(b"UNH+1+ORDERS:D:11A:UN'BGM+E03+11042+9'UNT+3+1'".to_vec());
1288        let bgm: BgmSegment = deserialize_first_from_reader(input).unwrap();
1289        assert_eq!(bgm.pruef_id, "11042");
1290    }
1291
1292    #[test]
1293    fn streaming_deserialize_all_from_reader() {
1294        let input = std::io::Cursor::new(b"BGM+E03+11042+9'BGM+E01+11043+9'".to_vec());
1295        let bgms: Vec<BgmSegment> = deserialize_all_from_reader(input).unwrap();
1296        assert_eq!(bgms.len(), 2);
1297        assert_eq!(bgms[0].pruef_id, "11042");
1298        assert_eq!(bgms[1].pruef_id, "11043");
1299    }
1300
1301    #[test]
1302    fn missing_segment_returns_error() {
1303        let input = b"DTM+137:20230401:102'";
1304        let result: Result<BgmSegment, _> = deserialize(input);
1305        assert!(result.is_err());
1306    }
1307
1308    #[test]
1309    fn vec_collects_all_matching_segments() {
1310        let input = b"DTM+137:20230401:102'BGM+E03+11042+9'BGM+E01+11043+9'";
1311        let bgms: Vec<BgmSegment> = deserialize(input).unwrap();
1312        assert_eq!(bgms.len(), 2);
1313        assert_eq!(bgms[0].pruef_id, "11042");
1314        assert_eq!(bgms[1].pruef_id, "11043");
1315    }
1316
1317    #[test]
1318    fn find_qualified_segment_matches_qualifier() {
1319        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1320        let segments: Vec<Segment<'_>> =
1321            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1322        let nad_ms = find_qualified_segment(&segments, "NAD", "MS");
1323        let nad_mr = find_qualified_segment(&segments, "NAD", "MR");
1324        assert!(nad_ms.is_some());
1325        assert!(nad_mr.is_some());
1326        assert_eq!(element_str(nad_ms.unwrap(), 0), "MS");
1327        assert_eq!(element_str(nad_mr.unwrap(), 0), "MR");
1328    }
1329
1330    #[test]
1331    fn round_trip_str_api() {
1332        let input = "BGM+E03+11042+9'";
1333        let bgm: BgmSegment = deserialize_str(input).unwrap();
1334        assert_eq!(bgm.pruef_id, "11042");
1335    }
1336
1337    #[test]
1338    fn required_element_extraction() {
1339        let input = b"BGM+E03+11042+9'";
1340        let segments: Vec<Segment<'_>> =
1341            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1342        let seg = &segments[0];
1343
1344        assert_eq!(required_element(seg, 0).unwrap(), "E03");
1345        assert_eq!(required_element(seg, 1).unwrap(), "11042");
1346        // Element 5 doesn't exist
1347        assert!(required_element(seg, 5).is_err());
1348    }
1349
1350    #[test]
1351    fn optional_element_extraction() {
1352        let input = b"BGM+E03+11042+9'BGM+E01++absent'";
1353        let segments: Vec<Segment<'_>> =
1354            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1355
1356        // First segment
1357        assert_eq!(optional_element(&segments[0], 0), Some("E03"));
1358        assert_eq!(optional_element(&segments[0], 1), Some("11042"));
1359        assert_eq!(optional_element(&segments[0], 5), None);
1360
1361        // Second segment with empty element
1362        assert_eq!(optional_element(&segments[1], 1), None);
1363    }
1364
1365    #[test]
1366    fn component_extraction() {
1367        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1368        let segments: Vec<Segment<'_>> =
1369            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1370        let seg = &segments[0];
1371
1372        assert_eq!(required_component(seg, 0, 0).unwrap(), "UNOA");
1373        assert_eq!(required_component(seg, 0, 1).unwrap(), "1");
1374        // Non-existent component
1375        assert!(required_component(seg, 0, 5).is_err());
1376    }
1377
1378    #[test]
1379    fn composite_element_helper() {
1380        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1381        let segments: Vec<Segment<'_>> =
1382            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1383        let seg = &segments[0];
1384
1385        let comp = composite_element(seg, 0).unwrap();
1386        assert_eq!(comp.len(), 2);
1387        assert_eq!(comp.get(0), Some("UNOA"));
1388        assert_eq!(comp.get(1), Some("1"));
1389        assert_eq!(comp.get(5), None);
1390        assert_eq!(comp.get_or_empty(5), "");
1391    }
1392
1393    #[test]
1394    fn get_all_components() {
1395        // UNB has composite element: UNOA:1
1396        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1397        let segments: Vec<Segment<'_>> =
1398            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1399        let seg = &segments[0];
1400
1401        let comps: Vec<&str> = get_components_iter(seg, 0).collect(); // First element is UNOA:1
1402        assert!(!comps.is_empty(), "Expected components but got empty");
1403        assert_eq!(comps.len(), 2);
1404        assert_eq!(comps[0], "UNOA");
1405        assert_eq!(comps[1], "1");
1406    }
1407
1408    #[test]
1409    fn qualifier_pattern_matching_supports_exact_and_wildcard() {
1410        // Exact match (no wildcard)
1411        assert!(qualifier_matches_pattern("MS", "MS"));
1412        assert!(!qualifier_matches_pattern("MS", "M")); // Not a prefix match after R-003
1413        // Wildcard patterns
1414        assert!(qualifier_matches_pattern("MS", "M*"));
1415        assert!(qualifier_matches_pattern("MRY", "M*Y"));
1416        assert!(!qualifier_matches_pattern("AB", "M*"));
1417    }
1418
1419    /// Comprehensive table-driven tests for `qualifier_matches_pattern`.
1420    #[test]
1421    fn qualifier_matches_pattern_table() {
1422        // (value, pattern, expected)
1423        let cases: &[(&str, &str, bool)] = &[
1424            // ── empty inputs ────────────────────────────────────────────────
1425            ("", "", true),        // empty matches empty
1426            ("", "*", true),       // wildcard matches empty string
1427            ("A", "", false),      // non-empty does not match empty pattern
1428            ("", "A", false),      // empty does not match non-empty literal
1429            // ── literal (no wildcard) ────────────────────────────────────────
1430            ("MS", "MS", true),
1431            ("BY", "BY", true),
1432            ("ms", "MS", false),   // case-sensitive
1433            ("MSX", "MS", false),  // prefix is NOT a match without wildcard
1434            ("M", "MS", false),    // too short
1435            // ── single wildcard at the end (prefix match) ────────────────────
1436            ("MS", "M*", true),
1437            ("MULTI", "MUL*", true),
1438            ("AB", "M*", false),
1439            ("", "M*", false),     // empty does not start with 'M'
1440            // ── single wildcard at the start (suffix match) ──────────────────
1441            ("MSG", "*G", true),
1442            ("G", "*G", true),
1443            ("MSG", "*X", false),
1444            ("", "*G", false),
1445            // ── wildcard in the middle ───────────────────────────────────────
1446            ("MRY", "M*Y", true),
1447            ("MAY", "M*Y", true),
1448            ("MY", "M*Y", true),   // zero-width wildcard: "M" + "" + "Y"
1449            ("MYY", "M*Y", true),  // last 'Y' matches, wildcard = 'Y'
1450            ("MAYZ", "M*Y", false),// does not end with 'Y'
1451            ("AB", "M*Y", false),
1452            // ── bare wildcard (match-all) ────────────────────────────────────
1453            ("*", "*", true),      // literal '*' value vs wildcard pattern
1454            ("anything", "*", true),
1455            ("", "*", true),
1456            // ── multiple wildcards ────────────────────────────────────────────
1457            ("ABCDE", "A*C*E", true),
1458            ("ACE", "A*C*E", true),  // zero-width wildcards
1459            ("AXCYE", "A*C*E", true),
1460            ("ABCDF", "A*C*E", false),
1461            // ── wildcard with empty segment between stars ─────────────────────
1462            ("AB", "A**B", true),   // "A**B" → parts ["A", "", "B"] → ends_with_wildcard?
1463            // ── pattern longer than value ─────────────────────────────────────
1464            ("AB", "A*B*C", false),
1465            // ── value contains pattern as substring but must anchor start ─────
1466            ("XMS", "MS", false),
1467        ];
1468
1469        for (value, pattern, expected) in cases {
1470            let got = qualifier_matches_pattern(value, pattern);
1471            assert_eq!(
1472                got, *expected,
1473                "qualifier_matches_pattern({value:?}, {pattern:?}) expected {expected} but got {got}"
1474            );
1475        }
1476    }
1477
1478    #[test]
1479    fn typed_qualifier_helpers_work() {
1480        let input = b"NAD+MS+9900001+293'NAD+MR+9900002+293'";
1481        let segments: Vec<Segment<'_>> =
1482            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1483
1484        let first = find_segment_typed::<NadM>(&segments).unwrap();
1485        assert_eq!(first.element_str(0), Some("MS"));
1486
1487        let all: Vec<_> = find_segments_typed::<NadWildcard>(&segments).collect();
1488        assert_eq!(all.len(), 2);
1489    }
1490
1491    #[test]
1492    fn segment_accessor_trait_methods_work() {
1493        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'";
1494        let segments: Vec<Segment<'_>> =
1495            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1496        let seg = &segments[0];
1497
1498        assert_eq!(SegmentAccessor::get_element(seg, 1), Some("SENDER"));
1499        assert_eq!(SegmentAccessor::required_composite(seg, 0, 1).unwrap(), "1");
1500        let parsed: i32 = SegmentAccessor::code_element(seg, 4).unwrap();
1501        assert_eq!(parsed, 1);
1502        let reps = SegmentAccessor::repeating_components(seg, 3, 0, 2).unwrap();
1503        assert_eq!(reps, vec!["200101", "0900"]);
1504    }
1505
1506    #[test]
1507    fn group_helpers_detect_contiguity() {
1508        struct NadAny;
1509        impl EdifactSegmentTag for NadAny {
1510            const SEGMENT_TAG: &'static str = "NAD";
1511        }
1512
1513        let contiguous_input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'";
1514        let contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(contiguous_input)
1515            .collect::<Result<_, _>>()
1516            .unwrap();
1517        assert!(groups_are_contiguous_by_qualifier::<NadAny>(
1518            &contiguous_segments
1519        ));
1520
1521        let non_contiguous_input = b"NAD+MS+1'RFF+AA:1'NAD+MR+2'";
1522        let non_contiguous_segments: Vec<Segment<'_>> = crate::from_bytes(non_contiguous_input)
1523            .collect::<Result<_, _>>()
1524            .unwrap();
1525        assert!(!groups_are_contiguous_by_qualifier::<NadAny>(
1526            &non_contiguous_segments
1527        ));
1528    }
1529
1530    #[test]
1531    fn group_helpers_collect_contiguous_groups() {
1532        struct NadAny;
1533        impl EdifactSegmentTag for NadAny {
1534            const SEGMENT_TAG: &'static str = "NAD";
1535        }
1536
1537        let input = b"NAD+MS+1'NAD+MR+2'RFF+AA:1'NAD+BY+3'";
1538        let segments: Vec<Segment<'_>> =
1539            crate::from_bytes(input).collect::<Result<_, _>>().unwrap();
1540        let groups = contiguous_groups_by_qualifier::<NadAny>(&segments);
1541
1542        assert_eq!(groups.len(), 2);
1543        assert_eq!(groups[0].len(), 2);
1544        assert_eq!(groups[1].len(), 1);
1545    }
1546
1547    // ── MessageWindowsIter tests ──────────────────────────────────────────────
1548
1549    #[test]
1550    fn message_windows_bytes_yields_complete_windows() {
1551        let input = b"UNB+UNOA:1+S+R+200101:0900+1'\
1552                      UNH+1+ORDERS:D:96A:UN'\
1553                      BGM+220+PO-001+9'\
1554                      UNT+3+1'\
1555                      UNZ+1+1'";
1556        let windows: Vec<_> = message_windows_bytes(input)
1557            .collect::<Result<_, _>>()
1558            .unwrap();
1559        assert_eq!(windows.len(), 1);
1560        assert_eq!(windows[0][0].tag, "UNH");
1561        assert_eq!(windows[0].last().unwrap().tag, "UNT");
1562    }
1563
1564    #[test]
1565    fn message_windows_truncated_stream_returns_error() {
1566        // Stream ends after UNH and BGM but without UNT — truncation must be an error
1567        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1568        let results: Vec<_> = message_windows_bytes(input).collect();
1569        assert_eq!(results.len(), 1);
1570        assert!(
1571            matches!(results[0], Err(EdifactError::UnexpectedEof { .. })),
1572            "expected UnexpectedEof for truncated window, got: {:?}",
1573            results[0]
1574        );
1575    }
1576
1577    #[test]
1578    fn message_windows_subsequent_calls_return_none_after_truncation() {
1579        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'";
1580        let mut iter = message_windows_bytes(input);
1581        assert!(matches!(
1582            iter.next(),
1583            Some(Err(EdifactError::UnexpectedEof { .. }))
1584        ));
1585        // After the error, the iterator must be fused (done = true)
1586        assert!(iter.next().is_none());
1587    }
1588
1589    #[test]
1590    fn message_windows_unh_without_unt_before_next_unh_returns_error() {
1591        let input = b"UNH+1+ORDERS:D:96A:UN'BGM+220+PO-001+9'\
1592                      UNH+2+ORDERS:D:96A:UN'BGM+220+PO-002+9'UNT+3+2'";
1593        let results: Vec<_> = message_windows_bytes(input).collect();
1594        // First item must be an error (UNH before UNT — missing closer)
1595        assert!(
1596            matches!(
1597                results[0],
1598                Err(EdifactError::InvalidSegmentForMessage { ref tag, .. }) if tag == "UNH"
1599            ),
1600            "expected InvalidSegmentForMessage(UNH), got: {:?}",
1601            results[0]
1602        );
1603    }
1604
1605    // ── SegmentAccessor unit tests ─────────────────────────────────────────────
1606
1607    fn parse_one(input: &str) -> crate::OwnedSegment {
1608        crate::from_reader(std::io::Cursor::new(input.as_bytes()))
1609            .expect("parse failed")
1610            .into_iter()
1611            .next()
1612            .expect("at least one segment")
1613    }
1614
1615    #[test]
1616    fn segment_accessor_get_element_returns_value() {
1617        let owned = parse_one("BGM+220+PO-001+9'");
1618        let seg = owned.as_borrowed();
1619        assert_eq!(SegmentAccessor::get_element(&seg, 0), Some("220"));
1620        assert_eq!(SegmentAccessor::get_element(&seg, 1), Some("PO-001"));
1621        assert_eq!(SegmentAccessor::get_element(&seg, 2), Some("9"));
1622        assert_eq!(SegmentAccessor::get_element(&seg, 9), None, "out-of-bounds must return None");
1623    }
1624
1625    #[test]
1626    fn segment_accessor_get_element_filters_empty() {
1627        let owned = parse_one("TST+++VALUE'");
1628        let seg = owned.as_borrowed();
1629        // elements 0 and 1 are empty; element 2 is "VALUE"
1630        assert_eq!(SegmentAccessor::get_element(&seg, 0), None, "empty element must return None");
1631        assert_eq!(SegmentAccessor::get_element(&seg, 1), None, "empty element must return None");
1632        assert_eq!(SegmentAccessor::get_element(&seg, 2), Some("VALUE"));
1633    }
1634
1635    #[test]
1636    fn segment_accessor_get_component_returns_value() {
1637        let owned = parse_one("UNH+1+ORDERS:D:96A:UN'");
1638        let seg = owned.as_borrowed();
1639        assert_eq!(seg.get_component(1, 0), Some("ORDERS"));
1640        assert_eq!(seg.get_component(1, 1), Some("D"));
1641        assert_eq!(seg.get_component(1, 2), Some("96A"));
1642        assert_eq!(seg.get_component(1, 3), Some("UN"));
1643        assert_eq!(seg.get_component(1, 9), None, "out-of-bounds must return None");
1644    }
1645
1646    #[test]
1647    fn segment_accessor_text_element_errors_on_missing() {
1648        let owned = parse_one("BGM+'");
1649        let seg = owned.as_borrowed();
1650        // element 0 is empty — text_element must return an error
1651        let err = seg.text_element(0);
1652        assert!(
1653            matches!(err, Err(EdifactError::MissingRequiredElement { ref tag, element_index: 0 }) if tag == "BGM"),
1654            "expected MissingRequiredElement, got: {err:?}"
1655        );
1656    }
1657
1658    #[test]
1659    fn segment_accessor_required_composite_errors_on_missing() {
1660        let owned = parse_one("DTM+137'");
1661        let seg = owned.as_borrowed();
1662        // component 1 of element 0 is absent
1663        let err = seg.required_composite(0, 1);
1664        assert!(
1665            matches!(err, Err(EdifactError::MissingRequiredComponent { ref tag, element_index: 0, component_index: 1 }) if tag == "DTM"),
1666            "expected MissingRequiredComponent, got: {err:?}"
1667        );
1668    }
1669
1670    #[test]
1671    fn segment_accessor_code_element_parses_integer() {
1672        let owned = parse_one("QTY+21:100'");
1673        let seg = owned.as_borrowed();
1674        let qty: u32 = seg.code_element(0).expect("should parse qualifier as u32");
1675        assert_eq!(qty, 21);
1676    }
1677
1678    #[test]
1679    fn segment_accessor_optional_element_absent_returns_none() {
1680        let owned = parse_one("BGM+220'");
1681        let seg = owned.as_borrowed();
1682        assert_eq!(seg.optional_element(5), None);
1683    }
1684}