Skip to main content

edifact_rs/
parser.rs

1//! Streaming EDIFACT parser — wraps a [`Tokenizer`] and assembles [`Segment`]s.
2
3use crate::{
4    error::EdifactError,
5    model::{Element, OwnedSegment, Segment, Span},
6    tokenizer::{Token, Tokenizer},
7};
8use memchr::memchr2;
9use smallvec::SmallVec;
10use std::borrow::Cow;
11use std::io::{BufRead, BufReader, Read};
12
13fn finish_element<'a>(
14    elements: &mut Vec<Element<'a>>,
15    current_components: &mut SmallVec<[Cow<'a, str>; 4]>,
16    current_component_spans: &mut SmallVec<[Span; 4]>,
17    current_element_start: &mut Option<usize>,
18) {
19    if let (Some(start), Some(last_span)) = (
20        current_element_start.take(),
21        current_component_spans.last().copied(),
22    ) {
23        elements.push(Element {
24            span: Span::new(start, last_span.end),
25            components: std::mem::take(current_components),
26            component_spans: std::mem::take(current_component_spans),
27        });
28    }
29}
30
31fn resolve_release(
32    val: &str,
33    release_char: char,
34    start_offset: usize,
35) -> Result<Cow<'_, str>, EdifactError> {
36    if !val.contains(release_char) {
37        return Ok(Cow::Borrowed(val));
38    }
39    resolve_release_owned(val, release_char, start_offset).map(Cow::Owned)
40}
41
42fn resolve_release_owned(
43    val: &str,
44    release_char: char,
45    start_offset: usize,
46) -> Result<String, EdifactError> {
47    let mut out = String::with_capacity(val.len());
48    let mut chars = val.chars();
49    while let Some(c) = chars.next() {
50        if c == release_char {
51            if let Some(escaped) = chars.next() {
52                out.push(escaped);
53            } else {
54                return Err(EdifactError::InvalidReleaseSequence {
55                    offset: start_offset + val.len().saturating_sub(1),
56                });
57            }
58        } else {
59            out.push(c);
60        }
61    }
62    Ok(out)
63}
64
65/// Streaming parser over a [`Tokenizer`].
66///
67/// Implements `Iterator<Item = Result<Segment<'a>, EdifactError>>`.
68/// Each `next()` call produces one fully-assembled segment.
69pub struct Parser<'a> {
70    tokenizer: Tokenizer<'a>,
71    /// Buffered token from a previous `next()` call (tag peeked ahead).
72    peeked: Option<Token<'a>>,
73    /// Release character from the active service string advice.
74    release_char: char,
75}
76
77impl<'a> Parser<'a> {
78    /// Construct a parser from a tokenizer.
79    pub fn new(tokenizer: Tokenizer<'a>) -> Self {
80        let release_char = tokenizer.service_string_advice().release_char as char;
81        Self {
82            tokenizer,
83            peeked: None,
84            release_char,
85        }
86    }
87}
88
89impl<'a> Iterator for Parser<'a> {
90    type Item = Result<Segment<'a>, EdifactError>;
91
92    fn next(&mut self) -> Option<Self::Item> {
93        // Obtain the segment tag (may have been peeked from a previous iteration)
94        let (tag, tag_span) = loop {
95            let tok = match self.peeked.take() {
96                Some(t) => Ok(t),
97                None => self.tokenizer.next()?,
98            };
99            match tok {
100                Ok(Token::SegmentTag { value, span }) => break (value, span),
101                Ok(Token::SegmentTerminator { .. }) => continue, // stray terminator — skip
102                Ok(_) => continue,                               // stray value — skip
103                Err(e) => return Some(Err(e)),
104            }
105        };
106
107        let mut elements: Vec<Element<'a>> = Vec::with_capacity(8);
108        let mut current_components: SmallVec<[Cow<'a, str>; 4]> = SmallVec::new();
109        let mut current_component_spans: SmallVec<[Span; 4]> = SmallVec::new();
110        let mut current_element_start: Option<usize> = None;
111        let mut in_element = false;
112        let mut segment_end = tag_span.end;
113
114        loop {
115            let tok = match self.tokenizer.next() {
116                Some(Ok(t)) => t,
117                Some(Err(e)) => return Some(Err(e)),
118                None => {
119                    // EOF — flush whatever we have
120                    if in_element {
121                        finish_element(
122                            &mut elements,
123                            &mut current_components,
124                            &mut current_component_spans,
125                            &mut current_element_start,
126                        );
127                        if let Some(last) = elements.last() {
128                            segment_end = last.span.end;
129                        }
130                    }
131                    break;
132                }
133            };
134
135            match tok {
136                Token::SegmentTag {
137                    value: next_tag,
138                    span,
139                } => {
140                    // We consumed the first token of the *next* segment; save it.
141                    self.peeked = Some(Token::SegmentTag {
142                        value: next_tag,
143                        span,
144                    });
145                    if in_element {
146                        finish_element(
147                            &mut elements,
148                            &mut current_components,
149                            &mut current_component_spans,
150                            &mut current_element_start,
151                        );
152                        if let Some(last) = elements.last() {
153                            segment_end = last.span.end;
154                        }
155                    }
156                    break;
157                }
158                Token::SegmentTerminator { span } => {
159                    if in_element {
160                        finish_element(
161                            &mut elements,
162                            &mut current_components,
163                            &mut current_component_spans,
164                            &mut current_element_start,
165                        );
166                    }
167                    segment_end = span.end;
168                    break;
169                }
170                Token::DataElement { value, span } => {
171                    if in_element {
172                        finish_element(
173                            &mut elements,
174                            &mut current_components,
175                            &mut current_component_spans,
176                            &mut current_element_start,
177                        );
178                    }
179                    let resolved = match resolve_release(value, self.release_char, span.start) {
180                        Ok(v) => v,
181                        Err(error) => return Some(Err(error)),
182                    };
183                    current_components.push(resolved);
184                    current_component_spans.push(span);
185                    current_element_start = Some(span.start);
186                    in_element = true;
187                }
188                Token::ComponentElement { value, span } => {
189                    if !in_element {
190                        // component before any element — treat as first element
191                        in_element = true;
192                        current_element_start = Some(span.start);
193                    }
194                    let resolved = match resolve_release(value, self.release_char, span.start) {
195                        Ok(v) => v,
196                        Err(error) => return Some(Err(error)),
197                    };
198                    current_components.push(resolved);
199                    current_component_spans.push(span);
200                }
201            }
202        }
203
204        Some(Ok(Segment {
205            tag,
206            span: Span::new(tag_span.start, segment_end),
207            tag_span,
208            elements,
209        }))
210    }
211}
212
213/// Parse EDIFACT from an arbitrary reader.
214///
215/// This path is optimized for bounded-memory ingest and returns owned segments,
216/// allowing the parser to advance across chunk boundaries without requiring a
217/// fully-buffered input slice.
218pub fn from_reader<R: Read>(reader: R) -> Result<Vec<OwnedSegment>, EdifactError> {
219    from_reader_stream(reader).collect()
220}
221
222/// Parse EDIFACT from a buffered reader.
223pub fn from_bufread<R: BufRead>(reader: R) -> Result<Vec<OwnedSegment>, EdifactError> {
224    from_bufread_stream(reader).collect()
225}
226
227/// Configuration for reader-based EDIFACT parsers.
228///
229/// Pass to [`from_reader_with_config`] or [`from_bufread_stream_with_config`] to
230/// override default limits.
231///
232/// # Example
233/// ```
234/// use edifact_rs::{ReaderConfig, from_reader_with_config};
235///
236/// let cfg = ReaderConfig::default().max_segment_bytes(4_096);
237/// let segments: Vec<_> = from_reader_with_config(b"BGM+220+1+9'".as_ref(), cfg)
238///     .collect::<Result<_, _>>()
239///     .unwrap();
240/// assert_eq!(segments[0].tag, "BGM");
241/// ```
242#[derive(Debug, Clone, Copy)]
243pub struct ReaderConfig {
244    /// Maximum allowed segment byte length (excluding the segment terminator).
245    ///
246    /// If a segment accumulates more bytes than this limit without a terminator
247    /// the parser returns [`EdifactError::SegmentTooLong`].  This prevents
248    /// unbounded allocation when processing malformed or adversarially crafted
249    /// input streams.
250    ///
251    /// Default: 65 536 bytes (64 KiB).  Real-world EDIFACT segments are almost
252    /// always below 4 KiB; consider using a tighter limit for untrusted inputs.
253    pub max_segment_bytes: usize,
254}
255
256impl Default for ReaderConfig {
257    fn default() -> Self {
258        Self {
259            max_segment_bytes: 65_536,
260        }
261    }
262}
263
264impl ReaderConfig {
265    /// Set the maximum segment byte length and return `self`.
266    #[must_use]
267    pub fn max_segment_bytes(mut self, limit: usize) -> Self {
268        self.max_segment_bytes = limit;
269        self
270    }
271}
272
273/// Streaming state for [`OwnedSegmentStream`].
274#[derive(Debug, Clone, Copy, PartialEq, Eq)]
275enum StreamState {
276    /// UNA header not yet scanned; must inspect first bytes.
277    Init,
278    /// UNA has been scanned (or was absent); streaming segments.
279    Running,
280    /// A terminal error was encountered; no more items.
281    Done,
282}
283
284/// Streaming iterator over owned segments from a buffered reader.
285///
286/// # Performance
287///
288/// A **fast path** uses [`BufRead::fill_buf`] + `memchr` to locate the segment
289/// terminator within the OS-level read buffer (typically 8 KB) without any
290/// intermediate heap allocation.  The segment bytes are parsed directly from
291/// the buffer slice and converted to an [`OwnedSegment`] in a single pass.
292///
293/// For segments that span read-buffer boundaries the implementation falls back
294/// to the byte-accumulation slow path, which allocates a temporary `Vec<u8>`
295/// and re-tokenizes — the same behaviour as in older versions of the library.
296/// In practice this fallback is rare because the default `BufReader` buffer
297/// (8 KB) is far larger than a typical EDIFACT segment (<300 bytes).
298///
299/// Configure limits via [`ReaderConfig`] and [`from_reader_with_config`] /
300/// [`from_bufread_stream_with_config`].
301pub struct OwnedSegmentStream<R: BufRead> {
302    reader: R,
303    ssa: crate::tokenizer::ServiceStringAdvice,
304    state: StreamState,
305    stream_offset: usize,
306    config: ReaderConfig,
307}
308
309impl<R: BufRead> OwnedSegmentStream<R> {
310    fn new(reader: R) -> Self {
311        Self::with_config(reader, ReaderConfig::default())
312    }
313
314    fn with_config(reader: R, config: ReaderConfig) -> Self {
315        Self {
316            reader,
317            ssa: crate::tokenizer::ServiceStringAdvice::default(),
318            state: StreamState::Init,
319            stream_offset: 0,
320            config,
321        }
322    }
323}
324
325// ── fast-path helpers ─────────────────────────────────────────────────────────
326
327/// Outcome of a single-buffer segment extraction attempt.
328enum FastSegment {
329    /// Segment parsed; second field = bytes to consume (content + terminator).
330    Parsed(OwnedSegment, usize),
331    /// Only whitespace or an isolated terminator; bytes to skip and continue.
332    Skip(usize),
333    /// Terminator not present in the current buffer; caller must use slow path.
334    NeedMore,
335    /// Buffer is empty — no more input.
336    Eof,
337    /// Parse error.
338    Err(EdifactError),
339}
340
341/// Return the byte offset of the first **unescaped** occurrence of `term` in `buf`.
342///
343/// A byte is *escaped* when it is immediately preceded by `release` (e.g.
344/// `?'` escapes `'`).  Two consecutive release chars cancel each other, so
345/// `??'` contains an *unescaped* `'`.
346///
347/// # Complexity
348///
349/// O(n) in the length of `buf`.  `memchr2` is used to fast-scan past bytes
350/// that are neither `release` nor `term`, so SIMD acceleration applies on
351/// platforms where `memchr` provides it.
352fn find_unescaped_term(buf: &[u8], term: u8, release: u8) -> Option<usize> {
353    let mut i = 0;
354    while i < buf.len() {
355        // Fast-skip to the next byte that might be a release char or terminator.
356        let rel = memchr2(release, term, &buf[i..])?;
357        let pos = i + rel;
358        if buf[pos] == release {
359            // Release char: the next byte is escaped — skip both.
360            i = pos + 2;
361        } else {
362            // Unescaped terminator found.
363            return Some(pos);
364        }
365    }
366    None
367}
368
369/// Try to parse one segment directly from the `BufRead` buffer.
370///
371/// This function borrows `reader` only for the duration of the call.  After it
372/// returns the caller is free to call `reader.consume(n)`.
373fn try_fast_segment<R: BufRead>(
374    reader: &mut R,
375    ssa: crate::tokenizer::ServiceStringAdvice,
376    seg_start: usize,
377    max_segment_bytes: usize,
378) -> FastSegment {
379    let buf = match reader.fill_buf() {
380        Ok(b) => b,
381        Err(e) => return FastSegment::Err(e.into()),
382    };
383
384    if buf.is_empty() {
385        return FastSegment::Eof;
386    }
387
388    let Some(pos) = find_unescaped_term(buf, ssa.segment_term, ssa.release_char) else {
389        return FastSegment::NeedMore;
390    };
391
392    // Enforce the segment-size guard *before* any allocation.
393    // `pos` is the index of the terminator byte, so the segment body is `buf[..pos]`.
394    if pos > max_segment_bytes {
395        return FastSegment::Err(EdifactError::SegmentTooLong {
396            offset: seg_start,
397            limit: max_segment_bytes,
398        });
399    }
400
401    // `buf[..pos]` is the segment content without the terminator.
402    let seg_bytes = &buf[..pos];
403
404    // Skip isolated terminators / pure-whitespace slots between segments.
405    if seg_bytes
406        .iter()
407        .all(|&b| matches!(b, b' ' | b'\t' | b'\r' | b'\n'))
408    {
409        return FastSegment::Skip(pos + 1);
410    }
411
412    // Parse directly from the buffer slice — zero intermediate allocation.
413    // Include the terminator byte so the parser sees a `SegmentTerminator`
414    // token and records a span that is consistent with the `from_bytes` path.
415    let tok = Tokenizer::new(&buf[..pos + 1], ssa);
416    match Parser::new(tok).collect::<Result<Vec<Segment<'_>>, _>>() {
417        Err(e) => FastSegment::Err(e),
418        Ok(segs) => match segs.into_iter().next() {
419            None => FastSegment::Skip(pos + 1),
420            Some(s) => FastSegment::Parsed(OwnedSegment::from(s).offset(seg_start), pos + 1),
421        },
422    }
423    // `buf` borrow released here — `reader.consume()` is safe to call in the caller.
424}
425
426// ── Iterator impl ─────────────────────────────────────────────────────────────
427
428impl<R: BufRead> Iterator for OwnedSegmentStream<R> {
429    type Item = Result<OwnedSegment, EdifactError>;
430
431    fn next(&mut self) -> Option<Self::Item> {
432        if self.state == StreamState::Done {
433            return None;
434        }
435
436        loop {
437            // ── Fast path (after UNA has been consumed) ───────────────────
438            if self.state == StreamState::Running {
439                let seg_start = self.stream_offset;
440                match try_fast_segment(
441                    &mut self.reader,
442                    self.ssa,
443                    seg_start,
444                    self.config.max_segment_bytes,
445                ) {
446                    FastSegment::Parsed(seg, n) => {
447                        self.reader.consume(n);
448                        self.stream_offset += n;
449                        return Some(Ok(seg));
450                    }
451                    FastSegment::Skip(n) => {
452                        self.reader.consume(n);
453                        self.stream_offset += n;
454                        continue;
455                    }
456                    FastSegment::Eof => return None,
457                    FastSegment::Err(e) => {
458                        self.state = StreamState::Done;
459                        return Some(Err(e));
460                    }
461                    FastSegment::NeedMore => {
462                        // Segment spans buffer boundary — fall through to slow path.
463                    }
464                }
465            }
466
467            // ── Slow path: byte accumulation (also handles UNA header) ────
468            let mut scanned = self.state != StreamState::Init;
469            let mut raw = match read_next_raw_segment(
470                &mut self.reader,
471                &mut self.ssa,
472                &mut scanned,
473                &mut self.stream_offset,
474                self.config.max_segment_bytes,
475            ) {
476                Ok(Some(r)) => r,
477                Ok(None) => return None,
478                Err(e) => {
479                    self.state = StreamState::Done;
480                    return Some(Err(e));
481                }
482            };
483            if scanned {
484                self.state = StreamState::Running;
485            }
486
487            raw.bytes.push(self.ssa.segment_term);
488            let tok = Tokenizer::new(raw.bytes.as_slice(), self.ssa);
489            match Parser::new(tok).collect::<Result<Vec<Segment<'_>>, _>>() {
490                Ok(segs) => {
491                    if let Some(s) = segs.into_iter().next() {
492                        return Some(Ok(OwnedSegment::from(s).offset(raw.start_offset)));
493                    }
494                    // Empty segment — loop back.
495                }
496                Err(e) => {
497                    self.state = StreamState::Done;
498                    return Some(Err(e));
499                }
500            }
501        }
502    }
503}
504
505/// Parse EDIFACT from a buffered reader as a streaming iterator.
506pub fn from_bufread_stream<R: BufRead>(reader: R) -> OwnedSegmentStream<R> {
507    OwnedSegmentStream::new(reader)
508}
509
510/// Parse EDIFACT from a buffered reader as a streaming iterator with custom config.
511pub fn from_bufread_stream_with_config<R: BufRead>(
512    reader: R,
513    config: ReaderConfig,
514) -> OwnedSegmentStream<R> {
515    OwnedSegmentStream::with_config(reader, config)
516}
517
518/// Parse EDIFACT from an arbitrary reader as a streaming iterator.
519pub fn from_reader_stream<R: Read>(reader: R) -> OwnedSegmentStream<BufReader<R>> {
520    from_bufread_stream(BufReader::new(reader))
521}
522
523/// Parse EDIFACT from an arbitrary reader as a streaming iterator with custom config.
524///
525/// # Example
526/// ```
527/// use edifact_rs::{ReaderConfig, from_reader_with_config};
528///
529/// let cfg = ReaderConfig::default().max_segment_bytes(4_096);
530/// let segs: Vec<_> = from_reader_with_config(b"BGM+220+1+9'".as_ref(), cfg)
531///     .collect::<Result<_, _>>()
532///     .unwrap();
533/// assert_eq!(segs[0].tag, "BGM");
534/// ```
535pub fn from_reader_with_config<R: Read>(
536    reader: R,
537    config: ReaderConfig,
538) -> OwnedSegmentStream<BufReader<R>> {
539    from_bufread_stream_with_config(BufReader::new(reader), config)
540}
541
542fn read_next_raw_segment<R: BufRead>(
543    reader: &mut R,
544    ssa: &mut crate::tokenizer::ServiceStringAdvice,
545    scanned_header: &mut bool,
546    stream_offset: &mut usize,
547    max_segment_bytes: usize,
548) -> Result<Option<crate::tokenizer::RawSegment>, EdifactError> {
549    loop {
550        let Some((first_offset, first)) = read_next_non_ws_byte(reader, stream_offset)? else {
551            return Ok(None);
552        };
553
554        if !*scanned_header && first == b'U' {
555            let second = read_required_byte(reader, stream_offset)?;
556            let third = read_required_byte(reader, stream_offset)?;
557            if second == b'N' && third == b'A' {
558                let mut una = [0u8; 9];
559                una[0] = b'U';
560                una[1] = b'N';
561                una[2] = b'A';
562                for slot in una.iter_mut().skip(3) {
563                    *slot = read_required_byte(reader, stream_offset)?;
564                }
565                *ssa = crate::tokenizer::ServiceStringAdvice {
566                    component_sep: una[3],
567                    element_sep: una[4],
568                    decimal_mark: una[5],
569                    release_char: una[6],
570                    segment_term: una[8],
571                };
572                if !ssa.is_valid() {
573                    return Err(EdifactError::InvalidUna);
574                }
575                *scanned_header = true;
576                continue;
577            }
578
579            *scanned_header = true;
580            return read_remainder_of_segment(
581                reader,
582                ssa,
583                crate::tokenizer::RawSegment {
584                    bytes: vec![first, second, third],
585                    start_offset: first_offset,
586                },
587                stream_offset,
588                max_segment_bytes,
589            );
590        }
591
592        *scanned_header = true;
593        return read_remainder_of_segment(
594            reader,
595            ssa,
596            crate::tokenizer::RawSegment {
597                bytes: vec![first],
598                start_offset: first_offset,
599            },
600            stream_offset,
601            max_segment_bytes,
602        );
603    }
604}
605
606fn read_remainder_of_segment<R: BufRead>(
607    reader: &mut R,
608    ssa: &crate::tokenizer::ServiceStringAdvice,
609    mut out: crate::tokenizer::RawSegment,
610    stream_offset: &mut usize,
611    max_segment_bytes: usize,
612) -> Result<Option<crate::tokenizer::RawSegment>, EdifactError> {
613    let mut escaped = false;
614    loop {
615        if out.bytes.len() >= max_segment_bytes {
616            return Err(EdifactError::SegmentTooLong {
617                offset: out.start_offset,
618                limit: max_segment_bytes,
619            });
620        }
621        let Some(byte) = read_next_byte(reader, stream_offset)? else {
622            return if out.bytes.is_empty() {
623                Ok(None)
624            } else if escaped {
625                Err(EdifactError::InvalidReleaseSequence {
626                    offset: out.start_offset + out.bytes.len().saturating_sub(1),
627                })
628            } else {
629                Err(EdifactError::UnexpectedEof {
630                    offset: out.start_offset + out.bytes.len(),
631                })
632            };
633        };
634
635        if !escaped && byte == ssa.segment_term {
636            return Ok(Some(out));
637        }
638
639        if !escaped && byte == ssa.release_char {
640            escaped = true;
641            out.bytes.push(byte);
642            continue;
643        }
644
645        escaped = false;
646        out.bytes.push(byte);
647    }
648}
649
650fn read_next_byte<R: BufRead>(
651    reader: &mut R,
652    stream_offset: &mut usize,
653) -> Result<Option<u8>, EdifactError> {
654    let buf = reader.fill_buf()?;
655    if buf.is_empty() {
656        return Ok(None);
657    }
658
659    let byte = buf[0];
660    reader.consume(1);
661    *stream_offset += 1;
662    Ok(Some(byte))
663}
664
665fn read_required_byte<R: BufRead>(
666    reader: &mut R,
667    stream_offset: &mut usize,
668) -> Result<u8, EdifactError> {
669    read_next_byte(reader, stream_offset)?.ok_or(EdifactError::UnexpectedEof {
670        offset: *stream_offset,
671    })
672}
673
674fn read_next_non_ws_byte<R: BufRead>(
675    reader: &mut R,
676    stream_offset: &mut usize,
677) -> Result<Option<(usize, u8)>, EdifactError> {
678    loop {
679        let current_offset = *stream_offset;
680        let Some(byte) = read_next_byte(reader, stream_offset)? else {
681            return Ok(None);
682        };
683        if !matches!(byte, b' ' | b'\t' | b'\r' | b'\n') {
684            return Ok(Some((current_offset, byte)));
685        }
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692    use crate::tokenizer::ServiceStringAdvice;
693
694    fn parse_all(input: &[u8]) -> Vec<Segment<'_>> {
695        let ssa = ServiceStringAdvice::from_bytes(input);
696        let tok = Tokenizer::new(input, ssa);
697        Parser::new(tok)
698            .collect::<Result<Vec<_>, _>>()
699            .expect("parse failed")
700    }
701
702    #[test]
703    fn parses_unb_unz() {
704        let input = b"UNB+UNOA:1+SENDER+RECEIVER+200101:0900+1'UNZ+0+1'";
705        let segs = parse_all(input);
706        assert_eq!(segs.len(), 2);
707        assert_eq!(segs[0].tag, "UNB");
708        assert_eq!(segs[1].tag, "UNZ");
709        assert_eq!(segs[0].tag_span, Span::new(0, 3));
710        assert_eq!(segs[0].span, Span::new(0, 41));
711    }
712
713    #[test]
714    fn element_access() {
715        let input = b"BGM+220+ORDER123+9'";
716        let segs = parse_all(input);
717        assert_eq!(segs[0].element_str(0), Some("220"));
718        assert_eq!(segs[0].element_str(1), Some("ORDER123"));
719    }
720
721    #[test]
722    fn component_access() {
723        let input = b"DTM+137:20200101:102'";
724        let segs = parse_all(input);
725        let dtm = &segs[0];
726        assert_eq!(dtm.get_element(0).unwrap().get_component(0), Some("137"));
727        assert_eq!(
728            dtm.get_element(0).unwrap().get_component(1),
729            Some("20200101")
730        );
731        assert_eq!(dtm.get_element(0).unwrap().get_component(2), Some("102"));
732    }
733
734    #[test]
735    fn release_char_resolved() {
736        let input = b"FTX+AAA++test?+value'";
737        let segs = parse_all(input);
738        assert_eq!(segs[0].element_str(2), Some("test+value"));
739        assert_eq!(
740            segs[0].get_element(2).unwrap().component_span(0),
741            Some(Span::new(9, 20))
742        );
743    }
744
745    #[test]
746    fn reader_path_preserves_custom_una_delimiters() {
747        let input = b"UNA:;.? 'BGM;220;test?;value'";
748        let segments = super::from_bufread(std::io::BufReader::new(std::io::Cursor::new(input)))
749            .expect("reader parse should succeed");
750        let bgm = segments
751            .iter()
752            .find(|segment| segment.tag == "BGM")
753            .expect("BGM segment should be present");
754        assert_eq!(bgm.elements[0].components[0], "220");
755        assert_eq!(bgm.elements[1].components[0], "test;value");
756    }
757
758    #[test]
759    fn arbitrary_bytes_no_panic() {
760        // This is the stable no-panic property — arbitrary input must not panic
761        let garbage: &[u8] = b"\xff\x00\x01\x02ABC+++'''???";
762        let _ = crate::from_bytes(garbage).collect::<Vec<_>>();
763    }
764
765    #[test]
766    fn from_reader_handles_chunk_boundaries() {
767        let input = b"UNA:+.? 'BGM+220+test?+value'UNT+2+1'";
768        let reader = std::io::BufReader::with_capacity(5, std::io::Cursor::new(input));
769        let parsed = from_bufread(reader).expect("reader parsing should succeed");
770        assert_eq!(parsed.len(), 2);
771        assert_eq!(parsed[0].tag, "BGM");
772        assert_eq!(parsed[0].elements[1].components[0], "test+value");
773        assert_eq!(parsed[1].tag, "UNT");
774    }
775
776    #[test]
777    fn from_reader_without_una_uses_default_delimiters() {
778        let input = b"BGM+220+X'UNT+2+1'";
779        let parsed =
780            from_reader(std::io::Cursor::new(input)).expect("reader parsing should succeed");
781        assert_eq!(parsed.len(), 2);
782        assert_eq!(parsed[0].tag, "BGM");
783        assert_eq!(parsed[0].elements[0].components[0], "220");
784        assert_eq!(parsed[1].span, Span::new(10, 18));
785    }
786
787    #[test]
788    fn dangling_release_sequence_is_error() {
789        let input = b"FTX+AAA++dangling?";
790        let err = crate::from_bytes(input)
791            .collect::<Result<Vec<_>, _>>()
792            .expect_err("expected dangling release to fail");
793
794        assert!(matches!(err, EdifactError::InvalidReleaseSequence { .. }));
795    }
796
797    #[test]
798    fn from_reader_reports_dangling_release_sequence() {
799        let input = b"FTX+AAA++dangling?";
800        let err = from_reader(std::io::Cursor::new(input))
801            .expect_err("expected dangling release from reader path");
802        assert!(matches!(err, EdifactError::InvalidReleaseSequence { .. }));
803    }
804
805    #[test]
806    fn from_reader_rejects_invalid_una() {
807        let input = b"UNA::.? 'BGM:220'";
808        let err = from_reader(std::io::Cursor::new(input))
809            .expect_err("invalid UNA should fail reader parsing");
810        assert!(matches!(err, EdifactError::InvalidUna));
811    }
812}