Skip to main content

edifact_rs/
model.rs

1use smallvec::SmallVec;
2use std::borrow::Cow;
3
4/// A half-open byte span within an EDIFACT payload.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub struct Span {
7    /// Start byte offset (inclusive).
8    pub start: usize,
9    /// End byte offset (exclusive).
10    pub end: usize,
11}
12
13impl Span {
14    #[inline]
15    /// Construct a span from inclusive start and exclusive end offsets.
16    pub const fn new(start: usize, end: usize) -> Self {
17        Self { start, end }
18    }
19
20    #[inline]
21    /// Shift the span by `delta` bytes.
22    ///
23    /// Uses saturating addition to avoid integer overflow on malformed input.
24    pub const fn offset(self, delta: usize) -> Self {
25        Self {
26            start: self.start.saturating_add(delta),
27            end: self.end.saturating_add(delta),
28        }
29    }
30
31    /// Length of the span in bytes.
32    ///
33    /// In debug builds, asserts `end >= start` (inverted spans are a bug).
34    /// In release builds, returns 0 for inverted spans rather than panicking,
35    /// so a single corrupt span does not abort an entire validation run.
36    #[inline]
37    pub fn len(self) -> usize {
38        debug_assert!(
39            self.end >= self.start,
40            "Span::len: end ({}) < start ({})",
41            self.end,
42            self.start
43        );
44        self.end.saturating_sub(self.start)
45    }
46
47    /// Returns `true` if the span covers zero bytes.
48    #[inline]
49    pub const fn is_empty(self) -> bool {
50        self.start == self.end
51    }
52}
53
54impl std::fmt::Display for Span {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        write!(f, "{}..{}", self.start, self.end)
57    }
58}
59
60/// A single EDIFACT segment, borrowing its data from the source input.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct Segment<'a> {
63    /// Segment tag, usually three uppercase letters.
64    pub tag: &'a str,
65    /// Span covering the whole segment payload.
66    pub span: Span,
67    /// Span covering only the segment tag.
68    pub tag_span: Span,
69    /// Segment elements in positional order.
70    pub elements: Vec<Element<'a>>,
71}
72
73impl<'a> Segment<'a> {
74    #[inline]
75    /// Construct a segment with default spans.
76    pub fn new(tag: &'a str, elements: Vec<Element<'a>>) -> Self {
77        Self {
78            tag,
79            span: Span::default(),
80            tag_span: Span::default(),
81            elements,
82        }
83    }
84
85    /// Return the element at position `n` (0-indexed), if it exists.
86    #[inline]
87    pub fn get_element(&self, n: usize) -> Option<&Element<'a>> {
88        self.elements.get(n)
89    }
90
91    /// Shorthand: get component 0 of element `n` — the most common access pattern.
92    #[inline]
93    pub fn element_str(&self, n: usize) -> Option<&str> {
94        self.elements.get(n)?.get_component(0)
95    }
96
97    /// Get component `comp` of element `elem` (both 0-based), or `None` if absent.
98    ///
99    /// Mirrors [`OwnedSegment::component_str`], eliminating the need to chain
100    /// `get_element(elem)?.get_component(comp)` in rule closures.
101    #[inline]
102    pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
103        self.elements.get(elem)?.get_component(comp)
104    }
105
106    /// Return the byte span of the element at position `n`, if it exists.
107    #[inline]
108    pub fn element_span(&self, n: usize) -> Option<Span> {
109        Some(self.elements.get(n)?.span)
110    }
111}
112
113/// A data element, which may have one or more component values.
114///
115/// Uses [`SmallVec`] with an inline capacity of 4 to avoid heap allocation
116/// for the common case (≤ 4 components).  Component values borrow from the
117/// original input; if the value contained a release-character sequence the
118/// resolved string is stored as an owned [`Cow::Owned`] variant instead of
119/// using `Box::leak`.
120///
121/// Each entry is a `(value, span)` pair, guaranteeing that the component
122/// string and its byte span are always in sync.
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct Element<'a> {
125    /// Span covering the whole element.
126    pub span: Span,
127    /// Element components in positional order, each paired with its byte span.
128    pub components: SmallVec<[(Cow<'a, str>, Span); 4]>,
129}
130
131impl<'a> Element<'a> {
132    /// Return the component at position `n` (0-indexed), if it exists.
133    #[inline]
134    pub fn get_component(&self, n: usize) -> Option<&str> {
135        self.components.get(n).map(|(c, _)| c.as_ref())
136    }
137
138    /// Return the component at position `n`, or `""` if absent.
139    #[inline]
140    pub fn component_or_empty(&self, n: usize) -> &str {
141        self.components
142            .get(n)
143            .map(|(c, _)| c.as_ref())
144            .unwrap_or("")
145    }
146
147    /// Return the byte span of the component at position `n`, if it exists.
148    #[inline]
149    pub fn component_span(&self, n: usize) -> Option<Span> {
150        self.components.get(n).map(|(_, s)| *s)
151    }
152
153    /// Convenience constructor: wraps string literals as borrowed components.
154    ///
155    /// Useful in tests and when constructing segments for writing.
156    pub fn of(components: &[&'a str]) -> Self {
157        Self {
158            span: Span::default(),
159            components: components
160                .iter()
161                .copied()
162                .map(|c| (Cow::Borrowed(c), Span::default()))
163                .collect(),
164        }
165    }
166}
167
168/// Owned data element used by reader-based parsing APIs.
169///
170/// Each entry in `components` is a `(value, span)` pair, keeping the string
171/// and its byte span structurally in sync.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct OwnedElement {
174    /// Span covering the whole element.
175    pub span: Span,
176    /// Owned element components in positional order, each paired with its byte span.
177    pub components: SmallVec<[(String, Span); 4]>,
178}
179
180impl OwnedElement {
181    #[inline]
182    /// Shift all stored spans by `delta` bytes.
183    pub fn offset(mut self, delta: usize) -> Self {
184        self.span = self.span.offset(delta);
185        for (_, span) in &mut self.components {
186            *span = span.offset(delta);
187        }
188        self
189    }
190}
191
192impl<'a> From<Element<'a>> for OwnedElement {
193    fn from(value: Element<'a>) -> Self {
194        Self {
195            span: value.span,
196            components: value
197                .components
198                .into_iter()
199                .map(|(c, s)| (c.into_owned(), s))
200                .collect(),
201        }
202    }
203}
204
205/// Owned segment used by reader-based parsing APIs.
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct OwnedSegment {
208    /// Segment tag, usually three uppercase letters.
209    pub tag: String,
210    /// Span covering the whole segment payload.
211    pub span: Span,
212    /// Span covering only the segment tag.
213    pub tag_span: Span,
214    /// Owned segment elements in positional order.
215    pub elements: Vec<OwnedElement>,
216}
217
218/// Zero-allocation view of an [`OwnedElement`].
219///
220/// Implements the same accessor methods as [`Element`] without constructing
221/// any intermediate `SmallVec` or `Cow` values.  Use this when you hold an
222/// `&OwnedSegment` reference and want to inspect element data without the
223/// `Vec<Element>` allocation that [`OwnedSegment::as_borrowed`] incurs.
224///
225/// Construct via `BorrowedElement::from(&owned_element)` or through
226/// [`BorrowedSegment::get_element`].
227#[derive(Debug, Clone, Copy)]
228pub struct BorrowedElement<'a>(pub(crate) &'a OwnedElement);
229
230impl<'a> From<&'a OwnedElement> for BorrowedElement<'a> {
231    #[inline]
232    fn from(elem: &'a OwnedElement) -> Self {
233        BorrowedElement(elem)
234    }
235}
236
237impl<'a> BorrowedElement<'a> {
238    /// Return the component at position `n` (0-indexed), if it exists.
239    #[inline]
240    pub fn get_component(&self, n: usize) -> Option<&'a str> {
241        self.0.components.get(n).map(|(s, _)| s.as_str())
242    }
243
244    /// Return the component at position `n`, or `""` if absent.
245    #[inline]
246    pub fn component_or_empty(&self, n: usize) -> &'a str {
247        self.0
248            .components
249            .get(n)
250            .map(|(s, _)| s.as_str())
251            .unwrap_or("")
252    }
253
254    /// Return the byte span of the component at position `n`, if it exists.
255    #[inline]
256    pub fn component_span(&self, n: usize) -> Option<Span> {
257        self.0.components.get(n).map(|(_, s)| *s)
258    }
259
260    /// The byte span covering the whole element.
261    #[inline]
262    pub fn span(&self) -> Span {
263        self.0.span
264    }
265
266    /// Number of components in this element.
267    #[inline]
268    pub fn len(&self) -> usize {
269        self.0.components.len()
270    }
271
272    /// Returns `true` if this element has no components.
273    #[inline]
274    pub fn is_empty(&self) -> bool {
275        self.0.components.is_empty()
276    }
277
278    /// Iterate over all component strings.
279    #[inline]
280    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
281        self.0.components.iter().map(|(c, _)| c.as_str())
282    }
283}
284
285/// Zero-allocation view of an [`OwnedSegment`].
286///
287/// Implements the same accessor methods as [`Segment`] without constructing
288/// a `Vec<Element>`.  Use this when you hold an `&OwnedSegment` reference and
289/// want to read data without the allocations incurred by
290/// [`OwnedSegment::as_borrowed`].
291///
292/// # Construction
293///
294/// The idiomatic way to obtain a `BorrowedSegment` is via [`OwnedSegment::borrow`]
295/// or the [`From`] impl:
296///
297/// ```rust
298/// use edifact_rs::{BorrowedSegment, OwnedSegment, Span};
299///
300/// let seg = OwnedSegment {
301///     tag: "BGM".into(),
302///     span: Span::new(0, 3),
303///     tag_span: Span::new(0, 3),
304///     elements: vec![],
305/// };
306/// let borrowed = BorrowedSegment::from(&seg);
307/// assert_eq!(borrowed.tag(), "BGM");
308/// ```
309///
310/// The `'a` lifetime is tied to the referent — you cannot outlive the
311/// `OwnedSegment` you borrowed from.
312#[derive(Debug, Clone, Copy)]
313pub struct BorrowedSegment<'a>(pub(crate) &'a OwnedSegment);
314
315impl<'a> From<&'a OwnedSegment> for BorrowedSegment<'a> {
316    #[inline]
317    fn from(seg: &'a OwnedSegment) -> Self {
318        BorrowedSegment(seg)
319    }
320}
321
322impl<'a> BorrowedSegment<'a> {
323    /// The segment tag (e.g. `"BGM"`).
324    #[inline]
325    pub fn tag(&self) -> &'a str {
326        &self.0.tag
327    }
328
329    /// Byte span covering the whole segment.
330    #[inline]
331    pub fn span(&self) -> Span {
332        self.0.span
333    }
334
335    /// Byte span covering only the segment tag.
336    #[inline]
337    pub fn tag_span(&self) -> Span {
338        self.0.tag_span
339    }
340
341    /// Return the element at position `n` (0-indexed), if it exists.
342    #[inline]
343    pub fn get_element(&self, n: usize) -> Option<BorrowedElement<'a>> {
344        self.0.elements.get(n).map(BorrowedElement)
345    }
346
347    /// Shorthand: first component of element `n` — the most common access pattern.
348    #[inline]
349    pub fn element_str(&self, n: usize) -> Option<&'a str> {
350        self.0
351            .elements
352            .get(n)?
353            .components
354            .first()
355            .map(|(c, _)| c.as_str())
356    }
357
358    /// Get component `comp` of element `elem` (both 0-based), or `None` if absent.
359    ///
360    /// Mirrors [`OwnedSegment::component_str`].
361    #[inline]
362    pub fn component_str(&self, elem: usize, comp: usize) -> Option<&'a str> {
363        self.0
364            .elements
365            .get(elem)?
366            .components
367            .get(comp)
368            .map(|(c, _)| c.as_str())
369    }
370
371    /// Return the byte span of the element at position `n`, if it exists.
372    #[inline]
373    pub fn element_span(&self, n: usize) -> Option<Span> {
374        Some(self.0.elements.get(n)?.span)
375    }
376
377    /// Iterate over all elements as zero-allocation views.
378    #[inline]
379    pub fn elements(&self) -> impl Iterator<Item = BorrowedElement<'a>> {
380        self.0.elements.iter().map(BorrowedElement)
381    }
382}
383
384impl OwnedSegment {
385    /// Get the first component of element `n`, or `None` if absent.
386    ///
387    /// This is the zero-allocation equivalent of `as_borrowed().element_str(n)`.
388    /// Used internally by [`crate::find_segment_owned`] and the derived
389    /// [`crate::EdifactDeserialize::edifact_deserialize_owned`] implementations.
390    #[inline]
391    pub fn element_str(&self, n: usize) -> Option<&str> {
392        self.elements
393            .get(n)?
394            .components
395            .first()
396            .map(|(s, _)| s.as_str())
397    }
398
399    /// Get component `comp` of element `elem`, or `None` if absent.
400    ///
401    /// Zero-allocation equivalent of `as_borrowed().get_element(elem)?.get_component(comp)`.
402    #[inline]
403    pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
404        self.elements
405            .get(elem)?
406            .components
407            .get(comp)
408            .map(|(s, _)| s.as_str())
409    }
410
411    #[inline]
412    /// Shift all stored spans by `delta` bytes.
413    pub fn offset(mut self, delta: usize) -> Self {
414        self.span = self.span.offset(delta);
415        self.tag_span = self.tag_span.offset(delta);
416        for element in &mut self.elements {
417            element.span = element.span.offset(delta);
418            for (_, span) in &mut element.components {
419                *span = span.offset(delta);
420            }
421        }
422        self
423    }
424
425    #[inline]
426    /// View this owned segment as a borrowed [`Segment`].
427    ///
428    /// **Performance note**: allocates a `Vec<Element<'_>>` on every call.
429    /// When only individual field access is needed, prefer
430    /// [`OwnedSegment::borrow`] → [`BorrowedSegment`] which is O(1).
431    /// `as_borrowed` remains necessary when the callee requires `&[Segment<'_>]`.
432    pub fn as_borrowed(&self) -> Segment<'_> {
433        Segment {
434            tag: self.tag.as_str(),
435            span: self.span,
436            tag_span: self.tag_span,
437            elements: self
438                .elements
439                .iter()
440                .map(|elem| Element {
441                    span: elem.span,
442                    components: elem
443                        .components
444                        .iter()
445                        .map(|(c, s)| (Cow::Borrowed(c.as_str()), *s))
446                        .collect(),
447                })
448                .collect(),
449        }
450    }
451
452    /// Return a zero-allocation view of this segment.
453    ///
454    /// Unlike [`as_borrowed`][OwnedSegment::as_borrowed], this is `O(1)` and
455    /// performs no heap allocation.  The view cannot be passed to APIs that
456    /// require `&[Segment<'_>]`; use [`as_borrowed`][OwnedSegment::as_borrowed]
457    /// for those call sites.
458    #[inline]
459    pub fn borrow(&self) -> BorrowedSegment<'_> {
460        BorrowedSegment(self)
461    }
462}
463
464impl<'a> From<Segment<'a>> for OwnedSegment {
465    fn from(value: Segment<'a>) -> Self {
466        Self {
467            tag: value.tag.to_string(),
468            span: value.span,
469            tag_span: value.tag_span,
470            elements: value.elements.into_iter().map(OwnedElement::from).collect(),
471        }
472    }
473}