Skip to main content

edifact_rs/
model.rs

1use smallvec::SmallVec;
2use std::borrow::Cow;
3
4/// A half-open byte span within an EDIFACT payload.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub struct Span {
7    /// Start byte offset (inclusive).
8    pub start: usize,
9    /// End byte offset (exclusive).
10    pub end: usize,
11}
12
13impl Span {
14    #[inline]
15    /// Construct a span from inclusive start and exclusive end offsets.
16    pub const fn new(start: usize, end: usize) -> Self {
17        Self { start, end }
18    }
19
20    #[inline]
21    /// Shift the span by `delta` bytes.
22    ///
23    /// Uses saturating addition to avoid integer overflow on malformed input.
24    pub const fn offset(self, delta: usize) -> Self {
25        Self {
26            start: self.start.saturating_add(delta),
27            end: self.end.saturating_add(delta),
28        }
29    }
30
31    /// Length of the span in bytes.
32    ///
33    /// # Note on constness
34    ///
35    /// This method is intentionally **not** `const fn` (changed in 0.7.0) so that
36    /// the `debug_assert!` overflow guard is included in debug builds.  If you need
37    /// span arithmetic in a `const` context use `span.end - span.start` directly
38    /// (both fields are `pub`).
39    #[inline]
40    pub fn len(self) -> usize {
41        debug_assert!(
42            self.start <= self.end,
43            "Span::len: start ({}) > end ({})",
44            self.start,
45            self.end
46        );
47        self.end - self.start
48    }
49
50    /// Returns `true` if the span covers zero bytes.
51    #[inline]
52    pub const fn is_empty(self) -> bool {
53        self.start == self.end
54    }
55}
56
57impl std::fmt::Display for Span {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        write!(f, "{}..{}", self.start, self.end)
60    }
61}
62
63/// A single EDIFACT segment, borrowing its data from the source input.
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct Segment<'a> {
66    /// Segment tag, usually three uppercase letters.
67    pub tag: &'a str,
68    /// Span covering the whole segment payload.
69    pub span: Span,
70    /// Span covering only the segment tag.
71    pub tag_span: Span,
72    /// Segment elements in positional order.
73    pub elements: Vec<Element<'a>>,
74}
75
76impl<'a> Segment<'a> {
77    #[inline]
78    /// Construct a segment with default spans.
79    pub fn new(tag: &'a str, elements: Vec<Element<'a>>) -> Self {
80        Self {
81            tag,
82            span: Span::default(),
83            tag_span: Span::default(),
84            elements,
85        }
86    }
87
88    /// Return the element at position `n` (0-indexed), if it exists.
89    #[inline]
90    pub fn get_element(&self, n: usize) -> Option<&Element<'a>> {
91        self.elements.get(n)
92    }
93
94    /// Shorthand: get component 0 of element `n` — the most common access pattern.
95    #[inline]
96    pub fn element_str(&self, n: usize) -> Option<&str> {
97        self.elements.get(n)?.get_component(0)
98    }
99
100    /// Return the byte span of the element at position `n`, if it exists.
101    #[inline]
102    pub fn element_span(&self, n: usize) -> Option<Span> {
103        Some(self.elements.get(n)?.span)
104    }
105}
106
107/// A data element, which may have one or more component values.
108///
109/// Uses [`SmallVec`] with an inline capacity of 4 to avoid heap allocation
110/// for the common case (≤ 4 components).  Component values borrow from the
111/// original input; if the value contained a release-character sequence the
112/// resolved string is stored as an owned [`Cow::Owned`] variant instead of
113/// using `Box::leak`.
114///
115/// Each entry is a `(value, span)` pair, guaranteeing that the component
116/// string and its byte span are always in sync.
117#[derive(Debug, Clone, PartialEq, Eq)]
118pub struct Element<'a> {
119    /// Span covering the whole element.
120    pub span: Span,
121    /// Element components in positional order, each paired with its byte span.
122    pub components: SmallVec<[(Cow<'a, str>, Span); 4]>,
123}
124
125impl<'a> Element<'a> {
126    /// Return the component at position `n` (0-indexed), if it exists.
127    #[inline]
128    pub fn get_component(&self, n: usize) -> Option<&str> {
129        self.components.get(n).map(|(c, _)| c.as_ref())
130    }
131
132    /// Return the component at position `n`, or `""` if absent.
133    #[inline]
134    pub fn component_or_empty(&self, n: usize) -> &str {
135        self.components
136            .get(n)
137            .map(|(c, _)| c.as_ref())
138            .unwrap_or("")
139    }
140
141    /// Return the byte span of the component at position `n`, if it exists.
142    #[inline]
143    pub fn component_span(&self, n: usize) -> Option<Span> {
144        self.components.get(n).map(|(_, s)| *s)
145    }
146
147    /// Convenience constructor: wraps string literals as borrowed components.
148    ///
149    /// Useful in tests and when constructing segments for writing.
150    pub fn of(components: &[&'a str]) -> Self {
151        Self {
152            span: Span::default(),
153            components: components
154                .iter()
155                .copied()
156                .map(|c| (Cow::Borrowed(c), Span::default()))
157                .collect(),
158        }
159    }
160}
161
162/// Owned data element used by reader-based parsing APIs.
163///
164/// Each entry in `components` is a `(value, span)` pair, keeping the string
165/// and its byte span structurally in sync.
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub struct OwnedElement {
168    /// Span covering the whole element.
169    pub span: Span,
170    /// Owned element components in positional order, each paired with its byte span.
171    pub components: SmallVec<[(String, Span); 4]>,
172}
173
174impl OwnedElement {
175    #[inline]
176    /// Shift all stored spans by `delta` bytes.
177    pub fn offset(mut self, delta: usize) -> Self {
178        self.span = self.span.offset(delta);
179        for (_, span) in &mut self.components {
180            *span = span.offset(delta);
181        }
182        self
183    }
184}
185
186impl<'a> From<Element<'a>> for OwnedElement {
187    fn from(value: Element<'a>) -> Self {
188        Self {
189            span: value.span,
190            components: value
191                .components
192                .into_iter()
193                .map(|(c, s)| (c.into_owned(), s))
194                .collect(),
195        }
196    }
197}
198
199/// Owned segment used by reader-based parsing APIs.
200#[derive(Debug, Clone, PartialEq, Eq)]
201pub struct OwnedSegment {
202    /// Segment tag, usually three uppercase letters.
203    pub tag: String,
204    /// Span covering the whole segment payload.
205    pub span: Span,
206    /// Span covering only the segment tag.
207    pub tag_span: Span,
208    /// Owned segment elements in positional order.
209    pub elements: Vec<OwnedElement>,
210}
211
212/// Zero-allocation view of an [`OwnedElement`].
213///
214/// Implements the same accessor methods as [`Element`] without constructing
215/// any intermediate `SmallVec` or `Cow` values.  Use this when you hold an
216/// `&OwnedSegment` reference and want to inspect element data without the
217/// `Vec<Element>` allocation that [`OwnedSegment::as_borrowed`] incurs.
218///
219/// Construct via `BorrowedElement::from(&owned_element)` or through
220/// [`BorrowedSegment::get_element`].
221#[derive(Debug, Clone, Copy)]
222pub struct BorrowedElement<'a>(pub(crate) &'a OwnedElement);
223
224impl<'a> From<&'a OwnedElement> for BorrowedElement<'a> {
225    #[inline]
226    fn from(elem: &'a OwnedElement) -> Self {
227        BorrowedElement(elem)
228    }
229}
230
231impl<'a> BorrowedElement<'a> {
232    /// Return the component at position `n` (0-indexed), if it exists.
233    #[inline]
234    pub fn get_component(&self, n: usize) -> Option<&'a str> {
235        self.0.components.get(n).map(|(s, _)| s.as_str())
236    }
237
238    /// Return the component at position `n`, or `""` if absent.
239    #[inline]
240    pub fn component_or_empty(&self, n: usize) -> &'a str {
241        self.0
242            .components
243            .get(n)
244            .map(|(s, _)| s.as_str())
245            .unwrap_or("")
246    }
247
248    /// Return the byte span of the component at position `n`, if it exists.
249    #[inline]
250    pub fn component_span(&self, n: usize) -> Option<Span> {
251        self.0.components.get(n).map(|(_, s)| *s)
252    }
253
254    /// The byte span covering the whole element.
255    #[inline]
256    pub fn span(&self) -> Span {
257        self.0.span
258    }
259
260    /// Number of components in this element.
261    #[inline]
262    pub fn len(&self) -> usize {
263        self.0.components.len()
264    }
265
266    /// Returns `true` if this element has no components.
267    #[inline]
268    pub fn is_empty(&self) -> bool {
269        self.0.components.is_empty()
270    }
271
272    /// Iterate over all component strings.
273    #[inline]
274    pub fn iter(&self) -> impl Iterator<Item = &'a str> {
275        self.0.components.iter().map(|(c, _)| c.as_str())
276    }
277}
278
279/// Zero-allocation view of an [`OwnedSegment`].
280///
281/// Implements the same accessor methods as [`Segment`] without constructing
282/// a `Vec<Element>`.  Use this when you hold an `&OwnedSegment` reference and
283/// want to read data without the allocations incurred by
284/// [`OwnedSegment::as_borrowed`].
285///
286/// # Construction
287///
288/// The idiomatic way to obtain a `BorrowedSegment` is via [`OwnedSegment::borrow`]
289/// or the [`From`] impl:
290///
291/// ```rust
292/// use edifact_rs::{BorrowedSegment, OwnedSegment, Span};
293///
294/// let seg = OwnedSegment {
295///     tag: "BGM".into(),
296///     span: Span::new(0, 3),
297///     tag_span: Span::new(0, 3),
298///     elements: vec![],
299/// };
300/// let borrowed = BorrowedSegment::from(&seg);
301/// assert_eq!(borrowed.tag(), "BGM");
302/// ```
303///
304/// The `'a` lifetime is tied to the referent — you cannot outlive the
305/// `OwnedSegment` you borrowed from.
306#[derive(Debug, Clone, Copy)]
307pub struct BorrowedSegment<'a>(pub(crate) &'a OwnedSegment);
308
309impl<'a> From<&'a OwnedSegment> for BorrowedSegment<'a> {
310    #[inline]
311    fn from(seg: &'a OwnedSegment) -> Self {
312        BorrowedSegment(seg)
313    }
314}
315
316impl<'a> BorrowedSegment<'a> {
317    /// The segment tag (e.g. `"BGM"`).
318    #[inline]
319    pub fn tag(&self) -> &'a str {
320        &self.0.tag
321    }
322
323    /// Byte span covering the whole segment.
324    #[inline]
325    pub fn span(&self) -> Span {
326        self.0.span
327    }
328
329    /// Byte span covering only the segment tag.
330    #[inline]
331    pub fn tag_span(&self) -> Span {
332        self.0.tag_span
333    }
334
335    /// Return the element at position `n` (0-indexed), if it exists.
336    #[inline]
337    pub fn get_element(&self, n: usize) -> Option<BorrowedElement<'a>> {
338        self.0.elements.get(n).map(BorrowedElement)
339    }
340
341    /// Shorthand: first component of element `n` — the most common access pattern.
342    #[inline]
343    pub fn element_str(&self, n: usize) -> Option<&'a str> {
344        self.0
345            .elements
346            .get(n)?
347            .components
348            .first()
349            .map(|(c, _)| c.as_str())
350    }
351
352    /// Return the byte span of the element at position `n`, if it exists.
353    #[inline]
354    pub fn element_span(&self, n: usize) -> Option<Span> {
355        Some(self.0.elements.get(n)?.span)
356    }
357
358    /// Iterate over all elements as zero-allocation views.
359    #[inline]
360    pub fn elements(&self) -> impl Iterator<Item = BorrowedElement<'a>> {
361        self.0.elements.iter().map(BorrowedElement)
362    }
363}
364
365impl OwnedSegment {
366    /// Get the first component of element `n`, or `None` if absent.
367    ///
368    /// This is the zero-allocation equivalent of `as_borrowed().element_str(n)`.
369    /// Used internally by [`crate::helpers::find_segment_owned`] and the derived
370    /// [`crate::EdifactDeserialize::edifact_deserialize_owned`] implementations.
371    #[inline]
372    pub fn element_str(&self, n: usize) -> Option<&str> {
373        self.elements
374            .get(n)?
375            .components
376            .first()
377            .map(|(s, _)| s.as_str())
378    }
379
380    /// Get component `comp` of element `elem`, or `None` if absent.
381    ///
382    /// Zero-allocation equivalent of `as_borrowed().get_element(elem)?.get_component(comp)`.
383    #[inline]
384    pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
385        self.elements
386            .get(elem)?
387            .components
388            .get(comp)
389            .map(|(s, _)| s.as_str())
390    }
391
392    #[inline]
393    /// Shift all stored spans by `delta` bytes.
394    pub fn offset(mut self, delta: usize) -> Self {
395        self.span = self.span.offset(delta);
396        self.tag_span = self.tag_span.offset(delta);
397        for element in &mut self.elements {
398            element.span = element.span.offset(delta);
399            for (_, span) in &mut element.components {
400                *span = span.offset(delta);
401            }
402        }
403        self
404    }
405
406    #[inline]
407    /// View this owned segment as a borrowed [`Segment`].
408    ///
409    /// **Performance note**: allocates a `Vec<Element<'_>>` on every call.
410    /// When only individual field access is needed, prefer
411    /// [`OwnedSegment::borrow`] → [`BorrowedSegment`] which is O(1).
412    /// `as_borrowed` remains necessary when the callee requires `&[Segment<'_>]`.
413    pub fn as_borrowed(&self) -> Segment<'_> {
414        Segment {
415            tag: self.tag.as_str(),
416            span: self.span,
417            tag_span: self.tag_span,
418            elements: self
419                .elements
420                .iter()
421                .map(|elem| Element {
422                    span: elem.span,
423                    components: elem
424                        .components
425                        .iter()
426                        .map(|(c, s)| (Cow::Borrowed(c.as_str()), *s))
427                        .collect(),
428                })
429                .collect(),
430        }
431    }
432
433    /// Return a zero-allocation view of this segment.
434    ///
435    /// Unlike [`as_borrowed`][OwnedSegment::as_borrowed], this is `O(1)` and
436    /// performs no heap allocation.  The view cannot be passed to APIs that
437    /// require `&[Segment<'_>]`; use [`as_borrowed`][OwnedSegment::as_borrowed]
438    /// for those call sites.
439    #[inline]
440    pub fn borrow(&self) -> BorrowedSegment<'_> {
441        BorrowedSegment(self)
442    }
443}
444
445impl<'a> From<Segment<'a>> for OwnedSegment {
446    fn from(value: Segment<'a>) -> Self {
447        Self {
448            tag: value.tag.to_string(),
449            span: value.span,
450            tag_span: value.tag_span,
451            elements: value.elements.into_iter().map(OwnedElement::from).collect(),
452        }
453    }
454}