edifact_rs/model.rs
1use smallvec::SmallVec;
2use std::borrow::Cow;
3
4/// A half-open byte span within an EDIFACT payload.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub struct Span {
7 /// Start byte offset (inclusive).
8 pub start: usize,
9 /// End byte offset (exclusive).
10 pub end: usize,
11}
12
13impl Span {
14 #[inline]
15 /// Construct a span from inclusive start and exclusive end offsets.
16 pub const fn new(start: usize, end: usize) -> Self {
17 Self { start, end }
18 }
19
20 #[inline]
21 /// Shift the span by `delta` bytes.
22 ///
23 /// Uses saturating addition to avoid integer overflow on malformed input.
24 pub const fn offset(self, delta: usize) -> Self {
25 Self {
26 start: self.start.saturating_add(delta),
27 end: self.end.saturating_add(delta),
28 }
29 }
30
31 /// Length of the span in bytes.
32 ///
33 /// # Panics
34 ///
35 /// Panics in debug **and** release builds when `end < start` (inverted span).
36 /// Use `saturating_sub` directly if you need a non-panicking variant.
37 #[inline]
38 pub fn len(self) -> usize {
39 self.end
40 .checked_sub(self.start)
41 .unwrap_or_else(|| panic!("Span::len: end ({}) < start ({})", self.end, self.start))
42 }
43
44 /// Returns `true` if the span covers zero bytes.
45 #[inline]
46 pub const fn is_empty(self) -> bool {
47 self.start == self.end
48 }
49}
50
51impl std::fmt::Display for Span {
52 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53 write!(f, "{}..{}", self.start, self.end)
54 }
55}
56
57/// A single EDIFACT segment, borrowing its data from the source input.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct Segment<'a> {
60 /// Segment tag, usually three uppercase letters.
61 pub tag: &'a str,
62 /// Span covering the whole segment payload.
63 pub span: Span,
64 /// Span covering only the segment tag.
65 pub tag_span: Span,
66 /// Segment elements in positional order.
67 pub elements: Vec<Element<'a>>,
68}
69
70impl<'a> Segment<'a> {
71 #[inline]
72 /// Construct a segment with default spans.
73 pub fn new(tag: &'a str, elements: Vec<Element<'a>>) -> Self {
74 Self {
75 tag,
76 span: Span::default(),
77 tag_span: Span::default(),
78 elements,
79 }
80 }
81
82 /// Return the element at position `n` (0-indexed), if it exists.
83 #[inline]
84 pub fn get_element(&self, n: usize) -> Option<&Element<'a>> {
85 self.elements.get(n)
86 }
87
88 /// Shorthand: get component 0 of element `n` — the most common access pattern.
89 #[inline]
90 pub fn element_str(&self, n: usize) -> Option<&str> {
91 self.elements.get(n)?.get_component(0)
92 }
93
94 /// Return the byte span of the element at position `n`, if it exists.
95 #[inline]
96 pub fn element_span(&self, n: usize) -> Option<Span> {
97 Some(self.elements.get(n)?.span)
98 }
99}
100
101/// A data element, which may have one or more component values.
102///
103/// Uses [`SmallVec`] with an inline capacity of 4 to avoid heap allocation
104/// for the common case (≤ 4 components). Component values borrow from the
105/// original input; if the value contained a release-character sequence the
106/// resolved string is stored as an owned [`Cow::Owned`] variant instead of
107/// using `Box::leak`.
108///
109/// Each entry is a `(value, span)` pair, guaranteeing that the component
110/// string and its byte span are always in sync.
111#[derive(Debug, Clone, PartialEq, Eq)]
112pub struct Element<'a> {
113 /// Span covering the whole element.
114 pub span: Span,
115 /// Element components in positional order, each paired with its byte span.
116 pub components: SmallVec<[(Cow<'a, str>, Span); 4]>,
117}
118
119impl<'a> Element<'a> {
120 /// Return the component at position `n` (0-indexed), if it exists.
121 #[inline]
122 pub fn get_component(&self, n: usize) -> Option<&str> {
123 self.components.get(n).map(|(c, _)| c.as_ref())
124 }
125
126 /// Return the component at position `n`, or `""` if absent.
127 #[inline]
128 pub fn component_or_empty(&self, n: usize) -> &str {
129 self.components
130 .get(n)
131 .map(|(c, _)| c.as_ref())
132 .unwrap_or("")
133 }
134
135 /// Return the byte span of the component at position `n`, if it exists.
136 #[inline]
137 pub fn component_span(&self, n: usize) -> Option<Span> {
138 self.components.get(n).map(|(_, s)| *s)
139 }
140
141 /// Convenience constructor: wraps string literals as borrowed components.
142 ///
143 /// Useful in tests and when constructing segments for writing.
144 pub fn of(components: &[&'a str]) -> Self {
145 Self {
146 span: Span::default(),
147 components: components
148 .iter()
149 .copied()
150 .map(|c| (Cow::Borrowed(c), Span::default()))
151 .collect(),
152 }
153 }
154}
155
156/// Owned data element used by reader-based parsing APIs.
157///
158/// Each entry in `components` is a `(value, span)` pair, keeping the string
159/// and its byte span structurally in sync.
160#[derive(Debug, Clone, PartialEq, Eq)]
161pub struct OwnedElement {
162 /// Span covering the whole element.
163 pub span: Span,
164 /// Owned element components in positional order, each paired with its byte span.
165 pub components: SmallVec<[(String, Span); 4]>,
166}
167
168impl OwnedElement {
169 #[inline]
170 /// Shift all stored spans by `delta` bytes.
171 pub fn offset(mut self, delta: usize) -> Self {
172 self.span = self.span.offset(delta);
173 for (_, span) in &mut self.components {
174 *span = span.offset(delta);
175 }
176 self
177 }
178}
179
180impl<'a> From<Element<'a>> for OwnedElement {
181 fn from(value: Element<'a>) -> Self {
182 Self {
183 span: value.span,
184 components: value
185 .components
186 .into_iter()
187 .map(|(c, s)| (c.into_owned(), s))
188 .collect(),
189 }
190 }
191}
192
193/// Owned segment used by reader-based parsing APIs.
194#[derive(Debug, Clone, PartialEq, Eq)]
195pub struct OwnedSegment {
196 /// Segment tag, usually three uppercase letters.
197 pub tag: String,
198 /// Span covering the whole segment payload.
199 pub span: Span,
200 /// Span covering only the segment tag.
201 pub tag_span: Span,
202 /// Owned segment elements in positional order.
203 pub elements: Vec<OwnedElement>,
204}
205
206/// Zero-allocation view of an [`OwnedElement`].
207///
208/// Implements the same accessor methods as [`Element`] without constructing
209/// any intermediate `SmallVec` or `Cow` values. Use this when you hold an
210/// `&OwnedSegment` reference and want to inspect element data without the
211/// `Vec<Element>` allocation that [`OwnedSegment::as_borrowed`] incurs.
212///
213/// Construct via `BorrowedElement::from(&owned_element)` or through
214/// [`BorrowedSegment::get_element`].
215#[derive(Debug, Clone, Copy)]
216pub struct BorrowedElement<'a>(pub(crate) &'a OwnedElement);
217
218impl<'a> From<&'a OwnedElement> for BorrowedElement<'a> {
219 #[inline]
220 fn from(elem: &'a OwnedElement) -> Self {
221 BorrowedElement(elem)
222 }
223}
224
225impl<'a> BorrowedElement<'a> {
226 /// Return the component at position `n` (0-indexed), if it exists.
227 #[inline]
228 pub fn get_component(&self, n: usize) -> Option<&'a str> {
229 self.0.components.get(n).map(|(s, _)| s.as_str())
230 }
231
232 /// Return the component at position `n`, or `""` if absent.
233 #[inline]
234 pub fn component_or_empty(&self, n: usize) -> &'a str {
235 self.0
236 .components
237 .get(n)
238 .map(|(s, _)| s.as_str())
239 .unwrap_or("")
240 }
241
242 /// Return the byte span of the component at position `n`, if it exists.
243 #[inline]
244 pub fn component_span(&self, n: usize) -> Option<Span> {
245 self.0.components.get(n).map(|(_, s)| *s)
246 }
247
248 /// The byte span covering the whole element.
249 #[inline]
250 pub fn span(&self) -> Span {
251 self.0.span
252 }
253
254 /// Number of components in this element.
255 #[inline]
256 pub fn len(&self) -> usize {
257 self.0.components.len()
258 }
259
260 /// Returns `true` if this element has no components.
261 #[inline]
262 pub fn is_empty(&self) -> bool {
263 self.0.components.is_empty()
264 }
265
266 /// Iterate over all component strings.
267 #[inline]
268 pub fn iter(&self) -> impl Iterator<Item = &'a str> {
269 self.0.components.iter().map(|(c, _)| c.as_str())
270 }
271}
272
273/// Zero-allocation view of an [`OwnedSegment`].
274///
275/// Implements the same accessor methods as [`Segment`] without constructing
276/// a `Vec<Element>`. Use this when you hold an `&OwnedSegment` reference and
277/// want to read data without the allocations incurred by
278/// [`OwnedSegment::as_borrowed`].
279///
280/// # Construction
281///
282/// The idiomatic way to obtain a `BorrowedSegment` is via [`OwnedSegment::borrow`]
283/// or the [`From`] impl:
284///
285/// ```rust
286/// use edifact_rs::{BorrowedSegment, OwnedSegment, Span};
287///
288/// let seg = OwnedSegment {
289/// tag: "BGM".into(),
290/// span: Span::new(0, 3),
291/// tag_span: Span::new(0, 3),
292/// elements: vec![],
293/// };
294/// let borrowed = BorrowedSegment::from(&seg);
295/// assert_eq!(borrowed.tag(), "BGM");
296/// ```
297///
298/// The `'a` lifetime is tied to the referent — you cannot outlive the
299/// `OwnedSegment` you borrowed from.
300#[derive(Debug, Clone, Copy)]
301pub struct BorrowedSegment<'a>(pub(crate) &'a OwnedSegment);
302
303impl<'a> From<&'a OwnedSegment> for BorrowedSegment<'a> {
304 #[inline]
305 fn from(seg: &'a OwnedSegment) -> Self {
306 BorrowedSegment(seg)
307 }
308}
309
310impl<'a> BorrowedSegment<'a> {
311 /// The segment tag (e.g. `"BGM"`).
312 #[inline]
313 pub fn tag(&self) -> &'a str {
314 &self.0.tag
315 }
316
317 /// Byte span covering the whole segment.
318 #[inline]
319 pub fn span(&self) -> Span {
320 self.0.span
321 }
322
323 /// Byte span covering only the segment tag.
324 #[inline]
325 pub fn tag_span(&self) -> Span {
326 self.0.tag_span
327 }
328
329 /// Return the element at position `n` (0-indexed), if it exists.
330 #[inline]
331 pub fn get_element(&self, n: usize) -> Option<BorrowedElement<'a>> {
332 self.0.elements.get(n).map(BorrowedElement)
333 }
334
335 /// Shorthand: first component of element `n` — the most common access pattern.
336 #[inline]
337 pub fn element_str(&self, n: usize) -> Option<&'a str> {
338 self.0
339 .elements
340 .get(n)?
341 .components
342 .first()
343 .map(|(c, _)| c.as_str())
344 }
345
346 /// Return the byte span of the element at position `n`, if it exists.
347 #[inline]
348 pub fn element_span(&self, n: usize) -> Option<Span> {
349 Some(self.0.elements.get(n)?.span)
350 }
351
352 /// Iterate over all elements as zero-allocation views.
353 #[inline]
354 pub fn elements(&self) -> impl Iterator<Item = BorrowedElement<'a>> {
355 self.0.elements.iter().map(BorrowedElement)
356 }
357}
358
359impl OwnedSegment {
360 /// Get the first component of element `n`, or `None` if absent.
361 ///
362 /// This is the zero-allocation equivalent of `as_borrowed().element_str(n)`.
363 /// Used internally by [`crate::helpers::find_segment_owned`] and the derived
364 /// [`crate::EdifactDeserialize::edifact_deserialize_owned`] implementations.
365 #[inline]
366 pub fn element_str(&self, n: usize) -> Option<&str> {
367 self.elements
368 .get(n)?
369 .components
370 .first()
371 .map(|(s, _)| s.as_str())
372 }
373
374 /// Get component `comp` of element `elem`, or `None` if absent.
375 ///
376 /// Zero-allocation equivalent of `as_borrowed().get_element(elem)?.get_component(comp)`.
377 #[inline]
378 pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
379 self.elements
380 .get(elem)?
381 .components
382 .get(comp)
383 .map(|(s, _)| s.as_str())
384 }
385
386 #[inline]
387 /// Shift all stored spans by `delta` bytes.
388 pub fn offset(mut self, delta: usize) -> Self {
389 self.span = self.span.offset(delta);
390 self.tag_span = self.tag_span.offset(delta);
391 for element in &mut self.elements {
392 element.span = element.span.offset(delta);
393 for (_, span) in &mut element.components {
394 *span = span.offset(delta);
395 }
396 }
397 self
398 }
399
400 #[inline]
401 /// View this owned segment as a borrowed [`Segment`].
402 ///
403 /// **Performance note**: allocates a `Vec<Element<'_>>` on every call.
404 /// When only individual field access is needed, prefer
405 /// [`OwnedSegment::borrow`] → [`BorrowedSegment`] which is O(1).
406 /// `as_borrowed` remains necessary when the callee requires `&[Segment<'_>]`.
407 pub fn as_borrowed(&self) -> Segment<'_> {
408 Segment {
409 tag: self.tag.as_str(),
410 span: self.span,
411 tag_span: self.tag_span,
412 elements: self
413 .elements
414 .iter()
415 .map(|elem| Element {
416 span: elem.span,
417 components: elem
418 .components
419 .iter()
420 .map(|(c, s)| (Cow::Borrowed(c.as_str()), *s))
421 .collect(),
422 })
423 .collect(),
424 }
425 }
426
427 /// Return a zero-allocation view of this segment.
428 ///
429 /// Unlike [`as_borrowed`][OwnedSegment::as_borrowed], this is `O(1)` and
430 /// performs no heap allocation. The view cannot be passed to APIs that
431 /// require `&[Segment<'_>]`; use [`as_borrowed`][OwnedSegment::as_borrowed]
432 /// for those call sites.
433 #[inline]
434 pub fn borrow(&self) -> BorrowedSegment<'_> {
435 BorrowedSegment(self)
436 }
437}
438
439impl<'a> From<Segment<'a>> for OwnedSegment {
440 fn from(value: Segment<'a>) -> Self {
441 Self {
442 tag: value.tag.to_string(),
443 span: value.span,
444 tag_span: value.tag_span,
445 elements: value.elements.into_iter().map(OwnedElement::from).collect(),
446 }
447 }
448}