edifact_rs/model.rs
1use smallvec::SmallVec;
2use std::borrow::Cow;
3
4/// A half-open byte span within an EDIFACT payload.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub struct Span {
7 /// Start byte offset (inclusive).
8 pub start: usize,
9 /// End byte offset (exclusive).
10 pub end: usize,
11}
12
13impl Span {
14 #[inline]
15 /// Construct a span from inclusive start and exclusive end offsets.
16 pub const fn new(start: usize, end: usize) -> Self {
17 Self { start, end }
18 }
19
20 #[inline]
21 /// Shift the span by `delta` bytes.
22 ///
23 /// Uses saturating addition to avoid integer overflow on malformed input.
24 pub const fn offset(self, delta: usize) -> Self {
25 Self {
26 start: self.start.saturating_add(delta),
27 end: self.end.saturating_add(delta),
28 }
29 }
30
31 /// Length of the span in bytes.
32 ///
33 /// In debug builds, asserts `end >= start` (inverted spans are a bug).
34 /// In release builds, returns 0 for inverted spans rather than panicking,
35 /// so a single corrupt span does not abort an entire validation run.
36 #[inline]
37 pub fn len(self) -> usize {
38 debug_assert!(
39 self.end >= self.start,
40 "Span::len: end ({}) < start ({})",
41 self.end,
42 self.start
43 );
44 self.end.saturating_sub(self.start)
45 }
46
47 /// Returns `true` if the span covers zero bytes.
48 #[inline]
49 pub const fn is_empty(self) -> bool {
50 self.start == self.end
51 }
52}
53
54impl std::fmt::Display for Span {
55 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56 write!(f, "{}..{}", self.start, self.end)
57 }
58}
59
60/// A single EDIFACT segment, borrowing its data from the source input.
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct Segment<'a> {
63 /// Segment tag, usually three uppercase letters.
64 pub tag: &'a str,
65 /// Span covering the whole segment payload.
66 pub span: Span,
67 /// Span covering only the segment tag.
68 pub tag_span: Span,
69 /// Segment elements in positional order.
70 pub elements: Vec<Element<'a>>,
71}
72
73impl<'a> Segment<'a> {
74 #[inline]
75 /// Construct a segment with default spans.
76 pub fn new(tag: &'a str, elements: Vec<Element<'a>>) -> Self {
77 Self {
78 tag,
79 span: Span::default(),
80 tag_span: Span::default(),
81 elements,
82 }
83 }
84
85 /// Return the element at position `n` (0-indexed), if it exists.
86 #[inline]
87 pub fn get_element(&self, n: usize) -> Option<&Element<'a>> {
88 self.elements.get(n)
89 }
90
91 /// Shorthand: get component 0 of element `n` — the most common access pattern.
92 #[inline]
93 pub fn element_str(&self, n: usize) -> Option<&str> {
94 self.elements.get(n)?.get_component(0)
95 }
96
97 /// Get component `comp` of element `elem` (both 0-based), or `None` if absent.
98 ///
99 /// Mirrors [`OwnedSegment::component_str`], eliminating the need to chain
100 /// `get_element(elem)?.get_component(comp)` in rule closures.
101 #[inline]
102 pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
103 self.elements.get(elem)?.get_component(comp)
104 }
105
106 /// Return the byte span of the element at position `n`, if it exists.
107 #[inline]
108 pub fn element_span(&self, n: usize) -> Option<Span> {
109 Some(self.elements.get(n)?.span)
110 }
111}
112
113/// A data element, which may have one or more component values.
114///
115/// Uses [`SmallVec`] with an inline capacity of 4 to avoid heap allocation
116/// for the common case (≤ 4 components). Component values borrow from the
117/// original input; if the value contained a release-character sequence the
118/// resolved string is stored as an owned [`Cow::Owned`] variant instead of
119/// using `Box::leak`.
120///
121/// Each entry is a `(value, span)` pair, guaranteeing that the component
122/// string and its byte span are always in sync.
123#[derive(Debug, Clone, PartialEq, Eq)]
124pub struct Element<'a> {
125 /// Span covering the whole element.
126 pub span: Span,
127 /// Element components in positional order, each paired with its byte span.
128 pub components: SmallVec<[(Cow<'a, str>, Span); 4]>,
129}
130
131impl<'a> Element<'a> {
132 /// Return the component at position `n` (0-indexed), if it exists.
133 #[inline]
134 pub fn get_component(&self, n: usize) -> Option<&str> {
135 self.components.get(n).map(|(c, _)| c.as_ref())
136 }
137
138 /// Return the component at position `n`, or `""` if absent.
139 #[inline]
140 pub fn component_or_empty(&self, n: usize) -> &str {
141 self.components
142 .get(n)
143 .map(|(c, _)| c.as_ref())
144 .unwrap_or("")
145 }
146
147 /// Return the byte span of the component at position `n`, if it exists.
148 #[inline]
149 pub fn component_span(&self, n: usize) -> Option<Span> {
150 self.components.get(n).map(|(_, s)| *s)
151 }
152
153 /// Convenience constructor: wraps string literals as borrowed components.
154 ///
155 /// Useful in tests and when constructing segments for writing.
156 pub fn of(components: &[&'a str]) -> Self {
157 Self {
158 span: Span::default(),
159 components: components
160 .iter()
161 .copied()
162 .map(|c| (Cow::Borrowed(c), Span::default()))
163 .collect(),
164 }
165 }
166}
167
168/// Owned data element used by reader-based parsing APIs.
169///
170/// Each entry in `components` is a `(value, span)` pair, keeping the string
171/// and its byte span structurally in sync.
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct OwnedElement {
174 /// Span covering the whole element.
175 pub span: Span,
176 /// Owned element components in positional order, each paired with its byte span.
177 pub components: SmallVec<[(String, Span); 4]>,
178}
179
180impl OwnedElement {
181 #[inline]
182 /// Shift all stored spans by `delta` bytes.
183 pub fn offset(mut self, delta: usize) -> Self {
184 self.span = self.span.offset(delta);
185 for (_, span) in &mut self.components {
186 *span = span.offset(delta);
187 }
188 self
189 }
190}
191
192impl<'a> From<Element<'a>> for OwnedElement {
193 fn from(value: Element<'a>) -> Self {
194 Self {
195 span: value.span,
196 components: value
197 .components
198 .into_iter()
199 .map(|(c, s)| (c.into_owned(), s))
200 .collect(),
201 }
202 }
203}
204
205/// Owned segment used by reader-based parsing APIs.
206#[derive(Debug, Clone, PartialEq, Eq)]
207pub struct OwnedSegment {
208 /// Segment tag, usually three uppercase letters.
209 pub tag: String,
210 /// Span covering the whole segment payload.
211 pub span: Span,
212 /// Span covering only the segment tag.
213 pub tag_span: Span,
214 /// Owned segment elements in positional order.
215 pub elements: Vec<OwnedElement>,
216}
217
218/// Zero-allocation view of an [`OwnedElement`].
219///
220/// Implements the same accessor methods as [`Element`] without constructing
221/// any intermediate `SmallVec` or `Cow` values. Use this when you hold an
222/// `&OwnedSegment` reference and want to inspect element data without the
223/// `Vec<Element>` allocation that [`OwnedSegment::as_borrowed`] incurs.
224///
225/// Construct via `BorrowedElement::from(&owned_element)` or through
226/// [`BorrowedSegment::get_element`].
227#[derive(Debug, Clone, Copy)]
228pub struct BorrowedElement<'a>(pub(crate) &'a OwnedElement);
229
230impl<'a> From<&'a OwnedElement> for BorrowedElement<'a> {
231 #[inline]
232 fn from(elem: &'a OwnedElement) -> Self {
233 BorrowedElement(elem)
234 }
235}
236
237impl<'a> BorrowedElement<'a> {
238 /// Return the component at position `n` (0-indexed), if it exists.
239 #[inline]
240 pub fn get_component(&self, n: usize) -> Option<&'a str> {
241 self.0.components.get(n).map(|(s, _)| s.as_str())
242 }
243
244 /// Return the component at position `n`, or `""` if absent.
245 #[inline]
246 pub fn component_or_empty(&self, n: usize) -> &'a str {
247 self.0
248 .components
249 .get(n)
250 .map(|(s, _)| s.as_str())
251 .unwrap_or("")
252 }
253
254 /// Return the byte span of the component at position `n`, if it exists.
255 #[inline]
256 pub fn component_span(&self, n: usize) -> Option<Span> {
257 self.0.components.get(n).map(|(_, s)| *s)
258 }
259
260 /// The byte span covering the whole element.
261 #[inline]
262 pub fn span(&self) -> Span {
263 self.0.span
264 }
265
266 /// Number of components in this element.
267 #[inline]
268 pub fn len(&self) -> usize {
269 self.0.components.len()
270 }
271
272 /// Returns `true` if this element has no components.
273 #[inline]
274 pub fn is_empty(&self) -> bool {
275 self.0.components.is_empty()
276 }
277
278 /// Iterate over all component strings.
279 #[inline]
280 pub fn iter(&self) -> impl Iterator<Item = &'a str> {
281 self.0.components.iter().map(|(c, _)| c.as_str())
282 }
283}
284
285/// Zero-allocation view of an [`OwnedSegment`].
286///
287/// Implements the same accessor methods as [`Segment`] without constructing
288/// a `Vec<Element>`. Use this when you hold an `&OwnedSegment` reference and
289/// want to read data without the allocations incurred by
290/// [`OwnedSegment::as_borrowed`].
291///
292/// # Construction
293///
294/// The idiomatic way to obtain a `BorrowedSegment` is via [`OwnedSegment::borrow`]
295/// or the [`From`] impl:
296///
297/// ```rust
298/// use edifact_rs::{BorrowedSegment, OwnedSegment, Span};
299///
300/// let seg = OwnedSegment {
301/// tag: "BGM".into(),
302/// span: Span::new(0, 3),
303/// tag_span: Span::new(0, 3),
304/// elements: vec![],
305/// };
306/// let borrowed = BorrowedSegment::from(&seg);
307/// assert_eq!(borrowed.tag(), "BGM");
308/// ```
309///
310/// The `'a` lifetime is tied to the referent — you cannot outlive the
311/// `OwnedSegment` you borrowed from.
312#[derive(Debug, Clone, Copy)]
313pub struct BorrowedSegment<'a>(pub(crate) &'a OwnedSegment);
314
315impl<'a> From<&'a OwnedSegment> for BorrowedSegment<'a> {
316 #[inline]
317 fn from(seg: &'a OwnedSegment) -> Self {
318 BorrowedSegment(seg)
319 }
320}
321
322impl<'a> BorrowedSegment<'a> {
323 /// The segment tag (e.g. `"BGM"`).
324 #[inline]
325 pub fn tag(&self) -> &'a str {
326 &self.0.tag
327 }
328
329 /// Byte span covering the whole segment.
330 #[inline]
331 pub fn span(&self) -> Span {
332 self.0.span
333 }
334
335 /// Byte span covering only the segment tag.
336 #[inline]
337 pub fn tag_span(&self) -> Span {
338 self.0.tag_span
339 }
340
341 /// Return the element at position `n` (0-indexed), if it exists.
342 #[inline]
343 pub fn get_element(&self, n: usize) -> Option<BorrowedElement<'a>> {
344 self.0.elements.get(n).map(BorrowedElement)
345 }
346
347 /// Shorthand: first component of element `n` — the most common access pattern.
348 #[inline]
349 pub fn element_str(&self, n: usize) -> Option<&'a str> {
350 self.0
351 .elements
352 .get(n)?
353 .components
354 .first()
355 .map(|(c, _)| c.as_str())
356 }
357
358 /// Get component `comp` of element `elem` (both 0-based), or `None` if absent.
359 ///
360 /// Mirrors [`OwnedSegment::component_str`].
361 #[inline]
362 pub fn component_str(&self, elem: usize, comp: usize) -> Option<&'a str> {
363 self.0
364 .elements
365 .get(elem)?
366 .components
367 .get(comp)
368 .map(|(c, _)| c.as_str())
369 }
370
371 /// Return the byte span of the element at position `n`, if it exists.
372 #[inline]
373 pub fn element_span(&self, n: usize) -> Option<Span> {
374 Some(self.0.elements.get(n)?.span)
375 }
376
377 /// Iterate over all elements as zero-allocation views.
378 #[inline]
379 pub fn elements(&self) -> impl Iterator<Item = BorrowedElement<'a>> {
380 self.0.elements.iter().map(BorrowedElement)
381 }
382}
383
384impl OwnedSegment {
385 /// Get the first component of element `n`, or `None` if absent.
386 ///
387 /// This is the zero-allocation equivalent of `as_borrowed().element_str(n)`.
388 /// Used internally by [`crate::find_segment_owned`] and the derived
389 /// [`crate::EdifactDeserialize::edifact_deserialize_owned`] implementations.
390 #[inline]
391 pub fn element_str(&self, n: usize) -> Option<&str> {
392 self.elements
393 .get(n)?
394 .components
395 .first()
396 .map(|(s, _)| s.as_str())
397 }
398
399 /// Get component `comp` of element `elem`, or `None` if absent.
400 ///
401 /// Zero-allocation equivalent of `as_borrowed().get_element(elem)?.get_component(comp)`.
402 #[inline]
403 pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
404 self.elements
405 .get(elem)?
406 .components
407 .get(comp)
408 .map(|(s, _)| s.as_str())
409 }
410
411 #[inline]
412 /// Shift all stored spans by `delta` bytes.
413 pub fn offset(mut self, delta: usize) -> Self {
414 self.span = self.span.offset(delta);
415 self.tag_span = self.tag_span.offset(delta);
416 for element in &mut self.elements {
417 element.span = element.span.offset(delta);
418 for (_, span) in &mut element.components {
419 *span = span.offset(delta);
420 }
421 }
422 self
423 }
424
425 #[inline]
426 /// View this owned segment as a borrowed [`Segment`].
427 ///
428 /// **Performance note**: allocates a `Vec<Element<'_>>` on every call.
429 /// When only individual field access is needed, prefer
430 /// [`OwnedSegment::borrow`] → [`BorrowedSegment`] which is O(1).
431 /// `as_borrowed` remains necessary when the callee requires `&[Segment<'_>]`.
432 pub fn as_borrowed(&self) -> Segment<'_> {
433 Segment {
434 tag: self.tag.as_str(),
435 span: self.span,
436 tag_span: self.tag_span,
437 elements: self
438 .elements
439 .iter()
440 .map(|elem| Element {
441 span: elem.span,
442 components: elem
443 .components
444 .iter()
445 .map(|(c, s)| (Cow::Borrowed(c.as_str()), *s))
446 .collect(),
447 })
448 .collect(),
449 }
450 }
451
452 /// Return a zero-allocation view of this segment.
453 ///
454 /// Unlike [`as_borrowed`][OwnedSegment::as_borrowed], this is `O(1)` and
455 /// performs no heap allocation. The view cannot be passed to APIs that
456 /// require `&[Segment<'_>]`; use [`as_borrowed`][OwnedSegment::as_borrowed]
457 /// for those call sites.
458 #[inline]
459 pub fn borrow(&self) -> BorrowedSegment<'_> {
460 BorrowedSegment(self)
461 }
462}
463
464impl<'a> From<Segment<'a>> for OwnedSegment {
465 fn from(value: Segment<'a>) -> Self {
466 Self {
467 tag: value.tag.to_string(),
468 span: value.span,
469 tag_span: value.tag_span,
470 elements: value.elements.into_iter().map(OwnedElement::from).collect(),
471 }
472 }
473}