edifact_rs/model.rs
1use smallvec::SmallVec;
2use std::borrow::Cow;
3
4/// A half-open byte span within an EDIFACT payload.
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub struct Span {
7 /// Start byte offset (inclusive).
8 pub start: usize,
9 /// End byte offset (exclusive).
10 pub end: usize,
11}
12
13impl Span {
14 #[inline]
15 /// Construct a span from inclusive start and exclusive end offsets.
16 pub const fn new(start: usize, end: usize) -> Self {
17 Self { start, end }
18 }
19
20 #[inline]
21 /// Shift the span by `delta` bytes.
22 ///
23 /// Uses saturating addition to avoid integer overflow on malformed input.
24 pub const fn offset(self, delta: usize) -> Self {
25 Self {
26 start: self.start.saturating_add(delta),
27 end: self.end.saturating_add(delta),
28 }
29 }
30
31 /// Length of the span in bytes.
32 ///
33 /// # Note on constness
34 ///
35 /// This method is intentionally **not** `const fn` (changed in 0.7.0) so that
36 /// the `debug_assert!` overflow guard is included in debug builds. If you need
37 /// span arithmetic in a `const` context use `span.end - span.start` directly
38 /// (both fields are `pub`).
39 #[inline]
40 pub fn len(self) -> usize {
41 debug_assert!(
42 self.start <= self.end,
43 "Span::len: start ({}) > end ({})",
44 self.start,
45 self.end
46 );
47 self.end - self.start
48 }
49
50 /// Returns `true` if the span covers zero bytes.
51 #[inline]
52 pub const fn is_empty(self) -> bool {
53 self.start == self.end
54 }
55}
56
57impl std::fmt::Display for Span {
58 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59 write!(f, "{}..{}", self.start, self.end)
60 }
61}
62
63/// A single EDIFACT segment, borrowing its data from the source input.
64#[derive(Debug, Clone, PartialEq, Eq)]
65pub struct Segment<'a> {
66 /// Segment tag, usually three uppercase letters.
67 pub tag: &'a str,
68 /// Span covering the whole segment payload.
69 pub span: Span,
70 /// Span covering only the segment tag.
71 pub tag_span: Span,
72 /// Segment elements in positional order.
73 pub elements: Vec<Element<'a>>,
74}
75
76impl<'a> Segment<'a> {
77 #[inline]
78 /// Construct a segment with default spans.
79 pub fn new(tag: &'a str, elements: Vec<Element<'a>>) -> Self {
80 Self {
81 tag,
82 span: Span::default(),
83 tag_span: Span::default(),
84 elements,
85 }
86 }
87
88 /// Return the element at position `n` (0-indexed), if it exists.
89 #[inline]
90 pub fn get_element(&self, n: usize) -> Option<&Element<'a>> {
91 self.elements.get(n)
92 }
93
94 /// Shorthand: get component 0 of element `n` — the most common access pattern.
95 #[inline]
96 pub fn element_str(&self, n: usize) -> Option<&str> {
97 self.elements.get(n)?.get_component(0)
98 }
99
100 /// Return the byte span of the element at position `n`, if it exists.
101 #[inline]
102 pub fn element_span(&self, n: usize) -> Option<Span> {
103 Some(self.elements.get(n)?.span)
104 }
105}
106
107/// A data element, which may have one or more component values.
108///
109/// Uses [`SmallVec`] with an inline capacity of 4 to avoid heap allocation
110/// for the common case (≤ 4 components). Component values borrow from the
111/// original input; if the value contained a release-character sequence the
112/// resolved string is stored as an owned [`Cow::Owned`] variant instead of
113/// using `Box::leak`.
114///
115/// Each entry is a `(value, span)` pair, guaranteeing that the component
116/// string and its byte span are always in sync.
117#[derive(Debug, Clone, PartialEq, Eq)]
118pub struct Element<'a> {
119 /// Span covering the whole element.
120 pub span: Span,
121 /// Element components in positional order, each paired with its byte span.
122 pub components: SmallVec<[(Cow<'a, str>, Span); 4]>,
123}
124
125impl<'a> Element<'a> {
126 /// Return the component at position `n` (0-indexed), if it exists.
127 #[inline]
128 pub fn get_component(&self, n: usize) -> Option<&str> {
129 self.components.get(n).map(|(c, _)| c.as_ref())
130 }
131
132 /// Return the component at position `n`, or `""` if absent.
133 #[inline]
134 pub fn component_or_empty(&self, n: usize) -> &str {
135 self.components
136 .get(n)
137 .map(|(c, _)| c.as_ref())
138 .unwrap_or("")
139 }
140
141 /// Return the byte span of the component at position `n`, if it exists.
142 #[inline]
143 pub fn component_span(&self, n: usize) -> Option<Span> {
144 self.components.get(n).map(|(_, s)| *s)
145 }
146
147 /// Convenience constructor: wraps string literals as borrowed components.
148 ///
149 /// Useful in tests and when constructing segments for writing.
150 pub fn of(components: &[&'a str]) -> Self {
151 Self {
152 span: Span::default(),
153 components: components
154 .iter()
155 .copied()
156 .map(|c| (Cow::Borrowed(c), Span::default()))
157 .collect(),
158 }
159 }
160}
161
162/// Owned data element used by reader-based parsing APIs.
163///
164/// Each entry in `components` is a `(value, span)` pair, keeping the string
165/// and its byte span structurally in sync.
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub struct OwnedElement {
168 /// Span covering the whole element.
169 pub span: Span,
170 /// Owned element components in positional order, each paired with its byte span.
171 pub components: SmallVec<[(String, Span); 4]>,
172}
173
174impl OwnedElement {
175 #[inline]
176 /// Shift all stored spans by `delta` bytes.
177 pub fn offset(mut self, delta: usize) -> Self {
178 self.span = self.span.offset(delta);
179 for (_, span) in &mut self.components {
180 *span = span.offset(delta);
181 }
182 self
183 }
184}
185
186impl<'a> From<Element<'a>> for OwnedElement {
187 fn from(value: Element<'a>) -> Self {
188 Self {
189 span: value.span,
190 components: value
191 .components
192 .into_iter()
193 .map(|(c, s)| (c.into_owned(), s))
194 .collect(),
195 }
196 }
197}
198
199/// Owned segment used by reader-based parsing APIs.
200#[derive(Debug, Clone, PartialEq, Eq)]
201pub struct OwnedSegment {
202 /// Segment tag, usually three uppercase letters.
203 pub tag: String,
204 /// Span covering the whole segment payload.
205 pub span: Span,
206 /// Span covering only the segment tag.
207 pub tag_span: Span,
208 /// Owned segment elements in positional order.
209 pub elements: Vec<OwnedElement>,
210}
211
212/// Zero-allocation view of an [`OwnedElement`].
213///
214/// Implements the same accessor methods as [`Element`] without constructing
215/// any intermediate `SmallVec` or `Cow` values. Use this when you hold an
216/// `&OwnedSegment` reference and want to inspect element data without the
217/// `Vec<Element>` allocation that [`OwnedSegment::as_borrowed`] incurs.
218///
219/// Construct via `BorrowedElement::from(&owned_element)` or through
220/// [`BorrowedSegment::get_element`].
221#[derive(Debug, Clone, Copy)]
222pub struct BorrowedElement<'a>(pub(crate) &'a OwnedElement);
223
224impl<'a> From<&'a OwnedElement> for BorrowedElement<'a> {
225 #[inline]
226 fn from(elem: &'a OwnedElement) -> Self {
227 BorrowedElement(elem)
228 }
229}
230
231impl<'a> BorrowedElement<'a> {
232 /// Return the component at position `n` (0-indexed), if it exists.
233 #[inline]
234 pub fn get_component(&self, n: usize) -> Option<&'a str> {
235 self.0.components.get(n).map(|(s, _)| s.as_str())
236 }
237
238 /// Return the component at position `n`, or `""` if absent.
239 #[inline]
240 pub fn component_or_empty(&self, n: usize) -> &'a str {
241 self.0
242 .components
243 .get(n)
244 .map(|(s, _)| s.as_str())
245 .unwrap_or("")
246 }
247
248 /// Return the byte span of the component at position `n`, if it exists.
249 #[inline]
250 pub fn component_span(&self, n: usize) -> Option<Span> {
251 self.0.components.get(n).map(|(_, s)| *s)
252 }
253
254 /// The byte span covering the whole element.
255 #[inline]
256 pub fn span(&self) -> Span {
257 self.0.span
258 }
259
260 /// Number of components in this element.
261 #[inline]
262 pub fn len(&self) -> usize {
263 self.0.components.len()
264 }
265
266 /// Returns `true` if this element has no components.
267 #[inline]
268 pub fn is_empty(&self) -> bool {
269 self.0.components.is_empty()
270 }
271
272 /// Iterate over all component strings.
273 #[inline]
274 pub fn iter(&self) -> impl Iterator<Item = &'a str> {
275 self.0.components.iter().map(|(c, _)| c.as_str())
276 }
277}
278
279/// Zero-allocation view of an [`OwnedSegment`].
280///
281/// Implements the same accessor methods as [`Segment`] without constructing
282/// a `Vec<Element>`. Use this when you hold an `&OwnedSegment` reference and
283/// want to read data without the allocations incurred by
284/// [`OwnedSegment::as_borrowed`].
285///
286/// # Construction
287///
288/// The idiomatic way to obtain a `BorrowedSegment` is via [`OwnedSegment::borrow`]
289/// or the [`From`] impl:
290///
291/// ```rust
292/// use edifact_rs::{BorrowedSegment, OwnedSegment, Span};
293///
294/// let seg = OwnedSegment {
295/// tag: "BGM".into(),
296/// span: Span::new(0, 3),
297/// tag_span: Span::new(0, 3),
298/// elements: vec![],
299/// };
300/// let borrowed = BorrowedSegment::from(&seg);
301/// assert_eq!(borrowed.tag(), "BGM");
302/// ```
303///
304/// The `'a` lifetime is tied to the referent — you cannot outlive the
305/// `OwnedSegment` you borrowed from.
306#[derive(Debug, Clone, Copy)]
307pub struct BorrowedSegment<'a>(pub(crate) &'a OwnedSegment);
308
309impl<'a> From<&'a OwnedSegment> for BorrowedSegment<'a> {
310 #[inline]
311 fn from(seg: &'a OwnedSegment) -> Self {
312 BorrowedSegment(seg)
313 }
314}
315
316impl<'a> BorrowedSegment<'a> {
317 /// The segment tag (e.g. `"BGM"`).
318 #[inline]
319 pub fn tag(&self) -> &'a str {
320 &self.0.tag
321 }
322
323 /// Byte span covering the whole segment.
324 #[inline]
325 pub fn span(&self) -> Span {
326 self.0.span
327 }
328
329 /// Byte span covering only the segment tag.
330 #[inline]
331 pub fn tag_span(&self) -> Span {
332 self.0.tag_span
333 }
334
335 /// Return the element at position `n` (0-indexed), if it exists.
336 #[inline]
337 pub fn get_element(&self, n: usize) -> Option<BorrowedElement<'a>> {
338 self.0.elements.get(n).map(BorrowedElement)
339 }
340
341 /// Shorthand: first component of element `n` — the most common access pattern.
342 #[inline]
343 pub fn element_str(&self, n: usize) -> Option<&'a str> {
344 self.0
345 .elements
346 .get(n)?
347 .components
348 .first()
349 .map(|(c, _)| c.as_str())
350 }
351
352 /// Return the byte span of the element at position `n`, if it exists.
353 #[inline]
354 pub fn element_span(&self, n: usize) -> Option<Span> {
355 Some(self.0.elements.get(n)?.span)
356 }
357
358 /// Iterate over all elements as zero-allocation views.
359 #[inline]
360 pub fn elements(&self) -> impl Iterator<Item = BorrowedElement<'a>> {
361 self.0.elements.iter().map(BorrowedElement)
362 }
363}
364
365impl OwnedSegment {
366 /// Get the first component of element `n`, or `None` if absent.
367 ///
368 /// This is the zero-allocation equivalent of `as_borrowed().element_str(n)`.
369 /// Used internally by [`crate::helpers::find_segment_owned`] and the derived
370 /// [`crate::EdifactDeserialize::edifact_deserialize_owned`] implementations.
371 #[inline]
372 pub fn element_str(&self, n: usize) -> Option<&str> {
373 self.elements
374 .get(n)?
375 .components
376 .first()
377 .map(|(s, _)| s.as_str())
378 }
379
380 /// Get component `comp` of element `elem`, or `None` if absent.
381 ///
382 /// Zero-allocation equivalent of `as_borrowed().get_element(elem)?.get_component(comp)`.
383 #[inline]
384 pub fn component_str(&self, elem: usize, comp: usize) -> Option<&str> {
385 self.elements
386 .get(elem)?
387 .components
388 .get(comp)
389 .map(|(s, _)| s.as_str())
390 }
391
392 #[inline]
393 /// Shift all stored spans by `delta` bytes.
394 pub fn offset(mut self, delta: usize) -> Self {
395 self.span = self.span.offset(delta);
396 self.tag_span = self.tag_span.offset(delta);
397 for element in &mut self.elements {
398 element.span = element.span.offset(delta);
399 for (_, span) in &mut element.components {
400 *span = span.offset(delta);
401 }
402 }
403 self
404 }
405
406 #[inline]
407 /// View this owned segment as a borrowed [`Segment`].
408 ///
409 /// **Performance note**: allocates a `Vec<Element<'_>>` on every call.
410 /// When only individual field access is needed, prefer
411 /// [`OwnedSegment::borrow`] → [`BorrowedSegment`] which is O(1).
412 /// `as_borrowed` remains necessary when the callee requires `&[Segment<'_>]`.
413 pub fn as_borrowed(&self) -> Segment<'_> {
414 Segment {
415 tag: self.tag.as_str(),
416 span: self.span,
417 tag_span: self.tag_span,
418 elements: self
419 .elements
420 .iter()
421 .map(|elem| Element {
422 span: elem.span,
423 components: elem
424 .components
425 .iter()
426 .map(|(c, s)| (Cow::Borrowed(c.as_str()), *s))
427 .collect(),
428 })
429 .collect(),
430 }
431 }
432
433 /// Return a zero-allocation view of this segment.
434 ///
435 /// Unlike [`as_borrowed`][OwnedSegment::as_borrowed], this is `O(1)` and
436 /// performs no heap allocation. The view cannot be passed to APIs that
437 /// require `&[Segment<'_>]`; use [`as_borrowed`][OwnedSegment::as_borrowed]
438 /// for those call sites.
439 #[inline]
440 pub fn borrow(&self) -> BorrowedSegment<'_> {
441 BorrowedSegment(self)
442 }
443}
444
445impl<'a> From<Segment<'a>> for OwnedSegment {
446 fn from(value: Segment<'a>) -> Self {
447 Self {
448 tag: value.tag.to_string(),
449 span: value.span,
450 tag_span: value.tag_span,
451 elements: value.elements.into_iter().map(OwnedElement::from).collect(),
452 }
453 }
454}