edifact_rs/group.rs
1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag. For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//! GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//! GroupDef {
26//! name: "SG7",
27//! trigger: "LIN",
28//! children: &[
29//! GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//! ],
31//! },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//! println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::Segment;
41use smallvec::SmallVec;
42use std::ops::Range;
43
44// ── GroupDef ──────────────────────────────────────────────────────────────────
45
46/// Static schema describing one segment group within an EDIFACT message.
47///
48/// `GroupDef` is designed to be declared as a `static` or `const` value, so
49/// both the struct itself and all nested `children` references are
50/// `'static`-lifetime slices with no heap allocation.
51#[derive(Debug, Clone, Copy)]
52pub struct GroupDef {
53 /// Human-readable group name, e.g. `"SG2"`.
54 pub name: &'static str,
55 /// The segment tag whose appearance starts a new instance of this group.
56 pub trigger: &'static str,
57 /// Nested child groups within this group.
58 ///
59 /// The first trigger encountered among `children` ends the current child
60 /// and starts a new one; a trigger that matches a sibling or ancestor group
61 /// ends this group entirely.
62 pub children: &'static [GroupDef],
63}
64
65// ── SegmentGroup ──────────────────────────────────────────────────────────────
66
67/// A populated segment group produced by [`group_segments`].
68///
69/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
70/// elements is heap-allocated per segment, but the string data inside each
71/// element still borrows from the original input buffer via the `'a` lifetime.
72///
73/// # Memory note
74///
75/// `group_segments` clones each [`Segment`] into the tree. For large messages
76/// or deeply nested schemas where minimal allocation is critical, consider
77/// working with the original flat segment slice and deriving group boundaries
78/// yourself based on the schema's trigger tags.
79#[derive(Debug)]
80pub struct SegmentGroup<'a> {
81 /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
82 pub definition: &'static str,
83 /// Segments that belong directly to this group instance.
84 ///
85 /// Segment values are cloned from the input slice, but the string data
86 /// inside each segment borrows from the original input for `'a`.
87 pub segments: Vec<Segment<'a>>,
88 /// Child group instances, in the order they appear in the message.
89 pub children: Vec<SegmentGroup<'a>>,
90}
91
92impl<'a> SegmentGroup<'a> {
93 fn new(definition: &'static str) -> Self {
94 Self {
95 definition,
96 segments: Vec::new(),
97 children: Vec::new(),
98 }
99 }
100
101 /// Iterate over all segments in this group and all descendant groups,
102 /// depth-first.
103 pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
104 AllSegmentsIter::new(self)
105 }
106
107 /// Find the first segment with the given `tag` in this group (not children).
108 ///
109 /// # Shallow search
110 ///
111 /// This method searches only the segments directly owned by **this** group
112 /// instance — it does **not** recurse into child groups. To search the
113 /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
114 ///
115 /// ```ignore
116 /// group.all_segments().find(|s| s.tag == "LIN")
117 /// ```
118 pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
119 self.segments.iter().find(|s| s.tag == tag)
120 }
121}
122
123// ── AllSegmentsIter ───────────────────────────────────────────────────────────
124
125struct AllSegmentsIter<'g, 'a> {
126 // Stack of (current_group, current_seg_idx, current_child_idx)
127 stack: SmallVec<[(&'g SegmentGroup<'a>, usize, usize); 8]>,
128}
129
130impl<'g, 'a> AllSegmentsIter<'g, 'a> {
131 fn new(root: &'g SegmentGroup<'a>) -> Self {
132 Self {
133 stack: smallvec::smallvec![(root, 0, 0)],
134 }
135 }
136}
137
138impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
139 type Item = &'g Segment<'a>;
140
141 fn next(&mut self) -> Option<Self::Item> {
142 loop {
143 let (group, seg_idx, child_idx) = self.stack.last_mut()?;
144 // Yield segments first.
145 if *seg_idx < group.segments.len() {
146 let seg = &group.segments[*seg_idx];
147 *seg_idx += 1;
148 return Some(seg);
149 }
150 // Then recurse into children
151 if *child_idx < group.children.len() {
152 let child = &group.children[*child_idx];
153 *child_idx += 1;
154 self.stack.push((child, 0, 0));
155 continue;
156 }
157 // Done with this group
158 self.stack.pop();
159 }
160 }
161
162 fn size_hint(&self) -> (usize, Option<usize>) {
163 // Conservative lower bound: count remaining direct segments in all
164 // frames on the stack. Children not yet pushed are not counted, so
165 // the true total may be higher, but this is still a valid lower bound.
166 let lower: usize = self
167 .stack
168 .iter()
169 .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
170 .sum();
171 (lower, None)
172 }
173}
174
175// ── group_segments ────────────────────────────────────────────────────────────
176
177/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
178///
179/// # Algorithm
180///
181/// The algorithm is a single-pass linear scan:
182///
183/// 1. Segments that do not match any group trigger in `schema` are added to
184/// the current group's `segments`.
185/// 2. When a trigger matching a group in `schema` is encountered:
186/// - If an open child with the same trigger already exists it is closed and
187/// a new instance is started (repetition).
188/// - If the trigger belongs to a *sibling* or *ancestor* group the current
189/// group is closed first (the caller handles restart).
190/// - Nested schemas recurse: child group triggers follow the same rules
191/// within their parent.
192///
193/// # Root group
194///
195/// The returned root group has `definition` set to `root_name` (typically
196/// `"ROOT"` or the message type string). Segments before the first matching
197/// trigger land in the root's own `segments` vec.
198///
199/// # Example
200///
201/// ```rust,ignore
202/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
203/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
204/// println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
205/// }
206/// ```
207pub fn group_segments<'a>(
208 segments: &[Segment<'a>],
209 schema: &'static [GroupDef],
210 root_name: &'static str,
211) -> SegmentGroup<'a> {
212 let mut root = SegmentGroup::new(root_name);
213 group_recursive(segments, &mut root, schema);
214 root
215}
216
217/// Internal recursive grouping. Returns the number of segments consumed.
218fn group_recursive<'a>(
219 segments: &[Segment<'a>],
220 parent: &mut SegmentGroup<'a>,
221 schema: &'static [GroupDef],
222) -> usize {
223 group_recursive_inner(segments, parent, schema, &[])
224}
225
226fn group_recursive_inner<'a>(
227 segments: &[Segment<'a>],
228 parent: &mut SegmentGroup<'a>,
229 schema: &'static [GroupDef],
230 stop_triggers: &[&'static str],
231) -> usize {
232 // Pre-compute the combined stop set for all children of this schema level.
233 // This is computed once per schema level, not once per segment, so the
234 // SmallVec is allocated at most O(depth) times rather than O(depth × n).
235 let combined_stop: SmallVec<[&'static str; 16]> = {
236 let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
237 for d in schema {
238 if !v.contains(&d.trigger) {
239 v.push(d.trigger);
240 }
241 }
242 v
243 };
244
245 let mut i = 0;
246 while i < segments.len() {
247 let tag = segments[i].tag;
248
249 // A trigger matching a stop tag means we must return control to the parent group.
250 if stop_triggers.iter().copied().any(|t| t == tag) {
251 break;
252 }
253
254 // Does this tag trigger a group in the schema?
255 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
256 // Start a new child group instance
257 let mut child = SegmentGroup::new(def.name);
258 // The trigger segment belongs to the new child
259 child.segments.push(segments[i].clone());
260 i += 1;
261
262 // Recurse into children of this group — pass the pre-computed stop set.
263 let consumed =
264 group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
265 i += consumed;
266
267 parent.children.push(child);
268 } else {
269 // Segment doesn't match any group trigger in this schema — it
270 // belongs to the parent group's own segments. This also covers
271 // leaf groups (empty schema): all non-stop-trigger segments after
272 // the trigger are accumulated into the current group.
273 parent.segments.push(segments[i].clone());
274 i += 1;
275 }
276 }
277 i
278}
279
280// ── SegmentGroupIndexed ───────────────────────────────────────────────────────
281
282/// A zero-clone counterpart to [`SegmentGroup`] that stores index ranges into
283/// the original flat segment slice rather than cloning each segment.
284///
285/// Produced by [`group_segments_indexed`]. To access the actual segments use
286/// the original `&[Segment<'a>]` together with [`total_span`]:
287///
288/// ```rust,ignore
289/// let indexed = group_segments_indexed(&segments, MY_SCHEMA, "ROOT");
290/// for child in &indexed.children {
291/// let child_segs = &segments[child.total_span.clone()];
292/// }
293/// ```
294///
295/// [`total_span`]: SegmentGroupIndexed::total_span
296#[derive(Debug)]
297pub struct SegmentGroupIndexed {
298 /// Group name from the schema, e.g. `"SG2"`, or the root name.
299 pub definition: &'static str,
300 /// Contiguous span `[start, end)` of absolute indices into the original flat
301 /// segment slice covering **all** segments in this group instance — trigger
302 /// segment, direct segments, and all descendant groups combined.
303 ///
304 /// Use this to slice the original `&[Segment<'_>]` to get every segment
305 /// belonging to this group:
306 ///
307 /// ```rust,ignore
308 /// let all_sg2_segs = &segments[sg2.total_span.clone()];
309 /// ```
310 ///
311 /// To iterate over only the segments that belong *directly* to this group
312 /// (excluding descendants), use [`direct_segment_indices`].
313 ///
314 /// [`direct_segment_indices`]: SegmentGroupIndexed::direct_segment_indices
315 pub total_span: Range<usize>,
316 /// Child group instances, in message order.
317 pub children: Vec<SegmentGroupIndexed>,
318}
319
320impl SegmentGroupIndexed {
321 /// Iterate over the absolute indices of segments that belong *directly* to
322 /// this group — i.e. those within [`total_span`] that are **not** covered
323 /// by any child group's [`total_span`].
324 ///
325 /// Complexity: `O(total_span.len() × children.len())`. For typical EDIFACT
326 /// message structures (≤ 8 children per group) this is negligible.
327 ///
328 /// [`total_span`]: SegmentGroupIndexed::total_span
329 pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
330 self.total_span.clone().filter(|i| {
331 !self
332 .children
333 .iter()
334 .any(|child| child.total_span.contains(i))
335 })
336 }
337}
338
339/// Partition `segments` into a [`SegmentGroupIndexed`] tree without cloning.
340///
341/// This is the zero-allocation counterpart to [`group_segments`]: instead of
342/// copying each [`Segment`] into the tree, it records `Range<usize>` indices
343/// into the original flat slice. Use the original slice together with
344/// [`SegmentGroupIndexed::total_span`] to access segments:
345///
346/// ```rust,ignore
347/// let tree = group_segments_indexed(&segments, MY_SCHEMA, "ORDERS");
348/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
349/// let segs = &segments[sg2.total_span.clone()];
350/// println!("first segment: {:?}", segs.first().map(|s| s.tag));
351/// // Direct segments only (excluding nested SG3, SG4 …):
352/// for idx in sg2.direct_segment_indices() {
353/// println!(" direct: {:?}", segments[idx].tag);
354/// }
355/// }
356/// ```
357///
358/// # Complexity
359///
360/// `O(n × schema_depth)` time, `O(tree_nodes)` space. No `Segment` clones.
361pub fn group_segments_indexed<'a>(
362 segments: &[Segment<'a>],
363 schema: &'static [GroupDef],
364 root_name: &'static str,
365) -> SegmentGroupIndexed {
366 let mut root = SegmentGroupIndexed {
367 definition: root_name,
368 total_span: 0..0,
369 children: Vec::new(),
370 };
371 group_recursive_indexed(segments, &mut root, schema, &[], 0);
372 root
373}
374
375/// Internal recursive indexed grouping. Returns the number of segments consumed.
376fn group_recursive_indexed<'a>(
377 segments: &[Segment<'a>],
378 parent: &mut SegmentGroupIndexed,
379 schema: &'static [GroupDef],
380 stop_triggers: &[&'static str],
381 offset: usize,
382) -> usize {
383 let combined_stop: SmallVec<[&'static str; 16]> = {
384 let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
385 for d in schema {
386 if !v.contains(&d.trigger) {
387 v.push(d.trigger);
388 }
389 }
390 v
391 };
392
393 // `span_start` is the absolute index of the first segment in this group.
394 // For child groups the caller pre-seeds `parent.total_span.start` with the
395 // trigger segment position; for the root (or any group with no pre-seeded
396 // trigger) we start at `offset`.
397 let span_start = if !parent.total_span.is_empty() {
398 parent.total_span.start // pre-seeded trigger position
399 } else {
400 offset
401 };
402
403 let mut i = 0;
404 while i < segments.len() {
405 let tag = segments[i].tag;
406
407 if stop_triggers.iter().copied().any(|t| t == tag) {
408 break;
409 }
410
411 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
412 let child_offset = offset + i;
413 let mut child = SegmentGroupIndexed {
414 definition: def.name,
415 // Pre-seed the trigger segment; the recursive call extends
416 // total_span to cover the full child subtree.
417 total_span: child_offset..child_offset + 1,
418 children: Vec::new(),
419 };
420 i += 1;
421
422 let consumed = group_recursive_indexed(
423 &segments[i..],
424 &mut child,
425 def.children,
426 &combined_stop,
427 offset + i,
428 );
429 i += consumed;
430
431 parent.children.push(child);
432 } else {
433 i += 1;
434 }
435 }
436
437 // Total span covers everything from the first segment (trigger or first
438 // direct segment) to the last segment consumed in this call.
439 parent.total_span = span_start..(offset + i);
440
441 i
442}
443
444#[cfg(test)]
445mod tests {
446 use super::*;
447 use crate::Span;
448 use crate::model::Element;
449
450 fn seg(tag: &'static str) -> Segment<'static> {
451 Segment {
452 tag,
453 span: Span::new(0, 0),
454 tag_span: Span::new(0, 0),
455 elements: vec![Element::of(&["x"])],
456 }
457 }
458
459 static SCHEMA: &[GroupDef] = &[
460 GroupDef {
461 name: "SG1",
462 trigger: "NAD",
463 children: &[GroupDef {
464 name: "SG2",
465 trigger: "CTA",
466 children: &[],
467 }],
468 },
469 GroupDef {
470 name: "SG3",
471 trigger: "LIN",
472 children: &[],
473 },
474 ];
475
476 #[test]
477 fn root_segments_before_first_trigger() {
478 let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
479 let tree = group_segments(&segs, SCHEMA, "ROOT");
480 assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
481 assert_eq!(tree.children.len(), 1);
482 assert_eq!(tree.children[0].definition, "SG1");
483 }
484
485 #[test]
486 fn repeated_trigger_creates_multiple_children() {
487 let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
488 let tree = group_segments(&segs, SCHEMA, "ROOT");
489 // Two NAD triggers → two SG1 children
490 assert_eq!(
491 tree.children
492 .iter()
493 .filter(|c| c.definition == "SG1")
494 .count(),
495 2
496 );
497 }
498
499 #[test]
500 fn nested_child_groups() {
501 let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
502 let tree = group_segments(&segs, SCHEMA, "ROOT");
503 let sg1 = &tree.children[0];
504 assert_eq!(sg1.definition, "SG1");
505 // Two CTA triggers → two SG2 children inside SG1
506 assert_eq!(sg1.children.len(), 2);
507 assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
508 }
509
510 #[test]
511 fn all_segments_iterator_depth_first() {
512 let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
513 let tree = group_segments(&segs, SCHEMA, "ROOT");
514 let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
515 assert!(tags.contains(&"UNH"));
516 assert!(tags.contains(&"NAD"));
517 assert!(tags.contains(&"CTA"));
518 }
519}