edifact_rs/group.rs
1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag. For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//! GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//! GroupDef {
26//! name: "SG7",
27//! trigger: "LIN",
28//! children: &[
29//! GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//! ],
31//! },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//! println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::Segment;
41use smallvec::SmallVec;
42
43// ── GroupDef ──────────────────────────────────────────────────────────────────
44
45/// Static schema describing one segment group within an EDIFACT message.
46///
47/// `GroupDef` is designed to be declared as a `static` or `const` value, so
48/// both the struct itself and all nested `children` references are
49/// `'static`-lifetime slices with no heap allocation.
50#[derive(Debug, Clone, Copy)]
51pub struct GroupDef {
52 /// Human-readable group name, e.g. `"SG2"`.
53 pub name: &'static str,
54 /// The segment tag whose appearance starts a new instance of this group.
55 pub trigger: &'static str,
56 /// Nested child groups within this group.
57 ///
58 /// The first trigger encountered among `children` ends the current child
59 /// and starts a new one; a trigger that matches a sibling or ancestor group
60 /// ends this group entirely.
61 pub children: &'static [GroupDef],
62}
63
64// ── SegmentGroup ──────────────────────────────────────────────────────────────
65
66/// A populated segment group produced by [`group_segments`].
67///
68/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
69/// elements is heap-allocated per segment, but the string data inside each
70/// element still borrows from the original input buffer via the `'a` lifetime.
71///
72/// # Memory note
73///
74/// `group_segments` clones each [`Segment`] into the tree. For large messages
75/// or deeply nested schemas where minimal allocation is critical, consider
76/// working with the original flat segment slice and deriving group boundaries
77/// yourself based on the schema's trigger tags.
78#[derive(Debug)]
79pub struct SegmentGroup<'a> {
80 /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
81 pub definition: &'static str,
82 /// Segments that belong directly to this group instance.
83 ///
84 /// Segment values are cloned from the input slice, but the string data
85 /// inside each segment borrows from the original input for `'a`.
86 pub segments: Vec<Segment<'a>>,
87 /// Child group instances, in the order they appear in the message.
88 pub children: Vec<SegmentGroup<'a>>,
89}
90
91impl<'a> SegmentGroup<'a> {
92 fn new(definition: &'static str) -> Self {
93 Self {
94 definition,
95 segments: Vec::new(),
96 children: Vec::new(),
97 }
98 }
99
100 /// Iterate over all segments in this group and all descendant groups,
101 /// depth-first.
102 pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
103 AllSegmentsIter::new(self)
104 }
105
106 /// Find the first segment with the given `tag` in this group (not children).
107 ///
108 /// # Shallow search
109 ///
110 /// This method searches only the segments directly owned by **this** group
111 /// instance — it does **not** recurse into child groups. To search the
112 /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
113 ///
114 /// ```ignore
115 /// group.all_segments().find(|s| s.tag == "LIN")
116 /// ```
117 pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
118 self.segments.iter().find(|s| s.tag == tag)
119 }
120}
121
122// ── AllSegmentsIter ───────────────────────────────────────────────────────────
123
124struct AllSegmentsIter<'g, 'a> {
125 // Stack of (current_group, current_seg_idx, current_child_idx)
126 stack: Vec<(&'g SegmentGroup<'a>, usize, usize)>,
127}
128
129impl<'g, 'a> AllSegmentsIter<'g, 'a> {
130 fn new(root: &'g SegmentGroup<'a>) -> Self {
131 Self {
132 stack: vec![(root, 0, 0)],
133 }
134 }
135}
136
137impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
138 type Item = &'g Segment<'a>;
139
140 fn next(&mut self) -> Option<Self::Item> {
141 loop {
142 let (group, seg_idx, child_idx) = self.stack.last_mut()?;
143 // Yield segments first.
144 if *seg_idx < group.segments.len() {
145 let seg = &group.segments[*seg_idx];
146 *seg_idx += 1;
147 return Some(seg);
148 }
149 // Then recurse into children
150 if *child_idx < group.children.len() {
151 let child = &group.children[*child_idx];
152 *child_idx += 1;
153 self.stack.push((child, 0, 0));
154 continue;
155 }
156 // Done with this group
157 self.stack.pop();
158 }
159 }
160
161 fn size_hint(&self) -> (usize, Option<usize>) {
162 // Conservative lower bound: count remaining direct segments in all
163 // frames on the stack. Children not yet pushed are not counted, so
164 // the true total may be higher, but this is still a valid lower bound.
165 let lower: usize = self
166 .stack
167 .iter()
168 .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
169 .sum();
170 (lower, None)
171 }
172}
173
174// ── group_segments ────────────────────────────────────────────────────────────
175
176/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
177///
178/// # Algorithm
179///
180/// The algorithm is a single-pass linear scan:
181///
182/// 1. Segments that do not match any group trigger in `schema` are added to
183/// the current group's `segments`.
184/// 2. When a trigger matching a group in `schema` is encountered:
185/// - If an open child with the same trigger already exists it is closed and
186/// a new instance is started (repetition).
187/// - If the trigger belongs to a *sibling* or *ancestor* group the current
188/// group is closed first (the caller handles restart).
189/// - Nested schemas recurse: child group triggers follow the same rules
190/// within their parent.
191///
192/// # Root group
193///
194/// The returned root group has `definition` set to `root_name` (typically
195/// `"ROOT"` or the message type string). Segments before the first matching
196/// trigger land in the root's own `segments` vec.
197///
198/// # Example
199///
200/// ```rust,ignore
201/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
202/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
203/// println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
204/// }
205/// ```
206pub fn group_segments<'a>(
207 segments: &[Segment<'a>],
208 schema: &'static [GroupDef],
209 root_name: &'static str,
210) -> SegmentGroup<'a> {
211 let mut root = SegmentGroup::new(root_name);
212 group_recursive(segments, &mut root, schema);
213 root
214}
215
216/// Internal recursive grouping. Returns the number of segments consumed.
217fn group_recursive<'a>(
218 segments: &[Segment<'a>],
219 parent: &mut SegmentGroup<'a>,
220 schema: &'static [GroupDef],
221) -> usize {
222 group_recursive_inner(segments, parent, schema, &[])
223}
224
225fn group_recursive_inner<'a>(
226 segments: &[Segment<'a>],
227 parent: &mut SegmentGroup<'a>,
228 schema: &'static [GroupDef],
229 stop_triggers: &[&'static str],
230) -> usize {
231 // Pre-compute the combined stop set for all children of this schema level.
232 // This is computed once per schema level, not once per segment, so the
233 // SmallVec is allocated at most O(depth) times rather than O(depth × n).
234 let combined_stop: SmallVec<[&'static str; 16]> = {
235 let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
236 for d in schema {
237 if !v.contains(&d.trigger) {
238 v.push(d.trigger);
239 }
240 }
241 v
242 };
243
244 let mut i = 0;
245 while i < segments.len() {
246 let tag = segments[i].tag;
247
248 // Compare by string value rather than using contains() because
249 // `tag` is borrowed from parsed input while `stop_triggers` holds
250 // `&'static str` values.
251 #[allow(clippy::manual_contains)]
252 if stop_triggers.iter().any(|t| *t == tag) {
253 break;
254 }
255
256 // Does this tag trigger a group in the schema?
257 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
258 // Start a new child group instance
259 let mut child = SegmentGroup::new(def.name);
260 // The trigger segment belongs to the new child
261 child.segments.push(segments[i].clone());
262 i += 1;
263
264 // Recurse into children of this group — pass the pre-computed stop set.
265 let consumed =
266 group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
267 i += consumed;
268
269 parent.children.push(child);
270 } else {
271 // Segment doesn't match any group trigger in this schema — it
272 // belongs to the parent group's own segments. This also covers
273 // leaf groups (empty schema): all non-stop-trigger segments after
274 // the trigger are accumulated into the current group.
275 parent.segments.push(segments[i].clone());
276 i += 1;
277 }
278 }
279 i
280}
281
282#[cfg(test)]
283mod tests {
284 use super::*;
285 use crate::Span;
286 use crate::model::Element;
287
288 fn seg(tag: &'static str) -> Segment<'static> {
289 Segment {
290 tag,
291 span: Span::new(0, 0),
292 tag_span: Span::new(0, 0),
293 elements: vec![Element::of(&["x"])],
294 }
295 }
296
297 static SCHEMA: &[GroupDef] = &[
298 GroupDef {
299 name: "SG1",
300 trigger: "NAD",
301 children: &[GroupDef {
302 name: "SG2",
303 trigger: "CTA",
304 children: &[],
305 }],
306 },
307 GroupDef {
308 name: "SG3",
309 trigger: "LIN",
310 children: &[],
311 },
312 ];
313
314 #[test]
315 fn root_segments_before_first_trigger() {
316 let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
317 let tree = group_segments(&segs, SCHEMA, "ROOT");
318 assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
319 assert_eq!(tree.children.len(), 1);
320 assert_eq!(tree.children[0].definition, "SG1");
321 }
322
323 #[test]
324 fn repeated_trigger_creates_multiple_children() {
325 let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
326 let tree = group_segments(&segs, SCHEMA, "ROOT");
327 // Two NAD triggers → two SG1 children
328 assert_eq!(
329 tree.children
330 .iter()
331 .filter(|c| c.definition == "SG1")
332 .count(),
333 2
334 );
335 }
336
337 #[test]
338 fn nested_child_groups() {
339 let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
340 let tree = group_segments(&segs, SCHEMA, "ROOT");
341 let sg1 = &tree.children[0];
342 assert_eq!(sg1.definition, "SG1");
343 // Two CTA triggers → two SG2 children inside SG1
344 assert_eq!(sg1.children.len(), 2);
345 assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
346 }
347
348 #[test]
349 fn all_segments_iterator_depth_first() {
350 let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
351 let tree = group_segments(&segs, SCHEMA, "ROOT");
352 let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
353 assert!(tags.contains(&"UNH"));
354 assert!(tags.contains(&"NAD"));
355 assert!(tags.contains(&"CTA"));
356 }
357}