edifact_rs/group.rs
1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag. For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//! GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//! GroupDef {
26//! name: "SG7",
27//! trigger: "LIN",
28//! children: &[
29//! GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//! ],
31//! },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//! println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::Segment;
41use smallvec::SmallVec;
42
43// ── GroupDef ──────────────────────────────────────────────────────────────────
44
45/// Static schema describing one segment group within an EDIFACT message.
46///
47/// `GroupDef` is designed to be declared as a `static` or `const` value, so
48/// both the struct itself and all nested `children` references are
49/// `'static`-lifetime slices with no heap allocation.
50#[derive(Debug, Clone, Copy)]
51pub struct GroupDef {
52 /// Human-readable group name, e.g. `"SG2"`.
53 pub name: &'static str,
54 /// The segment tag whose appearance starts a new instance of this group.
55 pub trigger: &'static str,
56 /// Nested child groups within this group.
57 ///
58 /// The first trigger encountered among `children` ends the current child
59 /// and starts a new one; a trigger that matches a sibling or ancestor group
60 /// ends this group entirely.
61 pub children: &'static [GroupDef],
62}
63
64// ── SegmentGroup ──────────────────────────────────────────────────────────────
65
66/// A populated segment group produced by [`group_segments`].
67///
68/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
69/// elements is heap-allocated per segment, but the string data inside each
70/// element still borrows from the original input buffer via the `'a` lifetime.
71/// For read-heavy workloads consider keeping the original segment slice and
72/// using group indices rather than cloned values.
73#[derive(Debug)]
74pub struct SegmentGroup<'a> {
75 /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
76 pub definition: &'static str,
77 /// Segments that belong directly to this group instance.
78 ///
79 /// Segment values are cloned from the input slice, but the string data
80 /// inside each segment borrows from the original input for `'a`.
81 pub segments: Vec<Segment<'a>>,
82 /// Child group instances, in the order they appear in the message.
83 pub children: Vec<SegmentGroup<'a>>,
84}
85
86impl<'a> SegmentGroup<'a> {
87 fn new(definition: &'static str) -> Self {
88 Self {
89 definition,
90 segments: Vec::new(),
91 children: Vec::new(),
92 }
93 }
94
95 /// Iterate over all segments in this group and all descendant groups,
96 /// depth-first.
97 pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
98 AllSegmentsIter::new(self)
99 }
100
101 /// Find the first segment with the given `tag` in this group (not children).
102 ///
103 /// # Shallow search
104 ///
105 /// This method searches only the segments directly owned by **this** group
106 /// instance — it does **not** recurse into child groups. To search the
107 /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
108 ///
109 /// ```ignore
110 /// group.all_segments().find(|s| s.tag == "LIN")
111 /// ```
112 pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
113 self.segments.iter().find(|s| s.tag == tag)
114 }
115}
116
117// ── AllSegmentsIter ───────────────────────────────────────────────────────────
118
119struct AllSegmentsIter<'g, 'a> {
120 // Stack of (current_group, current_seg_idx, current_child_idx)
121 stack: Vec<(&'g SegmentGroup<'a>, usize, usize)>,
122}
123
124impl<'g, 'a> AllSegmentsIter<'g, 'a> {
125 fn new(root: &'g SegmentGroup<'a>) -> Self {
126 Self {
127 stack: vec![(root, 0, 0)],
128 }
129 }
130}
131
132impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
133 type Item = &'g Segment<'a>;
134
135 fn next(&mut self) -> Option<Self::Item> {
136 loop {
137 let (group, seg_idx, child_idx) = self.stack.last_mut()?;
138 // Yield segments first.
139 if *seg_idx < group.segments.len() {
140 let seg = &group.segments[*seg_idx];
141 *seg_idx += 1;
142 return Some(seg);
143 }
144 // Then recurse into children
145 if *child_idx < group.children.len() {
146 let child = &group.children[*child_idx];
147 *child_idx += 1;
148 self.stack.push((child, 0, 0));
149 continue;
150 }
151 // Done with this group
152 self.stack.pop();
153 }
154 }
155
156 fn size_hint(&self) -> (usize, Option<usize>) {
157 // Conservative lower bound: count remaining direct segments in all
158 // frames on the stack. Children not yet pushed are not counted, so
159 // the true total may be higher, but this is still a valid lower bound.
160 let lower: usize = self
161 .stack
162 .iter()
163 .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
164 .sum();
165 (lower, None)
166 }
167}
168
169// ── group_segments ────────────────────────────────────────────────────────────
170
171/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
172///
173/// # Algorithm
174///
175/// The algorithm is a single-pass linear scan:
176///
177/// 1. Segments that do not match any group trigger in `schema` are added to
178/// the current group's `segments`.
179/// 2. When a trigger matching a group in `schema` is encountered:
180/// - If an open child with the same trigger already exists it is closed and
181/// a new instance is started (repetition).
182/// - If the trigger belongs to a *sibling* or *ancestor* group the current
183/// group is closed first (the caller handles restart).
184/// - Nested schemas recurse: child group triggers follow the same rules
185/// within their parent.
186///
187/// # Root group
188///
189/// The returned root group has `definition` set to `root_name` (typically
190/// `"ROOT"` or the message type string). Segments before the first matching
191/// trigger land in the root's own `segments` vec.
192///
193/// # Example
194///
195/// ```rust,ignore
196/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
197/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
198/// println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
199/// }
200/// ```
201pub fn group_segments<'a>(
202 segments: &[Segment<'a>],
203 schema: &'static [GroupDef],
204 root_name: &'static str,
205) -> SegmentGroup<'a> {
206 let mut root = SegmentGroup::new(root_name);
207 group_recursive(segments, &mut root, schema);
208 root
209}
210
211/// Internal recursive grouping. Returns the number of segments consumed.
212fn group_recursive<'a>(
213 segments: &[Segment<'a>],
214 parent: &mut SegmentGroup<'a>,
215 schema: &'static [GroupDef],
216) -> usize {
217 group_recursive_inner(segments, parent, schema, &[])
218}
219
220fn group_recursive_inner<'a>(
221 segments: &[Segment<'a>],
222 parent: &mut SegmentGroup<'a>,
223 schema: &'static [GroupDef],
224 stop_triggers: &[&'static str],
225) -> usize {
226 let mut i = 0;
227 while i < segments.len() {
228 let tag = segments[i].tag;
229
230 // Compare by string value rather than using contains() because
231 // `tag` is borrowed from parsed input while `stop_triggers` holds
232 // `&'static str` values.
233 #[allow(clippy::manual_contains)]
234 if stop_triggers.iter().any(|t| *t == tag) {
235 break;
236 }
237
238 // Does this tag trigger a group in the schema?
239 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
240 // Start a new child group instance
241 let mut child = SegmentGroup::new(def.name);
242 // The trigger segment belongs to the new child
243 child.segments.push(segments[i].clone());
244 i += 1;
245
246 // Build the combined stop triggers: parent stops + current schema triggers.
247 // Use SmallVec to avoid heap allocation for typical schema sizes.
248 let mut combined_stop: SmallVec<[&'static str; 16]> =
249 SmallVec::from_slice(stop_triggers);
250 for d in schema {
251 if !combined_stop.contains(&d.trigger) {
252 combined_stop.push(d.trigger);
253 }
254 }
255
256 // Recurse into children of this group — pass rest of segments
257 let consumed =
258 group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
259 i += consumed;
260
261 parent.children.push(child);
262 } else {
263 // Segment doesn't match any group trigger in this schema — it
264 // belongs to the parent group's own segments. This also covers
265 // leaf groups (empty schema): all non-stop-trigger segments after
266 // the trigger are accumulated into the current group.
267 parent.segments.push(segments[i].clone());
268 i += 1;
269 }
270 }
271 i
272}
273
274#[cfg(test)]
275mod tests {
276 use super::*;
277 use crate::Span;
278 use crate::model::Element;
279
280 fn seg(tag: &'static str) -> Segment<'static> {
281 Segment {
282 tag,
283 span: Span::new(0, 0),
284 tag_span: Span::new(0, 0),
285 elements: vec![Element::of(&["x"])],
286 }
287 }
288
289 static SCHEMA: &[GroupDef] = &[
290 GroupDef {
291 name: "SG1",
292 trigger: "NAD",
293 children: &[GroupDef {
294 name: "SG2",
295 trigger: "CTA",
296 children: &[],
297 }],
298 },
299 GroupDef {
300 name: "SG3",
301 trigger: "LIN",
302 children: &[],
303 },
304 ];
305
306 #[test]
307 fn root_segments_before_first_trigger() {
308 let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
309 let tree = group_segments(&segs, SCHEMA, "ROOT");
310 assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
311 assert_eq!(tree.children.len(), 1);
312 assert_eq!(tree.children[0].definition, "SG1");
313 }
314
315 #[test]
316 fn repeated_trigger_creates_multiple_children() {
317 let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
318 let tree = group_segments(&segs, SCHEMA, "ROOT");
319 // Two NAD triggers → two SG1 children
320 assert_eq!(
321 tree.children
322 .iter()
323 .filter(|c| c.definition == "SG1")
324 .count(),
325 2
326 );
327 }
328
329 #[test]
330 fn nested_child_groups() {
331 let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
332 let tree = group_segments(&segs, SCHEMA, "ROOT");
333 let sg1 = &tree.children[0];
334 assert_eq!(sg1.definition, "SG1");
335 // Two CTA triggers → two SG2 children inside SG1
336 assert_eq!(sg1.children.len(), 2);
337 assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
338 }
339
340 #[test]
341 fn all_segments_iterator_depth_first() {
342 let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
343 let tree = group_segments(&segs, SCHEMA, "ROOT");
344 let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
345 assert!(tags.contains(&"UNH"));
346 assert!(tags.contains(&"NAD"));
347 assert!(tags.contains(&"CTA"));
348 }
349}