Skip to main content

edifact_rs/
group.rs

1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag.  For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//!     GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//!     GroupDef {
26//!         name: "SG7",
27//!         trigger: "LIN",
28//!         children: &[
29//!             GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//!         ],
31//!     },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//!     println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::Segment;
41use smallvec::SmallVec;
42
43// ── GroupDef ──────────────────────────────────────────────────────────────────
44
45/// Static schema describing one segment group within an EDIFACT message.
46///
47/// `GroupDef` is designed to be declared as a `static` or `const` value, so
48/// both the struct itself and all nested `children` references are
49/// `'static`-lifetime slices with no heap allocation.
50#[derive(Debug, Clone, Copy)]
51pub struct GroupDef {
52    /// Human-readable group name, e.g. `"SG2"`.
53    pub name: &'static str,
54    /// The segment tag whose appearance starts a new instance of this group.
55    pub trigger: &'static str,
56    /// Nested child groups within this group.
57    ///
58    /// The first trigger encountered among `children` ends the current child
59    /// and starts a new one; a trigger that matches a sibling or ancestor group
60    /// ends this group entirely.
61    pub children: &'static [GroupDef],
62}
63
64// ── SegmentGroup ──────────────────────────────────────────────────────────────
65
66/// A populated segment group produced by [`group_segments`].
67///
68/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
69/// elements is heap-allocated per segment, but the string data inside each
70/// element still borrows from the original input buffer via the `'a` lifetime.
71/// For read-heavy workloads consider keeping the original segment slice and
72/// using group indices rather than cloned values.
73#[derive(Debug)]
74pub struct SegmentGroup<'a> {
75    /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
76    pub definition: &'static str,
77    /// Segments that belong directly to this group instance.
78    ///
79    /// Segment values are cloned from the input slice, but the string data
80    /// inside each segment borrows from the original input for `'a`.
81    pub segments: Vec<Segment<'a>>,
82    /// Child group instances, in the order they appear in the message.
83    pub children: Vec<SegmentGroup<'a>>,
84}
85
86impl<'a> SegmentGroup<'a> {
87    fn new(definition: &'static str) -> Self {
88        Self {
89            definition,
90            segments: Vec::new(),
91            children: Vec::new(),
92        }
93    }
94
95    /// Iterate over all segments in this group and all descendant groups,
96    /// depth-first.
97    pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
98        AllSegmentsIter::new(self)
99    }
100
101    /// Find the first segment with the given `tag` in this group (not children).
102    ///
103    /// # Shallow search
104    ///
105    /// This method searches only the segments directly owned by **this** group
106    /// instance — it does **not** recurse into child groups.  To search the
107    /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
108    ///
109    /// ```ignore
110    /// group.all_segments().find(|s| s.tag == "LIN")
111    /// ```
112    pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
113        self.segments.iter().find(|s| s.tag == tag)
114    }
115}
116
117// ── AllSegmentsIter ───────────────────────────────────────────────────────────
118
119struct AllSegmentsIter<'g, 'a> {
120    // Stack of (current_group, current_seg_idx, current_child_idx)
121    stack: Vec<(&'g SegmentGroup<'a>, usize, usize)>,
122}
123
124impl<'g, 'a> AllSegmentsIter<'g, 'a> {
125    fn new(root: &'g SegmentGroup<'a>) -> Self {
126        Self {
127            stack: vec![(root, 0, 0)],
128        }
129    }
130}
131
132impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
133    type Item = &'g Segment<'a>;
134
135    fn next(&mut self) -> Option<Self::Item> {
136        loop {
137            let (group, seg_idx, child_idx) = self.stack.last_mut()?;
138            // Yield segments first.
139            if *seg_idx < group.segments.len() {
140                let seg = &group.segments[*seg_idx];
141                *seg_idx += 1;
142                return Some(seg);
143            }
144            // Then recurse into children
145            if *child_idx < group.children.len() {
146                let child = &group.children[*child_idx];
147                *child_idx += 1;
148                self.stack.push((child, 0, 0));
149                continue;
150            }
151            // Done with this group
152            self.stack.pop();
153        }
154    }
155
156    fn size_hint(&self) -> (usize, Option<usize>) {
157        // Conservative lower bound: count remaining direct segments in all
158        // frames on the stack.  Children not yet pushed are not counted, so
159        // the true total may be higher, but this is still a valid lower bound.
160        let lower: usize = self
161            .stack
162            .iter()
163            .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
164            .sum();
165        (lower, None)
166    }
167}
168
169// ── group_segments ────────────────────────────────────────────────────────────
170
171/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
172///
173/// # Algorithm
174///
175/// The algorithm is a single-pass linear scan:
176///
177/// 1. Segments that do not match any group trigger in `schema` are added to
178///    the current group's `segments`.
179/// 2. When a trigger matching a group in `schema` is encountered:
180///    - If an open child with the same trigger already exists it is closed and
181///      a new instance is started (repetition).
182///    - If the trigger belongs to a *sibling* or *ancestor* group the current
183///      group is closed first (the caller handles restart).
184///    - Nested schemas recurse: child group triggers follow the same rules
185///      within their parent.
186///
187/// # Root group
188///
189/// The returned root group has `definition` set to `root_name` (typically
190/// `"ROOT"` or the message type string).  Segments before the first matching
191/// trigger land in the root's own `segments` vec.
192///
193/// # Example
194///
195/// ```rust,ignore
196/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
197/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
198///     println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
199/// }
200/// ```
201pub fn group_segments<'a>(
202    segments: &[Segment<'a>],
203    schema: &'static [GroupDef],
204    root_name: &'static str,
205) -> SegmentGroup<'a> {
206    let mut root = SegmentGroup::new(root_name);
207    group_recursive(segments, &mut root, schema);
208    root
209}
210
211/// Internal recursive grouping.  Returns the number of segments consumed.
212fn group_recursive<'a>(
213    segments: &[Segment<'a>],
214    parent: &mut SegmentGroup<'a>,
215    schema: &'static [GroupDef],
216) -> usize {
217    group_recursive_inner(segments, parent, schema, &[])
218}
219
220fn group_recursive_inner<'a>(
221    segments: &[Segment<'a>],
222    parent: &mut SegmentGroup<'a>,
223    schema: &'static [GroupDef],
224    stop_triggers: &[&'static str],
225) -> usize {
226    let mut i = 0;
227    while i < segments.len() {
228        let tag = segments[i].tag;
229
230        // Compare by string value rather than using contains() because
231        // `tag` is borrowed from parsed input while `stop_triggers` holds
232        // `&'static str` values.
233        #[allow(clippy::manual_contains)]
234        if stop_triggers.iter().any(|t| *t == tag) {
235            break;
236        }
237
238        // Does this tag trigger a group in the schema?
239        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
240            // Start a new child group instance
241            let mut child = SegmentGroup::new(def.name);
242            // The trigger segment belongs to the new child
243            child.segments.push(segments[i].clone());
244            i += 1;
245
246            // Build the combined stop triggers: parent stops + current schema triggers.
247            // Use SmallVec to avoid heap allocation for typical schema sizes.
248            let mut combined_stop: SmallVec<[&'static str; 16]> =
249                SmallVec::from_slice(stop_triggers);
250            for d in schema {
251                if !combined_stop.contains(&d.trigger) {
252                    combined_stop.push(d.trigger);
253                }
254            }
255
256            // Recurse into children of this group — pass rest of segments
257            let consumed =
258                group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
259            i += consumed;
260
261            parent.children.push(child);
262        } else {
263            // Segment doesn't match any group trigger in this schema — it
264            // belongs to the parent group's own segments.  This also covers
265            // leaf groups (empty schema): all non-stop-trigger segments after
266            // the trigger are accumulated into the current group.
267            parent.segments.push(segments[i].clone());
268            i += 1;
269        }
270    }
271    i
272}
273
274#[cfg(test)]
275mod tests {
276    use super::*;
277    use crate::Span;
278    use crate::model::Element;
279
280    fn seg(tag: &'static str) -> Segment<'static> {
281        Segment {
282            tag,
283            span: Span::new(0, 0),
284            tag_span: Span::new(0, 0),
285            elements: vec![Element::of(&["x"])],
286        }
287    }
288
289    static SCHEMA: &[GroupDef] = &[
290        GroupDef {
291            name: "SG1",
292            trigger: "NAD",
293            children: &[GroupDef {
294                name: "SG2",
295                trigger: "CTA",
296                children: &[],
297            }],
298        },
299        GroupDef {
300            name: "SG3",
301            trigger: "LIN",
302            children: &[],
303        },
304    ];
305
306    #[test]
307    fn root_segments_before_first_trigger() {
308        let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
309        let tree = group_segments(&segs, SCHEMA, "ROOT");
310        assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
311        assert_eq!(tree.children.len(), 1);
312        assert_eq!(tree.children[0].definition, "SG1");
313    }
314
315    #[test]
316    fn repeated_trigger_creates_multiple_children() {
317        let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
318        let tree = group_segments(&segs, SCHEMA, "ROOT");
319        // Two NAD triggers → two SG1 children
320        assert_eq!(
321            tree.children
322                .iter()
323                .filter(|c| c.definition == "SG1")
324                .count(),
325            2
326        );
327    }
328
329    #[test]
330    fn nested_child_groups() {
331        let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
332        let tree = group_segments(&segs, SCHEMA, "ROOT");
333        let sg1 = &tree.children[0];
334        assert_eq!(sg1.definition, "SG1");
335        // Two CTA triggers → two SG2 children inside SG1
336        assert_eq!(sg1.children.len(), 2);
337        assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
338    }
339
340    #[test]
341    fn all_segments_iterator_depth_first() {
342        let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
343        let tree = group_segments(&segs, SCHEMA, "ROOT");
344        let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
345        assert!(tags.contains(&"UNH"));
346        assert!(tags.contains(&"NAD"));
347        assert!(tags.contains(&"CTA"));
348    }
349}