Skip to main content

edifact_rs/
group.rs

1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag.  For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//!     GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//!     GroupDef {
26//!         name: "SG7",
27//!         trigger: "LIN",
28//!         children: &[
29//!             GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//!         ],
31//!     },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//!     println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::Segment;
41use smallvec::SmallVec;
42
43// ── GroupDef ──────────────────────────────────────────────────────────────────
44
45/// Static schema describing one segment group within an EDIFACT message.
46///
47/// `GroupDef` is designed to be declared as a `static` or `const` value, so
48/// both the struct itself and all nested `children` references are
49/// `'static`-lifetime slices with no heap allocation.
50#[derive(Debug, Clone, Copy)]
51pub struct GroupDef {
52    /// Human-readable group name, e.g. `"SG2"`.
53    pub name: &'static str,
54    /// The segment tag whose appearance starts a new instance of this group.
55    pub trigger: &'static str,
56    /// Nested child groups within this group.
57    ///
58    /// The first trigger encountered among `children` ends the current child
59    /// and starts a new one; a trigger that matches a sibling or ancestor group
60    /// ends this group entirely.
61    pub children: &'static [GroupDef],
62}
63
64// ── SegmentGroup ──────────────────────────────────────────────────────────────
65
66/// A populated segment group produced by [`group_segments`].
67///
68/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
69/// elements is heap-allocated per segment, but the string data inside each
70/// element still borrows from the original input buffer via the `'a` lifetime.
71///
72/// # Memory note
73///
74/// `group_segments` clones each [`Segment`] into the tree.  For large messages
75/// or deeply nested schemas where minimal allocation is critical, consider
76/// working with the original flat segment slice and deriving group boundaries
77/// yourself based on the schema's trigger tags.
78#[derive(Debug)]
79pub struct SegmentGroup<'a> {
80    /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
81    pub definition: &'static str,
82    /// Segments that belong directly to this group instance.
83    ///
84    /// Segment values are cloned from the input slice, but the string data
85    /// inside each segment borrows from the original input for `'a`.
86    pub segments: Vec<Segment<'a>>,
87    /// Child group instances, in the order they appear in the message.
88    pub children: Vec<SegmentGroup<'a>>,
89}
90
91impl<'a> SegmentGroup<'a> {
92    fn new(definition: &'static str) -> Self {
93        Self {
94            definition,
95            segments: Vec::new(),
96            children: Vec::new(),
97        }
98    }
99
100    /// Iterate over all segments in this group and all descendant groups,
101    /// depth-first.
102    pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
103        AllSegmentsIter::new(self)
104    }
105
106    /// Find the first segment with the given `tag` in this group (not children).
107    ///
108    /// # Shallow search
109    ///
110    /// This method searches only the segments directly owned by **this** group
111    /// instance — it does **not** recurse into child groups.  To search the
112    /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
113    ///
114    /// ```ignore
115    /// group.all_segments().find(|s| s.tag == "LIN")
116    /// ```
117    pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
118        self.segments.iter().find(|s| s.tag == tag)
119    }
120}
121
122// ── AllSegmentsIter ───────────────────────────────────────────────────────────
123
124struct AllSegmentsIter<'g, 'a> {
125    // Stack of (current_group, current_seg_idx, current_child_idx)
126    stack: Vec<(&'g SegmentGroup<'a>, usize, usize)>,
127}
128
129impl<'g, 'a> AllSegmentsIter<'g, 'a> {
130    fn new(root: &'g SegmentGroup<'a>) -> Self {
131        Self {
132            stack: vec![(root, 0, 0)],
133        }
134    }
135}
136
137impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
138    type Item = &'g Segment<'a>;
139
140    fn next(&mut self) -> Option<Self::Item> {
141        loop {
142            let (group, seg_idx, child_idx) = self.stack.last_mut()?;
143            // Yield segments first.
144            if *seg_idx < group.segments.len() {
145                let seg = &group.segments[*seg_idx];
146                *seg_idx += 1;
147                return Some(seg);
148            }
149            // Then recurse into children
150            if *child_idx < group.children.len() {
151                let child = &group.children[*child_idx];
152                *child_idx += 1;
153                self.stack.push((child, 0, 0));
154                continue;
155            }
156            // Done with this group
157            self.stack.pop();
158        }
159    }
160
161    fn size_hint(&self) -> (usize, Option<usize>) {
162        // Conservative lower bound: count remaining direct segments in all
163        // frames on the stack.  Children not yet pushed are not counted, so
164        // the true total may be higher, but this is still a valid lower bound.
165        let lower: usize = self
166            .stack
167            .iter()
168            .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
169            .sum();
170        (lower, None)
171    }
172}
173
174// ── group_segments ────────────────────────────────────────────────────────────
175
176/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
177///
178/// # Algorithm
179///
180/// The algorithm is a single-pass linear scan:
181///
182/// 1. Segments that do not match any group trigger in `schema` are added to
183///    the current group's `segments`.
184/// 2. When a trigger matching a group in `schema` is encountered:
185///    - If an open child with the same trigger already exists it is closed and
186///      a new instance is started (repetition).
187///    - If the trigger belongs to a *sibling* or *ancestor* group the current
188///      group is closed first (the caller handles restart).
189///    - Nested schemas recurse: child group triggers follow the same rules
190///      within their parent.
191///
192/// # Root group
193///
194/// The returned root group has `definition` set to `root_name` (typically
195/// `"ROOT"` or the message type string).  Segments before the first matching
196/// trigger land in the root's own `segments` vec.
197///
198/// # Example
199///
200/// ```rust,ignore
201/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
202/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
203///     println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
204/// }
205/// ```
206pub fn group_segments<'a>(
207    segments: &[Segment<'a>],
208    schema: &'static [GroupDef],
209    root_name: &'static str,
210) -> SegmentGroup<'a> {
211    let mut root = SegmentGroup::new(root_name);
212    group_recursive(segments, &mut root, schema);
213    root
214}
215
216/// Internal recursive grouping.  Returns the number of segments consumed.
217fn group_recursive<'a>(
218    segments: &[Segment<'a>],
219    parent: &mut SegmentGroup<'a>,
220    schema: &'static [GroupDef],
221) -> usize {
222    group_recursive_inner(segments, parent, schema, &[])
223}
224
225fn group_recursive_inner<'a>(
226    segments: &[Segment<'a>],
227    parent: &mut SegmentGroup<'a>,
228    schema: &'static [GroupDef],
229    stop_triggers: &[&'static str],
230) -> usize {
231    // Pre-compute the combined stop set for all children of this schema level.
232    // This is computed once per schema level, not once per segment, so the
233    // SmallVec is allocated at most O(depth) times rather than O(depth × n).
234    let combined_stop: SmallVec<[&'static str; 16]> = {
235        let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
236        for d in schema {
237            if !v.contains(&d.trigger) {
238                v.push(d.trigger);
239            }
240        }
241        v
242    };
243
244    let mut i = 0;
245    while i < segments.len() {
246        let tag = segments[i].tag;
247
248        // Compare by string value rather than using contains() because
249        // `tag` is borrowed from parsed input while `stop_triggers` holds
250        // `&'static str` values.
251        #[allow(clippy::manual_contains)]
252        if stop_triggers.iter().any(|t| *t == tag) {
253            break;
254        }
255
256        // Does this tag trigger a group in the schema?
257        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
258            // Start a new child group instance
259            let mut child = SegmentGroup::new(def.name);
260            // The trigger segment belongs to the new child
261            child.segments.push(segments[i].clone());
262            i += 1;
263
264            // Recurse into children of this group — pass the pre-computed stop set.
265            let consumed =
266                group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
267            i += consumed;
268
269            parent.children.push(child);
270        } else {
271            // Segment doesn't match any group trigger in this schema — it
272            // belongs to the parent group's own segments.  This also covers
273            // leaf groups (empty schema): all non-stop-trigger segments after
274            // the trigger are accumulated into the current group.
275            parent.segments.push(segments[i].clone());
276            i += 1;
277        }
278    }
279    i
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285    use crate::Span;
286    use crate::model::Element;
287
288    fn seg(tag: &'static str) -> Segment<'static> {
289        Segment {
290            tag,
291            span: Span::new(0, 0),
292            tag_span: Span::new(0, 0),
293            elements: vec![Element::of(&["x"])],
294        }
295    }
296
297    static SCHEMA: &[GroupDef] = &[
298        GroupDef {
299            name: "SG1",
300            trigger: "NAD",
301            children: &[GroupDef {
302                name: "SG2",
303                trigger: "CTA",
304                children: &[],
305            }],
306        },
307        GroupDef {
308            name: "SG3",
309            trigger: "LIN",
310            children: &[],
311        },
312    ];
313
314    #[test]
315    fn root_segments_before_first_trigger() {
316        let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
317        let tree = group_segments(&segs, SCHEMA, "ROOT");
318        assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
319        assert_eq!(tree.children.len(), 1);
320        assert_eq!(tree.children[0].definition, "SG1");
321    }
322
323    #[test]
324    fn repeated_trigger_creates_multiple_children() {
325        let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
326        let tree = group_segments(&segs, SCHEMA, "ROOT");
327        // Two NAD triggers → two SG1 children
328        assert_eq!(
329            tree.children
330                .iter()
331                .filter(|c| c.definition == "SG1")
332                .count(),
333            2
334        );
335    }
336
337    #[test]
338    fn nested_child_groups() {
339        let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
340        let tree = group_segments(&segs, SCHEMA, "ROOT");
341        let sg1 = &tree.children[0];
342        assert_eq!(sg1.definition, "SG1");
343        // Two CTA triggers → two SG2 children inside SG1
344        assert_eq!(sg1.children.len(), 2);
345        assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
346    }
347
348    #[test]
349    fn all_segments_iterator_depth_first() {
350        let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
351        let tree = group_segments(&segs, SCHEMA, "ROOT");
352        let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
353        assert!(tags.contains(&"UNH"));
354        assert!(tags.contains(&"NAD"));
355        assert!(tags.contains(&"CTA"));
356    }
357}