Skip to main content

edifact_rs/
group.rs

1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag.  For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//!     GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//!     GroupDef {
26//!         name: "SG7",
27//!         trigger: "LIN",
28//!         children: &[
29//!             GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//!         ],
31//!     },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//!     println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::Segment;
41use smallvec::SmallVec;
42use std::ops::Range;
43
44// ── GroupDef ──────────────────────────────────────────────────────────────────
45
46/// Static schema describing one segment group within an EDIFACT message.
47///
48/// `GroupDef` is designed to be declared as a `static` or `const` value, so
49/// both the struct itself and all nested `children` references are
50/// `'static`-lifetime slices with no heap allocation.
51#[derive(Debug, Clone, Copy)]
52pub struct GroupDef {
53    /// Human-readable group name, e.g. `"SG2"`.
54    pub name: &'static str,
55    /// The segment tag whose appearance starts a new instance of this group.
56    pub trigger: &'static str,
57    /// Nested child groups within this group.
58    ///
59    /// The first trigger encountered among `children` ends the current child
60    /// and starts a new one; a trigger that matches a sibling or ancestor group
61    /// ends this group entirely.
62    pub children: &'static [GroupDef],
63}
64
65// ── SegmentGroup ──────────────────────────────────────────────────────────────
66
67/// A populated segment group produced by [`group_segments`].
68///
69/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
70/// elements is heap-allocated per segment, but the string data inside each
71/// element still borrows from the original input buffer via the `'a` lifetime.
72///
73/// # Memory note
74///
75/// `group_segments` clones each [`Segment`] into the tree.  For large messages
76/// or deeply nested schemas where minimal allocation is critical, consider
77/// working with the original flat segment slice and deriving group boundaries
78/// yourself based on the schema's trigger tags.
79#[derive(Debug)]
80pub struct SegmentGroup<'a> {
81    /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
82    pub definition: &'static str,
83    /// Segments that belong directly to this group instance.
84    ///
85    /// Segment values are cloned from the input slice, but the string data
86    /// inside each segment borrows from the original input for `'a`.
87    pub segments: Vec<Segment<'a>>,
88    /// Child group instances, in the order they appear in the message.
89    pub children: Vec<SegmentGroup<'a>>,
90}
91
92impl<'a> SegmentGroup<'a> {
93    fn new(definition: &'static str) -> Self {
94        Self {
95            definition,
96            segments: Vec::new(),
97            children: Vec::new(),
98        }
99    }
100
101    /// Iterate over all segments in this group and all descendant groups,
102    /// depth-first.
103    pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
104        AllSegmentsIter::new(self)
105    }
106
107    /// Find the first segment with the given `tag` in this group (not children).
108    ///
109    /// # Shallow search
110    ///
111    /// This method searches only the segments directly owned by **this** group
112    /// instance — it does **not** recurse into child groups.  To search the
113    /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
114    ///
115    /// ```ignore
116    /// group.all_segments().find(|s| s.tag == "LIN")
117    /// ```
118    pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
119        self.segments.iter().find(|s| s.tag == tag)
120    }
121}
122
123// ── AllSegmentsIter ───────────────────────────────────────────────────────────
124
125struct AllSegmentsIter<'g, 'a> {
126    // Stack of (current_group, current_seg_idx, current_child_idx)
127    stack: SmallVec<[(&'g SegmentGroup<'a>, usize, usize); 8]>,
128}
129
130impl<'g, 'a> AllSegmentsIter<'g, 'a> {
131    fn new(root: &'g SegmentGroup<'a>) -> Self {
132        Self {
133            stack: smallvec::smallvec![(root, 0, 0)],
134        }
135    }
136}
137
138impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
139    type Item = &'g Segment<'a>;
140
141    fn next(&mut self) -> Option<Self::Item> {
142        loop {
143            let (group, seg_idx, child_idx) = self.stack.last_mut()?;
144            // Yield segments first.
145            if *seg_idx < group.segments.len() {
146                let seg = &group.segments[*seg_idx];
147                *seg_idx += 1;
148                return Some(seg);
149            }
150            // Then recurse into children
151            if *child_idx < group.children.len() {
152                let child = &group.children[*child_idx];
153                *child_idx += 1;
154                self.stack.push((child, 0, 0));
155                continue;
156            }
157            // Done with this group
158            self.stack.pop();
159        }
160    }
161
162    fn size_hint(&self) -> (usize, Option<usize>) {
163        // Conservative lower bound: count remaining direct segments in all
164        // frames on the stack.  Children not yet pushed are not counted, so
165        // the true total may be higher, but this is still a valid lower bound.
166        let lower: usize = self
167            .stack
168            .iter()
169            .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
170            .sum();
171        (lower, None)
172    }
173}
174
175// ── group_segments ────────────────────────────────────────────────────────────
176
177/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
178///
179/// # Algorithm
180///
181/// The algorithm is a single-pass linear scan:
182///
183/// 1. Segments that do not match any group trigger in `schema` are added to
184///    the current group's `segments`.
185/// 2. When a trigger matching a group in `schema` is encountered:
186///    - If an open child with the same trigger already exists it is closed and
187///      a new instance is started (repetition).
188///    - If the trigger belongs to a *sibling* or *ancestor* group the current
189///      group is closed first (the caller handles restart).
190///    - Nested schemas recurse: child group triggers follow the same rules
191///      within their parent.
192///
193/// # Root group
194///
195/// The returned root group has `definition` set to `root_name` (typically
196/// `"ROOT"` or the message type string).  Segments before the first matching
197/// trigger land in the root's own `segments` vec.
198///
199/// # Example
200///
201/// ```rust,ignore
202/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
203/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
204///     println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
205/// }
206/// ```
207pub fn group_segments<'a>(
208    segments: &[Segment<'a>],
209    schema: &'static [GroupDef],
210    root_name: &'static str,
211) -> SegmentGroup<'a> {
212    let mut root = SegmentGroup::new(root_name);
213    group_recursive(segments, &mut root, schema);
214    root
215}
216
217/// Internal recursive grouping.  Returns the number of segments consumed.
218fn group_recursive<'a>(
219    segments: &[Segment<'a>],
220    parent: &mut SegmentGroup<'a>,
221    schema: &'static [GroupDef],
222) -> usize {
223    group_recursive_inner(segments, parent, schema, &[])
224}
225
226fn group_recursive_inner<'a>(
227    segments: &[Segment<'a>],
228    parent: &mut SegmentGroup<'a>,
229    schema: &'static [GroupDef],
230    stop_triggers: &[&'static str],
231) -> usize {
232    // Pre-compute the combined stop set for all children of this schema level.
233    // This is computed once per schema level, not once per segment, so the
234    // SmallVec is allocated at most O(depth) times rather than O(depth × n).
235    let combined_stop: SmallVec<[&'static str; 16]> = {
236        let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
237        for d in schema {
238            if !v.contains(&d.trigger) {
239                v.push(d.trigger);
240            }
241        }
242        v
243    };
244
245    let mut i = 0;
246    while i < segments.len() {
247        let tag = segments[i].tag;
248
249        // A trigger matching a stop tag means we must return control to the parent group.
250        if stop_triggers.iter().copied().any(|t| t == tag) {
251            break;
252        }
253
254        // Does this tag trigger a group in the schema?
255        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
256            // Start a new child group instance
257            let mut child = SegmentGroup::new(def.name);
258            // The trigger segment belongs to the new child
259            child.segments.push(segments[i].clone());
260            i += 1;
261
262            // Recurse into children of this group — pass the pre-computed stop set.
263            let consumed =
264                group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
265            i += consumed;
266
267            parent.children.push(child);
268        } else {
269            // Segment doesn't match any group trigger in this schema — it
270            // belongs to the parent group's own segments.  This also covers
271            // leaf groups (empty schema): all non-stop-trigger segments after
272            // the trigger are accumulated into the current group.
273            parent.segments.push(segments[i].clone());
274            i += 1;
275        }
276    }
277    i
278}
279
280// ── SegmentGroupIndexed ───────────────────────────────────────────────────────
281
282/// A zero-clone counterpart to [`SegmentGroup`] that stores index ranges into
283/// the original flat segment slice rather than cloning each segment.
284///
285/// Produced by [`group_segments_indexed`].  To access the actual segments use
286/// the original `&[Segment<'a>]` together with [`total_span`]:
287///
288/// ```rust,ignore
289/// let indexed = group_segments_indexed(&segments, MY_SCHEMA, "ROOT");
290/// for child in &indexed.children {
291///     let child_segs = &segments[child.total_span.clone()];
292/// }
293/// ```
294///
295/// [`total_span`]: SegmentGroupIndexed::total_span
296#[derive(Debug)]
297pub struct SegmentGroupIndexed {
298    /// Group name from the schema, e.g. `"SG2"`, or the root name.
299    pub definition: &'static str,
300    /// Contiguous span `[start, end)` of absolute indices into the original flat
301    /// segment slice covering **all** segments in this group instance — trigger
302    /// segment, direct segments, and all descendant groups combined.
303    ///
304    /// Use this to slice the original `&[Segment<'_>]` to get every segment
305    /// belonging to this group:
306    ///
307    /// ```rust,ignore
308    /// let all_sg2_segs = &segments[sg2.total_span.clone()];
309    /// ```
310    ///
311    /// To iterate over only the segments that belong *directly* to this group
312    /// (excluding descendants), use [`direct_segment_indices`].
313    ///
314    /// [`direct_segment_indices`]: SegmentGroupIndexed::direct_segment_indices
315    pub total_span: Range<usize>,
316    /// Child group instances, in message order.
317    pub children: Vec<SegmentGroupIndexed>,
318}
319
320impl SegmentGroupIndexed {
321    /// Iterate over the absolute indices of segments that belong *directly* to
322    /// this group — i.e. those within [`total_span`] that are **not** covered
323    /// by any child group's [`total_span`].
324    ///
325    /// Complexity: `O(total_span.len() × children.len())`.  For typical EDIFACT
326    /// message structures (≤ 8 children per group) this is negligible.
327    ///
328    /// [`total_span`]: SegmentGroupIndexed::total_span
329    pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
330        self.total_span.clone().filter(|i| {
331            !self
332                .children
333                .iter()
334                .any(|child| child.total_span.contains(i))
335        })
336    }
337}
338
339/// Partition `segments` into a [`SegmentGroupIndexed`] tree without cloning.
340///
341/// This is the zero-allocation counterpart to [`group_segments`]: instead of
342/// copying each [`Segment`] into the tree, it records `Range<usize>` indices
343/// into the original flat slice.  Use the original slice together with
344/// [`SegmentGroupIndexed::total_span`] to access segments:
345///
346/// ```rust,ignore
347/// let tree = group_segments_indexed(&segments, MY_SCHEMA, "ORDERS");
348/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
349///     let segs = &segments[sg2.total_span.clone()];
350///     println!("first segment: {:?}", segs.first().map(|s| s.tag));
351///     // Direct segments only (excluding nested SG3, SG4 …):
352///     for idx in sg2.direct_segment_indices() {
353///         println!("  direct: {:?}", segments[idx].tag);
354///     }
355/// }
356/// ```
357///
358/// # Complexity
359///
360/// `O(n × schema_depth)` time, `O(tree_nodes)` space.  No `Segment` clones.
361pub fn group_segments_indexed<'a>(
362    segments: &[Segment<'a>],
363    schema: &'static [GroupDef],
364    root_name: &'static str,
365) -> SegmentGroupIndexed {
366    let mut root = SegmentGroupIndexed {
367        definition: root_name,
368        total_span: 0..0,
369        children: Vec::new(),
370    };
371    group_recursive_indexed(segments, &mut root, schema, &[], 0);
372    root
373}
374
375/// Internal recursive indexed grouping.  Returns the number of segments consumed.
376fn group_recursive_indexed<'a>(
377    segments: &[Segment<'a>],
378    parent: &mut SegmentGroupIndexed,
379    schema: &'static [GroupDef],
380    stop_triggers: &[&'static str],
381    offset: usize,
382) -> usize {
383    let combined_stop: SmallVec<[&'static str; 16]> = {
384        let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
385        for d in schema {
386            if !v.contains(&d.trigger) {
387                v.push(d.trigger);
388            }
389        }
390        v
391    };
392
393    // `span_start` is the absolute index of the first segment in this group.
394    // For child groups the caller pre-seeds `parent.total_span.start` with the
395    // trigger segment position; for the root (or any group with no pre-seeded
396    // trigger) we start at `offset`.
397    let span_start = if !parent.total_span.is_empty() {
398        parent.total_span.start // pre-seeded trigger position
399    } else {
400        offset
401    };
402
403    let mut i = 0;
404    while i < segments.len() {
405        let tag = segments[i].tag;
406
407        if stop_triggers.iter().copied().any(|t| t == tag) {
408            break;
409        }
410
411        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
412            let child_offset = offset + i;
413            let mut child = SegmentGroupIndexed {
414                definition: def.name,
415                // Pre-seed the trigger segment; the recursive call extends
416                // total_span to cover the full child subtree.
417                total_span: child_offset..child_offset + 1,
418                children: Vec::new(),
419            };
420            i += 1;
421
422            let consumed = group_recursive_indexed(
423                &segments[i..],
424                &mut child,
425                def.children,
426                &combined_stop,
427                offset + i,
428            );
429            i += consumed;
430
431            parent.children.push(child);
432        } else {
433            i += 1;
434        }
435    }
436
437    // Total span covers everything from the first segment (trigger or first
438    // direct segment) to the last segment consumed in this call.
439    parent.total_span = span_start..(offset + i);
440
441    i
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447    use crate::Span;
448    use crate::model::Element;
449
450    fn seg(tag: &'static str) -> Segment<'static> {
451        Segment {
452            tag,
453            span: Span::new(0, 0),
454            tag_span: Span::new(0, 0),
455            elements: vec![Element::of(&["x"])],
456        }
457    }
458
459    static SCHEMA: &[GroupDef] = &[
460        GroupDef {
461            name: "SG1",
462            trigger: "NAD",
463            children: &[GroupDef {
464                name: "SG2",
465                trigger: "CTA",
466                children: &[],
467            }],
468        },
469        GroupDef {
470            name: "SG3",
471            trigger: "LIN",
472            children: &[],
473        },
474    ];
475
476    #[test]
477    fn root_segments_before_first_trigger() {
478        let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
479        let tree = group_segments(&segs, SCHEMA, "ROOT");
480        assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
481        assert_eq!(tree.children.len(), 1);
482        assert_eq!(tree.children[0].definition, "SG1");
483    }
484
485    #[test]
486    fn repeated_trigger_creates_multiple_children() {
487        let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
488        let tree = group_segments(&segs, SCHEMA, "ROOT");
489        // Two NAD triggers → two SG1 children
490        assert_eq!(
491            tree.children
492                .iter()
493                .filter(|c| c.definition == "SG1")
494                .count(),
495            2
496        );
497    }
498
499    #[test]
500    fn nested_child_groups() {
501        let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
502        let tree = group_segments(&segs, SCHEMA, "ROOT");
503        let sg1 = &tree.children[0];
504        assert_eq!(sg1.definition, "SG1");
505        // Two CTA triggers → two SG2 children inside SG1
506        assert_eq!(sg1.children.len(), 2);
507        assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
508    }
509
510    #[test]
511    fn all_segments_iterator_depth_first() {
512        let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
513        let tree = group_segments(&segs, SCHEMA, "ROOT");
514        let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
515        assert!(tags.contains(&"UNH"));
516        assert!(tags.contains(&"NAD"));
517        assert!(tags.contains(&"CTA"));
518    }
519}