Skip to main content

edifact_rs/
group.rs

1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag.  For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//!     GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//!     GroupDef {
26//!         name: "SG7",
27//!         trigger: "LIN",
28//!         children: &[
29//!             GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//!         ],
31//!     },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//!     println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::{OwnedSegment, Segment};
41use smallvec::SmallVec;
42use std::ops::Range;
43
44// ── GroupDef ──────────────────────────────────────────────────────────────────
45
46/// Static schema describing one segment group within an EDIFACT message.
47///
48/// `GroupDef` is designed to be declared as a `static` or `const` value, so
49/// both the struct itself and all nested `children` references are
50/// `'static`-lifetime slices with no heap allocation.
51#[derive(Debug, Clone, Copy)]
52pub struct GroupDef {
53    /// Human-readable group name, e.g. `"SG2"`.
54    pub name: &'static str,
55    /// The segment tag whose appearance starts a new instance of this group.
56    pub trigger: &'static str,
57    /// Nested child groups within this group.
58    ///
59    /// The first trigger encountered among `children` ends the current child
60    /// and starts a new one; a trigger that matches a sibling or ancestor group
61    /// ends this group entirely.
62    pub children: &'static [GroupDef],
63}
64
65// ── SegmentGroup ──────────────────────────────────────────────────────────────
66
67/// A populated segment group produced by [`group_segments`].
68///
69/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
70/// elements is heap-allocated per segment, but the string data inside each
71/// element still borrows from the original input buffer via the `'a` lifetime.
72///
73/// # Memory note
74///
75/// `group_segments` clones each [`Segment`] into the tree.  For large messages
76/// or deeply nested schemas where minimal allocation is critical, consider
77/// working with the original flat segment slice and deriving group boundaries
78/// yourself based on the schema's trigger tags.
79#[derive(Debug)]
80pub struct SegmentGroup<'a> {
81    /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
82    pub definition: &'static str,
83    /// Segments that belong directly to this group instance.
84    ///
85    /// Segment values are cloned from the input slice, but the string data
86    /// inside each segment borrows from the original input for `'a`.
87    pub segments: Vec<Segment<'a>>,
88    /// Child group instances, in the order they appear in the message.
89    pub children: Vec<SegmentGroup<'a>>,
90}
91
92impl<'a> SegmentGroup<'a> {
93    fn new(definition: &'static str) -> Self {
94        Self {
95            definition,
96            segments: Vec::new(),
97            children: Vec::new(),
98        }
99    }
100
101    /// Iterate over all segments in this group and all descendant groups,
102    /// depth-first.
103    pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
104        AllSegmentsIter::new(self)
105    }
106
107    /// Find the first segment with the given `tag` in this group (not children).
108    ///
109    /// # Shallow search
110    ///
111    /// This method searches only the segments directly owned by **this** group
112    /// instance — it does **not** recurse into child groups.  To search the
113    /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
114    ///
115    /// ```ignore
116    /// group.all_segments().find(|s| s.tag == "LIN")
117    /// ```
118    pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
119        self.segments.iter().find(|s| s.tag == tag)
120    }
121}
122
123// ── AllSegmentsIter ───────────────────────────────────────────────────────────
124
125struct AllSegmentsIter<'g, 'a> {
126    // Stack of (current_group, current_seg_idx, current_child_idx)
127    stack: SmallVec<[(&'g SegmentGroup<'a>, usize, usize); 8]>,
128}
129
130impl<'g, 'a> AllSegmentsIter<'g, 'a> {
131    fn new(root: &'g SegmentGroup<'a>) -> Self {
132        Self {
133            stack: smallvec::smallvec![(root, 0, 0)],
134        }
135    }
136}
137
138impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
139    type Item = &'g Segment<'a>;
140
141    fn next(&mut self) -> Option<Self::Item> {
142        loop {
143            let (group, seg_idx, child_idx) = self.stack.last_mut()?;
144            // Yield segments first.
145            if *seg_idx < group.segments.len() {
146                let seg = &group.segments[*seg_idx];
147                *seg_idx += 1;
148                return Some(seg);
149            }
150            // Then recurse into children
151            if *child_idx < group.children.len() {
152                let child = &group.children[*child_idx];
153                *child_idx += 1;
154                self.stack.push((child, 0, 0));
155                continue;
156            }
157            // Done with this group
158            self.stack.pop();
159        }
160    }
161
162    fn size_hint(&self) -> (usize, Option<usize>) {
163        // Conservative lower bound: count remaining direct segments in all
164        // frames on the stack.  Children not yet pushed are not counted, so
165        // the true total may be higher, but this is still a valid lower bound.
166        let lower: usize = self
167            .stack
168            .iter()
169            .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
170            .sum();
171        (lower, None)
172    }
173}
174
175// ── group_segments ────────────────────────────────────────────────────────────
176
177/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
178///
179/// # Algorithm
180///
181/// The algorithm is a single-pass linear scan:
182///
183/// 1. Segments that do not match any group trigger in `schema` are added to
184///    the current group's `segments`.
185/// 2. When a trigger matching a group in `schema` is encountered:
186///    - If an open child with the same trigger already exists it is closed and
187///      a new instance is started (repetition).
188///    - If the trigger belongs to a *sibling* or *ancestor* group the current
189///      group is closed first (the caller handles restart).
190///    - Nested schemas recurse: child group triggers follow the same rules
191///      within their parent.
192///
193/// # Root group
194///
195/// The returned root group has `definition` set to `root_name` (typically
196/// `"ROOT"` or the message type string).  Segments before the first matching
197/// trigger land in the root's own `segments` vec.
198///
199/// # Example
200///
201/// ```rust,ignore
202/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
203/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
204///     println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
205/// }
206/// ```
207pub fn group_segments<'a>(
208    segments: &[Segment<'a>],
209    schema: &'static [GroupDef],
210    root_name: &'static str,
211) -> SegmentGroup<'a> {
212    let mut root = SegmentGroup::new(root_name);
213    group_recursive(segments, &mut root, schema);
214    root
215}
216
217/// Internal recursive grouping.  Returns the number of segments consumed.
218fn group_recursive<'a>(
219    segments: &[Segment<'a>],
220    parent: &mut SegmentGroup<'a>,
221    schema: &'static [GroupDef],
222) -> usize {
223    group_recursive_inner(segments, parent, schema, &[])
224}
225
226fn group_recursive_inner<'a>(
227    segments: &[Segment<'a>],
228    parent: &mut SegmentGroup<'a>,
229    schema: &'static [GroupDef],
230    stop_triggers: &[&'static str],
231) -> usize {
232    // Pre-compute the combined stop set for all children of this schema level.
233    // This is computed once per schema level, not once per segment, so the
234    // SmallVec is allocated at most O(depth) times rather than O(depth × n).
235    let combined_stop: SmallVec<[&'static str; 16]> = {
236        let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
237        for d in schema {
238            if !v.contains(&d.trigger) {
239                v.push(d.trigger);
240            }
241        }
242        v
243    };
244
245    let mut i = 0;
246    while i < segments.len() {
247        let tag = segments[i].tag;
248
249        // A trigger matching a stop tag means we must return control to the parent group.
250        if stop_triggers.iter().copied().any(|t| t == tag) {
251            break;
252        }
253
254        // Does this tag trigger a group in the schema?
255        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
256            // Start a new child group instance
257            let mut child = SegmentGroup::new(def.name);
258            // The trigger segment belongs to the new child
259            child.segments.push(segments[i].clone());
260            i += 1;
261
262            // Recurse into children of this group — pass the pre-computed stop set.
263            let consumed =
264                group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
265            i += consumed;
266
267            parent.children.push(child);
268        } else {
269            // Segment doesn't match any group trigger in this schema — it
270            // belongs to the parent group's own segments.  This also covers
271            // leaf groups (empty schema): all non-stop-trigger segments after
272            // the trigger are accumulated into the current group.
273            parent.segments.push(segments[i].clone());
274            i += 1;
275        }
276    }
277    i
278}
279
280// ── SegmentGroupIndexed ───────────────────────────────────────────────────────
281
282/// A zero-clone counterpart to [`SegmentGroup`] that stores index ranges into
283/// the original flat segment slice rather than cloning each segment.
284///
285/// Produced by [`group_segments_indexed`].  To access the actual segments use
286/// the original `&[Segment<'a>]` together with [`total_span`]:
287///
288/// ```rust,ignore
289/// let indexed = group_segments_indexed(&segments, MY_SCHEMA, "ROOT");
290/// for child in &indexed.children {
291///     let child_segs = &segments[child.total_span.clone()];
292/// }
293/// ```
294///
295/// [`total_span`]: SegmentGroupIndexed::total_span
296#[derive(Debug)]
297pub struct SegmentGroupIndexed {
298    /// Group name from the schema, e.g. `"SG2"`, or the root name.
299    pub definition: &'static str,
300    /// Contiguous span `[start, end)` of absolute indices into the original flat
301    /// segment slice covering **all** segments in this group instance — trigger
302    /// segment, direct segments, and all descendant groups combined.
303    ///
304    /// Use this to slice the original `&[Segment<'_>]` to get every segment
305    /// belonging to this group:
306    ///
307    /// ```rust,ignore
308    /// let all_sg2_segs = &segments[sg2.total_span.clone()];
309    /// ```
310    ///
311    /// To iterate over only the segments that belong *directly* to this group
312    /// (excluding descendants), use [`direct_segment_indices`].
313    ///
314    /// [`direct_segment_indices`]: SegmentGroupIndexed::direct_segment_indices
315    pub total_span: Range<usize>,
316    /// Child group instances, in message order.
317    pub children: Vec<SegmentGroupIndexed>,
318}
319
320impl SegmentGroupIndexed {
321    /// Iterate over the absolute indices of segments that belong *directly* to
322    /// this group — i.e. those within [`total_span`] that are **not** covered
323    /// by any child group's [`total_span`].
324    ///
325    /// Complexity: `O(total_span.len() × children.len())`.  For typical EDIFACT
326    /// message structures (≤ 8 children per group) this is negligible.
327    ///
328    /// [`total_span`]: SegmentGroupIndexed::total_span
329    pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
330        self.total_span.clone().filter(|i| {
331            !self
332                .children
333                .iter()
334                .any(|child| child.total_span.contains(i))
335        })
336    }
337}
338
339/// Partition `segments` into a [`SegmentGroupIndexed`] tree without cloning.
340///
341/// This is the zero-allocation counterpart to [`group_segments`]: instead of
342/// copying each [`Segment`] into the tree, it records `Range<usize>` indices
343/// into the original flat slice.  Use the original slice together with
344/// [`SegmentGroupIndexed::total_span`] to access segments.
345///
346/// # Worked Example
347///
348/// Consider a simplified 3-level MSCONS-like schema:
349///
350/// ```rust
351/// use edifact_rs::group::{GroupDef, group_segments_indexed};
352/// use edifact_rs::from_bytes;
353///
354/// // Schema: ROOT → SG1 (trigger: RFF) → SG5 (trigger: LOC) → SG6 (trigger: QTY)
355/// static SCHEMA: &[GroupDef] = &[
356///     GroupDef { name: "SG1", trigger: "RFF", children: &[] },
357///     GroupDef {
358///         name: "SG5",
359///         trigger: "LOC",
360///         children: &[
361///             GroupDef { name: "SG6", trigger: "QTY", children: &[] },
362///         ],
363///     },
364/// ];
365///
366/// // A small MSCONS-like message fragment (no envelope for clarity).
367/// let input = b"RFF+Z13:REF1'LOC+172+DE123'DTM+163:20230101:102'QTY+220:100:KWH'";
368/// let segments: Vec<_> = from_bytes(input)
369///     .collect::<Result<_, _>>()
370///     .unwrap();
371///
372/// let tree = group_segments_indexed(&segments, SCHEMA, "ROOT");
373///
374/// // The root contains no direct segments (all consumed by SG1 / SG5).
375/// assert!(tree.direct_segment_indices().next().is_none());
376///
377/// // One SG1 group and one SG5 group at root level.
378/// let sg1 = tree.children.iter().find(|g| g.definition == "SG1").unwrap();
379/// let sg5 = tree.children.iter().find(|g| g.definition == "SG5").unwrap();
380///
381/// // SG1 spans the RFF segment only.
382/// assert_eq!(&segments[sg1.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
383///            &["RFF"]);
384///
385/// // SG5 spans LOC + DTM + QTY (all three segments, including the SG6 child).
386/// let sg5_tags: Vec<_> = segments[sg5.total_span.clone()].iter().map(|s| s.tag).collect();
387/// assert_eq!(sg5_tags, &["LOC", "DTM", "QTY"]);
388///
389/// // SG5's direct segments (LOC + DTM) exclude the SG6 child (QTY).
390/// let sg5_direct: Vec<_> = sg5.direct_segment_indices()
391///     .map(|i| segments[i].tag)
392///     .collect();
393/// assert_eq!(sg5_direct, &["LOC", "DTM"]);
394///
395/// // SG6 contains only QTY.
396/// let sg6 = sg5.children.iter().find(|g| g.definition == "SG6").unwrap();
397/// assert_eq!(segments[sg6.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
398///            &["QTY"]);
399/// ```
400///
401/// # Group validation
402///
403/// `group_segments_indexed` pairs naturally with
404/// [`crate::validator::ValidationContext::validate_lenient_grouped`] to enforce group-presence rules:
405///
406/// ```rust,ignore
407/// use edifact_rs::{ProfileRulePack, ValidationContext};
408///
409/// let pack = ProfileRulePack::new("MY-AHB")
410///     .require_segment_in_group("SG5", "DTM", "SG5-DTM-M")
411///     .forbid_segment_in_group("SG1", "LOC", "SG1-LOC-F");
412/// let ctx = ValidationContext::builder().with_profile_pack(pack).build();
413///
414/// let tree = group_segments_indexed(&segments, SCHEMA, "MSCONS");
415/// let report = ctx.validate_lenient_grouped(&tree, &segments);
416/// ```
417///
418/// # Complexity
419///
420/// `O(n × schema_depth)` time, `O(tree_nodes)` space.  No `Segment` clones.
421pub fn group_segments_indexed<'a>(
422    segments: &[Segment<'a>],
423    schema: &'static [GroupDef],
424    root_name: &'static str,
425) -> SegmentGroupIndexed {
426    let mut root = SegmentGroupIndexed {
427        definition: root_name,
428        total_span: 0..0,
429        children: Vec::new(),
430    };
431    group_recursive_indexed(segments, &mut root, schema, &[], 0);
432    root
433}
434
435/// Partition an owned-segment slice into a [`SegmentGroup`] tree according to `schema`.
436///
437/// Equivalent to [`group_segments`] but accepts `&[OwnedSegment]` for use with
438/// the reader-based API ([`crate::from_reader`] → [`crate::FromReaderIter`]).
439///
440/// Internally borrows each `OwnedSegment` as a `Segment<'_>` and delegates to
441/// [`group_segments`], so all grouping logic is shared.
442pub fn group_owned_segments<'a>(
443    segments: &'a [OwnedSegment],
444    schema: &'static [GroupDef],
445    root_name: &'static str,
446) -> SegmentGroup<'a> {
447    let borrowed: Vec<Segment<'a>> = segments.iter().map(|s| s.as_borrowed()).collect();
448    group_segments(&borrowed, schema, root_name)
449}
450
451/// Partition an owned-segment slice into a [`SegmentGroupIndexed`] tree according to `schema`.
452///
453/// Equivalent to [`group_segments_indexed`] but accepts `&[OwnedSegment]`.
454pub fn group_owned_segments_indexed(
455    segments: &[OwnedSegment],
456    schema: &'static [GroupDef],
457    root_name: &'static str,
458) -> SegmentGroupIndexed {
459    let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
460    group_segments_indexed(&borrowed, schema, root_name)
461}
462
463/// Internal recursive indexed grouping.  Returns the number of segments consumed.
464fn group_recursive_indexed<'a>(
465    segments: &[Segment<'a>],
466    parent: &mut SegmentGroupIndexed,
467    schema: &'static [GroupDef],
468    stop_triggers: &[&'static str],
469    offset: usize,
470) -> usize {
471    let combined_stop: SmallVec<[&'static str; 16]> = {
472        let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
473        for d in schema {
474            if !v.contains(&d.trigger) {
475                v.push(d.trigger);
476            }
477        }
478        v
479    };
480
481    // `span_start` is the absolute index of the first segment in this group.
482    // For child groups the caller pre-seeds `parent.total_span.start` with the
483    // trigger segment position; for the root (or any group with no pre-seeded
484    // trigger) we start at `offset`.
485    let span_start = if !parent.total_span.is_empty() {
486        parent.total_span.start // pre-seeded trigger position
487    } else {
488        offset
489    };
490
491    let mut i = 0;
492    while i < segments.len() {
493        let tag = segments[i].tag;
494
495        if stop_triggers.iter().copied().any(|t| t == tag) {
496            break;
497        }
498
499        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
500            let child_offset = offset + i;
501            let mut child = SegmentGroupIndexed {
502                definition: def.name,
503                // Pre-seed the trigger segment; the recursive call extends
504                // total_span to cover the full child subtree.
505                total_span: child_offset..child_offset + 1,
506                children: Vec::new(),
507            };
508            i += 1;
509
510            let consumed = group_recursive_indexed(
511                &segments[i..],
512                &mut child,
513                def.children,
514                &combined_stop,
515                offset + i,
516            );
517            i += consumed;
518
519            parent.children.push(child);
520        } else {
521            i += 1;
522        }
523    }
524
525    // Total span covers everything from the first segment (trigger or first
526    // direct segment) to the last segment consumed in this call.
527    parent.total_span = span_start..(offset + i);
528
529    i
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535    use crate::Span;
536    use crate::model::Element;
537
538    fn seg(tag: &'static str) -> Segment<'static> {
539        Segment {
540            tag,
541            span: Span::new(0, 0),
542            tag_span: Span::new(0, 0),
543            elements: vec![Element::of(&["x"])],
544        }
545    }
546
547    static SCHEMA: &[GroupDef] = &[
548        GroupDef {
549            name: "SG1",
550            trigger: "NAD",
551            children: &[GroupDef {
552                name: "SG2",
553                trigger: "CTA",
554                children: &[],
555            }],
556        },
557        GroupDef {
558            name: "SG3",
559            trigger: "LIN",
560            children: &[],
561        },
562    ];
563
564    #[test]
565    fn root_segments_before_first_trigger() {
566        let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
567        let tree = group_segments(&segs, SCHEMA, "ROOT");
568        assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
569        assert_eq!(tree.children.len(), 1);
570        assert_eq!(tree.children[0].definition, "SG1");
571    }
572
573    #[test]
574    fn repeated_trigger_creates_multiple_children() {
575        let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
576        let tree = group_segments(&segs, SCHEMA, "ROOT");
577        // Two NAD triggers → two SG1 children
578        assert_eq!(
579            tree.children
580                .iter()
581                .filter(|c| c.definition == "SG1")
582                .count(),
583            2
584        );
585    }
586
587    #[test]
588    fn nested_child_groups() {
589        let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
590        let tree = group_segments(&segs, SCHEMA, "ROOT");
591        let sg1 = &tree.children[0];
592        assert_eq!(sg1.definition, "SG1");
593        // Two CTA triggers → two SG2 children inside SG1
594        assert_eq!(sg1.children.len(), 2);
595        assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
596    }
597
598    #[test]
599    fn all_segments_iterator_depth_first() {
600        let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
601        let tree = group_segments(&segs, SCHEMA, "ROOT");
602        let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
603        assert!(tags.contains(&"UNH"));
604        assert!(tags.contains(&"NAD"));
605        assert!(tags.contains(&"CTA"));
606    }
607}