Skip to main content

edifact_rs/
group.rs

1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments_indexed`]) that partitions a flat segment slice into a
5//! [`SegmentGroupIndexed`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag.  For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments_indexed};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//!     GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//!     GroupDef {
26//!         name: "SG7",
27//!         trigger: "LIN",
28//!         children: &[
29//!             GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//!         ],
31//!     },
32//! ];
33//!
34//! let root = group_segments_indexed(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//!     let child_segs = &segments[child.total_span.clone()];
37//!     println!("{} #{}: {} segments", child.definition, child.occurrence_index, child_segs.len());
38//! }
39//! ```
40
41use crate::{OwnedSegment, Segment};
42use smallvec::SmallVec;
43use std::ops::Range;
44
45// ── GroupDef ──────────────────────────────────────────────────────────────────
46
47/// Static schema describing one segment group within an EDIFACT message.
48///
49/// `GroupDef` is designed to be declared as a `static` or `const` value, so
50/// both the struct itself and all nested `children` references are
51/// `'static`-lifetime slices with no heap allocation.
52#[derive(Debug, Clone, Copy)]
53pub struct GroupDef {
54    /// Human-readable group name, e.g. `"SG2"`.
55    pub name: &'static str,
56    /// The segment tag whose appearance starts a new instance of this group.
57    pub trigger: &'static str,
58    /// Nested child groups within this group.
59    ///
60    /// The first trigger encountered among `children` ends the current child
61    /// and starts a new one; a trigger that matches a sibling or ancestor group
62    /// ends this group entirely.
63    pub children: &'static [GroupDef],
64}
65
66// ── SegmentGroupIndexed ───────────────────────────────────────────────────────
67
68/// Zero-copy segment group tree.  Stores index ranges into the original flat
69/// segment slice rather than cloning each segment.
70///
71/// Produced by [`group_segments_indexed`].  To access the actual segments use
72/// the original `&[Segment<'a>]` together with [`total_span`]:
73///
74/// ```rust,ignore
75/// let indexed = group_segments_indexed(&segments, MY_SCHEMA, "ROOT");
76/// for child in &indexed.children {
77///     let child_segs = &segments[child.total_span.clone()];
78/// }
79/// ```
80///
81/// [`total_span`]: SegmentGroupIndexed::total_span
82#[derive(Debug)]
83pub struct SegmentGroupIndexed {
84    /// Group name from the schema, e.g. `"SG2"`, or the root name.
85    pub definition: &'static str,
86    /// Contiguous span `[start, end)` of absolute indices into the original flat
87    /// segment slice covering **all** segments in this group instance — trigger
88    /// segment, direct segments, and all descendant groups combined.
89    ///
90    /// Use this to slice the original `&[Segment<'_>]` to get every segment
91    /// belonging to this group:
92    ///
93    /// ```rust,ignore
94    /// let all_sg2_segs = &segments[sg2.total_span.clone()];
95    /// ```
96    ///
97    /// To iterate over only the segments that belong *directly* to this group
98    /// (excluding descendants), use [`direct_segment_indices`].
99    ///
100    /// [`direct_segment_indices`]: SegmentGroupIndexed::direct_segment_indices
101    pub total_span: Range<usize>,
102    /// Child group instances, in message order.
103    pub children: Vec<SegmentGroupIndexed>,
104    /// Zero-based occurrence index of this group instance among all siblings
105    /// with the same `definition` at this level.
106    ///
107    /// For example, the first `SG5` child at a given level has `occurrence_index = 0`,
108    /// the second `SG5` has `occurrence_index = 1`, etc.  Siblings with a
109    /// *different* definition have independent counters.
110    ///
111    /// This field is essential for producing unambiguous rule-violation IDs
112    /// (e.g. `"SG5[2]/DTM"`) when the same group type repeats.
113    pub occurrence_index: usize,
114}
115
116impl SegmentGroupIndexed {
117    /// Iterate over the absolute indices of segments that belong *directly* to
118    /// this group — i.e. those within [`total_span`] that are **not** covered
119    /// by any child group's [`total_span`].
120    ///
121    /// Complexity: `O(total_span.len() × children.len())`.  For typical EDIFACT
122    /// message structures (≤ 8 children per group) this is negligible.
123    ///
124    /// [`total_span`]: SegmentGroupIndexed::total_span
125    pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
126        self.total_span.clone().filter(|i| {
127            !self
128                .children
129                .iter()
130                .any(|child| child.total_span.contains(i))
131        })
132    }
133}
134
135/// Partition `segments` into a [`SegmentGroupIndexed`] tree without cloning.
136///
137/// Stores `Range<usize>` indices into the original flat slice rather than
138/// copying each [`Segment`] into the tree.  Use the original slice together
139/// with [`SegmentGroupIndexed::total_span`] to access segments.
140///
141/// # Worked Example
142///
143/// Consider a simplified 3-level MSCONS-like schema:
144///
145/// ```rust
146/// use edifact_rs::group::{GroupDef, group_segments_indexed};
147/// use edifact_rs::from_bytes;
148///
149/// // Schema: ROOT → SG1 (trigger: RFF) → SG5 (trigger: LOC) → SG6 (trigger: QTY)
150/// static SCHEMA: &[GroupDef] = &[
151///     GroupDef { name: "SG1", trigger: "RFF", children: &[] },
152///     GroupDef {
153///         name: "SG5",
154///         trigger: "LOC",
155///         children: &[
156///             GroupDef { name: "SG6", trigger: "QTY", children: &[] },
157///         ],
158///     },
159/// ];
160///
161/// // A small MSCONS-like message fragment (no envelope for clarity).
162/// let input = b"RFF+Z13:REF1'LOC+172+DE123'DTM+163:20230101:102'QTY+220:100:KWH'";
163/// let segments: Vec<_> = from_bytes(input)
164///     .collect::<Result<_, _>>()
165///     .unwrap();
166///
167/// let tree = group_segments_indexed(&segments, SCHEMA, "ROOT");
168///
169/// // The root contains no direct segments (all consumed by SG1 / SG5).
170/// assert!(tree.direct_segment_indices().next().is_none());
171///
172/// // One SG1 group and one SG5 group at root level.
173/// let sg1 = tree.children.iter().find(|g| g.definition == "SG1").unwrap();
174/// let sg5 = tree.children.iter().find(|g| g.definition == "SG5").unwrap();
175///
176/// // SG1 spans the RFF segment only.
177/// assert_eq!(&segments[sg1.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
178///            &["RFF"]);
179///
180/// // SG5 spans LOC + DTM + QTY (all three segments, including the SG6 child).
181/// let sg5_tags: Vec<_> = segments[sg5.total_span.clone()].iter().map(|s| s.tag).collect();
182/// assert_eq!(sg5_tags, &["LOC", "DTM", "QTY"]);
183///
184/// // SG5's direct segments (LOC + DTM) exclude the SG6 child (QTY).
185/// let sg5_direct: Vec<_> = sg5.direct_segment_indices()
186///     .map(|i| segments[i].tag)
187///     .collect();
188/// assert_eq!(sg5_direct, &["LOC", "DTM"]);
189///
190/// // SG6 contains only QTY.
191/// let sg6 = sg5.children.iter().find(|g| g.definition == "SG6").unwrap();
192/// assert_eq!(segments[sg6.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
193///            &["QTY"]);
194/// ```
195///
196/// # Group validation
197///
198/// `group_segments_indexed` pairs naturally with
199/// [`crate::validator::ValidationContext::validate_lenient_grouped`] to enforce group-presence rules:
200///
201/// ```rust,ignore
202/// use edifact_rs::{ProfileRulePack, ValidationContext};
203///
204/// let pack = ProfileRulePack::new("MY-AHB")
205///     .require_segment_in_group("SG5", "DTM", "SG5-DTM-M")
206///     .forbid_segment_in_group("SG1", "LOC", "SG1-LOC-F");
207/// let ctx = ValidationContext::builder().with_profile_pack(pack).build();
208///
209/// let tree = group_segments_indexed(&segments, SCHEMA, "MSCONS");
210/// let report = ctx.validate_lenient_grouped(&tree, &segments);
211/// ```
212///
213/// # Complexity
214///
215/// `O(n × schema_depth)` time, `O(tree_nodes)` space.  No `Segment` clones.
216pub fn group_segments_indexed(
217    segments: &[Segment<'_>],
218    schema: &'static [GroupDef],
219    root_name: &'static str,
220) -> SegmentGroupIndexed {
221    let mut root = SegmentGroupIndexed {
222        definition: root_name,
223        total_span: 0..0,
224        children: Vec::new(),
225        occurrence_index: 0,
226    };
227    group_recursive_indexed(segments, &mut root, schema, &[], 0);
228    root
229}
230
231/// Partition an owned-segment slice into a [`SegmentGroupIndexed`] tree according to `schema`.
232///
233/// Equivalent to [`group_segments_indexed`] but accepts `&[OwnedSegment]`.
234pub fn group_owned_segments_indexed(
235    segments: &[OwnedSegment],
236    schema: &'static [GroupDef],
237    root_name: &'static str,
238) -> SegmentGroupIndexed {
239    let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
240    group_segments_indexed(&borrowed, schema, root_name)
241}
242
243/// Internal recursive indexed grouping.  Returns the number of segments consumed.
244fn group_recursive_indexed(
245    segments: &[Segment<'_>],
246    parent: &mut SegmentGroupIndexed,
247    schema: &'static [GroupDef],
248    stop_triggers: &[&'static str],
249    offset: usize,
250) -> usize {
251    let combined_stop: SmallVec<[&'static str; 16]> = {
252        let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
253        for d in schema {
254            if !v.contains(&d.trigger) {
255                v.push(d.trigger);
256            }
257        }
258        v
259    };
260
261    // `span_start` is the absolute index of the first segment in this group.
262    // For child groups the caller pre-seeds `parent.total_span.start` with the
263    // trigger segment position; for the root (or any group with no pre-seeded
264    // trigger) we start at `offset`.
265    let span_start = if !parent.total_span.is_empty() {
266        parent.total_span.start // pre-seeded trigger position
267    } else {
268        offset
269    };
270
271    let mut i = 0;
272    // Track how many children of each definition have been pushed at this level,
273    // so we can stamp `occurrence_index` on each new child.
274    let mut occ_counts: std::collections::HashMap<&'static str, usize> =
275        std::collections::HashMap::new();
276    while i < segments.len() {
277        let tag = segments[i].tag;
278
279        if stop_triggers.iter().copied().any(|t| t == tag) {
280            break;
281        }
282
283        if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
284            let child_offset = offset + i;
285            let occ_idx = {
286                let c = occ_counts.entry(def.name).or_insert(0);
287                let idx = *c;
288                *c += 1;
289                idx
290            };
291            let mut child = SegmentGroupIndexed {
292                definition: def.name,
293                // Pre-seed the trigger segment; the recursive call extends
294                // total_span to cover the full child subtree.
295                total_span: child_offset..child_offset + 1,
296                children: Vec::new(),
297                occurrence_index: occ_idx,
298            };
299            i += 1;
300
301            let consumed = group_recursive_indexed(
302                &segments[i..],
303                &mut child,
304                def.children,
305                &combined_stop,
306                offset + i,
307            );
308            i += consumed;
309
310            parent.children.push(child);
311        } else {
312            i += 1;
313        }
314    }
315
316    // Total span covers everything from the first segment (trigger or first
317    // direct segment) to the last segment consumed in this call.
318    parent.total_span = span_start..(offset + i);
319
320    i
321}
322
323#[cfg(test)]
324mod tests {
325    use super::*;
326    use crate::Span;
327    use crate::model::Element;
328
329    fn seg(tag: &'static str) -> Segment<'static> {
330        Segment {
331            tag,
332            span: Span::new(0, 0),
333            tag_span: Span::new(0, 0),
334            elements: vec![Element::of(&["x"])],
335        }
336    }
337
338    static SCHEMA: &[GroupDef] = &[
339        GroupDef {
340            name: "SG1",
341            trigger: "NAD",
342            children: &[GroupDef {
343                name: "SG2",
344                trigger: "CTA",
345                children: &[],
346            }],
347        },
348        GroupDef {
349            name: "SG3",
350            trigger: "LIN",
351            children: &[],
352        },
353    ];
354
355    #[test]
356    fn root_segments_before_first_trigger() {
357        let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
358        let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
359        // UNH (0) and BGM (1) are direct root segments; NAD (2) is in SG1.
360        let direct: Vec<_> = tree.direct_segment_indices().collect();
361        assert_eq!(direct, vec![0, 1], "UNH + BGM should be direct in root");
362        assert_eq!(tree.children.len(), 1);
363        assert_eq!(tree.children[0].definition, "SG1");
364    }
365
366    #[test]
367    fn repeated_trigger_creates_multiple_children() {
368        let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
369        let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
370        // Two NAD triggers → two SG1 children
371        assert_eq!(
372            tree.children
373                .iter()
374                .filter(|c| c.definition == "SG1")
375                .count(),
376            2
377        );
378    }
379
380    #[test]
381    fn repeated_trigger_occurrence_index_is_stamped() {
382        let segs = vec![seg("NAD"), seg("NAD"), seg("NAD")];
383        let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
384        let indices: Vec<_> = tree.children.iter().map(|c| c.occurrence_index).collect();
385        assert_eq!(indices, vec![0, 1, 2]);
386    }
387
388    #[test]
389    fn nested_child_groups() {
390        let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
391        let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
392        let sg1 = &tree.children[0];
393        assert_eq!(sg1.definition, "SG1");
394        // Two CTA triggers → two SG2 children inside SG1
395        assert_eq!(sg1.children.len(), 2);
396        assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
397    }
398
399    #[test]
400    fn total_span_covers_all_segments() {
401        let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
402        let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
403        // Root span covers all 3 segments
404        let all_tags: Vec<_> = segs[tree.total_span.clone()]
405            .iter()
406            .map(|s| s.tag)
407            .collect();
408        assert!(all_tags.contains(&"UNH"));
409        assert!(all_tags.contains(&"NAD"));
410        assert!(all_tags.contains(&"CTA"));
411    }
412}