edifact_rs/group.rs
1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments`]) that partitions a flat segment slice into a
5//! [`SegmentGroup`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag. For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//! GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//! GroupDef {
26//! name: "SG7",
27//! trigger: "LIN",
28//! children: &[
29//! GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//! ],
31//! },
32//! ];
33//!
34//! let root = group_segments(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//! println!("{}: {} segments", child.definition, child.segments.len());
37//! }
38//! ```
39
40use crate::{OwnedSegment, Segment};
41use smallvec::SmallVec;
42use std::ops::Range;
43
44// ── GroupDef ──────────────────────────────────────────────────────────────────
45
46/// Static schema describing one segment group within an EDIFACT message.
47///
48/// `GroupDef` is designed to be declared as a `static` or `const` value, so
49/// both the struct itself and all nested `children` references are
50/// `'static`-lifetime slices with no heap allocation.
51#[derive(Debug, Clone, Copy)]
52pub struct GroupDef {
53 /// Human-readable group name, e.g. `"SG2"`.
54 pub name: &'static str,
55 /// The segment tag whose appearance starts a new instance of this group.
56 pub trigger: &'static str,
57 /// Nested child groups within this group.
58 ///
59 /// The first trigger encountered among `children` ends the current child
60 /// and starts a new one; a trigger that matches a sibling or ancestor group
61 /// ends this group entirely.
62 pub children: &'static [GroupDef],
63}
64
65// ── SegmentGroup ──────────────────────────────────────────────────────────────
66
67/// A populated segment group produced by [`group_segments`].
68///
69/// Each segment is cloned (shallow copy) from the input slice: the `Vec` of
70/// elements is heap-allocated per segment, but the string data inside each
71/// element still borrows from the original input buffer via the `'a` lifetime.
72///
73/// # Memory note
74///
75/// `group_segments` clones each [`Segment`] into the tree. For large messages
76/// or deeply nested schemas where minimal allocation is critical, consider
77/// working with the original flat segment slice and deriving group boundaries
78/// yourself based on the schema's trigger tags.
79#[derive(Debug)]
80pub struct SegmentGroup<'a> {
81 /// Group name from the schema, e.g. `"SG2"`, or `"ROOT"` for the envelope.
82 pub definition: &'static str,
83 /// Segments that belong directly to this group instance.
84 ///
85 /// Segment values are cloned from the input slice, but the string data
86 /// inside each segment borrows from the original input for `'a`.
87 pub segments: Vec<Segment<'a>>,
88 /// Child group instances, in the order they appear in the message.
89 pub children: Vec<SegmentGroup<'a>>,
90}
91
92impl<'a> SegmentGroup<'a> {
93 fn new(definition: &'static str) -> Self {
94 Self {
95 definition,
96 segments: Vec::new(),
97 children: Vec::new(),
98 }
99 }
100
101 /// Iterate over all segments in this group and all descendant groups,
102 /// depth-first.
103 pub fn all_segments(&self) -> impl Iterator<Item = &Segment<'a>> + '_ {
104 AllSegmentsIter::new(self)
105 }
106
107 /// Find the first segment with the given `tag` in this group (not children).
108 ///
109 /// # Shallow search
110 ///
111 /// This method searches only the segments directly owned by **this** group
112 /// instance — it does **not** recurse into child groups. To search the
113 /// entire subtree use [`SegmentGroup::all_segments`] with [`Iterator::find`]:
114 ///
115 /// ```ignore
116 /// group.all_segments().find(|s| s.tag == "LIN")
117 /// ```
118 pub fn find_segment(&self, tag: &str) -> Option<&Segment<'a>> {
119 self.segments.iter().find(|s| s.tag == tag)
120 }
121}
122
123// ── AllSegmentsIter ───────────────────────────────────────────────────────────
124
125struct AllSegmentsIter<'g, 'a> {
126 // Stack of (current_group, current_seg_idx, current_child_idx)
127 stack: SmallVec<[(&'g SegmentGroup<'a>, usize, usize); 8]>,
128}
129
130impl<'g, 'a> AllSegmentsIter<'g, 'a> {
131 fn new(root: &'g SegmentGroup<'a>) -> Self {
132 Self {
133 stack: smallvec::smallvec![(root, 0, 0)],
134 }
135 }
136}
137
138impl<'g, 'a> Iterator for AllSegmentsIter<'g, 'a> {
139 type Item = &'g Segment<'a>;
140
141 fn next(&mut self) -> Option<Self::Item> {
142 loop {
143 let (group, seg_idx, child_idx) = self.stack.last_mut()?;
144 // Yield segments first.
145 if *seg_idx < group.segments.len() {
146 let seg = &group.segments[*seg_idx];
147 *seg_idx += 1;
148 return Some(seg);
149 }
150 // Then recurse into children
151 if *child_idx < group.children.len() {
152 let child = &group.children[*child_idx];
153 *child_idx += 1;
154 self.stack.push((child, 0, 0));
155 continue;
156 }
157 // Done with this group
158 self.stack.pop();
159 }
160 }
161
162 fn size_hint(&self) -> (usize, Option<usize>) {
163 // Conservative lower bound: count remaining direct segments in all
164 // frames on the stack. Children not yet pushed are not counted, so
165 // the true total may be higher, but this is still a valid lower bound.
166 let lower: usize = self
167 .stack
168 .iter()
169 .map(|(g, seg_idx, _)| g.segments.len().saturating_sub(*seg_idx))
170 .sum();
171 (lower, None)
172 }
173}
174
175// ── group_segments ────────────────────────────────────────────────────────────
176
177/// Partition `segments` into a [`SegmentGroup`] tree according to `schema`.
178///
179/// # Algorithm
180///
181/// The algorithm is a single-pass linear scan:
182///
183/// 1. Segments that do not match any group trigger in `schema` are added to
184/// the current group's `segments`.
185/// 2. When a trigger matching a group in `schema` is encountered:
186/// - If an open child with the same trigger already exists it is closed and
187/// a new instance is started (repetition).
188/// - If the trigger belongs to a *sibling* or *ancestor* group the current
189/// group is closed first (the caller handles restart).
190/// - Nested schemas recurse: child group triggers follow the same rules
191/// within their parent.
192///
193/// # Root group
194///
195/// The returned root group has `definition` set to `root_name` (typically
196/// `"ROOT"` or the message type string). Segments before the first matching
197/// trigger land in the root's own `segments` vec.
198///
199/// # Example
200///
201/// ```rust,ignore
202/// let tree = group_segments(&segments, MY_SCHEMA, "ORDERS");
203/// for sg2 in tree.children.iter().filter(|g| g.definition == "SG2") {
204/// println!("NAD group: {:?}", sg2.segments.iter().map(|s| s.tag).collect::<Vec<_>>());
205/// }
206/// ```
207pub fn group_segments<'a>(
208 segments: &[Segment<'a>],
209 schema: &'static [GroupDef],
210 root_name: &'static str,
211) -> SegmentGroup<'a> {
212 let mut root = SegmentGroup::new(root_name);
213 group_recursive(segments, &mut root, schema);
214 root
215}
216
217/// Internal recursive grouping. Returns the number of segments consumed.
218fn group_recursive<'a>(
219 segments: &[Segment<'a>],
220 parent: &mut SegmentGroup<'a>,
221 schema: &'static [GroupDef],
222) -> usize {
223 group_recursive_inner(segments, parent, schema, &[])
224}
225
226fn group_recursive_inner<'a>(
227 segments: &[Segment<'a>],
228 parent: &mut SegmentGroup<'a>,
229 schema: &'static [GroupDef],
230 stop_triggers: &[&'static str],
231) -> usize {
232 // Pre-compute the combined stop set for all children of this schema level.
233 // This is computed once per schema level, not once per segment, so the
234 // SmallVec is allocated at most O(depth) times rather than O(depth × n).
235 let combined_stop: SmallVec<[&'static str; 16]> = {
236 let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
237 for d in schema {
238 if !v.contains(&d.trigger) {
239 v.push(d.trigger);
240 }
241 }
242 v
243 };
244
245 let mut i = 0;
246 while i < segments.len() {
247 let tag = segments[i].tag;
248
249 // A trigger matching a stop tag means we must return control to the parent group.
250 if stop_triggers.iter().copied().any(|t| t == tag) {
251 break;
252 }
253
254 // Does this tag trigger a group in the schema?
255 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
256 // Start a new child group instance
257 let mut child = SegmentGroup::new(def.name);
258 // The trigger segment belongs to the new child
259 child.segments.push(segments[i].clone());
260 i += 1;
261
262 // Recurse into children of this group — pass the pre-computed stop set.
263 let consumed =
264 group_recursive_inner(&segments[i..], &mut child, def.children, &combined_stop);
265 i += consumed;
266
267 parent.children.push(child);
268 } else {
269 // Segment doesn't match any group trigger in this schema — it
270 // belongs to the parent group's own segments. This also covers
271 // leaf groups (empty schema): all non-stop-trigger segments after
272 // the trigger are accumulated into the current group.
273 parent.segments.push(segments[i].clone());
274 i += 1;
275 }
276 }
277 i
278}
279
280// ── SegmentGroupIndexed ───────────────────────────────────────────────────────
281
282/// A zero-clone counterpart to [`SegmentGroup`] that stores index ranges into
283/// the original flat segment slice rather than cloning each segment.
284///
285/// Produced by [`group_segments_indexed`]. To access the actual segments use
286/// the original `&[Segment<'a>]` together with [`total_span`]:
287///
288/// ```rust,ignore
289/// let indexed = group_segments_indexed(&segments, MY_SCHEMA, "ROOT");
290/// for child in &indexed.children {
291/// let child_segs = &segments[child.total_span.clone()];
292/// }
293/// ```
294///
295/// [`total_span`]: SegmentGroupIndexed::total_span
296#[derive(Debug)]
297pub struct SegmentGroupIndexed {
298 /// Group name from the schema, e.g. `"SG2"`, or the root name.
299 pub definition: &'static str,
300 /// Contiguous span `[start, end)` of absolute indices into the original flat
301 /// segment slice covering **all** segments in this group instance — trigger
302 /// segment, direct segments, and all descendant groups combined.
303 ///
304 /// Use this to slice the original `&[Segment<'_>]` to get every segment
305 /// belonging to this group:
306 ///
307 /// ```rust,ignore
308 /// let all_sg2_segs = &segments[sg2.total_span.clone()];
309 /// ```
310 ///
311 /// To iterate over only the segments that belong *directly* to this group
312 /// (excluding descendants), use [`direct_segment_indices`].
313 ///
314 /// [`direct_segment_indices`]: SegmentGroupIndexed::direct_segment_indices
315 pub total_span: Range<usize>,
316 /// Child group instances, in message order.
317 pub children: Vec<SegmentGroupIndexed>,
318}
319
320impl SegmentGroupIndexed {
321 /// Iterate over the absolute indices of segments that belong *directly* to
322 /// this group — i.e. those within [`total_span`] that are **not** covered
323 /// by any child group's [`total_span`].
324 ///
325 /// Complexity: `O(total_span.len() × children.len())`. For typical EDIFACT
326 /// message structures (≤ 8 children per group) this is negligible.
327 ///
328 /// [`total_span`]: SegmentGroupIndexed::total_span
329 pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
330 self.total_span.clone().filter(|i| {
331 !self
332 .children
333 .iter()
334 .any(|child| child.total_span.contains(i))
335 })
336 }
337}
338
339/// Partition `segments` into a [`SegmentGroupIndexed`] tree without cloning.
340///
341/// This is the zero-allocation counterpart to [`group_segments`]: instead of
342/// copying each [`Segment`] into the tree, it records `Range<usize>` indices
343/// into the original flat slice. Use the original slice together with
344/// [`SegmentGroupIndexed::total_span`] to access segments.
345///
346/// # Worked Example
347///
348/// Consider a simplified 3-level MSCONS-like schema:
349///
350/// ```rust
351/// use edifact_rs::group::{GroupDef, group_segments_indexed};
352/// use edifact_rs::from_bytes;
353///
354/// // Schema: ROOT → SG1 (trigger: RFF) → SG5 (trigger: LOC) → SG6 (trigger: QTY)
355/// static SCHEMA: &[GroupDef] = &[
356/// GroupDef { name: "SG1", trigger: "RFF", children: &[] },
357/// GroupDef {
358/// name: "SG5",
359/// trigger: "LOC",
360/// children: &[
361/// GroupDef { name: "SG6", trigger: "QTY", children: &[] },
362/// ],
363/// },
364/// ];
365///
366/// // A small MSCONS-like message fragment (no envelope for clarity).
367/// let input = b"RFF+Z13:REF1'LOC+172+DE123'DTM+163:20230101:102'QTY+220:100:KWH'";
368/// let segments: Vec<_> = from_bytes(input)
369/// .collect::<Result<_, _>>()
370/// .unwrap();
371///
372/// let tree = group_segments_indexed(&segments, SCHEMA, "ROOT");
373///
374/// // The root contains no direct segments (all consumed by SG1 / SG5).
375/// assert!(tree.direct_segment_indices().next().is_none());
376///
377/// // One SG1 group and one SG5 group at root level.
378/// let sg1 = tree.children.iter().find(|g| g.definition == "SG1").unwrap();
379/// let sg5 = tree.children.iter().find(|g| g.definition == "SG5").unwrap();
380///
381/// // SG1 spans the RFF segment only.
382/// assert_eq!(&segments[sg1.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
383/// &["RFF"]);
384///
385/// // SG5 spans LOC + DTM + QTY (all three segments, including the SG6 child).
386/// let sg5_tags: Vec<_> = segments[sg5.total_span.clone()].iter().map(|s| s.tag).collect();
387/// assert_eq!(sg5_tags, &["LOC", "DTM", "QTY"]);
388///
389/// // SG5's direct segments (LOC + DTM) exclude the SG6 child (QTY).
390/// let sg5_direct: Vec<_> = sg5.direct_segment_indices()
391/// .map(|i| segments[i].tag)
392/// .collect();
393/// assert_eq!(sg5_direct, &["LOC", "DTM"]);
394///
395/// // SG6 contains only QTY.
396/// let sg6 = sg5.children.iter().find(|g| g.definition == "SG6").unwrap();
397/// assert_eq!(segments[sg6.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
398/// &["QTY"]);
399/// ```
400///
401/// # Group validation
402///
403/// `group_segments_indexed` pairs naturally with
404/// [`crate::validator::ValidationContext::validate_lenient_grouped`] to enforce group-presence rules:
405///
406/// ```rust,ignore
407/// use edifact_rs::{ProfileRulePack, ValidationContext};
408///
409/// let pack = ProfileRulePack::new("MY-AHB")
410/// .require_segment_in_group("SG5", "DTM", "SG5-DTM-M")
411/// .forbid_segment_in_group("SG1", "LOC", "SG1-LOC-F");
412/// let ctx = ValidationContext::builder().with_profile_pack(pack).build();
413///
414/// let tree = group_segments_indexed(&segments, SCHEMA, "MSCONS");
415/// let report = ctx.validate_lenient_grouped(&tree, &segments);
416/// ```
417///
418/// # Complexity
419///
420/// `O(n × schema_depth)` time, `O(tree_nodes)` space. No `Segment` clones.
421pub fn group_segments_indexed<'a>(
422 segments: &[Segment<'a>],
423 schema: &'static [GroupDef],
424 root_name: &'static str,
425) -> SegmentGroupIndexed {
426 let mut root = SegmentGroupIndexed {
427 definition: root_name,
428 total_span: 0..0,
429 children: Vec::new(),
430 };
431 group_recursive_indexed(segments, &mut root, schema, &[], 0);
432 root
433}
434
435/// Partition an owned-segment slice into a [`SegmentGroup`] tree according to `schema`.
436///
437/// Equivalent to [`group_segments`] but accepts `&[OwnedSegment]` for use with
438/// the reader-based API ([`crate::from_reader`] → [`crate::FromReaderIter`]).
439///
440/// Internally borrows each `OwnedSegment` as a `Segment<'_>` and delegates to
441/// [`group_segments`], so all grouping logic is shared.
442pub fn group_owned_segments<'a>(
443 segments: &'a [OwnedSegment],
444 schema: &'static [GroupDef],
445 root_name: &'static str,
446) -> SegmentGroup<'a> {
447 let borrowed: Vec<Segment<'a>> = segments.iter().map(|s| s.as_borrowed()).collect();
448 group_segments(&borrowed, schema, root_name)
449}
450
451/// Partition an owned-segment slice into a [`SegmentGroupIndexed`] tree according to `schema`.
452///
453/// Equivalent to [`group_segments_indexed`] but accepts `&[OwnedSegment]`.
454pub fn group_owned_segments_indexed(
455 segments: &[OwnedSegment],
456 schema: &'static [GroupDef],
457 root_name: &'static str,
458) -> SegmentGroupIndexed {
459 let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
460 group_segments_indexed(&borrowed, schema, root_name)
461}
462
463/// Internal recursive indexed grouping. Returns the number of segments consumed.
464fn group_recursive_indexed<'a>(
465 segments: &[Segment<'a>],
466 parent: &mut SegmentGroupIndexed,
467 schema: &'static [GroupDef],
468 stop_triggers: &[&'static str],
469 offset: usize,
470) -> usize {
471 let combined_stop: SmallVec<[&'static str; 16]> = {
472 let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
473 for d in schema {
474 if !v.contains(&d.trigger) {
475 v.push(d.trigger);
476 }
477 }
478 v
479 };
480
481 // `span_start` is the absolute index of the first segment in this group.
482 // For child groups the caller pre-seeds `parent.total_span.start` with the
483 // trigger segment position; for the root (or any group with no pre-seeded
484 // trigger) we start at `offset`.
485 let span_start = if !parent.total_span.is_empty() {
486 parent.total_span.start // pre-seeded trigger position
487 } else {
488 offset
489 };
490
491 let mut i = 0;
492 while i < segments.len() {
493 let tag = segments[i].tag;
494
495 if stop_triggers.iter().copied().any(|t| t == tag) {
496 break;
497 }
498
499 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
500 let child_offset = offset + i;
501 let mut child = SegmentGroupIndexed {
502 definition: def.name,
503 // Pre-seed the trigger segment; the recursive call extends
504 // total_span to cover the full child subtree.
505 total_span: child_offset..child_offset + 1,
506 children: Vec::new(),
507 };
508 i += 1;
509
510 let consumed = group_recursive_indexed(
511 &segments[i..],
512 &mut child,
513 def.children,
514 &combined_stop,
515 offset + i,
516 );
517 i += consumed;
518
519 parent.children.push(child);
520 } else {
521 i += 1;
522 }
523 }
524
525 // Total span covers everything from the first segment (trigger or first
526 // direct segment) to the last segment consumed in this call.
527 parent.total_span = span_start..(offset + i);
528
529 i
530}
531
532#[cfg(test)]
533mod tests {
534 use super::*;
535 use crate::Span;
536 use crate::model::Element;
537
538 fn seg(tag: &'static str) -> Segment<'static> {
539 Segment {
540 tag,
541 span: Span::new(0, 0),
542 tag_span: Span::new(0, 0),
543 elements: vec![Element::of(&["x"])],
544 }
545 }
546
547 static SCHEMA: &[GroupDef] = &[
548 GroupDef {
549 name: "SG1",
550 trigger: "NAD",
551 children: &[GroupDef {
552 name: "SG2",
553 trigger: "CTA",
554 children: &[],
555 }],
556 },
557 GroupDef {
558 name: "SG3",
559 trigger: "LIN",
560 children: &[],
561 },
562 ];
563
564 #[test]
565 fn root_segments_before_first_trigger() {
566 let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
567 let tree = group_segments(&segs, SCHEMA, "ROOT");
568 assert_eq!(tree.segments.len(), 2, "UNH + BGM should be in root");
569 assert_eq!(tree.children.len(), 1);
570 assert_eq!(tree.children[0].definition, "SG1");
571 }
572
573 #[test]
574 fn repeated_trigger_creates_multiple_children() {
575 let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
576 let tree = group_segments(&segs, SCHEMA, "ROOT");
577 // Two NAD triggers → two SG1 children
578 assert_eq!(
579 tree.children
580 .iter()
581 .filter(|c| c.definition == "SG1")
582 .count(),
583 2
584 );
585 }
586
587 #[test]
588 fn nested_child_groups() {
589 let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
590 let tree = group_segments(&segs, SCHEMA, "ROOT");
591 let sg1 = &tree.children[0];
592 assert_eq!(sg1.definition, "SG1");
593 // Two CTA triggers → two SG2 children inside SG1
594 assert_eq!(sg1.children.len(), 2);
595 assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
596 }
597
598 #[test]
599 fn all_segments_iterator_depth_first() {
600 let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
601 let tree = group_segments(&segs, SCHEMA, "ROOT");
602 let tags: Vec<_> = tree.all_segments().map(|s| s.tag).collect();
603 assert!(tags.contains(&"UNH"));
604 assert!(tags.contains(&"NAD"));
605 assert!(tags.contains(&"CTA"));
606 }
607}