edifact_rs/group.rs
1//! Segment group tree model for structured EDIFACT message navigation.
2//!
3//! Provides a recursive group schema ([`GroupDef`]) and a segment-slice-to-tree
4//! function ([`group_segments_indexed`]) that partitions a flat segment slice into a
5//! [`SegmentGroupIndexed`] tree according to the schema.
6//!
7//! # Model overview
8//!
9//! Every UN/EDIFACT message type has a fixed set of **segment groups**: named,
10//! optionally-repeating sets of segments delimited by a specific *trigger*
11//! segment tag. For example, ORDERS D.11A has an `SG1` group starting with
12//! `RFF`, an `SG2` group starting with `NAD`, and so on.
13//!
14//! This module provides lightweight, allocation-efficient types for defining
15//! and working with these groups without requiring message-type-specific
16//! generated code.
17//!
18//! # Example
19//!
20//! ```rust,ignore
21//! use edifact_rs::group::{GroupDef, group_segments_indexed};
22//!
23//! static ORDERS_GROUPS: &[GroupDef] = &[
24//! GroupDef { name: "SG2", trigger: "NAD", children: &[] },
25//! GroupDef {
26//! name: "SG7",
27//! trigger: "LIN",
28//! children: &[
29//! GroupDef { name: "SG32", trigger: "PRI", children: &[] },
30//! ],
31//! },
32//! ];
33//!
34//! let root = group_segments_indexed(&segments, ORDERS_GROUPS, "ROOT");
35//! for child in &root.children {
36//! let child_segs = &segments[child.total_span.clone()];
37//! println!("{} #{}: {} segments", child.definition, child.occurrence_index, child_segs.len());
38//! }
39//! ```
40
41use crate::{OwnedSegment, Segment};
42use smallvec::SmallVec;
43use std::ops::Range;
44
45// ── GroupDef ──────────────────────────────────────────────────────────────────
46
47/// Static schema describing one segment group within an EDIFACT message.
48///
49/// `GroupDef` is designed to be declared as a `static` or `const` value, so
50/// both the struct itself and all nested `children` references are
51/// `'static`-lifetime slices with no heap allocation.
52#[derive(Debug, Clone, Copy)]
53pub struct GroupDef {
54 /// Human-readable group name, e.g. `"SG2"`.
55 pub name: &'static str,
56 /// The segment tag whose appearance starts a new instance of this group.
57 pub trigger: &'static str,
58 /// Nested child groups within this group.
59 ///
60 /// The first trigger encountered among `children` ends the current child
61 /// and starts a new one; a trigger that matches a sibling or ancestor group
62 /// ends this group entirely.
63 pub children: &'static [GroupDef],
64}
65
66// ── SegmentGroupIndexed ───────────────────────────────────────────────────────
67
68/// Zero-copy segment group tree. Stores index ranges into the original flat
69/// segment slice rather than cloning each segment.
70///
71/// Produced by [`group_segments_indexed`]. To access the actual segments use
72/// the original `&[Segment<'a>]` together with [`total_span`]:
73///
74/// ```rust,ignore
75/// let indexed = group_segments_indexed(&segments, MY_SCHEMA, "ROOT");
76/// for child in &indexed.children {
77/// let child_segs = &segments[child.total_span.clone()];
78/// }
79/// ```
80///
81/// [`total_span`]: SegmentGroupIndexed::total_span
82#[derive(Debug)]
83pub struct SegmentGroupIndexed {
84 /// Group name from the schema, e.g. `"SG2"`, or the root name.
85 pub definition: &'static str,
86 /// Contiguous span `[start, end)` of absolute indices into the original flat
87 /// segment slice covering **all** segments in this group instance — trigger
88 /// segment, direct segments, and all descendant groups combined.
89 ///
90 /// Use this to slice the original `&[Segment<'_>]` to get every segment
91 /// belonging to this group:
92 ///
93 /// ```rust,ignore
94 /// let all_sg2_segs = &segments[sg2.total_span.clone()];
95 /// ```
96 ///
97 /// To iterate over only the segments that belong *directly* to this group
98 /// (excluding descendants), use [`direct_segment_indices`].
99 ///
100 /// [`direct_segment_indices`]: SegmentGroupIndexed::direct_segment_indices
101 pub total_span: Range<usize>,
102 /// Child group instances, in message order.
103 pub children: Vec<SegmentGroupIndexed>,
104 /// Zero-based occurrence index of this group instance among all siblings
105 /// with the same `definition` at this level.
106 ///
107 /// For example, the first `SG5` child at a given level has `occurrence_index = 0`,
108 /// the second `SG5` has `occurrence_index = 1`, etc. Siblings with a
109 /// *different* definition have independent counters.
110 ///
111 /// This field is essential for producing unambiguous rule-violation IDs
112 /// (e.g. `"SG5[2]/DTM"`) when the same group type repeats.
113 pub occurrence_index: usize,
114}
115
116impl SegmentGroupIndexed {
117 /// Iterate over the absolute indices of segments that belong *directly* to
118 /// this group — i.e. those within [`total_span`] that are **not** covered
119 /// by any child group's [`total_span`].
120 ///
121 /// Complexity: `O(total_span.len() × children.len())`. For typical EDIFACT
122 /// message structures (≤ 8 children per group) this is negligible.
123 ///
124 /// [`total_span`]: SegmentGroupIndexed::total_span
125 pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
126 self.total_span.clone().filter(|i| {
127 !self
128 .children
129 .iter()
130 .any(|child| child.total_span.contains(i))
131 })
132 }
133}
134
135/// Partition `segments` into a [`SegmentGroupIndexed`] tree without cloning.
136///
137/// Stores `Range<usize>` indices into the original flat slice rather than
138/// copying each [`Segment`] into the tree. Use the original slice together
139/// with [`SegmentGroupIndexed::total_span`] to access segments.
140///
141/// # Worked Example
142///
143/// Consider a simplified 3-level MSCONS-like schema:
144///
145/// ```rust
146/// use edifact_rs::group::{GroupDef, group_segments_indexed};
147/// use edifact_rs::from_bytes;
148///
149/// // Schema: ROOT → SG1 (trigger: RFF) → SG5 (trigger: LOC) → SG6 (trigger: QTY)
150/// static SCHEMA: &[GroupDef] = &[
151/// GroupDef { name: "SG1", trigger: "RFF", children: &[] },
152/// GroupDef {
153/// name: "SG5",
154/// trigger: "LOC",
155/// children: &[
156/// GroupDef { name: "SG6", trigger: "QTY", children: &[] },
157/// ],
158/// },
159/// ];
160///
161/// // A small MSCONS-like message fragment (no envelope for clarity).
162/// let input = b"RFF+Z13:REF1'LOC+172+DE123'DTM+163:20230101:102'QTY+220:100:KWH'";
163/// let segments: Vec<_> = from_bytes(input)
164/// .collect::<Result<_, _>>()
165/// .unwrap();
166///
167/// let tree = group_segments_indexed(&segments, SCHEMA, "ROOT");
168///
169/// // The root contains no direct segments (all consumed by SG1 / SG5).
170/// assert!(tree.direct_segment_indices().next().is_none());
171///
172/// // One SG1 group and one SG5 group at root level.
173/// let sg1 = tree.children.iter().find(|g| g.definition == "SG1").unwrap();
174/// let sg5 = tree.children.iter().find(|g| g.definition == "SG5").unwrap();
175///
176/// // SG1 spans the RFF segment only.
177/// assert_eq!(&segments[sg1.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
178/// &["RFF"]);
179///
180/// // SG5 spans LOC + DTM + QTY (all three segments, including the SG6 child).
181/// let sg5_tags: Vec<_> = segments[sg5.total_span.clone()].iter().map(|s| s.tag).collect();
182/// assert_eq!(sg5_tags, &["LOC", "DTM", "QTY"]);
183///
184/// // SG5's direct segments (LOC + DTM) exclude the SG6 child (QTY).
185/// let sg5_direct: Vec<_> = sg5.direct_segment_indices()
186/// .map(|i| segments[i].tag)
187/// .collect();
188/// assert_eq!(sg5_direct, &["LOC", "DTM"]);
189///
190/// // SG6 contains only QTY.
191/// let sg6 = sg5.children.iter().find(|g| g.definition == "SG6").unwrap();
192/// assert_eq!(segments[sg6.total_span.clone()].iter().map(|s| s.tag).collect::<Vec<_>>(),
193/// &["QTY"]);
194/// ```
195///
196/// # Group validation
197///
198/// `group_segments_indexed` pairs naturally with
199/// [`crate::validator::ValidationContext::validate_lenient_grouped`] to enforce group-presence rules:
200///
201/// ```rust,ignore
202/// use edifact_rs::{ProfileRulePack, ValidationContext};
203///
204/// let pack = ProfileRulePack::new("MY-AHB")
205/// .require_segment_in_group("SG5", "DTM", "SG5-DTM-M")
206/// .forbid_segment_in_group("SG1", "LOC", "SG1-LOC-F");
207/// let ctx = ValidationContext::builder().with_profile_pack(pack).build();
208///
209/// let tree = group_segments_indexed(&segments, SCHEMA, "MSCONS");
210/// let report = ctx.validate_lenient_grouped(&tree, &segments);
211/// ```
212///
213/// # Complexity
214///
215/// `O(n × schema_depth)` time, `O(tree_nodes)` space. No `Segment` clones.
216pub fn group_segments_indexed(
217 segments: &[Segment<'_>],
218 schema: &'static [GroupDef],
219 root_name: &'static str,
220) -> SegmentGroupIndexed {
221 let mut root = SegmentGroupIndexed {
222 definition: root_name,
223 total_span: 0..0,
224 children: Vec::new(),
225 occurrence_index: 0,
226 };
227 group_recursive_indexed(segments, &mut root, schema, &[], 0);
228 root
229}
230
231/// Partition an owned-segment slice into a [`SegmentGroupIndexed`] tree according to `schema`.
232///
233/// Equivalent to [`group_segments_indexed`] but accepts `&[OwnedSegment]`.
234pub fn group_owned_segments_indexed(
235 segments: &[OwnedSegment],
236 schema: &'static [GroupDef],
237 root_name: &'static str,
238) -> SegmentGroupIndexed {
239 let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
240 group_segments_indexed(&borrowed, schema, root_name)
241}
242
243/// Internal recursive indexed grouping. Returns the number of segments consumed.
244fn group_recursive_indexed(
245 segments: &[Segment<'_>],
246 parent: &mut SegmentGroupIndexed,
247 schema: &'static [GroupDef],
248 stop_triggers: &[&'static str],
249 offset: usize,
250) -> usize {
251 let combined_stop: SmallVec<[&'static str; 16]> = {
252 let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
253 for d in schema {
254 if !v.contains(&d.trigger) {
255 v.push(d.trigger);
256 }
257 }
258 v
259 };
260
261 // `span_start` is the absolute index of the first segment in this group.
262 // For child groups the caller pre-seeds `parent.total_span.start` with the
263 // trigger segment position; for the root (or any group with no pre-seeded
264 // trigger) we start at `offset`.
265 let span_start = if !parent.total_span.is_empty() {
266 parent.total_span.start // pre-seeded trigger position
267 } else {
268 offset
269 };
270
271 let mut i = 0;
272 // Track how many children of each definition have been pushed at this level,
273 // so we can stamp `occurrence_index` on each new child.
274 let mut occ_counts: std::collections::HashMap<&'static str, usize> =
275 std::collections::HashMap::new();
276 while i < segments.len() {
277 let tag = segments[i].tag;
278
279 if stop_triggers.iter().copied().any(|t| t == tag) {
280 break;
281 }
282
283 if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
284 let child_offset = offset + i;
285 let occ_idx = {
286 let c = occ_counts.entry(def.name).or_insert(0);
287 let idx = *c;
288 *c += 1;
289 idx
290 };
291 let mut child = SegmentGroupIndexed {
292 definition: def.name,
293 // Pre-seed the trigger segment; the recursive call extends
294 // total_span to cover the full child subtree.
295 total_span: child_offset..child_offset + 1,
296 children: Vec::new(),
297 occurrence_index: occ_idx,
298 };
299 i += 1;
300
301 let consumed = group_recursive_indexed(
302 &segments[i..],
303 &mut child,
304 def.children,
305 &combined_stop,
306 offset + i,
307 );
308 i += consumed;
309
310 parent.children.push(child);
311 } else {
312 i += 1;
313 }
314 }
315
316 // Total span covers everything from the first segment (trigger or first
317 // direct segment) to the last segment consumed in this call.
318 parent.total_span = span_start..(offset + i);
319
320 i
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326 use crate::Span;
327 use crate::model::Element;
328
329 fn seg(tag: &'static str) -> Segment<'static> {
330 Segment {
331 tag,
332 span: Span::new(0, 0),
333 tag_span: Span::new(0, 0),
334 elements: vec![Element::of(&["x"])],
335 }
336 }
337
338 static SCHEMA: &[GroupDef] = &[
339 GroupDef {
340 name: "SG1",
341 trigger: "NAD",
342 children: &[GroupDef {
343 name: "SG2",
344 trigger: "CTA",
345 children: &[],
346 }],
347 },
348 GroupDef {
349 name: "SG3",
350 trigger: "LIN",
351 children: &[],
352 },
353 ];
354
355 #[test]
356 fn root_segments_before_first_trigger() {
357 let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
358 let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
359 // UNH (0) and BGM (1) are direct root segments; NAD (2) is in SG1.
360 let direct: Vec<_> = tree.direct_segment_indices().collect();
361 assert_eq!(direct, vec![0, 1], "UNH + BGM should be direct in root");
362 assert_eq!(tree.children.len(), 1);
363 assert_eq!(tree.children[0].definition, "SG1");
364 }
365
366 #[test]
367 fn repeated_trigger_creates_multiple_children() {
368 let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
369 let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
370 // Two NAD triggers → two SG1 children
371 assert_eq!(
372 tree.children
373 .iter()
374 .filter(|c| c.definition == "SG1")
375 .count(),
376 2
377 );
378 }
379
380 #[test]
381 fn repeated_trigger_occurrence_index_is_stamped() {
382 let segs = vec![seg("NAD"), seg("NAD"), seg("NAD")];
383 let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
384 let indices: Vec<_> = tree.children.iter().map(|c| c.occurrence_index).collect();
385 assert_eq!(indices, vec![0, 1, 2]);
386 }
387
388 #[test]
389 fn nested_child_groups() {
390 let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
391 let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
392 let sg1 = &tree.children[0];
393 assert_eq!(sg1.definition, "SG1");
394 // Two CTA triggers → two SG2 children inside SG1
395 assert_eq!(sg1.children.len(), 2);
396 assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
397 }
398
399 #[test]
400 fn total_span_covers_all_segments() {
401 let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
402 let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
403 // Root span covers all 3 segments
404 let all_tags: Vec<_> = segs[tree.total_span.clone()]
405 .iter()
406 .map(|s| s.tag)
407 .collect();
408 assert!(all_tags.contains(&"UNH"));
409 assert!(all_tags.contains(&"NAD"));
410 assert!(all_tags.contains(&"CTA"));
411 }
412}