use crate::{OwnedSegment, Segment};
use smallvec::SmallVec;
use std::ops::Range;
#[derive(Debug, Clone, Copy)]
pub struct GroupDef {
pub name: &'static str,
pub trigger: &'static str,
pub children: &'static [GroupDef],
}
#[derive(Debug)]
pub struct SegmentGroupIndexed {
pub definition: &'static str,
pub total_span: Range<usize>,
pub children: Vec<SegmentGroupIndexed>,
pub occurrence_index: usize,
}
impl SegmentGroupIndexed {
pub fn direct_segment_indices(&self) -> impl Iterator<Item = usize> + '_ {
self.total_span.clone().filter(|i| {
!self
.children
.iter()
.any(|child| child.total_span.contains(i))
})
}
}
pub fn group_segments_indexed(
segments: &[Segment<'_>],
schema: &'static [GroupDef],
root_name: &'static str,
) -> SegmentGroupIndexed {
let mut root = SegmentGroupIndexed {
definition: root_name,
total_span: 0..0,
children: Vec::new(),
occurrence_index: 0,
};
group_recursive_indexed(segments, &mut root, schema, &[], 0);
root
}
pub fn group_owned_segments_indexed(
segments: &[OwnedSegment],
schema: &'static [GroupDef],
root_name: &'static str,
) -> SegmentGroupIndexed {
let borrowed: Vec<Segment<'_>> = segments.iter().map(|s| s.as_borrowed()).collect();
group_segments_indexed(&borrowed, schema, root_name)
}
fn group_recursive_indexed(
segments: &[Segment<'_>],
parent: &mut SegmentGroupIndexed,
schema: &'static [GroupDef],
stop_triggers: &[&'static str],
offset: usize,
) -> usize {
let combined_stop: SmallVec<[&'static str; 16]> = {
let mut v: SmallVec<[&'static str; 16]> = SmallVec::from_slice(stop_triggers);
for d in schema {
if !v.contains(&d.trigger) {
v.push(d.trigger);
}
}
v
};
let span_start = if !parent.total_span.is_empty() {
parent.total_span.start } else {
offset
};
let mut i = 0;
let mut occ_counts: std::collections::HashMap<&'static str, usize> =
std::collections::HashMap::new();
while i < segments.len() {
let tag = segments[i].tag;
if stop_triggers.iter().copied().any(|t| t == tag) {
break;
}
if let Some(def) = schema.iter().find(|d| d.trigger == tag) {
let child_offset = offset + i;
let occ_idx = {
let c = occ_counts.entry(def.name).or_insert(0);
let idx = *c;
*c += 1;
idx
};
let mut child = SegmentGroupIndexed {
definition: def.name,
total_span: child_offset..child_offset + 1,
children: Vec::new(),
occurrence_index: occ_idx,
};
i += 1;
let consumed = group_recursive_indexed(
&segments[i..],
&mut child,
def.children,
&combined_stop,
offset + i,
);
i += consumed;
parent.children.push(child);
} else {
i += 1;
}
}
parent.total_span = span_start..(offset + i);
i
}
#[cfg(test)]
mod tests {
use super::*;
use crate::Span;
use crate::model::Element;
fn seg(tag: &'static str) -> Segment<'static> {
Segment {
tag,
span: Span::new(0, 0),
tag_span: Span::new(0, 0),
elements: vec![Element::of(&["x"])],
}
}
static SCHEMA: &[GroupDef] = &[
GroupDef {
name: "SG1",
trigger: "NAD",
children: &[GroupDef {
name: "SG2",
trigger: "CTA",
children: &[],
}],
},
GroupDef {
name: "SG3",
trigger: "LIN",
children: &[],
},
];
#[test]
fn root_segments_before_first_trigger() {
let segs = vec![seg("UNH"), seg("BGM"), seg("NAD")];
let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
let direct: Vec<_> = tree.direct_segment_indices().collect();
assert_eq!(direct, vec![0, 1], "UNH + BGM should be direct in root");
assert_eq!(tree.children.len(), 1);
assert_eq!(tree.children[0].definition, "SG1");
}
#[test]
fn repeated_trigger_creates_multiple_children() {
let segs = vec![seg("UNH"), seg("NAD"), seg("NAD"), seg("UNT")];
let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
assert_eq!(
tree.children
.iter()
.filter(|c| c.definition == "SG1")
.count(),
2
);
}
#[test]
fn repeated_trigger_occurrence_index_is_stamped() {
let segs = vec![seg("NAD"), seg("NAD"), seg("NAD")];
let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
let indices: Vec<_> = tree.children.iter().map(|c| c.occurrence_index).collect();
assert_eq!(indices, vec![0, 1, 2]);
}
#[test]
fn nested_child_groups() {
let segs = vec![seg("NAD"), seg("CTA"), seg("CTA")];
let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
let sg1 = &tree.children[0];
assert_eq!(sg1.definition, "SG1");
assert_eq!(sg1.children.len(), 2);
assert!(sg1.children.iter().all(|c| c.definition == "SG2"));
}
#[test]
fn total_span_covers_all_segments() {
let segs = vec![seg("UNH"), seg("NAD"), seg("CTA")];
let tree = group_segments_indexed(&segs, SCHEMA, "ROOT");
let all_tags: Vec<_> = segs[tree.total_span.clone()]
.iter()
.map(|s| s.tag)
.collect();
assert!(all_tags.contains(&"UNH"));
assert!(all_tags.contains(&"NAD"));
assert!(all_tags.contains(&"CTA"));
}
}