use mig_types::schema::mig::{MigSchema, MigSegment, MigSegmentGroup};
use std::collections::HashSet;
pub fn filter_mig_for_pid(mig: &MigSchema, ahb_numbers: &HashSet<String>) -> MigSchema {
let variant_aware_top = matches!(mig.message_type.as_str(), "UTILMD" | "PARTIN" | "APERAK");
let variant_aware_nested = mig.message_type == "UTILMD";
MigSchema {
message_type: mig.message_type.clone(),
variant: mig.variant.clone(),
version: mig.version.clone(),
publication_date: mig.publication_date.clone(),
author: mig.author.clone(),
format_version: mig.format_version.clone(),
source_file: mig.source_file.clone(),
segments: filter_segments(&mig.segments, ahb_numbers),
segment_groups: filter_groups_with_mode(
&mig.segment_groups,
ahb_numbers,
variant_aware_top,
variant_aware_nested,
),
}
}
const TRANSPORT_SEGMENTS: &[&str] = &["UNA", "UNB", "UNZ"];
fn filter_segments(segments: &[MigSegment], ahb_numbers: &HashSet<String>) -> Vec<MigSegment> {
segments
.iter()
.filter(|seg| {
if TRANSPORT_SEGMENTS.contains(&seg.id.as_str()) {
return true;
}
match &seg.number {
None => true,
Some(num) => ahb_numbers.contains(num),
}
})
.cloned()
.collect()
}
fn filter_groups_with_mode(
groups: &[MigSegmentGroup],
ahb_numbers: &HashSet<String>,
variant_aware: bool,
variant_aware_nested: bool,
) -> Vec<MigSegmentGroup> {
let filtered: Vec<MigSegmentGroup> = groups
.iter()
.filter(|group| group_matches_ahb(group, ahb_numbers))
.map(|group| filter_group_contents_with_mode(group, ahb_numbers, variant_aware_nested))
.collect();
if variant_aware {
tag_or_merge_same_id_groups(filtered)
} else {
merge_same_id_groups(filtered)
}
}
type QualifierInfo = (String, Vec<String>, Option<(usize, usize)>);
fn tag_or_merge_same_id_groups(groups: Vec<MigSegmentGroup>) -> Vec<MigSegmentGroup> {
let mut seen_ids: Vec<String> = Vec::new();
let mut by_id: Vec<Vec<MigSegmentGroup>> = Vec::new();
for g in groups {
if let Some(pos) = seen_ids.iter().position(|id| *id == g.id) {
by_id[pos].push(g);
} else {
seen_ids.push(g.id.clone());
by_id.push(vec![g]);
}
}
let mut result = Vec::new();
for variants in by_id {
if variants.len() == 1 {
result.push(variants.into_iter().next().unwrap());
} else {
let quals: Vec<Option<QualifierInfo>> = variants
.iter()
.map(extract_entry_qualifier_with_position)
.collect();
if quals.iter().all(|c| c.is_some()) {
let count = variants.len() as u32;
for (mut g, qual) in variants.into_iter().zip(quals) {
let (code, all_codes, pos) = qual.unwrap();
g.variant_code = Some(code);
g.variant_codes = all_codes;
g.variant_qualifier_position = pos;
g.merged_variant_count = Some(count);
result.push(g);
}
} else {
result.push(merge_group_variants(variants));
}
}
}
result
}
fn extract_entry_qualifier_with_position(
group: &MigSegmentGroup,
) -> Option<(String, Vec<String>, Option<(usize, usize)>)> {
let entry = group.segments.first()?;
for (ei, de) in entry.data_elements.iter().enumerate() {
if !de.codes.is_empty() {
let first_code = de.codes.first()?.value.clone();
let all_codes: Vec<String> = de.codes.iter().map(|c| c.value.clone()).collect();
let pos = if ei == 0 { None } else { Some((ei, 0)) };
return Some((first_code, all_codes, pos));
}
}
for comp in &entry.composites {
for sub in &comp.data_elements {
if !sub.codes.is_empty() {
let first_code = sub.codes.first()?.value.clone();
let all_codes: Vec<String> = sub.codes.iter().map(|c| c.value.clone()).collect();
let pos = Some((comp.position, sub.position));
return Some((first_code, all_codes, pos));
}
}
}
None
}
fn group_matches_ahb(group: &MigSegmentGroup, ahb_numbers: &HashSet<String>) -> bool {
for seg in &group.segments {
if let Some(num) = &seg.number {
if ahb_numbers.contains(num) {
return true;
}
}
}
for nested in &group.nested_groups {
if group_matches_ahb(nested, ahb_numbers) {
return true;
}
}
!group.segments.is_empty() && group.segments.iter().all(|s| s.number.is_none())
}
fn filter_group_contents_with_mode(
group: &MigSegmentGroup,
ahb_numbers: &HashSet<String>,
variant_aware: bool,
) -> MigSegmentGroup {
MigSegmentGroup {
id: group.id.clone(),
name: group.name.clone(),
description: group.description.clone(),
counter: group.counter.clone(),
level: group.level,
max_rep_std: group.max_rep_std,
max_rep_spec: group.max_rep_spec,
status_std: group.status_std.clone(),
status_spec: group.status_spec.clone(),
segments: filter_segments(&group.segments, ahb_numbers),
nested_groups: filter_groups_with_mode(
&group.nested_groups,
ahb_numbers,
variant_aware,
variant_aware,
),
variant_code: group.variant_code.clone(),
variant_qualifier_position: None,
variant_codes: group.variant_codes.clone(),
merged_variant_count: group.merged_variant_count,
}
}
fn merge_same_id_groups(groups: Vec<MigSegmentGroup>) -> Vec<MigSegmentGroup> {
let mut seen_ids: Vec<String> = Vec::new();
let mut by_id: Vec<Vec<MigSegmentGroup>> = Vec::new();
for g in groups {
if let Some(pos) = seen_ids.iter().position(|id| *id == g.id) {
by_id[pos].push(g);
} else {
seen_ids.push(g.id.clone());
by_id.push(vec![g]);
}
}
by_id
.into_iter()
.map(|variants| {
if variants.len() == 1 {
variants.into_iter().next().unwrap()
} else {
let count = variants.len() as u32;
let mut merged = merge_group_variants(variants);
merged.merged_variant_count = Some(count);
merged
}
})
.collect()
}
fn merge_group_variants(variants: Vec<MigSegmentGroup>) -> MigSegmentGroup {
let first = &variants[0];
let longest_idx = variants
.iter()
.enumerate()
.max_by_key(|(_, v)| v.segments.len())
.map(|(i, _)| i)
.unwrap_or(0);
let mut merged_segments: Vec<MigSegment> = variants[longest_idx].segments.clone();
for (i, variant) in variants.iter().enumerate() {
if i == longest_idx {
continue;
}
for seg in &variant.segments {
let count_in_merged = merged_segments.iter().filter(|s| s.id == seg.id).count();
let count_in_variant = variant.segments.iter().filter(|s| s.id == seg.id).count();
if count_in_variant > count_in_merged {
for _ in 0..(count_in_variant - count_in_merged) {
merged_segments.push(seg.clone());
}
}
}
}
let mut all_nested: Vec<MigSegmentGroup> = Vec::new();
for variant in &variants {
all_nested.extend(variant.nested_groups.iter().cloned());
}
let mut merged_nested = merge_same_id_groups(all_nested);
merged_nested.sort_by_key(|g| extract_group_number(&g.id));
MigSegmentGroup {
id: first.id.clone(),
name: first.name.clone(),
description: first.description.clone(),
counter: first.counter.clone(),
level: first.level,
max_rep_std: variants.iter().map(|v| v.max_rep_std).max().unwrap_or(1),
max_rep_spec: variants.iter().map(|v| v.max_rep_spec).max().unwrap_or(1),
status_std: first.status_std.clone(),
status_spec: first.status_spec.clone(),
segments: merged_segments,
nested_groups: merged_nested,
variant_code: first.variant_code.clone(),
variant_qualifier_position: None,
variant_codes: first.variant_codes.clone(),
merged_variant_count: None,
}
}
fn extract_group_number(id: &str) -> u32 {
id.strip_prefix("SG")
.and_then(|s| s.parse().ok())
.unwrap_or(u32::MAX)
}
#[cfg(test)]
mod tests {
use super::*;
use mig_types::schema::mig::MigSegment;
fn seg(id: &str, number: Option<&str>) -> MigSegment {
MigSegment {
id: id.to_string(),
name: id.to_string(),
description: None,
counter: None,
level: 0,
number: number.map(|n| n.to_string()),
max_rep_std: 1,
max_rep_spec: 1,
status_std: None,
status_spec: None,
example: None,
data_elements: vec![],
composites: vec![],
}
}
fn group(id: &str, segments: Vec<MigSegment>, nested: Vec<MigSegmentGroup>) -> MigSegmentGroup {
MigSegmentGroup {
id: id.to_string(),
name: id.to_string(),
description: None,
counter: None,
level: 1,
max_rep_std: 99,
max_rep_spec: 99,
status_std: None,
status_spec: None,
segments,
nested_groups: nested,
variant_code: None,
variant_qualifier_position: None,
variant_codes: vec![],
merged_variant_count: None,
}
}
#[test]
fn test_filter_selects_correct_sg4_variant() {
let sg4_list = group("SG4", vec![seg("IDE", Some("00012"))], vec![]);
let sg4_txn = group(
"SG4",
vec![
seg("IDE", Some("00020")),
seg("DTM", Some("00023")),
seg("STS", Some("00035")),
],
vec![
group("SG5", vec![seg("LOC", Some("00049"))], vec![]),
group("SG6", vec![seg("RFF", Some("00056"))], vec![]),
],
);
let mig = MigSchema {
message_type: "UTILMD".to_string(),
variant: None,
version: "S2.1".to_string(),
publication_date: String::new(),
author: String::new(),
format_version: "FV2504".to_string(),
source_file: String::new(),
segments: vec![seg("UNH", Some("00003")), seg("BGM", Some("00004"))],
segment_groups: vec![sg4_list, sg4_txn],
};
let ahb_numbers: HashSet<String> = [
"00003", "00004", "00020", "00023", "00035", "00049", "00056",
]
.iter()
.map(|s| s.to_string())
.collect();
let filtered = filter_mig_for_pid(&mig, &ahb_numbers);
assert_eq!(filtered.segments.len(), 2);
assert_eq!(filtered.segment_groups.len(), 1);
let sg4 = &filtered.segment_groups[0];
assert_eq!(sg4.id, "SG4");
assert_eq!(sg4.segments.len(), 3); assert_eq!(sg4.nested_groups.len(), 2); }
#[test]
fn test_filter_keeps_transport_and_matching_segments() {
let mig = MigSchema {
message_type: "UTILMD".to_string(),
variant: None,
version: "S2.1".to_string(),
publication_date: String::new(),
author: String::new(),
format_version: "FV2504".to_string(),
source_file: String::new(),
segments: vec![
seg("UNA", None), seg("UNB", Some("00001")), seg("UNH", Some("00003")), seg("DTM", Some("00099")), seg("UNZ", Some("00527")), ],
segment_groups: vec![],
};
let ahb_numbers: HashSet<String> = ["00003"].iter().map(|s| s.to_string()).collect();
let filtered = filter_mig_for_pid(&mig, &ahb_numbers);
assert_eq!(filtered.segments.len(), 4);
assert_eq!(filtered.segments[0].id, "UNA");
assert_eq!(filtered.segments[1].id, "UNB");
assert_eq!(filtered.segments[2].id, "UNH");
assert_eq!(filtered.segments[3].id, "UNZ");
}
#[test]
fn test_filter_merges_same_id_groups() {
let sg8_zd7 = group(
"SG8",
vec![seg("SEQ", Some("00089")), seg("RFF", Some("00090"))],
vec![group(
"SG10",
vec![seg("CCI", Some("00092")), seg("CAV", Some("00093"))],
vec![],
)],
);
let sg8_z98 = group(
"SG8",
vec![seg("SEQ", Some("00114"))],
vec![
group("SG9", vec![seg("QTY", Some("00116"))], vec![]),
group(
"SG10",
vec![seg("CCI", Some("00122")), seg("CAV", Some("00125"))],
vec![],
),
],
);
let sg8_zf3 = group(
"SG8",
vec![seg("SEQ", Some("00291")), seg("RFF", Some("00292"))],
vec![group(
"SG10",
vec![
seg("CCI", Some("00295")),
seg("CAV", Some("00296")),
seg("CAV", Some("00297")),
],
vec![],
)],
);
let sg4 = group(
"SG4",
vec![seg("IDE", Some("00020"))],
vec![sg8_zd7, sg8_z98, sg8_zf3],
);
let mig = MigSchema {
message_type: "UTILMD".to_string(),
variant: None,
version: "S2.1".to_string(),
publication_date: String::new(),
author: String::new(),
format_version: "FV2504".to_string(),
source_file: String::new(),
segments: vec![],
segment_groups: vec![sg4],
};
let ahb_numbers: HashSet<String> = [
"00020", "00089", "00090", "00092", "00093", "00114", "00116", "00122", "00125",
"00291", "00292", "00295", "00296", "00297",
]
.iter()
.map(|s| s.to_string())
.collect();
let filtered = filter_mig_for_pid(&mig, &ahb_numbers);
let sg4 = &filtered.segment_groups[0];
assert_eq!(sg4.nested_groups.len(), 1, "SG8 variants should be merged");
let sg8 = &sg4.nested_groups[0];
assert_eq!(sg8.id, "SG8");
let seg_tags: Vec<&str> = sg8.segments.iter().map(|s| s.id.as_str()).collect();
assert_eq!(seg_tags, vec!["SEQ", "RFF"]);
assert_eq!(sg8.nested_groups.len(), 2, "should have SG9 and SG10");
assert_eq!(sg8.nested_groups[0].id, "SG9");
assert_eq!(sg8.nested_groups[1].id, "SG10");
let sg10_tags: Vec<&str> = sg8.nested_groups[1]
.segments
.iter()
.map(|s| s.id.as_str())
.collect();
assert_eq!(sg10_tags, vec!["CCI", "CAV", "CAV"]);
}
#[test]
fn test_filter_removes_nested_groups_not_in_ahb() {
let sg8_z79 = group(
"SG8",
vec![seg("SEQ", Some("00081"))],
vec![group("SG10", vec![seg("CCI", Some("00083"))], vec![])],
);
let sg8_z99 = group(
"SG8",
vec![seg("SEQ", Some("00999"))], vec![],
);
let sg4 = group(
"SG4",
vec![seg("IDE", Some("00020"))],
vec![sg8_z79, sg8_z99],
);
let mig = MigSchema {
message_type: "UTILMD".to_string(),
variant: None,
version: "S2.1".to_string(),
publication_date: String::new(),
author: String::new(),
format_version: "FV2504".to_string(),
source_file: String::new(),
segments: vec![],
segment_groups: vec![sg4],
};
let ahb_numbers: HashSet<String> = ["00020", "00081", "00083"]
.iter()
.map(|s| s.to_string())
.collect();
let filtered = filter_mig_for_pid(&mig, &ahb_numbers);
let sg4 = &filtered.segment_groups[0];
assert_eq!(sg4.nested_groups.len(), 1);
assert_eq!(
sg4.nested_groups[0].segments[0].number,
Some("00081".to_string())
);
}
#[test]
fn test_variant_codes_include_all_entry_qualifier_codes() {
use mig_types::schema::common::CodeDefinition;
use mig_types::schema::mig::MigDataElement;
let mut sg6_z13 = group("SG6", vec![seg("RFF", Some("00056"))], vec![]);
sg6_z13.segments[0].data_elements.push(MigDataElement {
id: "1153".to_string(),
name: "Qualifier".to_string(),
description: None,
status_std: None,
status_spec: None,
format_std: None,
format_spec: None,
position: 0,
codes: vec![CodeDefinition {
value: "Z13".to_string(),
name: "PI".to_string(),
description: None,
}],
});
let mut sg6_zeit = group("SG6", vec![seg("RFF", Some("00073"))], vec![]);
sg6_zeit.segments[0].data_elements.push(MigDataElement {
id: "1153".to_string(),
name: "Qualifier".to_string(),
description: None,
status_std: None,
status_spec: None,
format_std: None,
format_spec: None,
position: 0,
codes: vec![
CodeDefinition {
value: "Z47".to_string(),
name: "A".to_string(),
description: None,
},
CodeDefinition {
value: "Z49".to_string(),
name: "B".to_string(),
description: None,
},
],
});
let tagged = tag_or_merge_same_id_groups(vec![sg6_z13, sg6_zeit]);
assert_eq!(tagged.len(), 2);
assert_eq!(tagged[0].variant_code, Some("Z13".to_string()));
assert_eq!(tagged[0].variant_codes, vec!["Z13"]);
assert_eq!(tagged[1].variant_code, Some("Z47".to_string()));
assert_eq!(tagged[1].variant_codes, vec!["Z47", "Z49"]);
}
}