use std::sync::Arc;
use edifact_primitives::{Control, EdifactDelimiters, RawSegment};
pub use mig_types::segment::OwnedSegment;
#[derive(Debug, Clone)]
pub struct MessageChunk {
pub envelope: Arc<Vec<OwnedSegment>>,
pub unh: OwnedSegment,
pub body: Vec<OwnedSegment>,
pub unt: OwnedSegment,
}
impl MessageChunk {
pub fn all_segments(&self) -> Vec<OwnedSegment> {
let mut segs = Vec::with_capacity(self.envelope.len() + 2 + self.body.len());
segs.extend_from_slice(&self.envelope);
segs.push(self.unh.clone());
segs.extend(self.body.iter().cloned());
segs.push(self.unt.clone());
segs
}
pub fn message_segments(&self) -> Vec<OwnedSegment> {
let mut segs = Vec::with_capacity(2 + self.body.len());
segs.push(self.unh.clone());
segs.extend(self.body.iter().cloned());
segs.push(self.unt.clone());
segs
}
pub fn segments_for_mig(&self, mig: &mig_types::schema::mig::MigSchema) -> Vec<OwnedSegment> {
if mig.includes_envelope() {
self.all_segments()
} else {
self.message_segments()
}
}
}
#[derive(Debug, Clone)]
pub struct InterchangeChunks {
pub envelope: Vec<OwnedSegment>,
pub messages: Vec<MessageChunk>,
pub unz: Option<OwnedSegment>,
}
fn unescape_edifact(value: &str, release: u8) -> String {
if !value.as_bytes().contains(&release) {
return value.to_string();
}
let bytes = value.as_bytes();
let mut result = String::with_capacity(value.len());
let mut i = 0;
while i < bytes.len() {
if bytes[i] == release && i + 1 < bytes.len() {
i += 1;
result.push(bytes[i] as char);
} else {
result.push(bytes[i] as char);
}
i += 1;
}
result
}
struct SegmentCollector {
segments: Vec<OwnedSegment>,
release: u8,
}
impl edifact_parser::EdifactHandler for SegmentCollector {
fn on_segment(&mut self, segment: &RawSegment<'_>) -> Control {
let release = self.release;
self.segments.push(OwnedSegment {
id: segment.id.to_string(),
elements: segment
.elements
.iter()
.map(|e| e.iter().map(|c| unescape_edifact(c, release)).collect())
.collect(),
segment_number: segment.position.segment_number,
});
Control::Continue
}
fn on_delimiters(&mut self, delimiters: &EdifactDelimiters, _explicit_una: bool) {
self.release = delimiters.release;
}
fn on_interchange_start(&mut self, _unb: &RawSegment<'_>) -> Control {
Control::Continue
}
fn on_message_start(&mut self, _unh: &RawSegment<'_>) -> Control {
Control::Continue
}
fn on_message_end(&mut self, _unt: &RawSegment<'_>) {}
fn on_interchange_end(&mut self, _unz: &RawSegment<'_>) {}
}
pub fn parse_to_segments(input: &[u8]) -> Result<Vec<OwnedSegment>, crate::AssemblyError> {
let mut collector = SegmentCollector {
segments: Vec::new(),
release: EdifactDelimiters::default().release,
};
edifact_parser::EdifactStreamParser::parse(input, &mut collector)
.map_err(|e| crate::AssemblyError::ParseError(e.to_string()))?;
Ok(collector.segments)
}
pub fn split_messages(
segments: Vec<OwnedSegment>,
) -> Result<InterchangeChunks, crate::AssemblyError> {
let mut envelope: Vec<OwnedSegment> = Vec::with_capacity(4);
let mut raw_messages: Vec<(OwnedSegment, Vec<OwnedSegment>, OwnedSegment)> = Vec::new();
let mut unz: Option<OwnedSegment> = None;
let mut current_unh: Option<OwnedSegment> = None;
let mut current_body: Vec<OwnedSegment> = Vec::with_capacity(32);
let mut seen_first_unh = false;
for seg in segments {
let id_upper = seg.id.to_uppercase();
match id_upper.as_str() {
"UNH" => {
seen_first_unh = true;
current_unh = Some(seg);
current_body.clear();
}
"UNT" => {
if let Some(unh) = current_unh.take() {
raw_messages.push((unh, std::mem::take(&mut current_body), seg));
}
}
"UNZ" => {
unz = Some(seg);
}
_ => {
if seen_first_unh {
current_body.push(seg);
} else {
envelope.push(seg);
}
}
}
}
if raw_messages.is_empty() {
return Err(crate::AssemblyError::ParseError(
"No UNH/UNT message pairs found in interchange".to_string(),
));
}
let envelope_arc = Arc::new(envelope);
let messages = raw_messages
.into_iter()
.map(|(unh, body, unt)| MessageChunk {
envelope: Arc::clone(&envelope_arc),
unh,
body,
unt,
})
.collect();
Ok(InterchangeChunks {
envelope: (*envelope_arc).clone(),
messages,
unz,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_to_segments_minimal() {
let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC001'UNT+3+MSG001'UNZ+1+REF001'";
let segments = parse_to_segments(input).unwrap();
assert_eq!(segments.len(), 5);
assert!(segments[0].is("UNB"));
assert!(segments[1].is("UNH"));
assert!(segments[2].is("BGM"));
assert!(segments[3].is("UNT"));
assert!(segments[4].is("UNZ"));
}
#[test]
fn test_parse_to_segments_element_access() {
let input = b"UNA:+.? 'UNB+UNOC:3'UNH+001+UTILMD:D:11A'BGM+E03+DOC001'UNT+2+001'UNZ+1'";
let segments = parse_to_segments(input).unwrap();
let bgm = &segments[2];
assert_eq!(bgm.id, "BGM");
assert_eq!(bgm.get_element(0), "E03");
assert_eq!(bgm.get_element(1), "DOC001");
assert_eq!(bgm.get_element(99), "");
}
#[test]
fn test_parse_to_segments_composite_access() {
let input = b"UNA:+.? 'UNH+001+UTILMD:D:11A:UN:S2.1'UNT+1+001'";
let segments = parse_to_segments(input).unwrap();
let unh = &segments[0]; assert_eq!(unh.get_component(1, 0), "UTILMD");
assert_eq!(unh.get_component(1, 1), "D");
assert_eq!(unh.get_component(1, 4), "S2.1");
}
#[test]
fn test_message_chunk_struct_exists() {
let chunk = MessageChunk {
envelope: Arc::new(vec![]),
unh: OwnedSegment {
id: "UNH".to_string(),
elements: vec![],
segment_number: 0,
},
body: vec![],
unt: OwnedSegment {
id: "UNT".to_string(),
elements: vec![],
segment_number: 1,
},
};
assert_eq!(chunk.unh.id, "UNH");
assert_eq!(chunk.unt.id, "UNT");
assert!(chunk.envelope.is_empty());
assert!(chunk.body.is_empty());
}
#[test]
fn test_interchange_chunks_struct_exists() {
let chunks = InterchangeChunks {
envelope: vec![],
messages: vec![],
unz: None,
};
assert!(chunks.messages.is_empty());
assert!(chunks.unz.is_none());
}
#[test]
fn test_split_messages_single_message() {
let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC001'UNT+3+MSG001'UNZ+1+REF001'";
let segments = parse_to_segments(input).unwrap();
let chunks = split_messages(segments).unwrap();
assert_eq!(chunks.messages.len(), 1);
assert_eq!(chunks.envelope.len(), 1); assert!(chunks.unz.is_some());
let msg = &chunks.messages[0];
assert!(msg.unh.is("UNH"));
assert!(msg.unt.is("UNT"));
assert_eq!(msg.body.len(), 1); assert!(msg.body[0].is("BGM"));
let all = msg.all_segments();
assert_eq!(all.len(), 4);
assert!(all[0].is("UNB"));
assert!(all[1].is("UNH"));
assert!(all[2].is("BGM"));
assert!(all[3].is("UNT"));
}
#[test]
fn test_split_messages_two_messages() {
let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001'UNT+2+001'UNH+002+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC002'DTM+137:20250101:102'UNT+3+002'UNZ+2+REF001'";
let segments = parse_to_segments(input).unwrap();
let chunks = split_messages(segments).unwrap();
assert_eq!(chunks.messages.len(), 2);
let msg1 = &chunks.messages[0];
assert_eq!(msg1.unh.get_element(0), "001");
assert_eq!(msg1.body.len(), 1);
assert!(msg1.body[0].is("BGM"));
let msg2 = &chunks.messages[1];
assert_eq!(msg2.unh.get_element(0), "002");
assert_eq!(msg2.body.len(), 2);
assert!(msg2.body[0].is("BGM"));
assert!(msg2.body[1].is("DTM"));
assert_eq!(msg1.envelope.len(), msg2.envelope.len());
assert!(msg1.envelope[0].is("UNB"));
}
#[test]
fn test_split_messages_envelope_preserved_per_message() {
let input = b"UNA:+.? 'UNB+UNOC:3+SEND+RECV+210101:1200+REF'UNH+001+UTILMD:D:11A:UN:S2.1'UNT+1+001'UNH+002+UTILMD:D:11A:UN:S2.1'UNT+1+002'UNZ+2+REF'";
let segments = parse_to_segments(input).unwrap();
let chunks = split_messages(segments).unwrap();
for msg in &chunks.messages {
let all = msg.all_segments();
assert!(all[0].is("UNB"), "First segment should be UNB");
assert!(all[1].is("UNH"), "Second segment should be UNH");
assert!(all.last().unwrap().is("UNT"), "Last segment should be UNT");
}
}
#[test]
fn test_split_messages_no_messages_errors() {
let input = b"UNA:+.? 'UNB+UNOC:3+S+R+210101:1200+REF'UNZ+0+REF'";
let segments = parse_to_segments(input).unwrap();
let result = split_messages(segments);
assert!(result.is_err());
}
#[test]
fn test_owned_segment_is_case_insensitive() {
let input = b"UNA:+.? 'UNB+UNOC:3'UNZ+0'";
let segments = parse_to_segments(input).unwrap();
assert!(segments[0].is("unb"));
assert!(segments[0].is("UNB"));
assert!(segments[0].is("Unb"));
}
}