1use std::sync::Arc;
11
12use edifact_primitives::{Control, EdifactDelimiters, RawSegment};
13
14pub use mig_types::segment::OwnedSegment;
16
17#[derive(Debug, Clone)]
19pub struct MessageChunk {
20 pub envelope: Arc<Vec<OwnedSegment>>,
22 pub unh: OwnedSegment,
24 pub body: Vec<OwnedSegment>,
26 pub unt: OwnedSegment,
28}
29
30impl MessageChunk {
31 pub fn all_segments(&self) -> Vec<OwnedSegment> {
36 let mut segs = Vec::with_capacity(self.envelope.len() + 2 + self.body.len());
37 segs.extend_from_slice(&self.envelope);
38 segs.push(self.unh.clone());
39 segs.extend(self.body.iter().cloned());
40 segs.push(self.unt.clone());
41 segs
42 }
43
44 pub fn message_segments(&self) -> Vec<OwnedSegment> {
49 let mut segs = Vec::with_capacity(2 + self.body.len());
50 segs.push(self.unh.clone());
51 segs.extend(self.body.iter().cloned());
52 segs.push(self.unt.clone());
53 segs
54 }
55
56 pub fn segments_for_mig(&self, mig: &mig_types::schema::mig::MigSchema) -> Vec<OwnedSegment> {
59 let starts_with_envelope = mig
60 .segments
61 .first()
62 .map(|s| s.id == "UNA" || s.id == "UNB")
63 .unwrap_or(false);
64 if starts_with_envelope {
65 self.all_segments()
66 } else {
67 self.message_segments()
68 }
69 }
70}
71
72#[derive(Debug, Clone)]
74pub struct InterchangeChunks {
75 pub envelope: Vec<OwnedSegment>,
77 pub messages: Vec<MessageChunk>,
79 pub unz: Option<OwnedSegment>,
81}
82
83fn unescape_edifact(value: &str, release: u8) -> String {
90 if !value.as_bytes().contains(&release) {
91 return value.to_string();
92 }
93 let bytes = value.as_bytes();
94 let mut result = String::with_capacity(value.len());
95 let mut i = 0;
96 while i < bytes.len() {
97 if bytes[i] == release && i + 1 < bytes.len() {
98 i += 1;
100 result.push(bytes[i] as char);
101 } else {
102 result.push(bytes[i] as char);
103 }
104 i += 1;
105 }
106 result
107}
108
109struct SegmentCollector {
111 segments: Vec<OwnedSegment>,
112 release: u8,
114}
115
116impl edifact_parser::EdifactHandler for SegmentCollector {
117 fn on_segment(&mut self, segment: &RawSegment<'_>) -> Control {
118 let release = self.release;
119 self.segments.push(OwnedSegment {
120 id: segment.id.to_string(),
121 elements: segment
122 .elements
123 .iter()
124 .map(|e| e.iter().map(|c| unescape_edifact(c, release)).collect())
125 .collect(),
126 segment_number: segment.position.segment_number,
127 });
128 Control::Continue
129 }
130
131 fn on_delimiters(&mut self, delimiters: &EdifactDelimiters, _explicit_una: bool) {
132 self.release = delimiters.release;
133 }
134
135 fn on_interchange_start(&mut self, _unb: &RawSegment<'_>) -> Control {
136 Control::Continue
137 }
138
139 fn on_message_start(&mut self, _unh: &RawSegment<'_>) -> Control {
140 Control::Continue
141 }
142
143 fn on_message_end(&mut self, _unt: &RawSegment<'_>) {}
144
145 fn on_interchange_end(&mut self, _unz: &RawSegment<'_>) {}
146}
147
148pub fn parse_to_segments(input: &[u8]) -> Result<Vec<OwnedSegment>, crate::AssemblyError> {
154 let mut collector = SegmentCollector {
155 segments: Vec::new(),
156 release: EdifactDelimiters::default().release,
157 };
158 edifact_parser::EdifactStreamParser::parse(input, &mut collector)
159 .map_err(|e| crate::AssemblyError::ParseError(e.to_string()))?;
160 Ok(collector.segments)
161}
162
163pub fn split_messages(
172 segments: Vec<OwnedSegment>,
173) -> Result<InterchangeChunks, crate::AssemblyError> {
174 let mut envelope: Vec<OwnedSegment> = Vec::with_capacity(4);
175 let mut raw_messages: Vec<(OwnedSegment, Vec<OwnedSegment>, OwnedSegment)> = Vec::new();
177 let mut unz: Option<OwnedSegment> = None;
178
179 let mut current_unh: Option<OwnedSegment> = None;
181 let mut current_body: Vec<OwnedSegment> = Vec::with_capacity(32);
182 let mut seen_first_unh = false;
183
184 for seg in segments {
185 let id_upper = seg.id.to_uppercase();
186 match id_upper.as_str() {
187 "UNH" => {
188 seen_first_unh = true;
189 current_unh = Some(seg);
190 current_body.clear();
191 }
192 "UNT" => {
193 if let Some(unh) = current_unh.take() {
194 raw_messages.push((unh, std::mem::take(&mut current_body), seg));
195 }
196 }
197 "UNZ" => {
198 unz = Some(seg);
199 }
200 _ => {
201 if seen_first_unh {
202 current_body.push(seg);
203 } else {
204 envelope.push(seg);
205 }
206 }
207 }
208 }
209
210 if raw_messages.is_empty() {
211 return Err(crate::AssemblyError::ParseError(
212 "No UNH/UNT message pairs found in interchange".to_string(),
213 ));
214 }
215
216 let envelope_arc = Arc::new(envelope);
218 let messages = raw_messages
219 .into_iter()
220 .map(|(unh, body, unt)| MessageChunk {
221 envelope: Arc::clone(&envelope_arc),
222 unh,
223 body,
224 unt,
225 })
226 .collect();
227
228 Ok(InterchangeChunks {
229 envelope: (*envelope_arc).clone(),
230 messages,
231 unz,
232 })
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn test_parse_to_segments_minimal() {
241 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC001'UNT+3+MSG001'UNZ+1+REF001'";
242 let segments = parse_to_segments(input).unwrap();
243
244 assert_eq!(segments.len(), 5);
246 assert!(segments[0].is("UNB"));
247 assert!(segments[1].is("UNH"));
248 assert!(segments[2].is("BGM"));
249 assert!(segments[3].is("UNT"));
250 assert!(segments[4].is("UNZ"));
251 }
252
253 #[test]
254 fn test_parse_to_segments_element_access() {
255 let input = b"UNA:+.? 'UNB+UNOC:3'UNH+001+UTILMD:D:11A'BGM+E03+DOC001'UNT+2+001'UNZ+1'";
256 let segments = parse_to_segments(input).unwrap();
257
258 let bgm = &segments[2];
259 assert_eq!(bgm.id, "BGM");
260 assert_eq!(bgm.get_element(0), "E03");
261 assert_eq!(bgm.get_element(1), "DOC001");
262 assert_eq!(bgm.get_element(99), "");
263 }
264
265 #[test]
266 fn test_parse_to_segments_composite_access() {
267 let input = b"UNA:+.? 'UNH+001+UTILMD:D:11A:UN:S2.1'UNT+1+001'";
268 let segments = parse_to_segments(input).unwrap();
269
270 let unh = &segments[0]; assert_eq!(unh.get_component(1, 0), "UTILMD");
272 assert_eq!(unh.get_component(1, 1), "D");
273 assert_eq!(unh.get_component(1, 4), "S2.1");
274 }
275
276 #[test]
277 fn test_message_chunk_struct_exists() {
278 let chunk = MessageChunk {
279 envelope: Arc::new(vec![]),
280 unh: OwnedSegment {
281 id: "UNH".to_string(),
282 elements: vec![],
283 segment_number: 0,
284 },
285 body: vec![],
286 unt: OwnedSegment {
287 id: "UNT".to_string(),
288 elements: vec![],
289 segment_number: 1,
290 },
291 };
292 assert_eq!(chunk.unh.id, "UNH");
293 assert_eq!(chunk.unt.id, "UNT");
294 assert!(chunk.envelope.is_empty());
295 assert!(chunk.body.is_empty());
296 }
297
298 #[test]
299 fn test_interchange_chunks_struct_exists() {
300 let chunks = InterchangeChunks {
301 envelope: vec![],
302 messages: vec![],
303 unz: None,
304 };
305 assert!(chunks.messages.is_empty());
306 assert!(chunks.unz.is_none());
307 }
308
309 #[test]
310 fn test_split_messages_single_message() {
311 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC001'UNT+3+MSG001'UNZ+1+REF001'";
312 let segments = parse_to_segments(input).unwrap();
313 let chunks = split_messages(segments).unwrap();
314
315 assert_eq!(chunks.messages.len(), 1);
316 assert_eq!(chunks.envelope.len(), 1); assert!(chunks.unz.is_some());
318
319 let msg = &chunks.messages[0];
320 assert!(msg.unh.is("UNH"));
321 assert!(msg.unt.is("UNT"));
322 assert_eq!(msg.body.len(), 1); assert!(msg.body[0].is("BGM"));
324
325 let all = msg.all_segments();
327 assert_eq!(all.len(), 4);
328 assert!(all[0].is("UNB"));
329 assert!(all[1].is("UNH"));
330 assert!(all[2].is("BGM"));
331 assert!(all[3].is("UNT"));
332 }
333
334 #[test]
335 fn test_split_messages_two_messages() {
336 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001'UNT+2+001'UNH+002+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC002'DTM+137:20250101:102'UNT+3+002'UNZ+2+REF001'";
337 let segments = parse_to_segments(input).unwrap();
338 let chunks = split_messages(segments).unwrap();
339
340 assert_eq!(chunks.messages.len(), 2);
341
342 let msg1 = &chunks.messages[0];
344 assert_eq!(msg1.unh.get_element(0), "001");
345 assert_eq!(msg1.body.len(), 1);
346 assert!(msg1.body[0].is("BGM"));
347
348 let msg2 = &chunks.messages[1];
350 assert_eq!(msg2.unh.get_element(0), "002");
351 assert_eq!(msg2.body.len(), 2);
352 assert!(msg2.body[0].is("BGM"));
353 assert!(msg2.body[1].is("DTM"));
354
355 assert_eq!(msg1.envelope.len(), msg2.envelope.len());
357 assert!(msg1.envelope[0].is("UNB"));
358 }
359
360 #[test]
361 fn test_split_messages_envelope_preserved_per_message() {
362 let input = b"UNA:+.? 'UNB+UNOC:3+SEND+RECV+210101:1200+REF'UNH+001+UTILMD:D:11A:UN:S2.1'UNT+1+001'UNH+002+UTILMD:D:11A:UN:S2.1'UNT+1+002'UNZ+2+REF'";
364 let segments = parse_to_segments(input).unwrap();
365 let chunks = split_messages(segments).unwrap();
366
367 for msg in &chunks.messages {
368 let all = msg.all_segments();
369 assert!(all[0].is("UNB"), "First segment should be UNB");
370 assert!(all[1].is("UNH"), "Second segment should be UNH");
371 assert!(all.last().unwrap().is("UNT"), "Last segment should be UNT");
372 }
373 }
374
375 #[test]
376 fn test_split_messages_no_messages_errors() {
377 let input = b"UNA:+.? 'UNB+UNOC:3+S+R+210101:1200+REF'UNZ+0+REF'";
378 let segments = parse_to_segments(input).unwrap();
379 let result = split_messages(segments);
380 assert!(result.is_err());
381 }
382
383 #[test]
384 fn test_owned_segment_is_case_insensitive() {
385 let input = b"UNA:+.? 'UNB+UNOC:3'UNZ+0'";
386 let segments = parse_to_segments(input).unwrap();
387 assert!(segments[0].is("unb"));
388 assert!(segments[0].is("UNB"));
389 assert!(segments[0].is("Unb"));
390 }
391}