1use std::sync::Arc;
11
12use edifact_primitives::{Control, EdifactDelimiters, RawSegment};
13
14pub use mig_types::segment::OwnedSegment;
16
17#[derive(Debug, Clone)]
19pub struct MessageChunk {
20 pub envelope: Arc<Vec<OwnedSegment>>,
22 pub unh: OwnedSegment,
24 pub body: Vec<OwnedSegment>,
26 pub unt: OwnedSegment,
28}
29
30impl MessageChunk {
31 pub fn all_segments(&self) -> Vec<OwnedSegment> {
36 let mut segs = Vec::with_capacity(self.envelope.len() + 2 + self.body.len());
37 segs.extend_from_slice(&self.envelope);
38 segs.push(self.unh.clone());
39 segs.extend(self.body.iter().cloned());
40 segs.push(self.unt.clone());
41 segs
42 }
43
44 pub fn message_segments(&self) -> Vec<OwnedSegment> {
49 let mut segs = Vec::with_capacity(2 + self.body.len());
50 segs.push(self.unh.clone());
51 segs.extend(self.body.iter().cloned());
52 segs.push(self.unt.clone());
53 segs
54 }
55
56 pub fn segments_for_mig(&self, mig: &mig_types::schema::mig::MigSchema) -> Vec<OwnedSegment> {
59 if mig.includes_envelope() {
60 self.all_segments()
61 } else {
62 self.message_segments()
63 }
64 }
65}
66
67#[derive(Debug, Clone)]
69pub struct InterchangeChunks {
70 pub envelope: Vec<OwnedSegment>,
72 pub messages: Vec<MessageChunk>,
74 pub unz: Option<OwnedSegment>,
76}
77
78fn unescape_edifact(value: &str, release: u8) -> String {
85 if !value.as_bytes().contains(&release) {
86 return value.to_string();
87 }
88 let bytes = value.as_bytes();
89 let mut result = String::with_capacity(value.len());
90 let mut i = 0;
91 while i < bytes.len() {
92 if bytes[i] == release && i + 1 < bytes.len() {
93 i += 1;
95 result.push(bytes[i] as char);
96 } else {
97 result.push(bytes[i] as char);
98 }
99 i += 1;
100 }
101 result
102}
103
104struct SegmentCollector {
106 segments: Vec<OwnedSegment>,
107 release: u8,
109}
110
111impl edifact_parser::EdifactHandler for SegmentCollector {
112 fn on_segment(&mut self, segment: &RawSegment<'_>) -> Control {
113 let release = self.release;
114 self.segments.push(OwnedSegment {
115 id: segment.id.to_string(),
116 elements: segment
117 .elements
118 .iter()
119 .map(|e| e.iter().map(|c| unescape_edifact(c, release)).collect())
120 .collect(),
121 segment_number: segment.position.segment_number,
122 });
123 Control::Continue
124 }
125
126 fn on_delimiters(&mut self, delimiters: &EdifactDelimiters, _explicit_una: bool) {
127 self.release = delimiters.release;
128 }
129
130 fn on_interchange_start(&mut self, _unb: &RawSegment<'_>) -> Control {
131 Control::Continue
132 }
133
134 fn on_message_start(&mut self, _unh: &RawSegment<'_>) -> Control {
135 Control::Continue
136 }
137
138 fn on_message_end(&mut self, _unt: &RawSegment<'_>) {}
139
140 fn on_interchange_end(&mut self, _unz: &RawSegment<'_>) {}
141}
142
143pub fn parse_to_segments(input: &[u8]) -> Result<Vec<OwnedSegment>, crate::AssemblyError> {
149 let mut collector = SegmentCollector {
150 segments: Vec::new(),
151 release: EdifactDelimiters::default().release,
152 };
153 edifact_parser::EdifactStreamParser::parse(input, &mut collector)
154 .map_err(|e| crate::AssemblyError::ParseError(e.to_string()))?;
155 Ok(collector.segments)
156}
157
158pub fn split_messages(
167 segments: Vec<OwnedSegment>,
168) -> Result<InterchangeChunks, crate::AssemblyError> {
169 let mut envelope: Vec<OwnedSegment> = Vec::with_capacity(4);
170 let mut raw_messages: Vec<(OwnedSegment, Vec<OwnedSegment>, OwnedSegment)> = Vec::new();
172 let mut unz: Option<OwnedSegment> = None;
173
174 let mut current_unh: Option<OwnedSegment> = None;
176 let mut current_body: Vec<OwnedSegment> = Vec::with_capacity(32);
177 let mut seen_first_unh = false;
178
179 for seg in segments {
180 let id_upper = seg.id.to_uppercase();
181 match id_upper.as_str() {
182 "UNH" => {
183 seen_first_unh = true;
184 current_unh = Some(seg);
185 current_body.clear();
186 }
187 "UNT" => {
188 if let Some(unh) = current_unh.take() {
189 raw_messages.push((unh, std::mem::take(&mut current_body), seg));
190 }
191 }
192 "UNZ" => {
193 unz = Some(seg);
194 }
195 _ => {
196 if seen_first_unh {
197 current_body.push(seg);
198 } else {
199 envelope.push(seg);
200 }
201 }
202 }
203 }
204
205 if raw_messages.is_empty() {
206 return Err(crate::AssemblyError::ParseError(
207 "No UNH/UNT message pairs found in interchange".to_string(),
208 ));
209 }
210
211 let envelope_arc = Arc::new(envelope);
213 let messages = raw_messages
214 .into_iter()
215 .map(|(unh, body, unt)| MessageChunk {
216 envelope: Arc::clone(&envelope_arc),
217 unh,
218 body,
219 unt,
220 })
221 .collect();
222
223 Ok(InterchangeChunks {
224 envelope: (*envelope_arc).clone(),
225 messages,
226 unz,
227 })
228}
229
230#[cfg(test)]
231mod tests {
232 use super::*;
233
234 #[test]
235 fn test_parse_to_segments_minimal() {
236 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC001'UNT+3+MSG001'UNZ+1+REF001'";
237 let segments = parse_to_segments(input).unwrap();
238
239 assert_eq!(segments.len(), 5);
241 assert!(segments[0].is("UNB"));
242 assert!(segments[1].is("UNH"));
243 assert!(segments[2].is("BGM"));
244 assert!(segments[3].is("UNT"));
245 assert!(segments[4].is("UNZ"));
246 }
247
248 #[test]
249 fn test_parse_to_segments_element_access() {
250 let input = b"UNA:+.? 'UNB+UNOC:3'UNH+001+UTILMD:D:11A'BGM+E03+DOC001'UNT+2+001'UNZ+1'";
251 let segments = parse_to_segments(input).unwrap();
252
253 let bgm = &segments[2];
254 assert_eq!(bgm.id, "BGM");
255 assert_eq!(bgm.get_element(0), "E03");
256 assert_eq!(bgm.get_element(1), "DOC001");
257 assert_eq!(bgm.get_element(99), "");
258 }
259
260 #[test]
261 fn test_parse_to_segments_composite_access() {
262 let input = b"UNA:+.? 'UNH+001+UTILMD:D:11A:UN:S2.1'UNT+1+001'";
263 let segments = parse_to_segments(input).unwrap();
264
265 let unh = &segments[0]; assert_eq!(unh.get_component(1, 0), "UTILMD");
267 assert_eq!(unh.get_component(1, 1), "D");
268 assert_eq!(unh.get_component(1, 4), "S2.1");
269 }
270
271 #[test]
272 fn test_message_chunk_struct_exists() {
273 let chunk = MessageChunk {
274 envelope: Arc::new(vec![]),
275 unh: OwnedSegment {
276 id: "UNH".to_string(),
277 elements: vec![],
278 segment_number: 0,
279 },
280 body: vec![],
281 unt: OwnedSegment {
282 id: "UNT".to_string(),
283 elements: vec![],
284 segment_number: 1,
285 },
286 };
287 assert_eq!(chunk.unh.id, "UNH");
288 assert_eq!(chunk.unt.id, "UNT");
289 assert!(chunk.envelope.is_empty());
290 assert!(chunk.body.is_empty());
291 }
292
293 #[test]
294 fn test_interchange_chunks_struct_exists() {
295 let chunks = InterchangeChunks {
296 envelope: vec![],
297 messages: vec![],
298 unz: None,
299 };
300 assert!(chunks.messages.is_empty());
301 assert!(chunks.unz.is_none());
302 }
303
304 #[test]
305 fn test_split_messages_single_message() {
306 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+MSG001+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC001'UNT+3+MSG001'UNZ+1+REF001'";
307 let segments = parse_to_segments(input).unwrap();
308 let chunks = split_messages(segments).unwrap();
309
310 assert_eq!(chunks.messages.len(), 1);
311 assert_eq!(chunks.envelope.len(), 1); assert!(chunks.unz.is_some());
313
314 let msg = &chunks.messages[0];
315 assert!(msg.unh.is("UNH"));
316 assert!(msg.unt.is("UNT"));
317 assert_eq!(msg.body.len(), 1); assert!(msg.body[0].is("BGM"));
319
320 let all = msg.all_segments();
322 assert_eq!(all.len(), 4);
323 assert!(all[0].is("UNB"));
324 assert!(all[1].is("UNH"));
325 assert!(all[2].is("BGM"));
326 assert!(all[3].is("UNT"));
327 }
328
329 #[test]
330 fn test_split_messages_two_messages() {
331 let input = b"UNA:+.? 'UNB+UNOC:3+SENDER+RECEIVER+210101:1200+REF001'UNH+001+UTILMD:D:11A:UN:S2.1'BGM+E01+DOC001'UNT+2+001'UNH+002+UTILMD:D:11A:UN:S2.1'BGM+E03+DOC002'DTM+137:20250101:102'UNT+3+002'UNZ+2+REF001'";
332 let segments = parse_to_segments(input).unwrap();
333 let chunks = split_messages(segments).unwrap();
334
335 assert_eq!(chunks.messages.len(), 2);
336
337 let msg1 = &chunks.messages[0];
339 assert_eq!(msg1.unh.get_element(0), "001");
340 assert_eq!(msg1.body.len(), 1);
341 assert!(msg1.body[0].is("BGM"));
342
343 let msg2 = &chunks.messages[1];
345 assert_eq!(msg2.unh.get_element(0), "002");
346 assert_eq!(msg2.body.len(), 2);
347 assert!(msg2.body[0].is("BGM"));
348 assert!(msg2.body[1].is("DTM"));
349
350 assert_eq!(msg1.envelope.len(), msg2.envelope.len());
352 assert!(msg1.envelope[0].is("UNB"));
353 }
354
355 #[test]
356 fn test_split_messages_envelope_preserved_per_message() {
357 let input = b"UNA:+.? 'UNB+UNOC:3+SEND+RECV+210101:1200+REF'UNH+001+UTILMD:D:11A:UN:S2.1'UNT+1+001'UNH+002+UTILMD:D:11A:UN:S2.1'UNT+1+002'UNZ+2+REF'";
359 let segments = parse_to_segments(input).unwrap();
360 let chunks = split_messages(segments).unwrap();
361
362 for msg in &chunks.messages {
363 let all = msg.all_segments();
364 assert!(all[0].is("UNB"), "First segment should be UNB");
365 assert!(all[1].is("UNH"), "Second segment should be UNH");
366 assert!(all.last().unwrap().is("UNT"), "Last segment should be UNT");
367 }
368 }
369
370 #[test]
371 fn test_split_messages_no_messages_errors() {
372 let input = b"UNA:+.? 'UNB+UNOC:3+S+R+210101:1200+REF'UNZ+0+REF'";
373 let segments = parse_to_segments(input).unwrap();
374 let result = split_messages(segments);
375 assert!(result.is_err());
376 }
377
378 #[test]
379 fn test_owned_segment_is_case_insensitive() {
380 let input = b"UNA:+.? 'UNB+UNOC:3'UNZ+0'";
381 let segments = parse_to_segments(input).unwrap();
382 assert!(segments[0].is("unb"));
383 assert!(segments[0].is("UNB"));
384 assert!(segments[0].is("Unb"));
385 }
386}