1use edifact_primitives::{Control, RawSegment};
7use thiserror::Error;
8
9use crate::{EdifactHandler, EdifactStreamParser};
10
11#[derive(Debug, Clone, PartialEq, Eq)]
17pub struct DetectResult {
18 pub format_version: &'static str,
20 pub message_type: String,
22 pub unh_version: String,
24 pub note: Option<String>,
26}
27
28#[derive(Debug, Clone, Error, PartialEq, Eq)]
30pub enum DetectError {
31 #[error("input contains no UNH segment")]
33 NoUnh,
34 #[error("unknown version '{unh_version}' for {message_type} (known: {known:?})")]
38 UnknownVersion {
39 message_type: String,
40 unh_version: String,
41 known: Vec<String>,
42 },
43 #[error("message type '{message_type}' not supported by auto-detection")]
45 UnsupportedMessageType { message_type: String },
46 #[error("failed to parse EDIFACT input: {0}")]
48 ParseFailure(String),
49}
50
51pub fn detect_format_version(edifact: &str) -> Result<DetectResult, DetectError> {
58 let trimmed = edifact.trim_start_matches([' ', '\t', '\r', '\n', '\u{feff}']);
59 if trimmed.is_empty() {
60 return Err(DetectError::NoUnh);
61 }
62
63 let mut handler = UnhCapture::default();
64 EdifactStreamParser::parse(trimmed.as_bytes(), &mut handler)
65 .map_err(|e| DetectError::ParseFailure(e.to_string()))?;
66
67 let message_type = handler.message_type.ok_or(DetectError::NoUnh)?;
68 let unh_version = handler
69 .unh_version
70 .ok_or_else(|| DetectError::ParseFailure("UNH S009 missing version component".into()))?;
71
72 if !message_type_supported(&message_type) {
73 return Err(DetectError::UnsupportedMessageType { message_type });
74 }
75
76 let mut candidates: Vec<&'static str> = VERSION_TABLE
77 .iter()
78 .filter(|(mt, ver, _)| *mt == message_type && *ver == unh_version)
79 .map(|(_, _, fv)| *fv)
80 .collect();
81 candidates.sort_by(|a, b| b.cmp(a));
83
84 match candidates.as_slice() {
85 [] => {
86 let known = known_versions_for(&message_type);
87 Err(DetectError::UnknownVersion {
88 message_type,
89 unh_version,
90 known,
91 })
92 }
93 [single] => Ok(DetectResult {
94 format_version: single,
95 message_type,
96 unh_version,
97 note: None,
98 }),
99 [newest, rest @ ..] => {
100 let other = rest.to_vec().join(", ");
101 Ok(DetectResult {
102 format_version: newest,
103 message_type,
104 unh_version,
105 note: Some(format!("also matches {}", other)),
106 })
107 }
108 }
109}
110
111const VERSION_TABLE: &[(&str, &str, &str)] = &[
117 ("APERAK", "2.1i", "FV2504"),
119 ("COMDIS", "1.0e", "FV2504"),
120 ("IFTSTA", "2.0f", "FV2504"),
121 ("INVOIC", "2.8d", "FV2504"),
122 ("MSCONS", "2.4c", "FV2504"),
123 ("ORDERS", "1.4a", "FV2504"),
124 ("ORDRSP", "1.4", "FV2504"),
125 ("PARTIN", "1.0e", "FV2504"),
126 ("PRICAT", "2.0d", "FV2504"),
127 ("QUOTES", "1.3a", "FV2504"),
128 ("REMADV", "2.9c", "FV2504"),
129 ("REQOTE", "1.3b", "FV2504"),
130 ("UTILMD", "G1.0a", "FV2504"),
131 ("UTILMD", "S2.1", "FV2504"),
132 ("UTILTS", "1.1e", "FV2504"),
133 ("APERAK", "2.1i", "FV2510"), ("COMDIS", "1.0f", "FV2510"),
136 ("IFTSTA", "2.0g", "FV2510"),
137 ("INVOIC", "2.8e", "FV2510"),
138 ("MSCONS", "2.4c", "FV2510"), ("ORDERS", "1.4b", "FV2510"),
140 ("ORDRSP", "1.4a", "FV2510"),
141 ("PARTIN", "1.0e", "FV2510"),
142 ("PRICAT", "2.0e", "FV2510"),
143 ("QUOTES", "1.3b", "FV2510"),
144 ("REMADV", "2.9d", "FV2510"),
145 ("REQOTE", "1.3c", "FV2510"),
146 ("UTILMD", "G1.0a", "FV2510"),
147 ("UTILMD", "S2.1", "FV2510"), ("UTILTS", "1.1e", "FV2510"), ("APERAK", "2.1i", "FV2604"), ("COMDIS", "1.0g", "FV2604"),
152 ("IFTSTA", "2.0g", "FV2604"), ("INVOIC", "2.8e", "FV2604"), ("MSCONS", "2.4c", "FV2604"), ("ORDERS", "1.4b", "FV2604"), ("ORDRSP", "1.4b", "FV2604"),
157 ("PARTIN", "1.0f", "FV2604"),
158 ("PRICAT", "2.0e", "FV2604"), ("QUOTES", "1.3b", "FV2604"), ("REMADV", "2.9e", "FV2604"),
161 ("REQOTE", "1.3c", "FV2604"), ("UTILMD", "G1.1", "FV2604"),
163 ("UTILMD", "S2.1", "FV2604"), ("UTILTS", "1.1e", "FV2604"), ("APERAK", "2.2", "FV2610"),
167 ("COMDIS", "1.0g", "FV2610"), ("IFTSTA", "2.1", "FV2610"),
169 ("INVOIC", "2.8e", "FV2610"), ("MSCONS", "2.5", "FV2610"),
171 ("ORDERS", "1.4c", "FV2610"),
172 ("ORDRSP", "1.4c", "FV2610"),
173 ("PARTIN", "1.1", "FV2610"),
174 ("PRICAT", "2.1", "FV2610"),
175 ("QUOTES", "1.3c", "FV2610"),
176 ("REMADV", "2.9e", "FV2610"), ("REQOTE", "1.3c", "FV2610"), ("UTILMD", "G1.2", "FV2610"),
179 ("UTILMD", "S2.2", "FV2610"),
180 ("UTILTS", "1.1e", "FV2610"), ];
182
183fn known_versions_for(message_type: &str) -> Vec<String> {
184 let mut versions: Vec<String> = VERSION_TABLE
185 .iter()
186 .filter(|(mt, _, _)| *mt == message_type)
187 .map(|(_, ver, _)| (*ver).to_string())
188 .collect();
189 versions.sort();
190 versions.dedup();
191 versions
192}
193
194fn message_type_supported(message_type: &str) -> bool {
195 VERSION_TABLE.iter().any(|(mt, _, _)| *mt == message_type)
196}
197
198#[derive(Default)]
199struct UnhCapture {
200 message_type: Option<String>,
201 unh_version: Option<String>,
202}
203
204impl EdifactHandler for UnhCapture {
205 fn on_message_start(&mut self, unh: &RawSegment) -> Control {
206 if let Some(s009) = unh.elements.get(1) {
210 self.message_type = s009.first().map(|s| s.to_string());
211 self.unh_version = s009.get(4).map(|s| s.to_string());
212 }
213 Control::Stop
214 }
215}
216
217#[cfg(test)]
218mod tests {
219 use super::*;
220
221 #[test]
222 fn empty_input_returns_no_unh() {
223 let err = detect_format_version("").unwrap_err();
224 assert_eq!(err, DetectError::NoUnh);
225 }
226
227 #[test]
228 fn extracts_unh_s009_for_utilmd_s2_1() {
229 let input = "UNB+UNOC:3+sender+recv+250505:0826+REF'\
230 UNH+REF+UTILMD:D:11A:UN:S2.1'\
231 UNT+1+REF'\
232 UNZ+1+REF'";
233 let result = detect_format_version(input).unwrap();
234 assert_eq!(result.message_type, "UTILMD");
235 assert_eq!(result.unh_version, "S2.1");
236 }
237
238 #[test]
239 fn maps_utilmd_g1_1_to_fv2604() {
240 let input = "UNB+UNOC:3+s+r+260211:1006+R'\
241 UNH+R+UTILMD:D:11A:UN:G1.1'\
242 UNT+1+R'UNZ+1+R'";
243 let result = detect_format_version(input).unwrap();
244 assert_eq!(result.format_version, "FV2604");
245 assert_eq!(result.note, None);
246 }
247
248 #[test]
249 fn unknown_version_returns_known_list() {
250 let input = "UNB+UNOC:3+s+r+250505:0826+R'\
251 UNH+R+UTILMD:D:11A:UN:S2.0a'\
252 UNT+1+R'UNZ+1+R'";
253 let err = detect_format_version(input).unwrap_err();
254 match err {
255 DetectError::UnknownVersion {
256 message_type,
257 unh_version,
258 known,
259 } => {
260 assert_eq!(message_type, "UTILMD");
261 assert_eq!(unh_version, "S2.0a");
262 assert!(known.contains(&"S2.1".to_string()));
263 }
264 other => panic!("expected UnknownVersion, got {other:?}"),
265 }
266 }
267
268 #[test]
269 fn unsupported_message_type() {
270 let input = "UNB+UNOC:3+s+r+250505:0826+R'\
271 UNH+R+FOOBAR:D:01A:UN:1.0'\
272 UNT+1+R'UNZ+1+R'";
273 let err = detect_format_version(input).unwrap_err();
274 assert_eq!(
275 err,
276 DetectError::UnsupportedMessageType {
277 message_type: "FOOBAR".into()
278 }
279 );
280 }
281
282 #[test]
283 fn utilmd_s2_1_picks_newest_with_note() {
284 let input = "UNB+UNOC:3+s+r+251201:0826+R'\
285 UNH+R+UTILMD:D:11A:UN:S2.1'\
286 UNT+1+R'UNZ+1+R'";
287 let result = detect_format_version(input).unwrap();
288 assert_eq!(result.format_version, "FV2604");
289 let note = result.note.as_deref().unwrap_or("");
290 assert!(note.contains("FV2504"), "note was: {note}");
291 assert!(note.contains("FV2510"), "note was: {note}");
292 }
293
294 #[test]
295 fn mscons_2_4c_picks_newest_with_note() {
296 let input = "UNB+UNOC:3+s+r+260301:0826+R'\
297 UNH+R+MSCONS:D:04B:UN:2.4c'\
298 UNT+1+R'UNZ+1+R'";
299 let result = detect_format_version(input).unwrap();
300 assert_eq!(result.format_version, "FV2604");
301 assert!(result.note.as_deref().unwrap_or("").contains("FV2504"));
302 }
303
304 #[test]
305 fn handles_leading_whitespace_and_bom() {
306 let input = "\u{feff}\r\n UNB+UNOC:3+s+r+250505:0826+R'\
307 UNH+R+UTILMD:D:11A:UN:S2.1'\
308 UNT+1+R'UNZ+1+R'";
309 let result = detect_format_version(input).unwrap();
310 assert_eq!(result.message_type, "UTILMD");
311 }
312
313 #[test]
314 fn handles_una_with_default_delimiters() {
315 let input = "UNA:+.? '\
316 UNB+UNOC:3+s+r+250505:0826+R'\
317 UNH+R+UTILMD:D:11A:UN:S2.1'\
318 UNT+1+R'UNZ+1+R'";
319 let result = detect_format_version(input).unwrap();
320 assert_eq!(result.format_version, "FV2604"); }
322
323 #[test]
324 fn empty_s009_version_is_unknown_version_not_parse_failure() {
325 let input = "UNB+UNOC:3+s+r+250505:0826+R'\
326 UNH+R+UTILMD:D:11A:UN:'\
327 UNT+1+R'UNZ+1+R'";
328 let err = detect_format_version(input).unwrap_err();
329 match err {
330 DetectError::UnknownVersion { unh_version, .. } => assert_eq!(unh_version, ""),
331 DetectError::ParseFailure(_) => {
332 }
334 other => panic!("unexpected: {other:?}"),
335 }
336 }
337
338 #[test]
339 fn multi_message_interchange_uses_first_unh() {
340 let input = "UNB+UNOC:3+s+r+250505:0826+R'\
341 UNH+R1+UTILMD:D:11A:UN:S2.1'UNT+1+R1'\
342 UNH+R2+APERAK:D:07B:UN:2.1i'UNT+1+R2'\
343 UNZ+2+R'";
344 let result = detect_format_version(input).unwrap();
345 assert_eq!(result.message_type, "UTILMD");
346 }
347
348 #[test]
349 fn no_unh_just_unb() {
350 let input = "UNB+UNOC:3+s+r+250505:0826+R'UNZ+0+R'";
351 let err = detect_format_version(input).unwrap_err();
352 assert_eq!(err, DetectError::NoUnh);
353 }
354}