1use crate::validator::{Validator, report_error};
4use crate::{EdifactError, Segment, ValidationIssue, ValidationReport, ValidationSeverity};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Status {
9 Mandatory,
11 Conditional,
13}
14
15#[derive(Debug, Clone, Copy)]
17pub struct ElementRef {
18 pub position: u8,
20 pub data_element: &'static str,
22 pub status: Status,
24 pub max_repeat: u8,
26}
27
28#[derive(Debug)]
30pub struct SegmentDefinition {
31 pub tag: &'static str,
33 pub name: &'static str,
35 pub elements: &'static [ElementRef],
37}
38
39type SegmentLookupFn = fn(&str) -> Option<&'static SegmentDefinition>;
40type IsCodeValidFn = fn(&str, &str) -> bool;
41type SuggestCodeFn = fn(&str, &str) -> Option<&'static str>;
42type ExpectedComponentsFn = fn(&str, usize) -> Option<u8>;
43type AdditionalStructureRuleFn = fn(&Segment<'_>) -> Result<(), EdifactError>;
44type CodeListRulesFn = fn(tag: &str) -> &'static [(usize, usize, &'static str)];
47
48pub(crate) fn base_code_list_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
56 match tag {
57 "BGM" => &[(0, 0, "1001")],
58 "DTM" => &[(0, 0, "2005")],
59 "NAD" => &[(0, 0, "3035")],
60 "QTY" => &[(0, 0, "6063")],
61 "RFF" => &[(0, 0, "1153")],
62 "MOA" => &[(0, 0, "5025")],
63 "PRI" => &[(0, 0, "5125")],
64 "LOC" => &[(0, 0, "3227")],
65 _ => &[],
66 }
67}
68
69#[derive(Debug, Clone)]
82pub struct DirectoryValidator {
83 directory_id: &'static str,
84 segment_lookup: SegmentLookupFn,
85 is_code_valid: IsCodeValidFn,
86 suggest_code: SuggestCodeFn,
87 expected_components: ExpectedComponentsFn,
88 code_list_rules: CodeListRulesFn,
89 additional_structure_rule: Option<AdditionalStructureRuleFn>,
90 message_type: Option<String>,
91 enforce_known_tags: bool,
92 structure_checks: bool,
93 code_list_checks: bool,
94}
95
96impl DirectoryValidator {
97 pub fn new(
99 directory_id: &'static str,
100 segment_lookup: SegmentLookupFn,
101 is_code_valid: IsCodeValidFn,
102 suggest_code: SuggestCodeFn,
103 expected_components: ExpectedComponentsFn,
104 additional_structure_rule: Option<AdditionalStructureRuleFn>,
105 ) -> Self {
106 Self {
107 directory_id,
108 segment_lookup,
109 is_code_valid,
110 suggest_code,
111 expected_components,
112 code_list_rules: base_code_list_rules,
113 additional_structure_rule,
114 message_type: None,
115 enforce_known_tags: true,
116 structure_checks: true,
117 code_list_checks: true,
118 }
119 }
120
121 pub fn with_code_list_rules(mut self, f: CodeListRulesFn) -> Self {
126 self.code_list_rules = f;
127 self
128 }
129
130 pub fn structure_only(mut self) -> Self {
132 self.structure_checks = true;
133 self.code_list_checks = false;
134 self
135 }
136
137 pub fn code_list_only(mut self) -> Self {
139 self.structure_checks = false;
140 self.code_list_checks = true;
141 self
142 }
143
144 pub fn enforce_known_tags(mut self, enforce: bool) -> Self {
146 self.enforce_known_tags = enforce;
147 self
148 }
149
150 fn detect_message_type(&self, segments: &[Segment<'_>]) -> Option<String> {
151 if let Some(explicit) = self.message_type.as_deref() {
152 return Some(explicit.to_owned());
153 }
154
155 segments
156 .iter()
157 .find(|s| s.tag == "UNH")
158 .and_then(|s| s.get_element(1))
159 .and_then(|e| e.get_component(0))
160 .map(str::to_owned)
161 }
162
163 fn required_segments_for(message_type: &str) -> &'static [&'static str] {
174 match message_type {
175 "UTILMD" | "ORDERS" | "INVOIC" => &["UNH", "BGM", "UNT"],
176 _ => &["UNH", "UNT"],
177 }
178 }
179
180 fn effective_component_count(seg: &Segment<'_>, element_idx: usize) -> Option<u8> {
192 let elem = seg.elements.get(element_idx)?;
193 let mut count = elem.components.len();
194 while count > 0 && elem.components[count - 1].as_ref().is_empty() {
195 count -= 1;
196 }
197 debug_assert!(
198 count <= u8::MAX as usize,
199 "effective_component_count: element has >255 components, which is invalid EDIFACT"
200 );
201 Some(count as u8)
202 }
203
204 fn validate_component_counts(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
205 for idx in 0..seg.elements.len() {
206 if let Some(expected) = (self.expected_components)(seg.tag, idx) {
207 let actual = Self::effective_component_count(seg, idx).unwrap_or(0);
208 if actual != expected {
209 return Err(EdifactError::InvalidComponentCount {
210 tag: seg.tag.to_owned(),
211 element_index: idx,
212 expected,
213 actual,
214 offset: seg.span.start,
215 });
216 }
217 }
218 }
219 Ok(())
220 }
221
222 fn validate_code_lists(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
223 let rules = (self.code_list_rules)(seg.tag);
224
225 for (elem_idx, comp_idx, de) in rules {
226 let value = seg
227 .get_element(*elem_idx)
228 .and_then(|e| e.get_component(*comp_idx))
229 .unwrap_or("");
230 if !value.is_empty() && !(self.is_code_valid)(de, value) {
231 let suggestion = (self.suggest_code)(de, value);
232 return Err(EdifactError::InvalidCodeValue {
233 tag: seg.tag.to_owned(),
234 element_index: *elem_idx,
235 value: value.to_owned(),
236 code_list: (*de).to_owned(),
237 offset: seg.span.start,
238 suggestion,
239 });
240 }
241 }
242
243 Ok(())
244 }
245}
246
247impl DirectoryValidator {
248 fn validate_segment(&self, seg: &Segment<'_>) -> Result<(), EdifactError> {
249 if !self.structure_checks && !self.code_list_checks {
250 return Ok(());
251 }
252
253 let Some(def) = (self.segment_lookup)(seg.tag) else {
254 if self.structure_checks && self.enforce_known_tags {
255 return Err(EdifactError::InvalidSegmentForMessage {
256 tag: seg.tag.to_owned(),
257 message_type: self
258 .message_type
259 .clone()
260 .unwrap_or_else(|| self.directory_id.to_owned()),
261 offset: seg.tag_span.start,
262 });
263 }
264 return Ok(());
265 };
266
267 let max_elements = def.elements.len();
268 let min_elements = def
269 .elements
270 .iter()
271 .rposition(|e| e.status == Status::Mandatory)
272 .map(|idx| idx + 1)
273 .unwrap_or(0);
274 let actual = seg.elements.len();
275
276 if self.structure_checks && (actual < min_elements || actual > max_elements) {
277 return Err(EdifactError::InvalidElementCount {
278 tag: seg.tag.to_owned(),
279 min: min_elements,
280 max: max_elements,
281 actual,
282 offset: seg.span.start,
283 });
284 }
285
286 if self.structure_checks {
287 for element in def
288 .elements
289 .iter()
290 .filter(|e| e.status == Status::Mandatory)
291 {
292 let idx = (element.position as usize).saturating_sub(1);
293 let is_present = seg
294 .elements
295 .get(idx)
296 .is_some_and(|elem| elem.components.iter().any(|c| !c.as_ref().is_empty()));
297 if !is_present {
298 return Err(EdifactError::MissingRequiredElement {
299 tag: seg.tag.to_owned(),
300 element_index: idx,
301 });
302 }
303 }
304 self.validate_component_counts(seg)?;
305
306 if let Some(rule) = self.additional_structure_rule {
307 rule(seg)?;
308 }
309 }
310
311 if self.code_list_checks {
312 self.validate_code_lists(seg)?;
313 }
314
315 Ok(())
316 }
317}
318
319impl Validator for DirectoryValidator {
320 fn set_message_type(&mut self, message_type: Option<&str>) {
321 self.message_type = message_type.map(str::to_owned);
322 }
323
324 fn validate_batch(&self, segments: &[Segment<'_>], report: &mut ValidationReport) {
325 for seg in segments {
326 if let Err(err) = self.validate_segment(seg) {
327 report_error(report, err);
328 }
329 }
330
331 if self.structure_checks {
332 if let Some(message_type) = self.detect_message_type(segments) {
333 for required_tag in Self::required_segments_for(&message_type) {
334 if segments.iter().all(|s| s.tag != *required_tag) {
335 report.add_error(
336 ValidationIssue::new(
337 ValidationSeverity::Error,
338 format!(
339 "required segment {} missing for message type {}",
340 required_tag, message_type
341 ),
342 )
343 .with_segment(*required_tag)
344 .with_suggestion("Add the mandatory segment at the correct position"),
345 );
346 }
347 }
348
349 let seq = Self::required_segments_for(&message_type);
350 let mut last_idx = None;
351 for tag in seq {
352 if let Some(idx) = segments.iter().position(|s| s.tag == *tag) {
353 if let Some(prev) = last_idx {
354 if idx < prev {
355 report.add_error(
356 ValidationIssue::new(
357 ValidationSeverity::Error,
358 format!(
359 "segment sequence violation for message type {}: '{}' appears out of order",
360 message_type, tag
361 ),
362 )
363 .with_segment(*tag)
364 .with_suggestion(
365 "Ensure required segments follow UN/EDIFACT canonical order",
366 ),
367 );
368 }
369 }
370 last_idx = Some(idx);
371 }
372 }
373 }
374 }
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381
382 static TEST_ELEMENTS: &[ElementRef] = &[ElementRef {
383 position: 1,
384 data_element: "C507",
385 status: Status::Mandatory,
386 max_repeat: 1,
387 }];
388
389 static TEST_SEGMENT: SegmentDefinition = SegmentDefinition {
390 tag: "TST",
391 name: "Test segment",
392 elements: TEST_ELEMENTS,
393 };
394
395 fn segment_lookup(tag: &str) -> Option<&'static SegmentDefinition> {
396 match tag {
397 "TST" => Some(&TEST_SEGMENT),
398 _ => None,
399 }
400 }
401
402 fn code_valid(_de: &str, _code: &str) -> bool {
403 true
404 }
405
406 fn suggest_code(_de: &str, _code: &str) -> Option<&'static str> {
407 None
408 }
409
410 fn expected_components(_tag: &str, _idx: usize) -> Option<u8> {
411 None
412 }
413
414 #[test]
415 fn mandatory_composite_present_when_any_component_non_empty() {
416 let input = b"TST+:ABC'";
417 let segments: Vec<_> = crate::from_bytes(input)
418 .collect::<Result<Vec<_>, _>>()
419 .expect("parse should succeed");
420
421 let validator = DirectoryValidator::new(
422 "TEST",
423 segment_lookup,
424 code_valid,
425 suggest_code,
426 expected_components,
427 None,
428 );
429
430 let mut report = ValidationReport::default();
431 validator.validate_batch(&segments, &mut report);
432 assert!(!report.has_errors());
433 }
434
435 fn parse_single(input: &[u8]) -> crate::model::Segment<'static> {
438 let leaked: &'static [u8] = Box::leak(input.to_vec().into_boxed_slice());
442 crate::from_bytes(leaked)
443 .collect::<Result<Vec<_>, _>>()
444 .expect("parse should succeed")
445 .into_iter()
446 .next()
447 .expect("at least one segment")
448 }
449
450 #[test]
451 fn trailing_empty_component_stripped_from_dtm() {
452 let seg = parse_single(b"DTM+137:20200101:'");
456 let count = DirectoryValidator::effective_component_count(&seg, 0);
457 assert_eq!(count, Some(2), "trailing empty component should be stripped");
458 }
459
460 #[test]
461 fn all_empty_components_result_in_zero() {
462 let seg = parse_single(b"NAD+MS++:'");
464 let count = DirectoryValidator::effective_component_count(&seg, 2);
465 assert_eq!(count, Some(0), "all-empty composite should have effective count 0");
466 }
467
468 #[test]
469 fn non_empty_component_not_stripped() {
470 let seg = parse_single(b"DTM+137:20200101:102'");
472 let count = DirectoryValidator::effective_component_count(&seg, 0);
473 assert_eq!(count, Some(3), "no components should be stripped when all non-empty");
474 }
475
476 #[test]
477 fn with_code_list_rules_overrides_base() {
478 fn custom_rules(tag: &str) -> &'static [(usize, usize, &'static str)] {
480 match tag {
481 "TST" => &[(0, 0, "CUSTOM_DE")],
482 _ => &[],
483 }
484 }
485 fn custom_code_valid(_de: &str, code: &str) -> bool {
486 code == "VALID"
487 }
488 fn no_suggestion(_de: &str, _code: &str) -> Option<&'static str> {
489 None
490 }
491
492 let input = b"TST+INVALID'";
493 let segments: Vec<_> = crate::from_bytes(input)
494 .collect::<Result<Vec<_>, _>>()
495 .expect("parse should succeed");
496
497 let validator = DirectoryValidator::new(
498 "TEST",
499 segment_lookup,
500 custom_code_valid,
501 no_suggestion,
502 expected_components,
503 None,
504 )
505 .with_code_list_rules(custom_rules);
506
507 let mut report = ValidationReport::default();
508 validator.validate_batch(&segments, &mut report);
509 assert!(
510 report.has_warnings(),
511 "INVALID is not in the custom code list so validation must warn"
512 );
513 }
514}