Skip to main content

copybook_corruption_detectors/
lib.rs

1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//! Focused field-level corruption detectors.
4//!
5//! This microcrate owns only packed-decimal and EBCDIC-byte scanners used by higher-level
6//! copybook workflows.
7
8use copybook_core::{Error, ErrorCode};
9use copybook_corruption_predicates::{
10    is_invalid_comp3_high_nibble, is_invalid_comp3_low_nibble, is_invalid_comp3_sign_nibble,
11    is_likely_corrupted_ebcdic_byte,
12};
13
14/// Detect potential EBCDIC corruption in text fields.
15#[inline]
16#[must_use = "Inspect the returned errors to handle corruption findings"]
17pub fn detect_ebcdic_corruption(data: &[u8], field_path: &str) -> Vec<Error> {
18    let mut errors = Vec::new();
19
20    for (i, &byte) in data.iter().enumerate() {
21        if is_likely_corrupted_ebcdic_byte(byte) {
22            let error = Error::new(
23                ErrorCode::CBKC301_INVALID_EBCDIC_BYTE,
24                format!(
25                    "Potentially corrupted EBCDIC byte 0x{byte:02X} at position {i} in field {field_path}"
26                ),
27            )
28            .with_field(field_path)
29            .with_offset(i as u64);
30
31            errors.push(error);
32        }
33    }
34
35    errors
36}
37
38/// Detect patterns in packed decimal that suggest corruption.
39#[inline]
40#[must_use = "Inspect the returned errors to handle corruption findings"]
41pub fn detect_packed_corruption(data: &[u8], field_path: &str) -> Vec<Error> {
42    let mut errors = Vec::new();
43
44    for (i, &byte) in data.iter().enumerate() {
45        let high_nibble = (byte >> 4) & 0x0F;
46        let low_nibble = byte & 0x0F;
47
48        if is_invalid_comp3_high_nibble(byte) {
49            let error = Error::new(
50                ErrorCode::CBKD401_COMP3_INVALID_NIBBLE,
51                format!(
52                    "invalid high nibble 0x{high_nibble:X} in packed decimal at byte {i} (full byte: 0x{byte:02X})"
53                ),
54            )
55            .with_field(field_path)
56            .with_offset(i as u64);
57
58            errors.push(error);
59        }
60
61        if i == data.len() - 1 {
62            if is_invalid_comp3_sign_nibble(byte) {
63                let error = Error::new(
64                    ErrorCode::CBKD401_COMP3_INVALID_NIBBLE,
65                    format!(
66                        "invalid sign nibble 0x{low_nibble:X} in packed decimal (should be C/D/F), byte {i} (full byte: 0x{byte:02X})"
67                    ),
68                )
69                .with_field(field_path)
70                .with_offset(i as u64);
71
72                errors.push(error);
73            }
74        } else if is_invalid_comp3_low_nibble(byte) {
75            let error = Error::new(
76                ErrorCode::CBKD401_COMP3_INVALID_NIBBLE,
77                format!(
78                    "invalid low nibble 0x{low_nibble:X} in packed decimal at byte {i} (full byte: 0x{byte:02X})"
79                ),
80            )
81            .with_field(field_path)
82            .with_offset(i as u64);
83
84            errors.push(error);
85        }
86    }
87
88    errors
89}
90
91#[cfg(test)]
92#[allow(clippy::expect_used, clippy::unwrap_used)]
93mod tests {
94    use super::*;
95
96    // ── EBCDIC corruption tests ──────────────────────────────────────
97
98    #[test]
99    fn detect_ebcdic_corruption_finds_corrupted_bytes() {
100        let corrupted_data = [0xC1, 0x00, 0x7F, 0xC2];
101        let errors = detect_ebcdic_corruption(&corrupted_data, "TEXT.FIELD");
102        assert_eq!(errors.len(), 2);
103    }
104
105    #[test]
106    fn detect_ebcdic_corruption_ignores_clean_input() {
107        let normal_data = [0xC1, 0xC2, 0xC3];
108        assert!(detect_ebcdic_corruption(&normal_data, "TEXT.FIELD").is_empty());
109    }
110
111    #[test]
112    fn ebcdic_empty_input_returns_no_errors() {
113        assert!(detect_ebcdic_corruption(&[], "EMPTY").is_empty());
114    }
115
116    #[test]
117    fn ebcdic_single_clean_byte() {
118        assert!(detect_ebcdic_corruption(&[0xF0], "F").is_empty());
119    }
120
121    #[test]
122    fn ebcdic_single_corrupted_byte() {
123        let errors = detect_ebcdic_corruption(&[0x01], "F");
124        assert_eq!(errors.len(), 1);
125        assert_eq!(errors[0].code, ErrorCode::CBKC301_INVALID_EBCDIC_BYTE);
126    }
127
128    #[test]
129    fn ebcdic_boundary_0x1f_is_corrupted() {
130        let errors = detect_ebcdic_corruption(&[0x1F], "BND");
131        assert_eq!(errors.len(), 1);
132    }
133
134    #[test]
135    fn ebcdic_boundary_0x20_is_clean() {
136        assert!(detect_ebcdic_corruption(&[0x20], "BND").is_empty());
137    }
138
139    #[test]
140    fn ebcdic_boundary_0x7e_is_clean() {
141        assert!(detect_ebcdic_corruption(&[0x7E], "BND").is_empty());
142    }
143
144    #[test]
145    fn ebcdic_boundary_0x7f_is_corrupted() {
146        let errors = detect_ebcdic_corruption(&[0x7F], "BND");
147        assert_eq!(errors.len(), 1);
148    }
149
150    #[test]
151    fn ebcdic_boundary_0x9f_is_corrupted() {
152        let errors = detect_ebcdic_corruption(&[0x9F], "BND");
153        assert_eq!(errors.len(), 1);
154    }
155
156    #[test]
157    fn ebcdic_boundary_0xa0_is_clean() {
158        assert!(detect_ebcdic_corruption(&[0xA0], "BND").is_empty());
159    }
160
161    #[test]
162    fn ebcdic_error_field_path_propagated() {
163        let errors = detect_ebcdic_corruption(&[0x00], "REC.GROUP.FIELD");
164        assert_eq!(
165            errors[0]
166                .context
167                .as_ref()
168                .and_then(|c| c.field_path.as_deref()),
169            Some("REC.GROUP.FIELD")
170        );
171    }
172
173    #[test]
174    fn ebcdic_error_offset_is_correct() {
175        let data = [0xC1, 0xC2, 0x05, 0xC3];
176        let errors = detect_ebcdic_corruption(&data, "F");
177        assert_eq!(errors.len(), 1);
178        assert_eq!(
179            errors[0].context.as_ref().and_then(|c| c.byte_offset),
180            Some(2)
181        );
182    }
183
184    #[test]
185    fn ebcdic_all_c0_controls_flagged() {
186        let data: Vec<u8> = (0x00..=0x1F).collect();
187        let errors = detect_ebcdic_corruption(&data, "C0");
188        assert_eq!(errors.len(), 32);
189    }
190
191    #[test]
192    fn ebcdic_all_c1_controls_flagged() {
193        let data: Vec<u8> = (0x7F..=0x9F).collect();
194        let errors = detect_ebcdic_corruption(&data, "C1");
195        assert_eq!(errors.len(), 33);
196    }
197
198    // ── Packed decimal corruption tests ──────────────────────────────
199
200    #[test]
201    fn detect_packed_corruption_flags_invalid_sign() {
202        let invalid_sign = [0x12, 0x34, 0x56];
203        let errors = detect_packed_corruption(&invalid_sign, "TEST.FIELD");
204        assert!(!errors.is_empty());
205        assert!(
206            errors
207                .iter()
208                .any(|e| e.message.contains("invalid sign nibble"))
209        );
210    }
211
212    #[test]
213    fn detect_packed_corruption_flags_invalid_high_nibble() {
214        let invalid = [0xA2, 0x34, 0x5C];
215        let errors = detect_packed_corruption(&invalid, "TEST.FIELD");
216        assert!(!errors.is_empty());
217        assert!(
218            errors
219                .iter()
220                .any(|e| e.message.contains("invalid high nibble"))
221        );
222    }
223
224    #[test]
225    fn detect_packed_corruption_tolerates_valid_payload() {
226        let valid_packed = [0x12, 0x34, 0x5C];
227        let errors = detect_packed_corruption(&valid_packed, "TEST.FIELD");
228        assert!(errors.is_empty());
229    }
230
231    #[test]
232    fn packed_empty_input_returns_no_errors() {
233        assert!(detect_packed_corruption(&[], "EMPTY").is_empty());
234    }
235
236    #[test]
237    fn packed_single_byte_valid_positive() {
238        // 0x1C = digit 1, sign C (positive)
239        assert!(detect_packed_corruption(&[0x1C], "S").is_empty());
240    }
241
242    #[test]
243    fn packed_single_byte_valid_negative() {
244        // 0x5D = digit 5, sign D (negative)
245        assert!(detect_packed_corruption(&[0x5D], "S").is_empty());
246    }
247
248    #[test]
249    fn packed_single_byte_valid_unsigned() {
250        // 0x3F = digit 3, sign F (unsigned)
251        assert!(detect_packed_corruption(&[0x3F], "S").is_empty());
252    }
253
254    #[test]
255    fn packed_single_byte_invalid_sign() {
256        // 0x17 = digit 1, sign nibble 7 (invalid)
257        let errors = detect_packed_corruption(&[0x17], "S");
258        assert_eq!(errors.len(), 1);
259        assert!(errors[0].message.contains("invalid sign nibble"));
260    }
261
262    #[test]
263    fn packed_invalid_low_nibble_non_terminal() {
264        // 0x1A in non-terminal position: low nibble A is invalid
265        let errors = detect_packed_corruption(&[0x1A, 0x5C], "F");
266        assert!(
267            errors
268                .iter()
269                .any(|e| e.message.contains("invalid low nibble"))
270        );
271    }
272
273    #[test]
274    fn packed_both_nibbles_invalid() {
275        // 0xAB: high nibble A invalid, low nibble B invalid (non-terminal)
276        let errors = detect_packed_corruption(&[0xAB, 0x1C], "F");
277        assert!(errors.len() >= 2);
278    }
279
280    #[test]
281    fn packed_error_code_is_comp3_invalid_nibble() {
282        let errors = detect_packed_corruption(&[0xA0, 0x1C], "X");
283        assert!(
284            errors
285                .iter()
286                .all(|e| e.code == ErrorCode::CBKD401_COMP3_INVALID_NIBBLE)
287        );
288    }
289
290    #[test]
291    fn packed_sign_c_d_f_all_valid() {
292        assert!(detect_packed_corruption(&[0x1C], "V").is_empty()); // C = positive
293        assert!(detect_packed_corruption(&[0x1D], "V").is_empty()); // D = negative
294        assert!(detect_packed_corruption(&[0x1F], "V").is_empty()); // F = unsigned
295    }
296
297    #[test]
298    fn packed_field_path_in_error() {
299        let errors = detect_packed_corruption(&[0xBB], "MY.PACKED.FIELD");
300        assert!(
301            errors
302                .iter()
303                .all(|e| e.context.as_ref().and_then(|c| c.field_path.as_deref())
304                    == Some("MY.PACKED.FIELD"))
305        );
306    }
307}