Skip to main content

copybook_corruption_rdw/
lib.rs

1#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
2// SPDX-License-Identifier: AGPL-3.0-or-later
3//! RDW corruption heuristics.
4//!
5//! This crate owns only RDW ASCII-corruption detection.
6
7use copybook_core::{Error, ErrorCode};
8use copybook_corruption_predicates::is_ascii_printable;
9use copybook_rdw_predicates::rdw_is_suspect_ascii_corruption_slice;
10
11/// Heuristics for detecting ASCII transfer corruption in RDW headers.
12///
13/// This function implements the `CBKF104_RDW_SUSPECT_ASCII` detection logic by
14/// checking for patterns that suggest binary data was converted through ASCII
15/// transfer by mistake (for example, EBCDIC/ASCII confusion around the length
16/// field).
17#[inline]
18#[must_use = "Handle the returned error when corruption is detected"]
19pub fn detect_rdw_ascii_corruption(rdw_bytes: &[u8]) -> Option<Error> {
20    if rdw_bytes.len() < 4 {
21        return None;
22    }
23
24    // Extract the length field (first 2 bytes, big-endian).
25    let length_bytes = [rdw_bytes[0], rdw_bytes[1]];
26    let length = u16::from_be_bytes(length_bytes);
27
28    // Heuristic 1: Length field contains ASCII digits.
29    if rdw_is_suspect_ascii_corruption_slice(rdw_bytes) {
30        return Some(Error::new(
31            ErrorCode::CBKF104_RDW_SUSPECT_ASCII,
32            format!(
33                "RDW length field appears to contain ASCII digits: 0x{:02X}{:02X} ('{}{}')",
34                rdw_bytes[0],
35                rdw_bytes[1],
36                ascii_char_or_dot(rdw_bytes[0]),
37                ascii_char_or_dot(rdw_bytes[1])
38            ),
39        ));
40    }
41
42    // Heuristic 2: Unreasonably large length values that could be ASCII.
43    if length > 0x3030 && length <= 0x3939 {
44        // Range covers ASCII '00'..='99' when interpreted as binary.
45        return Some(Error::new(
46            ErrorCode::CBKF104_RDW_SUSPECT_ASCII,
47            format!(
48                "RDW length field suspiciously large ({length}), may be ASCII-corrupted: 0x{length:04X}"
49            ),
50        ));
51    }
52
53    // Heuristic 3: Reserved bytes contain ASCII-like printable bytes.
54    if is_ascii_printable(rdw_bytes[2])
55        && is_ascii_printable(rdw_bytes[3])
56        && rdw_bytes[2..4] != [0x00, 0x00]
57    {
58        return Some(Error::new(
59            ErrorCode::CBKF104_RDW_SUSPECT_ASCII,
60            format!(
61                "RDW reserved bytes contain ASCII-like data: 0x{:02X}{:02X} ('{}{}')",
62                rdw_bytes[2],
63                rdw_bytes[3],
64                ascii_char_or_dot(rdw_bytes[2]),
65                ascii_char_or_dot(rdw_bytes[3])
66            ),
67        ));
68    }
69
70    None
71}
72
73fn ascii_char_or_dot(byte: u8) -> char {
74    if is_ascii_printable(byte) {
75        byte as char
76    } else {
77        '.'
78    }
79}
80
81#[cfg(test)]
82#[allow(clippy::expect_used, clippy::unwrap_used)]
83mod tests {
84    use super::*;
85    use copybook_core::ErrorCode;
86    use copybook_rdw_predicates::rdw_is_suspect_ascii_corruption_slice;
87    use proptest::prelude::*;
88
89    fn expected_corruption_present(data: &[u8]) -> bool {
90        if data.len() < 4 {
91            return false;
92        }
93
94        let length = u16::from_be_bytes([data[0], data[1]]);
95        let reserved_bytes = [data[2], data[3]];
96        rdw_is_suspect_ascii_corruption_slice(data)
97            || (0x3030u16..=0x3939u16).contains(&length)
98            || (is_ascii_printable(data[2])
99                && is_ascii_printable(data[3])
100                && reserved_bytes != [0x00, 0x00])
101    }
102
103    #[test]
104    fn detects_ascii_digit_header() {
105        let error = detect_rdw_ascii_corruption(b"12\0\0").expect("ASCII digits should be flagged");
106        assert_eq!(error.code, ErrorCode::CBKF104_RDW_SUSPECT_ASCII);
107    }
108
109    #[test]
110    fn short_headers_do_not_error() {
111        assert!(detect_rdw_ascii_corruption(b"\x31\x32").is_none());
112    }
113
114    #[test]
115    fn detects_reserved_printables() {
116        let error = detect_rdw_ascii_corruption(&[0x00, 0x50, b'A', b'B'])
117            .expect("reserved bytes should be flagged");
118        assert_eq!(error.code, ErrorCode::CBKF104_RDW_SUSPECT_ASCII);
119    }
120
121    #[test]
122    fn ascii_heuristic_and_reference_match() {
123        let header = [b'0', b'1', b'2', b'3'];
124        assert_eq!(
125            detect_rdw_ascii_corruption(&header).is_some(),
126            expected_corruption_present(&header)
127        );
128    }
129
130    #[test]
131    fn empty_input_returns_none() {
132        assert!(detect_rdw_ascii_corruption(&[]).is_none());
133    }
134
135    #[test]
136    fn exactly_3_bytes_returns_none() {
137        assert!(detect_rdw_ascii_corruption(&[b'1', b'2', 0x00]).is_none());
138    }
139
140    #[test]
141    fn clean_binary_header_returns_none() {
142        // Normal RDW: length=80, reserved=0x0000
143        assert!(detect_rdw_ascii_corruption(&[0x00, 0x50, 0x00, 0x00]).is_none());
144    }
145
146    #[test]
147    fn heuristic2_length_in_ascii_range_0x3031() {
148        // 0x3031 is within 0x3030..=0x3939 (ASCII "01")
149        let result = detect_rdw_ascii_corruption(&[0x30, 0x31, 0x00, 0x00]);
150        // Heuristic 1 fires first since both are ASCII digits
151        assert!(result.is_some());
152        assert_eq!(result.unwrap().code, ErrorCode::CBKF104_RDW_SUSPECT_ASCII);
153    }
154
155    #[test]
156    fn heuristic2_length_0x3939_upper_bound() {
157        // 0x3939 = ASCII "99", within suspect range
158        let result = detect_rdw_ascii_corruption(&[0x39, 0x39, 0x00, 0x00]);
159        assert!(result.is_some());
160    }
161
162    #[test]
163    fn heuristic3_reserved_both_printable() {
164        // Length bytes are not ASCII digits, but reserved bytes are printable
165        let result = detect_rdw_ascii_corruption(&[0x00, 0x10, b'X', b'Y']);
166        assert!(result.is_some());
167        assert!(result.unwrap().message.contains("reserved bytes"));
168    }
169
170    #[test]
171    fn heuristic3_reserved_one_printable_one_not() {
172        // Only one reserved byte is printable — not flagged by heuristic 3
173        assert!(detect_rdw_ascii_corruption(&[0x00, 0x10, b'X', 0x01]).is_none());
174    }
175
176    #[test]
177    fn heuristic3_reserved_both_zero_not_flagged() {
178        // Reserved bytes [0x00, 0x00] are excluded even though 0x00 is not printable
179        assert!(detect_rdw_ascii_corruption(&[0x00, 0x10, 0x00, 0x00]).is_none());
180    }
181
182    #[test]
183    fn longer_input_only_first_4_bytes_matter() {
184        let data = [b'5', b'6', 0x00, 0x00, 0xFF, 0xFF, 0xFF];
185        let result = detect_rdw_ascii_corruption(&data);
186        assert!(result.is_some());
187    }
188
189    #[test]
190    fn all_zeros_returns_none() {
191        assert!(detect_rdw_ascii_corruption(&[0x00, 0x00, 0x00, 0x00]).is_none());
192    }
193
194    #[test]
195    fn all_0xff_returns_none() {
196        // 0xFF is not an ASCII digit and not ASCII printable
197        assert!(detect_rdw_ascii_corruption(&[0xFF, 0xFF, 0xFF, 0xFF]).is_none());
198    }
199
200    proptest! {
201        #[test]
202        fn matches_reference_model(data in prop::collection::vec(any::<u8>(), 0..128)) {
203            let expected = expected_corruption_present(&data);
204            let actual = detect_rdw_ascii_corruption(&data).is_some();
205            prop_assert_eq!(actual, expected);
206        }
207    }
208}