#![cfg_attr(not(test), deny(clippy::unwrap_used, clippy::expect_used))]
use copybook_core::{Error, ErrorCode};
use copybook_corruption_predicates::is_ascii_printable;
use copybook_rdw_predicates::rdw_is_suspect_ascii_corruption_slice;
#[inline]
#[must_use = "Handle the returned error when corruption is detected"]
pub fn detect_rdw_ascii_corruption(rdw_bytes: &[u8]) -> Option<Error> {
if rdw_bytes.len() < 4 {
return None;
}
let length_bytes = [rdw_bytes[0], rdw_bytes[1]];
let length = u16::from_be_bytes(length_bytes);
if rdw_is_suspect_ascii_corruption_slice(rdw_bytes) {
return Some(Error::new(
ErrorCode::CBKF104_RDW_SUSPECT_ASCII,
format!(
"RDW length field appears to contain ASCII digits: 0x{:02X}{:02X} ('{}{}')",
rdw_bytes[0],
rdw_bytes[1],
ascii_char_or_dot(rdw_bytes[0]),
ascii_char_or_dot(rdw_bytes[1])
),
));
}
if length > 0x3030 && length <= 0x3939 {
return Some(Error::new(
ErrorCode::CBKF104_RDW_SUSPECT_ASCII,
format!(
"RDW length field suspiciously large ({length}), may be ASCII-corrupted: 0x{length:04X}"
),
));
}
if is_ascii_printable(rdw_bytes[2])
&& is_ascii_printable(rdw_bytes[3])
&& rdw_bytes[2..4] != [0x00, 0x00]
{
return Some(Error::new(
ErrorCode::CBKF104_RDW_SUSPECT_ASCII,
format!(
"RDW reserved bytes contain ASCII-like data: 0x{:02X}{:02X} ('{}{}')",
rdw_bytes[2],
rdw_bytes[3],
ascii_char_or_dot(rdw_bytes[2]),
ascii_char_or_dot(rdw_bytes[3])
),
));
}
None
}
fn ascii_char_or_dot(byte: u8) -> char {
if is_ascii_printable(byte) {
byte as char
} else {
'.'
}
}
#[cfg(test)]
#[allow(clippy::expect_used, clippy::unwrap_used)]
mod tests {
use super::*;
use copybook_core::ErrorCode;
use copybook_rdw_predicates::rdw_is_suspect_ascii_corruption_slice;
use proptest::prelude::*;
fn expected_corruption_present(data: &[u8]) -> bool {
if data.len() < 4 {
return false;
}
let length = u16::from_be_bytes([data[0], data[1]]);
let reserved_bytes = [data[2], data[3]];
rdw_is_suspect_ascii_corruption_slice(data)
|| (0x3030u16..=0x3939u16).contains(&length)
|| (is_ascii_printable(data[2])
&& is_ascii_printable(data[3])
&& reserved_bytes != [0x00, 0x00])
}
#[test]
fn detects_ascii_digit_header() {
let error = detect_rdw_ascii_corruption(b"12\0\0").expect("ASCII digits should be flagged");
assert_eq!(error.code, ErrorCode::CBKF104_RDW_SUSPECT_ASCII);
}
#[test]
fn short_headers_do_not_error() {
assert!(detect_rdw_ascii_corruption(b"\x31\x32").is_none());
}
#[test]
fn detects_reserved_printables() {
let error = detect_rdw_ascii_corruption(&[0x00, 0x50, b'A', b'B'])
.expect("reserved bytes should be flagged");
assert_eq!(error.code, ErrorCode::CBKF104_RDW_SUSPECT_ASCII);
}
#[test]
fn ascii_heuristic_and_reference_match() {
let header = [b'0', b'1', b'2', b'3'];
assert_eq!(
detect_rdw_ascii_corruption(&header).is_some(),
expected_corruption_present(&header)
);
}
#[test]
fn empty_input_returns_none() {
assert!(detect_rdw_ascii_corruption(&[]).is_none());
}
#[test]
fn exactly_3_bytes_returns_none() {
assert!(detect_rdw_ascii_corruption(&[b'1', b'2', 0x00]).is_none());
}
#[test]
fn clean_binary_header_returns_none() {
assert!(detect_rdw_ascii_corruption(&[0x00, 0x50, 0x00, 0x00]).is_none());
}
#[test]
fn heuristic2_length_in_ascii_range_0x3031() {
let result = detect_rdw_ascii_corruption(&[0x30, 0x31, 0x00, 0x00]);
assert!(result.is_some());
assert_eq!(result.unwrap().code, ErrorCode::CBKF104_RDW_SUSPECT_ASCII);
}
#[test]
fn heuristic2_length_0x3939_upper_bound() {
let result = detect_rdw_ascii_corruption(&[0x39, 0x39, 0x00, 0x00]);
assert!(result.is_some());
}
#[test]
fn heuristic3_reserved_both_printable() {
let result = detect_rdw_ascii_corruption(&[0x00, 0x10, b'X', b'Y']);
assert!(result.is_some());
assert!(result.unwrap().message.contains("reserved bytes"));
}
#[test]
fn heuristic3_reserved_one_printable_one_not() {
assert!(detect_rdw_ascii_corruption(&[0x00, 0x10, b'X', 0x01]).is_none());
}
#[test]
fn heuristic3_reserved_both_zero_not_flagged() {
assert!(detect_rdw_ascii_corruption(&[0x00, 0x10, 0x00, 0x00]).is_none());
}
#[test]
fn longer_input_only_first_4_bytes_matter() {
let data = [b'5', b'6', 0x00, 0x00, 0xFF, 0xFF, 0xFF];
let result = detect_rdw_ascii_corruption(&data);
assert!(result.is_some());
}
#[test]
fn all_zeros_returns_none() {
assert!(detect_rdw_ascii_corruption(&[0x00, 0x00, 0x00, 0x00]).is_none());
}
#[test]
fn all_0xff_returns_none() {
assert!(detect_rdw_ascii_corruption(&[0xFF, 0xFF, 0xFF, 0xFF]).is_none());
}
proptest! {
#[test]
fn matches_reference_model(data in prop::collection::vec(any::<u8>(), 0..128)) {
let expected = expected_corruption_present(&data);
let actual = detect_rdw_ascii_corruption(&data).is_some();
prop_assert_eq!(actual, expected);
}
}
}