#[derive(Debug, Clone, Copy)]
#[allow(dead_code)]
pub(crate) struct RstMarkerPos {
pub offset: usize,
pub rst_num: u8,
}
pub(crate) struct RstScanResult {
pub markers: Vec<RstMarkerPos>,
pub entropy_end: usize,
}
pub(crate) fn scan_rst_markers(scan_data: &[u8], capacity_hint: usize) -> RstScanResult {
#[cfg(target_arch = "x86_64")]
{
use archmage::SimdToken;
if let Some(token) = archmage::X64V3Token::summon() {
return scan_rst_markers_avx2(token, scan_data, capacity_hint);
}
}
scan_rst_markers_scalar(scan_data, capacity_hint)
}
fn scan_rst_markers_scalar(data: &[u8], capacity_hint: usize) -> RstScanResult {
let mut markers = Vec::with_capacity(capacity_hint);
let mut i = 0;
let len = data.len();
while i + 1 < len {
if data[i] == 0xFF {
let next = data[i + 1];
if (0xD0..=0xD7).contains(&next) {
markers.push(RstMarkerPos {
offset: i,
rst_num: next - 0xD0,
});
i += 2;
continue;
}
if next == 0x00 || next == 0xFF {
i += 2;
continue;
}
break;
}
i += 1;
}
RstScanResult {
markers,
entropy_end: i,
}
}
#[cfg(target_arch = "x86_64")]
#[archmage::arcane]
fn scan_rst_markers_avx2(
_token: archmage::X64V3Token,
data: &[u8],
capacity_hint: usize,
) -> RstScanResult {
use core::arch::x86_64::*;
use safe_unaligned_simd::x86_64 as safe_simd;
let mut markers = Vec::with_capacity(capacity_hint);
let len = data.len();
if len < 2 {
return RstScanResult {
markers,
entropy_end: 0,
};
}
let chunk_size = 32;
let simd_end = len.saturating_sub(chunk_size);
let mut i = 0;
let ff_vec = _mm256_set1_epi8(-1i8);
while i < simd_end {
let bytes: [u8; 32] = data[i..i + 32].try_into().unwrap();
let bytes_as_i16: [i16; 16] = bytemuck::cast(bytes);
let chunk = safe_simd::_mm256_loadu_si256(&bytes_as_i16);
let cmp = _mm256_cmpeq_epi8(chunk, ff_vec);
let mut mask = _mm256_movemask_epi8(cmp) as u32;
while mask != 0 {
let bit_pos = mask.trailing_zeros() as usize;
let ff_offset = i + bit_pos;
if ff_offset + 1 < len {
let next = data[ff_offset + 1];
if (0xD0..=0xD7).contains(&next) {
markers.push(RstMarkerPos {
offset: ff_offset,
rst_num: next - 0xD0,
});
} else if next != 0x00 && next != 0xFF {
return RstScanResult {
markers,
entropy_end: ff_offset,
};
}
}
mask &= mask - 1;
}
i += chunk_size;
}
while i + 1 < len {
if data[i] == 0xFF {
let next = data[i + 1];
if (0xD0..=0xD7).contains(&next) {
markers.push(RstMarkerPos {
offset: i,
rst_num: next - 0xD0,
});
i += 2;
continue;
}
if next == 0x00 || next == 0xFF {
i += 2;
continue;
}
break;
}
i += 1;
}
RstScanResult {
markers,
entropy_end: i,
}
}
pub(crate) fn compute_segments(
markers: &[RstMarkerPos],
scan_data_len: usize,
) -> (Vec<usize>, Vec<usize>) {
let n = markers.len() + 1;
let mut starts = Vec::with_capacity(n);
let mut ends = Vec::with_capacity(n);
starts.push(0);
for m in markers {
ends.push(m.offset);
starts.push(m.offset + 2);
}
ends.push(scan_data_len);
(starts, ends)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_no_markers() {
let data = [0x00, 0x01, 0x02, 0x03];
let result = scan_rst_markers(&data, 0);
assert!(result.markers.is_empty());
assert_eq!(result.entropy_end, 3);
}
#[test]
fn test_single_rst0() {
let data = [0x12, 0x34, 0xFF, 0xD0, 0x56, 0x78];
let result = scan_rst_markers(&data, 0);
assert_eq!(result.markers.len(), 1);
assert_eq!(result.markers[0].offset, 2);
assert_eq!(result.markers[0].rst_num, 0);
assert_eq!(result.entropy_end, 5);
}
#[test]
fn test_multiple_rst_markers() {
let mut data = Vec::new();
data.extend_from_slice(&[0x11, 0x22, 0x33]);
data.extend_from_slice(&[0xFF, 0xD0]);
data.extend_from_slice(&[0x44, 0x55]);
data.extend_from_slice(&[0xFF, 0xD1]);
data.extend_from_slice(&[0x66]);
let result = scan_rst_markers(&data, 0);
assert_eq!(result.markers.len(), 2);
assert_eq!(result.markers[0].offset, 3);
assert_eq!(result.markers[0].rst_num, 0);
assert_eq!(result.markers[1].offset, 7);
assert_eq!(result.markers[1].rst_num, 1);
}
#[test]
fn test_stuffed_bytes_ignored() {
let data = [0xFF, 0x00, 0x12, 0xFF, 0xD0, 0x34];
let result = scan_rst_markers(&data, 0);
assert_eq!(result.markers.len(), 1);
assert_eq!(result.markers[0].offset, 3);
assert_eq!(result.markers[0].rst_num, 0);
}
#[test]
fn test_stops_at_non_rst_marker() {
let data = [0x11, 0xFF, 0xD0, 0x22, 0xFF, 0xD9];
let result = scan_rst_markers(&data, 0);
assert_eq!(result.markers.len(), 1);
assert_eq!(result.markers[0].offset, 1);
assert_eq!(result.entropy_end, 4);
}
#[test]
fn test_compute_segments() {
let markers = vec![
RstMarkerPos {
offset: 100,
rst_num: 0,
},
RstMarkerPos {
offset: 250,
rst_num: 1,
},
];
let (starts, ends) = compute_segments(&markers, 400);
assert_eq!(starts, vec![0, 102, 252]);
assert_eq!(ends, vec![100, 250, 400]);
}
#[test]
fn test_rst_cycling() {
let mut data = Vec::new();
for i in 0..10u8 {
data.push(0x11);
data.extend_from_slice(&[0xFF, 0xD0 + (i & 7)]);
}
let result = scan_rst_markers(&data, 0);
assert_eq!(result.markers.len(), 10);
for (i, m) in result.markers.iter().enumerate() {
assert_eq!(m.rst_num, (i as u8) & 7);
}
}
#[cfg(target_arch = "x86_64")]
#[test]
fn test_avx2_matches_scalar() {
let mut data = vec![0u8; 200];
data[10] = 0xFF;
data[11] = 0xD0;
data[31] = 0xFF;
data[32] = 0xD1;
data[63] = 0xFF;
data[64] = 0xD2;
data[100] = 0xFF;
data[101] = 0x00; data[150] = 0xFF;
data[151] = 0xD3;
let scalar = scan_rst_markers_scalar(&data, 0);
let simd = scan_rst_markers(&data, 0);
assert_eq!(scalar.markers.len(), simd.markers.len());
assert_eq!(scalar.entropy_end, simd.entropy_end);
for (s, v) in scalar.markers.iter().zip(simd.markers.iter()) {
assert_eq!(s.offset, v.offset);
assert_eq!(s.rst_num, v.rst_num);
}
}
}