#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ByteHistogram {
counts: [u32; 256],
}
impl ByteHistogram {
#[must_use]
pub fn new() -> Self {
Self { counts: [0; 256] }
}
#[must_use]
pub fn from_block(data: &[u8]) -> Self {
let mut h0 = [0u32; 256];
let mut h1 = [0u32; 256];
let mut h2 = [0u32; 256];
let mut h3 = [0u32; 256];
let chunks = data.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
h0[usize::from(chunk[0])] += 1;
h1[usize::from(chunk[1])] += 1;
h2[usize::from(chunk[2])] += 1;
h3[usize::from(chunk[3])] += 1;
}
for (i, &byte) in remainder.iter().enumerate() {
match i {
0 => h0[usize::from(byte)] += 1,
1 => h1[usize::from(byte)] += 1,
2 => h2[usize::from(byte)] += 1,
_ => h3[usize::from(byte)] += 1,
}
}
let mut counts = [0u32; 256];
for i in 0..256 {
counts[i] = h0[i]
.saturating_add(h1[i])
.saturating_add(h2[i])
.saturating_add(h3[i]);
}
Self { counts }
}
#[must_use]
pub fn from_raw_counts(counts: [u32; 256]) -> Self {
Self { counts }
}
#[must_use]
#[inline]
pub fn count(&self, byte: u8) -> u32 {
self.counts[usize::from(byte)]
}
#[must_use]
#[inline]
pub fn raw_counts(&self) -> &[u32; 256] {
&self.counts
}
#[must_use]
pub fn contains_all(&self, required_bytes: &[bool; 256]) -> bool {
required_bytes
.iter()
.enumerate()
.all(|(byte, required)| !required || self.counts[byte] > 0)
}
#[must_use]
pub fn contains_any(&self, byte_set: &[bool; 256]) -> bool {
byte_set
.iter()
.enumerate()
.any(|(byte, required)| *required && self.counts[byte] > 0)
}
}
impl Default for ByteHistogram {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::ByteHistogram;
#[test]
fn histogram_empty_block() {
let histogram = ByteHistogram::from_block(&[]);
for byte in u8::MIN..=u8::MAX {
assert_eq!(histogram.count(byte), 0);
}
}
#[test]
fn histogram_single_byte_repeated() {
let histogram = ByteHistogram::from_block(&[0x41; 64]);
assert_eq!(histogram.count(0x41), 64);
assert_eq!(histogram.count(0x42), 0);
}
#[test]
fn histogram_all_256_values() {
let all_bytes: Vec<u8> = (u8::MIN..=u8::MAX).collect();
let histogram = ByteHistogram::from_block(&all_bytes);
for byte in u8::MIN..=u8::MAX {
assert_eq!(histogram.count(byte), 1);
}
}
#[test]
fn histogram_contains_all_true() {
let histogram = ByteHistogram::from_block(b"abcdef");
let mut required = [false; 256];
required[usize::from(b'a')] = true;
required[usize::from(b'c')] = true;
required[usize::from(b'f')] = true;
assert!(histogram.contains_all(&required));
}
#[test]
fn histogram_contains_all_false() {
let histogram = ByteHistogram::from_block(b"abcdef");
let mut required = [false; 256];
required[usize::from(b'a')] = true;
required[usize::from(b'z')] = true;
assert!(!histogram.contains_all(&required));
}
}