vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! Shannon entropy scoring and high-entropy window helpers.

use super::spans::{span, ByteSpan};
use super::{to_u32, validate_input, DetectionError};

/// Compute Shannon entropy in bits per byte.
#[must_use]
pub fn shannon_bits(input: &[u8]) -> f32 {
    if input.is_empty() {
        return 0.0;
    }
    let mut counts = [0_u32; 256];
    for &byte in input {
        counts[usize::from(byte)] += 1;
    }
    let len = input.len() as f32;
    counts
        .iter()
        .filter(|&&count| count != 0)
        .map(|&count| {
            let p = count as f32 / len;
            -p * p.log2()
        })
        .sum()
}

/// Return offsets of windows whose entropy is at or above `threshold`.
///
/// # Errors
///
/// Returns `Fix: ...` for oversized input, invalid window sizes, invalid thresholds,
/// or offsets that exceed the public ABI.
pub fn high_entropy_offsets(
    input: &[u8],
    window: u32,
    threshold: f32,
) -> Result<Vec<u32>, DetectionError> {
    validate_input(input)?;
    if window == 0 {
        return Err("Fix: set entropy window to at least 1 byte.".into());
    }
    let window = usize::try_from(window)
        .map_err(|_| "Fix: set entropy window to a value that fits usize.".to_string())?;
    if window > input.len() {
        return Ok(Vec::new());
    }
    if !(0.0..=8.0).contains(&threshold) {
        return Err(format!(
            "Fix: set entropy threshold between 0.0 and 8.0 bits per byte, got {threshold}"
        ));
    }

    let mut offsets = Vec::new();
    for index in 0..=input.len() - window {
        if shannon_bits(&input[index..index + window]) >= threshold {
            offsets.push(to_u32(index, "offset")?);
        }
    }
    Ok(offsets)
}

/// Return spans of windows whose entropy is at or above `threshold`.
///
/// # Errors
///
/// Returns `Fix: ...` for invalid entropy settings or ABI conversion failures.
pub fn high_entropy_spans(
    input: &[u8],
    window: usize,
    threshold: f32,
) -> Result<Vec<ByteSpan>, DetectionError> {
    let offsets = high_entropy_offsets(
        input,
        u32::try_from(window)
            .map_err(|_| "Fix: reduce entropy window so it fits U32.".to_string())?,
        threshold,
    )?;
    offsets
        .into_iter()
        .map(|offset| span(usize::try_from(offset).unwrap_or(usize::MAX), window))
        .collect()
}