vyre 0.4.0

GPU compute intermediate representation with a standard operation library
Documentation
//! Single-byte XOR scoring and range de-duplication.

use super::spans::span;
use super::{validate_input, DetectionError};

const MIN_WINDOW: usize = 32;
const MAX_WINDOW: usize = 256;
const SCORE_THRESHOLD: i32 = 115;

/// Single-byte XOR candidate range and key.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct XorSingleByteFinding {
    /// Candidate XOR key.
    pub key: u8,
    /// Inclusive byte offset where the decoded-looking range starts.
    pub offset: u32,
    /// Candidate range length in bytes.
    pub len: u32,
}

/// Return true when bytes are mostly printable plaintext.
#[must_use]
pub fn looks_like_plaintext(input: &[u8]) -> bool {
    let printable = input
        .iter()
        .filter(|&&b| b.is_ascii_graphic() || b == b' ' || b == b'\n' || b == b'\t')
        .count();
    printable * 100 / input.len() >= 90
}

/// Score a candidate XOR key against English-like plaintext.
#[must_use]
pub fn english_score(input: &[u8], key: u8) -> i32 {
    let mut score = 0i32;
    let mut printable = 0usize;
    for &byte in input {
        let decoded = byte ^ key;
        if decoded.is_ascii_graphic() || decoded == b' ' || decoded == b'\n' || decoded == b'\t' {
            printable += 1;
        } else {
            score -= 8;
        }
        match decoded.to_ascii_lowercase() {
            b'e' | b't' | b'a' | b'o' | b'i' | b'n' | b's' | b'h' | b'r' => score += 4,
            b' ' => score += 5,
            b'.' | b',' | b':' | b';' | b'\'' | b'"' => score += 1,
            b'0'..=b'9' => score += 1,
            _ => {}
        }
    }
    if printable * 100 / input.len() >= 85 {
        score += 25;
    }
    if looks_like_plaintext(input) {
        score -= 60;
    }
    score
}

/// Return the highest scoring nonzero XOR key.
#[must_use]
pub fn best_key(input: &[u8]) -> Option<(u8, i32)> {
    let mut best = None;
    for key in 1u8..=255 {
        let score = english_score(input, key);
        if best.is_none_or(|(_, prior)| score > prior) {
            best = Some((key, score));
        }
    }
    best
}

/// Return true when two public ranges overlap.
#[must_use]
pub fn ranges_overlap(a_start: u32, a_len: u32, b_start: u32, b_len: u32) -> bool {
    let a_end = a_start.saturating_add(a_len);
    let b_end = b_start.saturating_add(b_len);
    a_start < b_end && b_start < a_end
}

/// Remove duplicate key/range findings.
///
/// # Errors
///
/// This function is fallible for API symmetry and future checked conversions.
pub fn dedupe(
    findings: Vec<XorSingleByteFinding>,
) -> Result<Vec<XorSingleByteFinding>, DetectionError> {
    let mut out: Vec<XorSingleByteFinding> = Vec::new();
    for finding in findings {
        let duplicate = out.iter().any(|prior| {
            prior.key == finding.key
                && ranges_overlap(prior.offset, prior.len, finding.offset, finding.len)
        });
        if !duplicate {
            out.push(finding);
        }
    }
    Ok(out)
}

/// Return single-byte XOR key candidates with offset and length.
///
/// # Errors
///
/// Returns `Fix: ...` when input validation or span conversion fails.
pub fn detect_xor_single_byte(input: &[u8]) -> Result<Vec<XorSingleByteFinding>, DetectionError> {
    validate_input(input)?;
    if input.len() < MIN_WINDOW {
        return Ok(Vec::new());
    }
    let window = input.len().min(MAX_WINDOW);
    let stride = (window / 4).max(8);
    let mut findings = Vec::new();
    let mut start = 0usize;
    while start + MIN_WINDOW <= input.len() {
        let end = (start + window).min(input.len());
        if let Some((key, score)) = best_key(&input[start..end]) {
            if score >= SCORE_THRESHOLD && key != 0 {
                let byte_span = span(start, end - start)?;
                findings.push(XorSingleByteFinding {
                    key,
                    offset: byte_span.offset,
                    len: byte_span.len,
                });
            }
        }
        if end == input.len() {
            break;
        }
        start += stride;
    }
    dedupe(findings)
}