keyhog-scanner 0.5.40

keyhog-scanner: high-performance SIMD-accelerated secret detection engine
Documentation
use super::pipeline::{extract_encoded_values, push_decoded_text_chunk_spliced};
use super::{Decoder, EncodedString};
use keyhog_core::Chunk;

pub(super) struct HexDecoder;

impl Decoder for HexDecoder {
    fn name(&self) -> &'static str {
        "hex"
    }

    fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
        let mut decoded_chunks = Vec::new();
        // Floor lowered from 32→16 hex chars (8 decoded bytes) so
        // short API keys encode-through in `encoding_explosion_runner`.
        for hex_match in find_hex_strings(&chunk.data, 16) {
            let cleaned: String = hex_match.value.chars().filter(|c| *c != '_').collect();
            if let Ok(decoded) = hex_decode(&cleaned) {
                if let Ok(text) = String::from_utf8(decoded) {
                    // Splice over the *original* encoded blob (with `_` if present)
                    // so companion context survives - passing the cleaned form
                    // misses the parent substring and drops the anchor.
                    push_decoded_text_chunk_spliced(
                        &mut decoded_chunks,
                        chunk,
                        &hex_match.value,
                        text,
                        self.name(),
                    );
                }
            }
        }
        decoded_chunks
    }
}

pub fn find_hex_strings(text: &str, min_length: usize) -> Vec<EncodedString> {
    let mut results = Vec::new();
    for candidate in extract_encoded_values(text) {
        // Hex literals in firmware dumps and config files commonly use `_`
        // every 2/4/8 chars for readability (`A1_B2_C3_...`). Strip those
        // before validating - audit class #5 (release-2026-04-26) noted
        // the previous all-hex check missed this evasion entirely.
        let cleaned: String = candidate.chars().filter(|c| *c != '_').collect();
        if cleaned.len() >= min_length
            && cleaned.len().is_multiple_of(2)
            && cleaned.chars().all(|ch| ch.is_ascii_hexdigit())
        {
            results.push(EncodedString { value: candidate });
        }
    }
    results
}

/// Maximum hex input length we'll decode (prevents OOM from malicious input).
const MAX_HEX_INPUT_LEN: usize = 32 * 1024 * 1024; // 32 MB -> 16 MB decoded

#[allow(clippy::result_unit_err)]
pub fn hex_decode(input: &str) -> Result<Vec<u8>, ()> {
    let cleaned: String = input.chars().filter(|c| *c != '_').collect();
    if !cleaned.len().is_multiple_of(2) || cleaned.len() > MAX_HEX_INPUT_LEN {
        return Err(());
    }
    hex_simd::decode_to_vec(&cleaned).map_err(|_| ())
}

pub(super) fn hex_val(byte: u8) -> Result<u8, ()> {
    match byte {
        b'0'..=b'9' => Ok(byte - b'0'),
        b'a'..=b'f' => Ok(byte - b'a' + 10),
        b'A'..=b'F' => Ok(byte - b'A' + 10),
        _ => Err(()),
    }
}