Skip to main content

keyhog_scanner/decode/
hex.rs

1use super::pipeline::{extract_encoded_values, push_decoded_text_chunk_spliced};
2use super::{Decoder, EncodedString};
3use keyhog_core::Chunk;
4
5pub(super) struct HexDecoder;
6
7impl Decoder for HexDecoder {
8    fn name(&self) -> &'static str {
9        "hex"
10    }
11
12    fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
13        let mut decoded_chunks = Vec::new();
14        // Floor lowered from 32→16 hex chars (8 decoded bytes) so
15        // short API keys encode-through in `encoding_explosion_runner`.
16        for hex_match in find_hex_strings(&chunk.data, 16) {
17            let cleaned: String = hex_match.value.chars().filter(|c| *c != '_').collect();
18            if let Ok(decoded) = hex_decode(&cleaned) {
19                if let Ok(text) = String::from_utf8(decoded) {
20                    // Splice over the *original* encoded blob (with `_` if present)
21                    // so companion context survives - passing the cleaned form
22                    // misses the parent substring and drops the anchor.
23                    push_decoded_text_chunk_spliced(
24                        &mut decoded_chunks,
25                        chunk,
26                        &hex_match.value,
27                        text,
28                        self.name(),
29                    );
30                }
31            }
32        }
33        decoded_chunks
34    }
35}
36
37pub fn find_hex_strings(text: &str, min_length: usize) -> Vec<EncodedString> {
38    let mut results = Vec::new();
39    for candidate in extract_encoded_values(text) {
40        // Hex literals in firmware dumps and config files commonly use `_`
41        // every 2/4/8 chars for readability (`A1_B2_C3_...`). Strip those
42        // before validating - audit class #5 (release-2026-04-26) noted
43        // the previous all-hex check missed this evasion entirely.
44        let cleaned: String = candidate.chars().filter(|c| *c != '_').collect();
45        if cleaned.len() >= min_length
46            && cleaned.len().is_multiple_of(2)
47            && cleaned.chars().all(|ch| ch.is_ascii_hexdigit())
48        {
49            results.push(EncodedString { value: candidate });
50        }
51    }
52    results
53}
54
55/// Maximum hex input length we'll decode (prevents OOM from malicious input).
56const MAX_HEX_INPUT_LEN: usize = 32 * 1024 * 1024; // 32 MB -> 16 MB decoded
57
58#[allow(clippy::result_unit_err)]
59pub fn hex_decode(input: &str) -> Result<Vec<u8>, ()> {
60    let cleaned: String = input.chars().filter(|c| *c != '_').collect();
61    if !cleaned.len().is_multiple_of(2) || cleaned.len() > MAX_HEX_INPUT_LEN {
62        return Err(());
63    }
64    hex_simd::decode_to_vec(&cleaned).map_err(|_| ())
65}
66
67pub(super) fn hex_val(byte: u8) -> Result<u8, ()> {
68    match byte {
69        b'0'..=b'9' => Ok(byte - b'0'),
70        b'a'..=b'f' => Ok(byte - b'a' + 10),
71        b'A'..=b'F' => Ok(byte - b'A' + 10),
72        _ => Err(()),
73    }
74}