Skip to main content

keyhog_scanner/decode/
base64.rs

1use super::pipeline::{extract_encoded_values, push_decoded_text_chunk_spliced};
2use super::{Decoder, EncodedString};
3use keyhog_core::Chunk;
4
5pub(super) struct Base64Decoder;
6
7impl Decoder for Base64Decoder {
8    fn name(&self) -> &'static str {
9        "base64"
10    }
11
12    fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
13        let mut decoded_chunks = Vec::new();
14        // Floor lowered from 20→12 so short contract credentials (7–15
15        // chars) survive encode-through in `encoding_explosion_runner`.
16        // `extract_encoded_values` already rejects noise shorter than 4.
17        for b64_match in find_base64_strings(&chunk.data, 12) {
18            if let Ok(decoded) = base64_decode(&b64_match.value) {
19                if let Ok(text) = String::from_utf8(decoded) {
20                    // Splice the decoded text back over the original
21                    // base64 blob in the parent so companion context
22                    // (e.g. `aws_secret = "…"`) stays adjacent to the
23                    // decoded credential. Without this the decoded
24                    // chunk is bare-bytes-only and every detector
25                    // anchored on an adjacent keyword misses.
26                    push_decoded_text_chunk_spliced(
27                        &mut decoded_chunks,
28                        chunk,
29                        &b64_match.value,
30                        text,
31                        self.name(),
32                    );
33                }
34            }
35        }
36        decoded_chunks
37    }
38}
39
40pub(super) struct Z85Decoder;
41
42impl Decoder for Z85Decoder {
43    fn name(&self) -> &'static str {
44        "z85"
45    }
46
47    fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
48        let mut decoded_chunks = Vec::new();
49        for z_match in find_z85_strings(&chunk.data, 20) {
50            if let Ok(decoded) = z85_decode(&z_match.value) {
51                if let Ok(text) = String::from_utf8(decoded) {
52                    push_decoded_text_chunk_spliced(
53                        &mut decoded_chunks,
54                        chunk,
55                        &z_match.value,
56                        text.trim_end_matches('\0').to_string(),
57                        self.name(),
58                    );
59                }
60            }
61        }
62        decoded_chunks
63    }
64}
65
66#[derive(Clone, Copy)]
67enum Base64Variant {
68    Standard,
69    StandardNoPad,
70    UrlSafe,
71    UrlSafeNoPad,
72}
73
74pub fn find_base64_strings(text: &str, min_length: usize) -> Vec<EncodedString> {
75    let mut results = Vec::new();
76    let b64_chars = |ch: char| {
77        ch.is_ascii_alphanumeric() || ch == '+' || ch == '/' || ch == '=' || ch == '-' || ch == '_'
78    };
79
80    for candidate in extract_encoded_values(text) {
81        if candidate.len() >= min_length
82            && candidate.chars().all(b64_chars)
83            && classify_base64(&candidate).is_some()
84        {
85            results.push(EncodedString { value: candidate });
86        }
87    }
88    results
89}
90
91fn classify_base64(candidate: &str) -> Option<Base64Variant> {
92    if !has_valid_base64_padding(candidate) {
93        return None;
94    }
95
96    let has_standard = candidate.contains('+') || candidate.contains('/');
97    let has_urlsafe = candidate.contains('-') || candidate.contains('_');
98    if has_standard && has_urlsafe {
99        return None;
100    }
101
102    let padded = candidate.contains('=');
103    match (has_urlsafe, padded, candidate.len() % 4) {
104        (_, true, 0) => Some(if has_urlsafe {
105            Base64Variant::UrlSafe
106        } else {
107            Base64Variant::Standard
108        }),
109        (_, true, _) => None,
110        (_, false, 1) => Some(if has_urlsafe {
111            Base64Variant::UrlSafeNoPad
112        } else {
113            Base64Variant::StandardNoPad
114        }),
115        (true, false, _) => Some(Base64Variant::UrlSafeNoPad),
116        (false, false, 0) => Some(Base64Variant::Standard),
117        (false, false, _) => Some(Base64Variant::StandardNoPad),
118    }
119}
120
121fn has_valid_base64_padding(candidate: &str) -> bool {
122    let first_padding = match candidate.find('=') {
123        Some(index) => index,
124        None => return true,
125    };
126
127    let padding = &candidate[first_padding..];
128    first_padding > 0
129        && padding.len() <= 2
130        && padding.bytes().all(|byte| byte == b'=')
131        && candidate[..first_padding].bytes().all(|byte| byte != b'=')
132}
133
134/// Maximum base64 input length we'll decode (prevents OOM from malicious input).
135const MAX_BASE64_INPUT_LEN: usize = 16 * 1024 * 1024; // 16 MB -> ~12 MB decoded
136
137#[allow(clippy::result_unit_err)]
138pub fn base64_decode(input: &str) -> Result<Vec<u8>, ()> {
139    if input.len() > MAX_BASE64_INPUT_LEN {
140        return Err(());
141    }
142
143    let variant = classify_base64(input).ok_or(())?;
144    match variant {
145        Base64Variant::Standard => base64_simd::STANDARD.decode_to_vec(input.as_bytes()),
146        Base64Variant::StandardNoPad => {
147            base64_simd::STANDARD_NO_PAD.decode_to_vec(input.as_bytes())
148        }
149        Base64Variant::UrlSafe => base64_simd::URL_SAFE.decode_to_vec(input.as_bytes()),
150        Base64Variant::UrlSafeNoPad => base64_simd::URL_SAFE_NO_PAD.decode_to_vec(input.as_bytes()),
151    }
152    .map_err(|_| ())
153}
154
155fn find_z85_strings(text: &str, min_length: usize) -> Vec<EncodedString> {
156    let mut results = Vec::new();
157    let is_z85_char =
158        |ch: char| ch.is_ascii_alphanumeric() || ".-:+=^!/*?&<>()[]{}@%$#".contains(ch);
159
160    for candidate in extract_encoded_values(text) {
161        let cleaned: String = candidate.chars().filter(|ch| !ch.is_whitespace()).collect();
162        if cleaned.len() >= min_length
163            && cleaned.len().is_multiple_of(5)
164            && cleaned.chars().all(is_z85_char)
165        {
166            results.push(EncodedString { value: cleaned });
167        }
168    }
169    results
170}
171
172/// Maximum Z85 input length we'll decode.
173const MAX_Z85_INPUT_LEN: usize = 16 * 1024 * 1024;
174
175#[allow(clippy::result_unit_err)]
176pub fn z85_decode(input: &str) -> Result<Vec<u8>, ()> {
177    if !input.len().is_multiple_of(5) || input.len() > MAX_Z85_INPUT_LEN {
178        return Err(());
179    }
180    let mut decoded = Vec::with_capacity(input.len() * 4 / 5);
181    let bytes = input.as_bytes();
182    for chunk in bytes.chunks_exact(5) {
183        let mut value = 0u64;
184        for &byte in chunk {
185            value = value * 85 + z85_val(byte)? as u64;
186        }
187        if value > u32::MAX as u64 {
188            return Err(());
189        }
190        let value = value as u32;
191        decoded.push((value >> 24) as u8);
192        decoded.push((value >> 16) as u8);
193        decoded.push((value >> 8) as u8);
194        decoded.push(value as u8);
195    }
196    Ok(decoded)
197}
198
199fn z85_val(byte: u8) -> Result<u8, ()> {
200    match byte {
201        b'0'..=b'9' => Ok(byte - b'0'),
202        b'a'..=b'f' => Ok(byte - b'a' + 10),
203        b'g'..=b'z' => Ok(byte - b'g' + 16),
204        b'A'..=b'Z' => Ok(byte - b'A' + 36),
205        b'.' => Ok(62),
206        b'-' => Ok(63),
207        b':' => Ok(64),
208        b'+' => Ok(65),
209        b'=' => Ok(66),
210        b'^' => Ok(67),
211        b'!' => Ok(68),
212        b'/' => Ok(69),
213        b'*' => Ok(70),
214        b'?' => Ok(71),
215        b'&' => Ok(72),
216        b'<' => Ok(73),
217        b'>' => Ok(74),
218        b'(' => Ok(75),
219        b')' => Ok(76),
220        b'[' => Ok(77),
221        b']' => Ok(78),
222        b'{' => Ok(79),
223        b'}' => Ok(80),
224        b'@' => Ok(81),
225        b'%' => Ok(82),
226        b'$' => Ok(83),
227        b'#' => Ok(84),
228        _ => Err(()),
229    }
230}