use base64::Engine;
#[derive(Debug, Clone, Default, PartialEq)]
pub struct DecodeStructure {
pub decodable: bool,
pub decoded_len: usize,
pub printable_ratio: f32,
pub magic: Option<&'static str>,
pub protobuf_wire: bool,
}
impl DecodeStructure {
#[must_use]
pub fn is_binary_payload(&self) -> bool {
self.magic.is_some() || (self.protobuf_wire && self.decoded_len >= 8)
}
}
const MIN_DECODE_LEN: usize = 16;
#[must_use]
pub fn is_encoded_binary(candidate: &str) -> bool {
use std::cell::RefCell;
use std::collections::HashMap;
const MAX_CACHE_ENTRIES: usize = 4096;
thread_local! {
static CACHE: RefCell<HashMap<u64, bool>> = RefCell::new(HashMap::with_capacity(256));
}
let mut hash: u64 = 0xcbf29ce484222325;
for &byte in candidate.as_bytes() {
hash ^= u64::from(byte);
hash = hash.wrapping_mul(0x100000001b3);
}
CACHE.with(|cache| {
if let Some(&verdict) = cache.borrow().get(&hash) {
return verdict;
}
let verdict = analyze(candidate).is_binary_payload();
let mut cache = cache.borrow_mut();
if cache.len() >= MAX_CACHE_ENTRIES {
cache.clear();
}
cache.insert(hash, verdict);
verdict
})
}
const DECODED_PLACEHOLDER_WORDS: &[&[u8]] = &[
b"example",
b"dummy",
b"fake",
b"sample",
b"placeholder",
b"changeme",
];
#[must_use]
pub fn looks_like_uniform_base64_blob(value: &str) -> bool {
if !(60..=300).contains(&value.len()) {
return false;
}
let has_padding = value.ends_with("==") || value.ends_with('=');
let length_mult_4 = value.len() % 4 == 0;
if !has_padding && !length_mult_4 {
return false;
}
let mut has_b64_punct = false;
for b in value.bytes() {
match b {
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'=' => {}
b'+' | b'/' => has_b64_punct = true,
_ => return false,
}
}
has_b64_punct || has_padding
}
#[must_use]
pub fn decoded_contains_placeholder(candidate: &str) -> bool {
use std::cell::RefCell;
use std::collections::HashMap;
const MAX_CACHE_ENTRIES: usize = 4096;
thread_local! {
static CACHE: RefCell<HashMap<u64, bool>> = RefCell::new(HashMap::with_capacity(256));
}
let mut hash: u64 = 0xcbf29ce484222325;
for &byte in candidate.as_bytes() {
hash ^= u64::from(byte);
hash = hash.wrapping_mul(0x100000001b3);
}
CACHE.with(|cache| {
if let Some(&verdict) = cache.borrow().get(&hash) {
return verdict;
}
let verdict = compute_decoded_contains_placeholder(candidate);
let mut cache = cache.borrow_mut();
if cache.len() >= MAX_CACHE_ENTRIES {
cache.clear();
}
cache.insert(hash, verdict);
verdict
})
}
fn compute_decoded_contains_placeholder(candidate: &str) -> bool {
let trimmed = candidate.trim();
if trimmed.len() < MIN_DECODE_LEN {
return false;
}
let Some(bytes) = decode_candidate(trimmed) else {
return false;
};
if bytes.is_empty() {
return false;
}
DECODED_PLACEHOLDER_WORDS.iter().any(|word| {
bytes
.windows(word.len())
.any(|window| window.eq_ignore_ascii_case(word))
})
}
#[must_use]
pub fn analyze(candidate: &str) -> DecodeStructure {
let trimmed = candidate.trim();
if trimmed.len() < MIN_DECODE_LEN {
return DecodeStructure::default();
}
let Some(bytes) = decode_candidate(trimmed) else {
return DecodeStructure::default();
};
if bytes.is_empty() {
return DecodeStructure::default();
}
let printable = bytes
.iter()
.filter(|&&b| (32..127).contains(&b) || matches!(b, 9 | 10 | 13))
.count();
DecodeStructure {
decodable: true,
decoded_len: bytes.len(),
printable_ratio: printable as f32 / bytes.len() as f32,
magic: magic_format(&bytes),
protobuf_wire: parse_protobuf_wire(&bytes),
}
}
fn decode_candidate(s: &str) -> Option<Vec<u8>> {
let looks_b64 = s
.bytes()
.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'+' | b'/' | b'-' | b'_' | b'='));
if looks_b64 {
let mut padded = s.to_string();
let rem = padded.len() % 4;
if rem != 0 {
padded.push_str(&"=".repeat(4 - rem));
}
if let Ok(b) = base64::engine::general_purpose::STANDARD.decode(padded.as_bytes()) {
return Some(b);
}
if let Ok(b) = base64::engine::general_purpose::URL_SAFE.decode(padded.as_bytes()) {
return Some(b);
}
}
if s.len() >= MIN_DECODE_LEN && s.len() % 2 == 0 && s.bytes().all(|b| b.is_ascii_hexdigit()) {
let mut out = Vec::with_capacity(s.len() / 2);
let raw = s.as_bytes();
let mut i = 0;
while i + 1 < raw.len() {
let hi = (raw[i] as char).to_digit(16)?;
let lo = (raw[i + 1] as char).to_digit(16)?;
out.push(((hi << 4) | lo) as u8);
i += 2;
}
return Some(out);
}
None
}
fn magic_format(b: &[u8]) -> Option<&'static str> {
const SIGS: &[(&[u8], &str)] = &[
(b"\x89PNG\r\n\x1a\n", "png"),
(b"\xff\xd8\xff", "jpeg"),
(b"GIF87a", "gif"),
(b"GIF89a", "gif"),
(b"\x1f\x8b", "gzip"),
(b"BZh", "bzip2"),
(b"\xfd7zXZ\x00", "xz"),
(b"\x28\xb5\x2f\xfd", "zstd"),
(b"PK\x03\x04", "zip"),
(b"PK\x05\x06", "zip"),
(b"7z\xbc\xaf\x27\x1c", "7z"),
(b"Rar!\x1a\x07", "rar"),
(b"%PDF-", "pdf"),
(b"\x7fELF", "elf"),
(b"\xfe\xed\xfa\xce", "mach-o"),
(b"\xfe\xed\xfa\xcf", "mach-o"),
(b"\xcf\xfa\xed\xfe", "mach-o"),
(b"\xca\xfe\xba\xbe", "java-class"),
(b"MZ", "pe"),
(b"SQLite format 3\x00", "sqlite"),
(b"OggS", "ogg"),
(b"RIFF", "riff"),
(b"\x00\x61\x73\x6d", "wasm"),
(b"\x78\x01", "zlib"),
(b"\x78\x9c", "zlib"),
(b"\x78\xda", "zlib"),
(b"\x78\x5e", "zlib"),
];
SIGS.iter()
.find(|(sig, _)| b.starts_with(sig))
.map(|(_, name)| *name)
}
fn parse_protobuf_wire(data: &[u8]) -> bool {
let n = data.len();
if n < 8 {
return false;
}
let mut i = 0usize;
let mut fields = 0u32;
while i < n {
let Some((tag, next)) = read_varint(data, i) else {
return false;
};
i = next;
let wire = tag & 0x07;
let field_no = tag >> 3;
if field_no == 0 {
return false;
}
match wire {
0 => {
let Some((_, next)) = read_varint(data, i) else {
return false;
};
i = next;
}
1 => {
match i.checked_add(8) {
Some(x) if x <= n => i = x,
_ => return false,
}
}
2 => {
let Some((len, next)) = read_varint(data, i) else {
return false;
};
i = match next.checked_add(len as usize) {
Some(x) if x <= n => x,
_ => return false,
};
}
5 => {
match i.checked_add(4) {
Some(x) if x <= n => i = x,
_ => return false,
}
}
_ => return false, }
fields += 1;
}
i == n && fields >= 3
}
fn read_varint(data: &[u8], start: usize) -> Option<(u64, usize)> {
let mut value: u64 = 0;
let mut shift = 0u32;
let mut i = start;
loop {
let b = *data.get(i)?;
i += 1;
value |= u64::from(b & 0x7F) << shift;
if b & 0x80 == 0 {
return Some((value, i));
}
shift += 7;
if shift > 63 {
return None;
}
}
}