use super::pipeline::{extract_encoded_values, push_decoded_text_chunk};
use super::Decoder;
use aho_corasick::AhoCorasick;
use keyhog_core::Chunk;
use std::sync::LazyLock;
pub struct CaesarDecoder;
const MIN_CAESAR_LEN: usize = 16;
const MIN_ALNUM_RUN: usize = 8;
static ROTATED_PREFIX_AC: LazyLock<Option<AhoCorasick>> = LazyLock::new(|| {
let mut needles: Vec<String> = Vec::new();
for prefix in crate::confidence::KNOWN_PREFIXES {
for k in 1..=25u8 {
needles.push(caesar_shift(prefix, 26 - k));
}
}
AhoCorasick::new(&needles).ok()
});
const SOURCE_CODE_EXTENSIONS: &[&str] = &[
".rs", ".py", ".go", ".js", ".jsx", ".ts", ".tsx", ".java", ".kt", ".scala", ".c", ".cc",
".cpp", ".cxx", ".h", ".hh", ".hpp", ".cs", ".rb", ".php", ".swift", ".m", ".mm", ".sh",
".bash", ".zsh", ".fish", ".lua", ".pl", ".pm", ".sql", ".html", ".htm", ".css", ".scss",
".sass", ".vue", ".svelte", ".md", ".rst", ".txt", ".adoc", ".tbl", ".mk", ".cmake",
];
const SOURCE_CODE_FILENAMES: &[&str] = &["kconfig", "makefile", "cmakelists.txt"];
pub fn is_source_code_path(path: Option<&str>) -> bool {
let Some(p) = path else { return false };
let lower = p.replace('\\', "/").to_ascii_lowercase();
if let Some(file_name) = lower.rsplit('/').next() {
if SOURCE_CODE_FILENAMES.contains(&file_name) {
return true;
}
}
SOURCE_CODE_EXTENSIONS
.iter()
.any(|ext| lower.ends_with(ext))
}
pub(crate) fn line_has_credential_url(line: &str) -> bool {
let Some(scheme_end) = line.find("://") else {
return false;
};
let scheme_bytes = line[..scheme_end].as_bytes();
let scheme_ok = scheme_bytes.len() >= 2
&& scheme_bytes
.iter()
.rev()
.take_while(|b| b.is_ascii_alphabetic() || **b == b'+')
.count()
>= 2;
if !scheme_ok {
return false;
}
let rest = &line[scheme_end + 3..];
let userinfo_end = rest
.find(|c: char| c == '/' || c == '?' || c == '#' || c.is_ascii_whitespace())
.unwrap_or(rest.len());
let userinfo = &rest[..userinfo_end];
let Some(at_pos) = userinfo.find('@') else {
return false;
};
userinfo[..at_pos].contains(':')
}
impl Decoder for CaesarDecoder {
fn name(&self) -> &'static str {
"caesar"
}
fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
if chunk.metadata.source_type.contains("/caesar") {
return Vec::new();
}
if is_source_code_path(chunk.metadata.path.as_deref()) {
return Vec::new();
}
let mut out = Vec::new();
let chunk_has_credential_url = chunk.data.lines().any(line_has_credential_url);
if chunk_has_credential_url {
return Vec::new();
}
for candidate in extract_encoded_values(&chunk.data) {
if candidate.len() < MIN_CAESAR_LEN {
continue;
}
if !candidate_shape_invariant(&candidate) {
continue;
}
if let Some(ac) = ROTATED_PREFIX_AC.as_ref() {
if !ac.is_match(candidate.as_str()) {
continue;
}
}
for shift in 1..=25u8 {
let decoded = caesar_shift(&candidate, shift);
if !looks_credential_shaped(&decoded) {
continue;
}
push_decoded_text_chunk(&mut out, chunk, decoded, self.name());
}
}
out
}
}
fn candidate_shape_invariant(s: &str) -> bool {
let bytes = s.as_bytes();
if !bytes.iter().any(|b| b.is_ascii_digit()) {
return false;
}
if !bytes.iter().any(|b| b.is_ascii_alphabetic()) {
return false;
}
let mut run = 0usize;
for &b in bytes {
if b.is_ascii_alphanumeric() {
run += 1;
if run >= MIN_ALNUM_RUN {
return true;
}
} else {
run = 0;
}
}
false
}
pub fn caesar_shift(input: &str, shift: u8) -> String {
let mut out = String::with_capacity(input.len());
for ch in input.chars() {
let shifted = match ch {
'A'..='Z' => {
let base = b'A';
let off = (ch as u8 - base + shift) % 26;
(base + off) as char
}
'a'..='z' => {
let base = b'a';
let off = (ch as u8 - base + shift) % 26;
(base + off) as char
}
_ => ch,
};
out.push(shifted);
}
out
}
pub fn looks_credential_shaped(s: &str) -> bool {
let bytes = s.as_bytes();
if !bytes.iter().any(|b| b.is_ascii_digit()) {
return false;
}
let mut run = 0usize;
let mut saw_long_run = false;
for &b in bytes {
if b.is_ascii_alphanumeric() {
run += 1;
if run >= MIN_ALNUM_RUN {
saw_long_run = true;
break;
}
} else {
run = 0;
}
}
if !saw_long_run {
return false;
}
crate::confidence::KNOWN_PREFIXES
.iter()
.any(|prefix| s.contains(prefix))
}