use super::pipeline::{extract_encoded_values, push_decoded_text_chunk};
use super::Decoder;
use keyhog_core::Chunk;
pub struct CaesarDecoder;
const MIN_CAESAR_LEN: usize = 16;
const MIN_ALNUM_RUN: usize = 8;
const SOURCE_CODE_EXTENSIONS: &[&str] = &[
".rs", ".py", ".go", ".js", ".jsx", ".ts", ".tsx", ".java", ".kt", ".scala", ".c", ".cc",
".cpp", ".cxx", ".h", ".hh", ".hpp", ".cs", ".rb", ".php", ".swift", ".m", ".mm", ".sh",
".bash", ".zsh", ".fish", ".lua", ".pl", ".pm", ".sql", ".html", ".htm", ".css", ".scss",
".sass", ".vue", ".svelte", ".md", ".rst", ".txt", ".adoc",
];
pub fn is_source_code_path(path: Option<&str>) -> bool {
let Some(p) = path else { return false };
let lower = p.to_ascii_lowercase();
SOURCE_CODE_EXTENSIONS
.iter()
.any(|ext| lower.ends_with(ext))
}
impl Decoder for CaesarDecoder {
fn name(&self) -> &'static str {
"caesar"
}
fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
if chunk.metadata.source_type.contains("/caesar") {
return Vec::new();
}
if is_source_code_path(chunk.metadata.path.as_deref()) {
return Vec::new();
}
let mut out = Vec::new();
for candidate in extract_encoded_values(&chunk.data) {
if candidate.len() < MIN_CAESAR_LEN {
continue;
}
if !candidate.chars().any(|c| c.is_ascii_alphabetic()) {
continue;
}
for shift in 1..=25u8 {
let decoded = caesar_shift(&candidate, shift);
if !looks_credential_shaped(&decoded) {
continue;
}
push_decoded_text_chunk(&mut out, chunk, decoded, self.name());
}
}
out
}
}
pub fn caesar_shift(input: &str, shift: u8) -> String {
let mut out = String::with_capacity(input.len());
for ch in input.chars() {
let shifted = match ch {
'A'..='Z' => {
let base = b'A';
let off = (ch as u8 - base + shift) % 26;
(base + off) as char
}
'a'..='z' => {
let base = b'a';
let off = (ch as u8 - base + shift) % 26;
(base + off) as char
}
_ => ch,
};
out.push(shifted);
}
out
}
pub fn looks_credential_shaped(s: &str) -> bool {
let bytes = s.as_bytes();
if !bytes.iter().any(|b| b.is_ascii_digit()) {
return false;
}
let mut run = 0usize;
let mut saw_long_run = false;
for &b in bytes {
if b.is_ascii_alphanumeric() {
run += 1;
if run >= MIN_ALNUM_RUN {
saw_long_run = true;
break;
}
} else {
run = 0;
}
}
if !saw_long_run {
return false;
}
crate::confidence::KNOWN_PREFIXES
.iter()
.any(|prefix| s.contains(prefix))
}