use super::pipeline::push_decoded_text_chunk;
use super::Decoder;
use keyhog_core::Chunk;
pub(super) struct JsonDecoder;
impl Decoder for JsonDecoder {
fn name(&self) -> &'static str {
"json"
}
fn decode_chunk(&self, chunk: &Chunk) -> Vec<Chunk> {
let mut decoded_chunks = Vec::new();
for json_string in extract_json_strings(&chunk.data) {
if let Ok(unescaped) = json_unescape(&json_string) {
push_decoded_text_chunk(&mut decoded_chunks, chunk, unescaped, self.name());
}
}
decoded_chunks
}
}
fn extract_json_strings(text: &str) -> Vec<String> {
let mut strings = Vec::new();
let bytes = text.as_bytes();
let mut index = 0;
while index < bytes.len() {
if bytes[index] != b'"' {
index += 1;
continue;
}
index += 1;
let mut content = String::with_capacity(32);
let mut escaping = false;
let mut closed = false;
while index < bytes.len() {
let current = bytes[index];
if escaping {
content.push(current as char);
escaping = false;
} else if current == b'\\' {
escaping = true;
content.push('\\');
} else if current == b'"' {
closed = true;
index += 1;
if content.len() >= 4 {
strings.push(content);
}
break;
} else if current == b'\n' || current == b'\r' {
break;
} else {
content.push(current as char);
}
index += 1;
}
if closed {
continue;
}
index += 1;
}
strings
}
fn json_unescape(input: &str) -> Result<String, ()> {
let mut decoded = String::with_capacity(input.len());
let mut chars = input.chars().peekable();
while let Some(ch) = chars.next() {
if ch != '\\' {
decoded.push(ch);
continue;
}
match chars.next() {
Some('"') => decoded.push('"'),
Some('\\') => decoded.push('\\'),
Some('/') => decoded.push('/'),
Some('b') => decoded.push('\x08'),
Some('f') => decoded.push('\x0C'),
Some('n') => decoded.push('\n'),
Some('r') => decoded.push('\r'),
Some('t') => decoded.push('\t'),
Some('u') => {
let code = take_hex_digits(&mut chars, 4)?;
decoded.push(char::from_u32(code).ok_or(())?);
}
_ => return Err(()),
}
}
Ok(decoded)
}
fn take_hex_digits<I>(chars: &mut std::iter::Peekable<I>, count: usize) -> Result<u32, ()>
where
I: Iterator<Item = char>,
{
let mut value = 0u32;
for _ in 0..count {
let ch = chars.next().ok_or(())?;
value = (value << 4) | ch.to_digit(16).ok_or(())?;
}
Ok(value)
}
#[cfg(test)]
mod tests {
use super::*;
use keyhog_core::ChunkMetadata;
#[test]
fn unescapes_json_string() {
assert_eq!(
json_unescape(r#"hello \"world\""#).unwrap(),
r#"hello "world""#
);
}
#[test]
fn unescapes_json_with_backslash() {
assert_eq!(json_unescape(r#"path\\to\\file"#).unwrap(), r#"path\to\file"#);
}
#[test]
fn extracts_and_unescapes_from_json_object() {
let chunk = Chunk {
data: r#"{"api_key": "TESTKEY_aK7xP9mQ2wE5rT8yU1iO", "host": "localhost"}"#
.to_string(),
metadata: ChunkMetadata {
source_type: "test".into(),
path: None,
commit: None,
author: None,
date: None,
},
};
let decoder = JsonDecoder;
let result = decoder.decode_chunk(&chunk);
assert_eq!(result.len(), 2);
assert_eq!(result[0].data, "TESTKEY_aK7xP9mQ2wE5rT8yU1iO");
assert_eq!(result[1].data, "localhost");
}
#[test]
fn extracts_secret_with_escaped_quotes() {
let chunk = Chunk {
data: r#"{"message": "use \"TESTKEY_aK7xP9mQ2wE5rT8yU1iO\""}"#.to_string(),
metadata: ChunkMetadata {
source_type: "test".into(),
path: None,
commit: None,
author: None,
date: None,
},
};
let decoder = JsonDecoder;
let result = decoder.decode_chunk(&chunk);
assert!(result
.iter()
.any(|c| c.data == r#"use "TESTKEY_aK7xP9mQ2wE5rT8yU1iO""#));
}
}