use crate::error::Result;
use crate::processor::{FileTypeProfile, Processor};
use crate::processor::json_proc::JsonProcessor;
use crate::store::MappingStore;
const MAX_LOG_INPUT_SIZE: usize = 256 * 1024 * 1024;
pub struct LogLineProcessor {
json_proc: JsonProcessor,
}
impl LogLineProcessor {
pub fn new() -> Self {
Self {
json_proc: JsonProcessor,
}
}
}
impl Default for LogLineProcessor {
fn default() -> Self {
Self::new()
}
}
impl Processor for LogLineProcessor {
fn name(&self) -> &'static str {
"log"
}
fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
profile.processor == "log"
}
fn process(
&self,
content: &[u8],
profile: &FileTypeProfile,
store: &MappingStore,
) -> Result<Vec<u8>> {
if content.len() > MAX_LOG_INPUT_SIZE {
use crate::error::SanitizeError;
return Err(SanitizeError::InputTooLarge {
size: content.len(),
limit: MAX_LOG_INPUT_SIZE,
});
}
let text = String::from_utf8_lossy(content);
let mut output = String::with_capacity(text.len());
let raw_lines: Vec<&str> = text.split('\n').collect();
let lines = if raw_lines.last().map_or(false, |l| l.is_empty()) {
&raw_lines[..raw_lines.len() - 1]
} else {
&raw_lines[..]
};
for line in lines {
let processed_line = process_log_line(line, profile, store, &self.json_proc);
output.push_str(&processed_line);
output.push('\n');
}
if !text.ends_with('\n') && output.ends_with('\n') {
output.pop();
}
Ok(output.into_bytes())
}
}
fn process_log_line(
line: &str,
profile: &FileTypeProfile,
store: &MappingStore,
json_proc: &JsonProcessor,
) -> String {
let Some(json_start) = line.find('{') else {
return line.to_string();
};
let json_end = match find_matching_brace(&line[json_start..]) {
Some(relative_end) => json_start + relative_end,
None => return line.to_string(),
};
let json_span = &line[json_start..=json_end];
let prefix = &line[..json_start];
let suffix = &line[json_end + 1..];
let compact_profile = FileTypeProfile::new(
"json",
profile.fields.clone(),
)
.with_option("compact", "true");
match json_proc.process(json_span.as_bytes(), &compact_profile, store) {
Ok(sanitised_bytes) => {
let sanitised = String::from_utf8_lossy(&sanitised_bytes);
format!("{}{}{}", prefix, sanitised, suffix)
}
Err(_) => line.to_string(),
}
}
fn find_matching_brace(s: &str) -> Option<usize> {
if !s.starts_with('{') {
return None;
}
let mut depth: usize = 0;
let mut in_string = false;
let mut escaped = false;
let bytes = s.as_bytes();
for (i, &b) in bytes.iter().enumerate() {
if escaped {
escaped = false;
continue;
}
match b {
b'\\' if in_string => escaped = true,
b'"' => in_string = !in_string,
b'{' if !in_string => depth += 1,
b'}' if !in_string => {
depth -= 1;
if depth == 0 {
return Some(i);
}
}
_ => {}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generator::HmacGenerator;
use crate::processor::profile::FieldRule;
use std::sync::Arc;
fn make_store() -> MappingStore {
let gen = Arc::new(HmacGenerator::new([42u8; 32]));
MappingStore::new(gen, None)
}
fn wildcard_profile() -> FileTypeProfile {
FileTypeProfile::new("log", vec![FieldRule::new("*")])
}
#[test]
fn pure_ndjson_line() {
let store = make_store();
let proc = LogLineProcessor::new();
let content = b"{\"level\":\"info\",\"token\":\"abc123\",\"msg\":\"ok\"}\n";
let output = proc.process(content, &wildcard_profile(), &store).unwrap();
let text = String::from_utf8(output).unwrap();
assert!(!text.contains("abc123"));
assert!(text.contains("\"level\""));
}
#[test]
fn log_prefix_before_json() {
let store = make_store();
let proc = LogLineProcessor::new();
let content = b"2024-01-01T00:00:00Z INFO {\"token\":\"secret\",\"user\":\"bob\"}\n";
let output = proc.process(content, &wildcard_profile(), &store).unwrap();
let text = String::from_utf8(output).unwrap();
assert!(text.contains("2024-01-01T00:00:00Z INFO "));
assert!(!text.contains("secret"));
assert!(!text.contains("bob"));
}
#[test]
fn non_json_line_preserved() {
let store = make_store();
let proc = LogLineProcessor::new();
let content = b"plain text log line with no json\n";
let output = proc.process(content, &wildcard_profile(), &store).unwrap();
assert_eq!(output, content);
}
#[test]
fn malformed_json_line_preserved() {
let store = make_store();
let proc = LogLineProcessor::new();
let content = b"ERROR: template {name} not found\n";
let output = proc.process(content, &wildcard_profile(), &store).unwrap();
assert_eq!(output, content);
}
#[test]
fn multi_line_ndjson() {
let store = make_store();
let proc = LogLineProcessor::new();
let content = b"{\"token\":\"abc\"}\n{\"key\":\"xyz\"}\n";
let output = proc.process(content, &wildcard_profile(), &store).unwrap();
let text = String::from_utf8(output).unwrap();
assert!(!text.contains("abc"));
assert!(!text.contains("xyz"));
assert_eq!(text.lines().count(), 2);
}
#[test]
fn find_matching_brace_simple() {
assert_eq!(find_matching_brace("{\"a\":\"b\"}"), Some(8));
}
#[test]
fn find_matching_brace_nested() {
assert_eq!(find_matching_brace("{\"a\":{\"b\":\"c\"}}"), Some(14));
}
#[test]
fn find_matching_brace_brace_in_string() {
assert_eq!(find_matching_brace("{\"a\":\"{not_nested}\"}"), Some(19));
}
}