sanitize_engine/processor/
log_line.rs1use crate::error::Result;
34use crate::processor::{FileTypeProfile, Processor};
35use crate::processor::json_proc::JsonProcessor;
36use crate::store::MappingStore;
37
38const MAX_LOG_INPUT_SIZE: usize = 256 * 1024 * 1024; pub struct LogLineProcessor {
43 json_proc: JsonProcessor,
44}
45
46impl LogLineProcessor {
47 pub fn new() -> Self {
48 Self {
49 json_proc: JsonProcessor,
50 }
51 }
52}
53
54impl Default for LogLineProcessor {
55 fn default() -> Self {
56 Self::new()
57 }
58}
59
60impl Processor for LogLineProcessor {
61 fn name(&self) -> &'static str {
62 "log"
63 }
64
65 fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
66 profile.processor == "log"
67 }
68
69 fn process(
70 &self,
71 content: &[u8],
72 profile: &FileTypeProfile,
73 store: &MappingStore,
74 ) -> Result<Vec<u8>> {
75 if content.len() > MAX_LOG_INPUT_SIZE {
76 use crate::error::SanitizeError;
77 return Err(SanitizeError::InputTooLarge {
78 size: content.len(),
79 limit: MAX_LOG_INPUT_SIZE,
80 });
81 }
82
83 let text = String::from_utf8_lossy(content);
84 let mut output = String::with_capacity(text.len());
85
86 let raw_lines: Vec<&str> = text.split('\n').collect();
89 let lines = if raw_lines.last().map_or(false, |l| l.is_empty()) {
90 &raw_lines[..raw_lines.len() - 1]
91 } else {
92 &raw_lines[..]
93 };
94
95 for line in lines {
96 let processed_line = process_log_line(line, profile, store, &self.json_proc);
97 output.push_str(&processed_line);
98 output.push('\n');
99 }
100
101 if !text.ends_with('\n') && output.ends_with('\n') {
103 output.pop();
104 }
105
106 Ok(output.into_bytes())
107 }
108}
109
110fn process_log_line(
113 line: &str,
114 profile: &FileTypeProfile,
115 store: &MappingStore,
116 json_proc: &JsonProcessor,
117) -> String {
118 let Some(json_start) = line.find('{') else {
120 return line.to_string();
121 };
122
123 let json_end = match find_matching_brace(&line[json_start..]) {
125 Some(relative_end) => json_start + relative_end,
126 None => return line.to_string(),
127 };
128
129 let json_span = &line[json_start..=json_end];
130 let prefix = &line[..json_start];
131 let suffix = &line[json_end + 1..];
132
133 let compact_profile = FileTypeProfile::new(
135 "json",
136 profile.fields.clone(),
137 )
138 .with_option("compact", "true");
139
140 match json_proc.process(json_span.as_bytes(), &compact_profile, store) {
142 Ok(sanitised_bytes) => {
143 let sanitised = String::from_utf8_lossy(&sanitised_bytes);
144 format!("{}{}{}", prefix, sanitised, suffix)
145 }
146 Err(_) => line.to_string(),
149 }
150}
151
152fn find_matching_brace(s: &str) -> Option<usize> {
155 if !s.starts_with('{') {
156 return None;
157 }
158 let mut depth: usize = 0;
159 let mut in_string = false;
160 let mut escaped = false;
161 let bytes = s.as_bytes();
162
163 for (i, &b) in bytes.iter().enumerate() {
164 if escaped {
165 escaped = false;
166 continue;
167 }
168 match b {
169 b'\\' if in_string => escaped = true,
170 b'"' => in_string = !in_string,
171 b'{' if !in_string => depth += 1,
172 b'}' if !in_string => {
173 depth -= 1;
174 if depth == 0 {
175 return Some(i);
176 }
177 }
178 _ => {}
179 }
180 }
181 None
182}
183
184#[cfg(test)]
185mod tests {
186 use super::*;
187 use crate::generator::HmacGenerator;
188 use crate::processor::profile::FieldRule;
189 use std::sync::Arc;
190
191 fn make_store() -> MappingStore {
192 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
193 MappingStore::new(gen, None)
194 }
195
196 fn wildcard_profile() -> FileTypeProfile {
197 FileTypeProfile::new("log", vec![FieldRule::new("*")])
198 }
199
200 #[test]
201 fn pure_ndjson_line() {
202 let store = make_store();
203 let proc = LogLineProcessor::new();
204 let content = b"{\"level\":\"info\",\"token\":\"abc123\",\"msg\":\"ok\"}\n";
205 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
206 let text = String::from_utf8(output).unwrap();
207 assert!(!text.contains("abc123"));
208 assert!(text.contains("\"level\""));
210 }
211
212 #[test]
213 fn log_prefix_before_json() {
214 let store = make_store();
215 let proc = LogLineProcessor::new();
216 let content = b"2024-01-01T00:00:00Z INFO {\"token\":\"secret\",\"user\":\"bob\"}\n";
217 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
218 let text = String::from_utf8(output).unwrap();
219 assert!(text.contains("2024-01-01T00:00:00Z INFO "));
221 assert!(!text.contains("secret"));
223 assert!(!text.contains("bob"));
224 }
225
226 #[test]
227 fn non_json_line_preserved() {
228 let store = make_store();
229 let proc = LogLineProcessor::new();
230 let content = b"plain text log line with no json\n";
231 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
232 assert_eq!(output, content);
233 }
234
235 #[test]
236 fn malformed_json_line_preserved() {
237 let store = make_store();
238 let proc = LogLineProcessor::new();
239 let content = b"ERROR: template {name} not found\n";
241 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
242 assert_eq!(output, content);
243 }
244
245 #[test]
246 fn multi_line_ndjson() {
247 let store = make_store();
248 let proc = LogLineProcessor::new();
249 let content = b"{\"token\":\"abc\"}\n{\"key\":\"xyz\"}\n";
250 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
251 let text = String::from_utf8(output).unwrap();
252 assert!(!text.contains("abc"));
253 assert!(!text.contains("xyz"));
254 assert_eq!(text.lines().count(), 2);
255 }
256
257 #[test]
258 fn find_matching_brace_simple() {
259 assert_eq!(find_matching_brace("{\"a\":\"b\"}"), Some(8));
260 }
261
262 #[test]
263 fn find_matching_brace_nested() {
264 assert_eq!(find_matching_brace("{\"a\":{\"b\":\"c\"}}"), Some(14));
265 }
266
267 #[test]
268 fn find_matching_brace_brace_in_string() {
269 assert_eq!(find_matching_brace("{\"a\":\"{not_nested}\"}"), Some(19));
270 }
271}