sanitize_engine/processor/
log_line.rs1use crate::error::Result;
34use crate::processor::json_proc::JsonProcessor;
35use crate::processor::limits::DEFAULT_INPUT_SIZE;
36use crate::processor::{FileTypeProfile, Processor};
37use crate::store::MappingStore;
38
39pub struct LogLineProcessor {
41 json_proc: JsonProcessor,
42}
43
44impl LogLineProcessor {
45 pub fn new() -> Self {
46 Self {
47 json_proc: JsonProcessor,
48 }
49 }
50}
51
52impl Default for LogLineProcessor {
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl Processor for LogLineProcessor {
59 fn name(&self) -> &'static str {
60 "log"
61 }
62
63 fn can_handle(&self, _content: &[u8], profile: &FileTypeProfile) -> bool {
64 profile.processor == "log"
65 }
66
67 fn process(
68 &self,
69 content: &[u8],
70 profile: &FileTypeProfile,
71 store: &MappingStore,
72 ) -> Result<Vec<u8>> {
73 if content.len() > DEFAULT_INPUT_SIZE {
74 use crate::error::SanitizeError;
75 return Err(SanitizeError::InputTooLarge {
76 size: content.len(),
77 limit: DEFAULT_INPUT_SIZE,
78 });
79 }
80
81 let text = String::from_utf8_lossy(content);
82 let mut output = String::with_capacity(text.len());
83
84 let raw_lines: Vec<&str> = text.split('\n').collect();
87 let lines = if raw_lines.last().is_some_and(|l| l.is_empty()) {
88 &raw_lines[..raw_lines.len() - 1]
89 } else {
90 &raw_lines[..]
91 };
92
93 for line in lines {
94 let processed_line = process_log_line(line, profile, store, &self.json_proc);
95 output.push_str(&processed_line);
96 output.push('\n');
97 }
98
99 if !text.ends_with('\n') && output.ends_with('\n') {
101 output.pop();
102 }
103
104 Ok(output.into_bytes())
105 }
106}
107
108fn process_log_line(
111 line: &str,
112 profile: &FileTypeProfile,
113 store: &MappingStore,
114 json_proc: &JsonProcessor,
115) -> String {
116 let Some(json_start) = line.find('{') else {
118 return line.to_string();
119 };
120
121 let json_end = match find_matching_brace(&line[json_start..]) {
123 Some(relative_end) => json_start + relative_end,
124 None => return line.to_string(),
125 };
126
127 let json_span = &line[json_start..=json_end];
128 let prefix = &line[..json_start];
129 let suffix = &line[json_end + 1..];
130
131 let compact_profile =
133 FileTypeProfile::new("json", profile.fields.clone()).with_option("compact", "true");
134
135 match json_proc.process(json_span.as_bytes(), &compact_profile, store) {
137 Ok(sanitised_bytes) => {
138 let sanitised = String::from_utf8_lossy(&sanitised_bytes);
139 format!("{}{}{}", prefix, sanitised, suffix)
140 }
141 Err(_) => line.to_string(),
144 }
145}
146
147fn find_matching_brace(s: &str) -> Option<usize> {
150 if !s.starts_with('{') {
151 return None;
152 }
153 let mut depth: usize = 0;
154 let mut in_string = false;
155 let mut escaped = false;
156 let bytes = s.as_bytes();
157
158 for (i, &b) in bytes.iter().enumerate() {
159 if escaped {
160 escaped = false;
161 continue;
162 }
163 match b {
164 b'\\' if in_string => escaped = true,
165 b'"' => in_string = !in_string,
166 b'{' if !in_string => depth += 1,
167 b'}' if !in_string => {
168 depth -= 1;
169 if depth == 0 {
170 return Some(i);
171 }
172 }
173 _ => {}
174 }
175 }
176 None
177}
178
179#[cfg(test)]
180mod tests {
181 use super::*;
182 use crate::generator::HmacGenerator;
183 use crate::processor::profile::FieldRule;
184 use std::sync::Arc;
185
186 fn make_store() -> MappingStore {
187 let gen = Arc::new(HmacGenerator::new([42u8; 32]));
188 MappingStore::new(gen, None)
189 }
190
191 fn wildcard_profile() -> FileTypeProfile {
192 FileTypeProfile::new("log", vec![FieldRule::new("*")])
193 }
194
195 #[test]
196 fn pure_ndjson_line() {
197 let store = make_store();
198 let proc = LogLineProcessor::new();
199 let content = b"{\"level\":\"info\",\"token\":\"abc123\",\"msg\":\"ok\"}\n";
200 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
201 let text = String::from_utf8(output).unwrap();
202 assert!(!text.contains("abc123"));
203 assert!(text.contains("\"level\""));
205 }
206
207 #[test]
208 fn log_prefix_before_json() {
209 let store = make_store();
210 let proc = LogLineProcessor::new();
211 let content = b"2024-01-01T00:00:00Z INFO {\"token\":\"secret\",\"user\":\"bob\"}\n";
212 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
213 let text = String::from_utf8(output).unwrap();
214 assert!(text.contains("2024-01-01T00:00:00Z INFO "));
216 assert!(!text.contains("secret"));
218 assert!(!text.contains("bob"));
219 }
220
221 #[test]
222 fn non_json_line_preserved() {
223 let store = make_store();
224 let proc = LogLineProcessor::new();
225 let content = b"plain text log line with no json\n";
226 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
227 assert_eq!(output, content);
228 }
229
230 #[test]
231 fn malformed_json_line_preserved() {
232 let store = make_store();
233 let proc = LogLineProcessor::new();
234 let content = b"ERROR: template {name} not found\n";
236 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
237 assert_eq!(output, content);
238 }
239
240 #[test]
241 fn multi_line_ndjson() {
242 let store = make_store();
243 let proc = LogLineProcessor::new();
244 let content = b"{\"token\":\"abc\"}\n{\"key\":\"xyz\"}\n";
245 let output = proc.process(content, &wildcard_profile(), &store).unwrap();
246 let text = String::from_utf8(output).unwrap();
247 assert!(!text.contains("abc"));
248 assert!(!text.contains("xyz"));
249 assert_eq!(text.lines().count(), 2);
250 }
251
252 #[test]
253 fn find_matching_brace_simple() {
254 assert_eq!(find_matching_brace("{\"a\":\"b\"}"), Some(8));
255 }
256
257 #[test]
258 fn find_matching_brace_nested() {
259 assert_eq!(find_matching_brace("{\"a\":{\"b\":\"c\"}}"), Some(14));
260 }
261
262 #[test]
263 fn find_matching_brace_brace_in_string() {
264 assert_eq!(find_matching_brace("{\"a\":\"{not_nested}\"}"), Some(19));
265 }
266}