Skip to main content

hyperi_rustlib/logger/
masking.rs

1// Project:   hyperi-rustlib
2// File:      src/logger/masking.rs
3// Purpose:   Sensitive data masking for log output
4// Language:  Rust
5//
6// License:   FSL-1.1-ALv2
7// Copyright: (c) 2026 HYPERI PTY LIMITED
8
9//! Sensitive data masking for tracing log output.
10//!
11//! Provides a [`MaskingWriter`] that intercepts formatted log output and
12//! redacts sensitive field values before writing to the underlying destination.
13
14use std::collections::HashSet;
15use std::io;
16use std::sync::Arc;
17
18/// Redacted value placeholder.
19pub const REDACTED: &str = "[REDACTED]";
20
21/// Default list of sensitive field names to mask.
22#[must_use]
23pub fn default_sensitive_fields() -> Vec<String> {
24    vec![
25        // Passwords
26        "password",
27        "passwd",
28        "pwd",
29        "pass",
30        // Tokens and keys
31        "token",
32        "secret",
33        "api_key",
34        "apikey",
35        "api-key",
36        "access_key",
37        "secret_key",
38        "private_key",
39        "privatekey",
40        // Auth
41        "auth",
42        "authorization",
43        "bearer",
44        "credential",
45        "credentials",
46        // OAuth
47        "client_secret",
48        "refresh_token",
49        "access_token",
50        // Other sensitive
51        "ssn",
52        "credit_card",
53        "creditcard",
54        "cvv",
55        "pin",
56    ]
57    .into_iter()
58    .map(String::from)
59    .collect()
60}
61
62/// Configuration for sensitive field detection.
63///
64/// Holds the set of field name patterns considered sensitive. Used both as a
65/// standalone detector and as configuration for [`make_masking_writer`].
66#[derive(Debug, Clone)]
67pub struct MaskingLayer {
68    sensitive_fields: HashSet<String>,
69}
70
71impl MaskingLayer {
72    /// Create a new masking layer with default sensitive fields.
73    #[must_use]
74    pub fn new() -> Self {
75        Self::with_fields(default_sensitive_fields())
76    }
77
78    /// Create a masking layer with custom sensitive fields.
79    #[must_use]
80    pub fn with_fields(fields: Vec<String>) -> Self {
81        Self {
82            sensitive_fields: fields.into_iter().map(|s| s.to_lowercase()).collect(),
83        }
84    }
85
86    /// Add additional sensitive fields.
87    #[must_use]
88    pub fn add_fields(mut self, fields: Vec<String>) -> Self {
89        for field in fields {
90            self.sensitive_fields.insert(field.to_lowercase());
91        }
92        self
93    }
94
95    /// Check if a field name should be masked.
96    #[must_use]
97    pub fn should_mask(&self, field_name: &str) -> bool {
98        should_mask_field(field_name, &self.sensitive_fields)
99    }
100}
101
102impl Default for MaskingLayer {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108// ---------------------------------------------------------------------------
109// Writer-based masking
110// ---------------------------------------------------------------------------
111
112/// Create a masking writer factory for use with tracing-subscriber's `with_writer`.
113///
114/// Returns a closure that produces [`MaskingWriter`] instances wrapping stderr.
115/// When the sensitive fields set is empty and no service fields are set, the
116/// writer passes through without buffering or redaction.
117pub fn make_masking_writer(
118    sensitive_fields: HashSet<String>,
119    is_json: bool,
120    service_name: Option<String>,
121    service_version: Option<String>,
122) -> impl Fn() -> MaskingWriter<io::Stderr> + Send + Sync {
123    let fields = Arc::new(sensitive_fields);
124    let name = service_name.map(Arc::from);
125    let version = service_version.map(Arc::from);
126    move || MaskingWriter {
127        inner: io::stderr(),
128        buffer: Vec::with_capacity(512),
129        sensitive_fields: Arc::clone(&fields),
130        is_json,
131        service_name: name.clone(),
132        service_version: version.clone(),
133    }
134}
135
136/// A writer that redacts sensitive field values from formatted log output.
137///
138/// Buffers each log line (tracing-subscriber writes complete lines via
139/// `write_all`), applies field-level redaction, then flushes to the inner
140/// writer. When the sensitive fields set is empty and no service fields are
141/// set, writes pass through directly with no buffering overhead.
142pub struct MaskingWriter<W: io::Write> {
143    inner: W,
144    buffer: Vec<u8>,
145    sensitive_fields: Arc<HashSet<String>>,
146    is_json: bool,
147    /// Service name injected into JSON log output (JSON mode only).
148    service_name: Option<Arc<str>>,
149    /// Service version injected into JSON log output (JSON mode only).
150    service_version: Option<Arc<str>>,
151}
152
153impl<W: io::Write> MaskingWriter<W> {
154    /// Create a new masking writer wrapping the given writer.
155    ///
156    /// When `sensitive_fields` is empty, writes pass through with no overhead.
157    /// Set `is_json` to `true` for JSON-format redaction, `false` for text.
158    #[must_use]
159    pub fn new(inner: W, sensitive_fields: Arc<HashSet<String>>, is_json: bool) -> Self {
160        Self {
161            inner,
162            buffer: Vec::with_capacity(512),
163            sensitive_fields,
164            is_json,
165            service_name: None,
166            service_version: None,
167        }
168    }
169
170    fn flush_buffer(&mut self) -> io::Result<()> {
171        if self.buffer.is_empty() {
172            return Ok(());
173        }
174        let line = String::from_utf8_lossy(&self.buffer);
175        let redacted = if self.is_json {
176            inject_and_redact_json_line(
177                &line,
178                &self.sensitive_fields,
179                self.service_name.as_deref(),
180                self.service_version.as_deref(),
181            )
182        } else {
183            redact_text_line(&line, &self.sensitive_fields)
184        };
185        self.inner.write_all(redacted.as_bytes())?;
186        self.buffer.clear();
187        Ok(())
188    }
189
190    /// Returns `true` if this writer must buffer output (masking or injection active).
191    fn needs_buffering(&self) -> bool {
192        !self.sensitive_fields.is_empty()
193            || self.service_name.is_some()
194            || self.service_version.is_some()
195    }
196}
197
198impl<W: io::Write> io::Write for MaskingWriter<W> {
199    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
200        if !self.needs_buffering() {
201            return self.inner.write(buf);
202        }
203        self.buffer.extend_from_slice(buf);
204        Ok(buf.len())
205    }
206
207    fn flush(&mut self) -> io::Result<()> {
208        if self.needs_buffering() {
209            self.flush_buffer()?;
210        }
211        self.inner.flush()
212    }
213}
214
215impl<W: io::Write> Drop for MaskingWriter<W> {
216    fn drop(&mut self) {
217        if self.needs_buffering() {
218            let _ = self.flush_buffer();
219        }
220    }
221}
222
223// ---------------------------------------------------------------------------
224// Redaction functions
225// ---------------------------------------------------------------------------
226
227/// Check if a field name matches any sensitive pattern (case-insensitive substring).
228fn should_mask_field(field_name: &str, sensitive: &HashSet<String>) -> bool {
229    let lower = field_name.to_lowercase();
230    sensitive.iter().any(|s| lower.contains(s.as_str()))
231}
232
233/// Inject service fields and redact sensitive fields in a JSON log line.
234///
235/// Parses the line as JSON, inserts `service` and `version` at the root level
236/// (if provided), walks the object tree and replaces values of sensitive keys
237/// with `[REDACTED]`, then re-serialises.
238fn inject_and_redact_json_line(
239    line: &str,
240    sensitive: &HashSet<String>,
241    service_name: Option<&str>,
242    service_version: Option<&str>,
243) -> String {
244    let trimmed = line.trim_end_matches('\n');
245    if let Ok(mut value) = serde_json::from_str::<serde_json::Value>(trimmed) {
246        if let serde_json::Value::Object(ref mut map) = value {
247            if let Some(name) = service_name {
248                map.insert(
249                    "service".to_string(),
250                    serde_json::Value::String(name.to_string()),
251                );
252            }
253            if let Some(ver) = service_version {
254                map.insert(
255                    "version".to_string(),
256                    serde_json::Value::String(ver.to_string()),
257                );
258            }
259        }
260        redact_json_value(&mut value, sensitive);
261        let mut result = serde_json::to_string(&value).unwrap_or_else(|_| trimmed.to_string());
262        if line.ends_with('\n') {
263            result.push('\n');
264        }
265        result
266    } else {
267        line.to_string()
268    }
269}
270
271/// Recursively redact sensitive keys in a JSON value.
272fn redact_json_value(value: &mut serde_json::Value, sensitive: &HashSet<String>) {
273    match value {
274        serde_json::Value::Object(map) => {
275            for (key, val) in map.iter_mut() {
276                if should_mask_field(key, sensitive) {
277                    *val = serde_json::Value::String(REDACTED.to_string());
278                } else {
279                    redact_json_value(val, sensitive);
280                }
281            }
282        }
283        serde_json::Value::Array(arr) => {
284            for item in arr {
285                redact_json_value(item, sensitive);
286            }
287        }
288        _ => {}
289    }
290}
291
292/// Redact sensitive fields in a text-format log line.
293///
294/// Tracing-subscriber's text formatter outputs fields as `name=value` (Debug)
295/// or `name="string value"` (quoted strings). This function finds sensitive
296/// field names and replaces their values with `[REDACTED]`.
297fn redact_text_line(line: &str, sensitive: &HashSet<String>) -> String {
298    let mut result = String::with_capacity(line.len());
299    let mut pos = 0;
300
301    while pos < line.len() {
302        match line[pos..].find('=') {
303            None => {
304                result.push_str(&line[pos..]);
305                break;
306            }
307            Some(rel_eq) => {
308                let eq_pos = pos + rel_eq;
309
310                // Scan backwards from '=' to find the field name start
311                let field_start = line[pos..eq_pos]
312                    .rfind(|c: char| !c.is_alphanumeric() && c != '_' && c != '-' && c != '.')
313                    .map_or(pos, |rp| pos + rp + 1);
314                let field_name = &line[field_start..eq_pos];
315
316                if !field_name.is_empty() && should_mask_field(field_name, sensitive) {
317                    // Copy everything up to and including '='
318                    result.push_str(&line[pos..=eq_pos]);
319
320                    // Skip the value and replace with redacted placeholder
321                    let after_eq = eq_pos + 1;
322                    let value_end = skip_field_value(line, after_eq);
323                    result.push_str(REDACTED);
324                    pos = value_end;
325                } else {
326                    // Not sensitive — copy through the '=' and continue
327                    result.push_str(&line[pos..=eq_pos]);
328                    pos = eq_pos + 1;
329                }
330            }
331        }
332    }
333
334    result
335}
336
337/// Skip past a field value in text-format output, returning the position after the value.
338fn skip_field_value(line: &str, start: usize) -> usize {
339    if start >= line.len() {
340        return start;
341    }
342    if line.as_bytes()[start] == b'"' {
343        // Quoted value — find closing quote (handle escaped quotes)
344        let mut i = start + 1;
345        while i < line.len() {
346            if line.as_bytes()[i] == b'"' && line.as_bytes()[i - 1] != b'\\' {
347                return i + 1;
348            }
349            i += 1;
350        }
351        line.len()
352    } else {
353        // Unquoted value — ends at next whitespace
354        line[start..]
355            .find(char::is_whitespace)
356            .map_or(line.len(), |wp| start + wp)
357    }
358}
359
360/// Mask sensitive values in a string.
361///
362/// Replaces values that look like tokens, keys, or passwords with `[REDACTED]`.
363#[must_use]
364pub fn mask_sensitive_string(input: &str, patterns: &[&str]) -> String {
365    let mut result = input.to_string();
366
367    for pattern in patterns {
368        // Simple pattern matching for key=value or "key": "value"
369        let search_patterns = [
370            format!("{pattern}="),
371            format!("{pattern}:"),
372            format!("\"{pattern}\""),
373        ];
374
375        for search in &search_patterns {
376            if let Some(start) = result.to_lowercase().find(&search.to_lowercase()) {
377                // Find the value after the pattern
378                let value_start = start + search.len();
379                if let Some(rest) = result.get(value_start..) {
380                    // Find end of value (space, comma, quote, or end of string)
381                    let value_end = rest
382                        .find(|c: char| c.is_whitespace() || c == ',' || c == '"' || c == '}')
383                        .unwrap_or(rest.len());
384
385                    let before = &result[..value_start];
386                    let after = &rest[value_end..];
387                    result = format!("{before}{REDACTED}{after}");
388                }
389            }
390        }
391    }
392
393    result
394}
395
396#[cfg(test)]
397mod tests {
398    use super::*;
399    use std::sync::Mutex;
400
401    // Shared buffer for testing MaskingWriter (survives writer drop)
402    struct TestWriter(Arc<Mutex<Vec<u8>>>);
403
404    impl io::Write for TestWriter {
405        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
406            self.0.lock().unwrap().extend_from_slice(buf);
407            Ok(buf.len())
408        }
409
410        fn flush(&mut self) -> io::Result<()> {
411            Ok(())
412        }
413    }
414
415    #[test]
416    fn test_default_sensitive_fields() {
417        let fields = default_sensitive_fields();
418        assert!(fields.contains(&"password".to_string()));
419        assert!(fields.contains(&"token".to_string()));
420        assert!(fields.contains(&"api_key".to_string()));
421        assert!(fields.contains(&"secret".to_string()));
422    }
423
424    #[test]
425    fn test_masking_layer_should_mask() {
426        let layer = MaskingLayer::new();
427
428        assert!(layer.should_mask("password"));
429        assert!(layer.should_mask("PASSWORD"));
430        assert!(layer.should_mask("user_password"));
431        assert!(layer.should_mask("api_key"));
432        assert!(layer.should_mask("secret_token"));
433
434        assert!(!layer.should_mask("username"));
435        assert!(!layer.should_mask("host"));
436        assert!(!layer.should_mask("port"));
437    }
438
439    #[test]
440    fn test_masking_layer_custom_fields() {
441        let layer = MaskingLayer::with_fields(vec!["custom_secret".to_string()]);
442
443        assert!(layer.should_mask("custom_secret"));
444        assert!(!layer.should_mask("password")); // Not in custom list
445    }
446
447    #[test]
448    fn test_masking_layer_add_fields() {
449        let layer = MaskingLayer::new().add_fields(vec!["my_custom_field".to_string()]);
450
451        assert!(layer.should_mask("my_custom_field"));
452        assert!(layer.should_mask("password")); // Still has defaults
453    }
454
455    #[test]
456    fn test_mask_sensitive_string() {
457        let input = "password=secret123 username=john";
458        let result = mask_sensitive_string(input, &["password"]);
459        assert!(result.contains("[REDACTED]"));
460        assert!(result.contains("username=john"));
461    }
462
463    // --- JSON redaction tests ---
464
465    #[test]
466    fn test_redact_json_line_sensitive_field() {
467        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
468        let input =
469            "{\"level\":\"INFO\",\"fields\":{\"message\":\"hello\",\"password\":\"secret123\"}}\n";
470        let result = inject_and_redact_json_line(input, &sensitive, None, None);
471        assert!(result.contains("[REDACTED]"));
472        assert!(!result.contains("secret123"));
473        assert!(result.contains("hello"));
474        assert!(result.ends_with('\n'));
475    }
476
477    #[test]
478    fn test_redact_json_line_nested() {
479        let sensitive: HashSet<String> = ["token".to_string()].into_iter().collect();
480        let input = r#"{"fields":{"config":{"token":"abc123","host":"localhost"}}}"#;
481        let result = inject_and_redact_json_line(input, &sensitive, None, None);
482        assert!(!result.contains("abc123"));
483        assert!(result.contains("localhost"));
484    }
485
486    #[test]
487    fn test_redact_json_line_preserves_non_sensitive() {
488        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
489        let input = r#"{"level":"INFO","fields":{"username":"john","host":"db.example.com"}}"#;
490        let result = inject_and_redact_json_line(input, &sensitive, None, None);
491        assert!(result.contains("john"));
492        assert!(result.contains("db.example.com"));
493    }
494
495    #[test]
496    fn test_redact_json_line_invalid_json_passthrough() {
497        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
498        let input = "this is not json\n";
499        let result = inject_and_redact_json_line(input, &sensitive, None, None);
500        assert_eq!(result, input);
501    }
502
503    // --- Text redaction tests ---
504
505    #[test]
506    fn test_redact_text_line_quoted_value() {
507        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
508        let input = r#"2026-01-01T00:00:00Z  INFO target: hello password="secret123" user="john""#;
509        let result = redact_text_line(input, &sensitive);
510        assert!(!result.contains("secret123"));
511        assert!(result.contains("password=[REDACTED]"));
512        assert!(result.contains(r#"user="john""#));
513    }
514
515    #[test]
516    fn test_redact_text_line_unquoted_value() {
517        let sensitive: HashSet<String> = ["token".to_string()].into_iter().collect();
518        let input = "2026-01-01T00:00:00Z  INFO target: msg token=abc123 count=42";
519        let result = redact_text_line(input, &sensitive);
520        assert!(!result.contains("abc123"));
521        assert!(result.contains("token=[REDACTED]"));
522        assert!(result.contains("count=42"));
523    }
524
525    #[test]
526    fn test_redact_text_line_no_sensitive_fields() {
527        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
528        let input = "2026-01-01T00:00:00Z  INFO target: hello username=john count=42";
529        let result = redact_text_line(input, &sensitive);
530        assert_eq!(result, input);
531    }
532
533    #[test]
534    fn test_redact_text_line_case_insensitive() {
535        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
536        let input = r#"2026-01-01T00:00:00Z  INFO target: msg PASSWORD="secret""#;
537        let result = redact_text_line(input, &sensitive);
538        assert!(!result.contains("secret"));
539        assert!(result.contains("[REDACTED]"));
540    }
541
542    #[test]
543    fn test_redact_text_line_multiple_sensitive() {
544        let sensitive: HashSet<String> = ["password".to_string(), "token".to_string()]
545            .into_iter()
546            .collect();
547        let input = r#"password="pass1" host=localhost token=tok123"#;
548        let result = redact_text_line(input, &sensitive);
549        assert!(!result.contains("pass1"));
550        assert!(!result.contains("tok123"));
551        assert!(result.contains("host=localhost"));
552        assert_eq!(result.matches("[REDACTED]").count(), 2);
553    }
554
555    // --- MaskingWriter tests ---
556
557    #[test]
558    fn test_masking_writer_passthrough_when_empty() {
559        let buf = Arc::new(Mutex::new(Vec::new()));
560        let sensitive = Arc::new(HashSet::new());
561        {
562            let mut writer = MaskingWriter {
563                inner: TestWriter(Arc::clone(&buf)),
564                buffer: Vec::new(),
565                sensitive_fields: sensitive,
566                is_json: false,
567                service_name: None,
568                service_version: None,
569            };
570            io::Write::write_all(&mut writer, b"password=secret\n").unwrap();
571        }
572        let guard = buf.lock().unwrap();
573        let output = String::from_utf8_lossy(&guard);
574        assert_eq!(output, "password=secret\n");
575    }
576
577    #[test]
578    fn test_masking_writer_redacts_text_on_drop() {
579        let buf = Arc::new(Mutex::new(Vec::new()));
580        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
581        {
582            let mut writer = MaskingWriter {
583                inner: TestWriter(Arc::clone(&buf)),
584                buffer: Vec::new(),
585                sensitive_fields: Arc::new(sensitive),
586                is_json: false,
587                service_name: None,
588                service_version: None,
589            };
590            io::Write::write_all(&mut writer, b"password=secret123 user=john\n").unwrap();
591        }
592        let guard = buf.lock().unwrap();
593        let output = String::from_utf8_lossy(&guard);
594        assert!(output.contains("[REDACTED]"));
595        assert!(!output.contains("secret123"));
596        assert!(output.contains("user=john"));
597    }
598
599    #[test]
600    fn test_masking_writer_redacts_json_on_drop() {
601        let buf = Arc::new(Mutex::new(Vec::new()));
602        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
603        {
604            let mut writer = MaskingWriter {
605                inner: TestWriter(Arc::clone(&buf)),
606                buffer: Vec::new(),
607                sensitive_fields: Arc::new(sensitive),
608                is_json: true,
609                service_name: None,
610                service_version: None,
611            };
612            let json = b"{\"message\":\"hello\",\"password\":\"secret123\"}\n";
613            io::Write::write_all(&mut writer, json).unwrap();
614        }
615        let guard = buf.lock().unwrap();
616        let output = String::from_utf8_lossy(&guard);
617        assert!(output.contains("[REDACTED]"));
618        assert!(!output.contains("secret123"));
619        assert!(output.contains("hello"));
620    }
621}