Skip to main content

hyperi_rustlib/logger/
masking.rs

1// Project:   hyperi-rustlib
2// File:      src/logger/masking.rs
3// Purpose:   Sensitive data masking for log output
4// Language:  Rust
5//
6// License:   FSL-1.1-ALv2
7// Copyright: (c) 2026 HYPERI PTY LIMITED
8
9//! Sensitive data masking for tracing log output.
10//!
11//! Provides a [`MaskingWriter`] that intercepts formatted log output and
12//! redacts sensitive field values before writing to the underlying destination.
13
14use std::collections::HashSet;
15use std::io;
16use std::sync::Arc;
17
18/// Redacted value placeholder.
19pub const REDACTED: &str = "[REDACTED]";
20
21/// Default list of sensitive field names to mask.
22#[must_use]
23pub fn default_sensitive_fields() -> Vec<String> {
24    vec![
25        // Passwords
26        "password",
27        "passwd",
28        "pwd",
29        "pass",
30        // Tokens and keys
31        "token",
32        "secret",
33        "api_key",
34        "apikey",
35        "api-key",
36        "access_key",
37        "secret_key",
38        "private_key",
39        "privatekey",
40        // Auth
41        "auth",
42        "authorization",
43        "bearer",
44        "credential",
45        "credentials",
46        // OAuth
47        "client_secret",
48        "refresh_token",
49        "access_token",
50        // Other sensitive
51        "ssn",
52        "credit_card",
53        "creditcard",
54        "cvv",
55        "pin",
56    ]
57    .into_iter()
58    .map(String::from)
59    .collect()
60}
61
62/// Configuration for sensitive field detection.
63///
64/// Holds the set of field name patterns considered sensitive. Used both as a
65/// standalone detector and as configuration for [`make_masking_writer`].
66#[derive(Debug, Clone)]
67pub struct MaskingLayer {
68    sensitive_fields: HashSet<String>,
69}
70
71impl MaskingLayer {
72    /// Create a new masking layer with default sensitive fields.
73    #[must_use]
74    pub fn new() -> Self {
75        Self::with_fields(default_sensitive_fields())
76    }
77
78    /// Create a masking layer with custom sensitive fields.
79    #[must_use]
80    pub fn with_fields(fields: Vec<String>) -> Self {
81        Self {
82            sensitive_fields: fields.into_iter().map(|s| s.to_lowercase()).collect(),
83        }
84    }
85
86    /// Add additional sensitive fields.
87    #[must_use]
88    pub fn add_fields(mut self, fields: Vec<String>) -> Self {
89        for field in fields {
90            self.sensitive_fields.insert(field.to_lowercase());
91        }
92        self
93    }
94
95    /// Check if a field name should be masked.
96    #[must_use]
97    pub fn should_mask(&self, field_name: &str) -> bool {
98        should_mask_field(field_name, &self.sensitive_fields)
99    }
100}
101
102impl Default for MaskingLayer {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108// ---------------------------------------------------------------------------
109// Writer-based masking
110// ---------------------------------------------------------------------------
111
112/// Create a masking writer factory for use with tracing-subscriber's `with_writer`.
113///
114/// Returns a closure that produces [`MaskingWriter`] instances wrapping stderr.
115/// When the sensitive fields set is empty and no service fields are set, the
116/// writer passes through without buffering or redaction.
117pub fn make_masking_writer(
118    sensitive_fields: HashSet<String>,
119    is_json: bool,
120    service_name: Option<String>,
121    service_version: Option<String>,
122) -> impl Fn() -> MaskingWriter<io::Stderr> + Send + Sync {
123    let fields = Arc::new(sensitive_fields);
124    let name = service_name.map(Arc::from);
125    let version = service_version.map(Arc::from);
126    move || MaskingWriter {
127        inner: io::stderr(),
128        buffer: Vec::with_capacity(512),
129        sensitive_fields: Arc::clone(&fields),
130        is_json,
131        service_name: name.clone(),
132        service_version: version.clone(),
133    }
134}
135
136/// A writer that redacts sensitive field values from formatted log output.
137///
138/// Buffers each log line (tracing-subscriber writes complete lines via
139/// `write_all`), applies field-level redaction, then flushes to the inner
140/// writer. When the sensitive fields set is empty and no service fields are
141/// set, writes pass through directly with no buffering overhead.
142pub struct MaskingWriter<W: io::Write> {
143    inner: W,
144    buffer: Vec<u8>,
145    sensitive_fields: Arc<HashSet<String>>,
146    is_json: bool,
147    /// Service name injected into JSON log output (JSON mode only).
148    service_name: Option<Arc<str>>,
149    /// Service version injected into JSON log output (JSON mode only).
150    service_version: Option<Arc<str>>,
151}
152
153impl<W: io::Write> MaskingWriter<W> {
154    /// Create a new masking writer wrapping the given writer.
155    ///
156    /// When `sensitive_fields` is empty, writes pass through with no overhead.
157    /// Set `is_json` to `true` for JSON-format redaction, `false` for text.
158    #[must_use]
159    pub fn new(inner: W, sensitive_fields: Arc<HashSet<String>>, is_json: bool) -> Self {
160        Self {
161            inner,
162            buffer: Vec::with_capacity(512),
163            sensitive_fields,
164            is_json,
165            service_name: None,
166            service_version: None,
167        }
168    }
169
170    fn flush_buffer(&mut self) -> io::Result<()> {
171        if self.buffer.is_empty() {
172            return Ok(());
173        }
174        let line = String::from_utf8_lossy(&self.buffer);
175        let redacted = if self.is_json {
176            inject_and_redact_json_line(
177                &line,
178                &self.sensitive_fields,
179                self.service_name.as_deref(),
180                self.service_version.as_deref(),
181            )
182        } else {
183            redact_text_line(&line, &self.sensitive_fields)
184        };
185        self.inner.write_all(redacted.as_bytes())?;
186        self.buffer.clear();
187        Ok(())
188    }
189
190    /// Returns `true` if this writer must buffer output (masking or injection active).
191    fn needs_buffering(&self) -> bool {
192        !self.sensitive_fields.is_empty()
193            || self.service_name.is_some()
194            || self.service_version.is_some()
195    }
196}
197
198impl<W: io::Write> io::Write for MaskingWriter<W> {
199    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
200        if !self.needs_buffering() {
201            return self.inner.write(buf);
202        }
203        self.buffer.extend_from_slice(buf);
204        Ok(buf.len())
205    }
206
207    fn flush(&mut self) -> io::Result<()> {
208        if self.needs_buffering() {
209            self.flush_buffer()?;
210        }
211        self.inner.flush()
212    }
213}
214
215impl<W: io::Write> Drop for MaskingWriter<W> {
216    fn drop(&mut self) {
217        if self.needs_buffering() {
218            let _ = self.flush_buffer();
219        }
220    }
221}
222
223// ---------------------------------------------------------------------------
224// Redaction functions
225// ---------------------------------------------------------------------------
226
227/// Check if a field name matches any sensitive pattern (case-insensitive substring).
228fn should_mask_field(field_name: &str, sensitive: &HashSet<String>) -> bool {
229    let lower = field_name.to_lowercase();
230    sensitive.iter().any(|s| lower.contains(s.as_str()))
231}
232
233/// Inject service fields and redact sensitive fields in a JSON log line.
234///
235/// Parses the line as JSON, inserts `service` and `version` at the root level
236/// (if provided), walks the object tree and replaces values of sensitive keys
237/// with `[REDACTED]`, then re-serialises.
238fn inject_and_redact_json_line(
239    line: &str,
240    sensitive: &HashSet<String>,
241    service_name: Option<&str>,
242    service_version: Option<&str>,
243) -> String {
244    let trimmed = line.trim_end_matches('\n');
245    if let Ok(mut value) = serde_json::from_str::<serde_json::Value>(trimmed) {
246        if let serde_json::Value::Object(ref mut map) = value {
247            if let Some(name) = service_name {
248                map.insert(
249                    "service".to_string(),
250                    serde_json::Value::String(name.to_string()),
251                );
252            }
253            if let Some(ver) = service_version {
254                map.insert(
255                    "version".to_string(),
256                    serde_json::Value::String(ver.to_string()),
257                );
258            }
259
260            // Inject K8s context fields (no-op on bare metal — fields are None)
261            let ctx = crate::env::runtime_context();
262            if let Some(ref pod) = ctx.pod_name {
263                map.insert(
264                    "pod_name".to_string(),
265                    serde_json::Value::String(pod.clone()),
266                );
267            }
268            if let Some(ref ns) = ctx.namespace {
269                map.insert(
270                    "namespace".to_string(),
271                    serde_json::Value::String(ns.clone()),
272                );
273            }
274            if let Some(ref node) = ctx.node_name {
275                map.insert(
276                    "node_name".to_string(),
277                    serde_json::Value::String(node.clone()),
278                );
279            }
280        }
281        redact_json_value(&mut value, sensitive);
282        let mut result = serde_json::to_string(&value).unwrap_or_else(|_| trimmed.to_string());
283        if line.ends_with('\n') {
284            result.push('\n');
285        }
286        result
287    } else {
288        line.to_string()
289    }
290}
291
292/// Recursively redact sensitive keys in a JSON value.
293fn redact_json_value(value: &mut serde_json::Value, sensitive: &HashSet<String>) {
294    match value {
295        serde_json::Value::Object(map) => {
296            for (key, val) in map.iter_mut() {
297                if should_mask_field(key, sensitive) {
298                    *val = serde_json::Value::String(REDACTED.to_string());
299                } else {
300                    redact_json_value(val, sensitive);
301                }
302            }
303        }
304        serde_json::Value::Array(arr) => {
305            for item in arr {
306                redact_json_value(item, sensitive);
307            }
308        }
309        _ => {}
310    }
311}
312
313/// Redact sensitive fields in a text-format log line.
314///
315/// Tracing-subscriber's text formatter outputs fields as `name=value` (Debug)
316/// or `name="string value"` (quoted strings). This function finds sensitive
317/// field names and replaces their values with `[REDACTED]`.
318fn redact_text_line(line: &str, sensitive: &HashSet<String>) -> String {
319    let mut result = String::with_capacity(line.len());
320    let mut pos = 0;
321
322    while pos < line.len() {
323        match line[pos..].find('=') {
324            None => {
325                result.push_str(&line[pos..]);
326                break;
327            }
328            Some(rel_eq) => {
329                let eq_pos = pos + rel_eq;
330
331                // Scan backwards from '=' to find the field name start
332                let field_start = line[pos..eq_pos]
333                    .rfind(|c: char| !c.is_alphanumeric() && c != '_' && c != '-' && c != '.')
334                    .map_or(pos, |rp| pos + rp + 1);
335                let field_name = &line[field_start..eq_pos];
336
337                if !field_name.is_empty() && should_mask_field(field_name, sensitive) {
338                    // Copy everything up to and including '='
339                    result.push_str(&line[pos..=eq_pos]);
340
341                    // Skip the value and replace with redacted placeholder
342                    let after_eq = eq_pos + 1;
343                    let value_end = skip_field_value(line, after_eq);
344                    result.push_str(REDACTED);
345                    pos = value_end;
346                } else {
347                    // Not sensitive — copy through the '=' and continue
348                    result.push_str(&line[pos..=eq_pos]);
349                    pos = eq_pos + 1;
350                }
351            }
352        }
353    }
354
355    result
356}
357
358/// Skip past a field value in text-format output, returning the position after the value.
359fn skip_field_value(line: &str, start: usize) -> usize {
360    if start >= line.len() {
361        return start;
362    }
363    if line.as_bytes()[start] == b'"' {
364        // Quoted value — find closing quote (handle escaped quotes)
365        let mut i = start + 1;
366        while i < line.len() {
367            if line.as_bytes()[i] == b'"' && line.as_bytes()[i - 1] != b'\\' {
368                return i + 1;
369            }
370            i += 1;
371        }
372        line.len()
373    } else {
374        // Unquoted value — ends at next whitespace
375        line[start..]
376            .find(char::is_whitespace)
377            .map_or(line.len(), |wp| start + wp)
378    }
379}
380
381/// Mask sensitive values in a string.
382///
383/// Replaces values that look like tokens, keys, or passwords with `[REDACTED]`.
384#[must_use]
385pub fn mask_sensitive_string(input: &str, patterns: &[&str]) -> String {
386    let mut result = input.to_string();
387
388    for pattern in patterns {
389        // Simple pattern matching for key=value or "key": "value"
390        let search_patterns = [
391            format!("{pattern}="),
392            format!("{pattern}:"),
393            format!("\"{pattern}\""),
394        ];
395
396        for search in &search_patterns {
397            if let Some(start) = result.to_lowercase().find(&search.to_lowercase()) {
398                // Find the value after the pattern
399                let value_start = start + search.len();
400                if let Some(rest) = result.get(value_start..) {
401                    // Find end of value (space, comma, quote, or end of string)
402                    let value_end = rest
403                        .find(|c: char| c.is_whitespace() || c == ',' || c == '"' || c == '}')
404                        .unwrap_or(rest.len());
405
406                    let before = &result[..value_start];
407                    let after = &rest[value_end..];
408                    result = format!("{before}{REDACTED}{after}");
409                }
410            }
411        }
412    }
413
414    result
415}
416
417#[cfg(test)]
418mod tests {
419    use super::*;
420    use std::sync::Mutex;
421
422    // Shared buffer for testing MaskingWriter (survives writer drop)
423    struct TestWriter(Arc<Mutex<Vec<u8>>>);
424
425    impl io::Write for TestWriter {
426        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
427            self.0.lock().unwrap().extend_from_slice(buf);
428            Ok(buf.len())
429        }
430
431        fn flush(&mut self) -> io::Result<()> {
432            Ok(())
433        }
434    }
435
436    #[test]
437    fn test_default_sensitive_fields() {
438        let fields = default_sensitive_fields();
439        assert!(fields.contains(&"password".to_string()));
440        assert!(fields.contains(&"token".to_string()));
441        assert!(fields.contains(&"api_key".to_string()));
442        assert!(fields.contains(&"secret".to_string()));
443    }
444
445    #[test]
446    fn test_masking_layer_should_mask() {
447        let layer = MaskingLayer::new();
448
449        assert!(layer.should_mask("password"));
450        assert!(layer.should_mask("PASSWORD"));
451        assert!(layer.should_mask("user_password"));
452        assert!(layer.should_mask("api_key"));
453        assert!(layer.should_mask("secret_token"));
454
455        assert!(!layer.should_mask("username"));
456        assert!(!layer.should_mask("host"));
457        assert!(!layer.should_mask("port"));
458    }
459
460    #[test]
461    fn test_masking_layer_custom_fields() {
462        let layer = MaskingLayer::with_fields(vec!["custom_secret".to_string()]);
463
464        assert!(layer.should_mask("custom_secret"));
465        assert!(!layer.should_mask("password")); // Not in custom list
466    }
467
468    #[test]
469    fn test_masking_layer_add_fields() {
470        let layer = MaskingLayer::new().add_fields(vec!["my_custom_field".to_string()]);
471
472        assert!(layer.should_mask("my_custom_field"));
473        assert!(layer.should_mask("password")); // Still has defaults
474    }
475
476    #[test]
477    fn test_mask_sensitive_string() {
478        let input = "password=secret123 username=john";
479        let result = mask_sensitive_string(input, &["password"]);
480        assert!(result.contains("[REDACTED]"));
481        assert!(result.contains("username=john"));
482    }
483
484    // --- JSON redaction tests ---
485
486    #[test]
487    fn test_redact_json_line_sensitive_field() {
488        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
489        let input =
490            "{\"level\":\"INFO\",\"fields\":{\"message\":\"hello\",\"password\":\"secret123\"}}\n";
491        let result = inject_and_redact_json_line(input, &sensitive, None, None);
492        assert!(result.contains("[REDACTED]"));
493        assert!(!result.contains("secret123"));
494        assert!(result.contains("hello"));
495        assert!(result.ends_with('\n'));
496    }
497
498    #[test]
499    fn test_redact_json_line_nested() {
500        let sensitive: HashSet<String> = ["token".to_string()].into_iter().collect();
501        let input = r#"{"fields":{"config":{"token":"abc123","host":"localhost"}}}"#;
502        let result = inject_and_redact_json_line(input, &sensitive, None, None);
503        assert!(!result.contains("abc123"));
504        assert!(result.contains("localhost"));
505    }
506
507    #[test]
508    fn test_redact_json_line_preserves_non_sensitive() {
509        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
510        let input = r#"{"level":"INFO","fields":{"username":"john","host":"db.example.com"}}"#;
511        let result = inject_and_redact_json_line(input, &sensitive, None, None);
512        assert!(result.contains("john"));
513        assert!(result.contains("db.example.com"));
514    }
515
516    #[test]
517    fn test_redact_json_line_invalid_json_passthrough() {
518        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
519        let input = "this is not json\n";
520        let result = inject_and_redact_json_line(input, &sensitive, None, None);
521        assert_eq!(result, input);
522    }
523
524    // --- Text redaction tests ---
525
526    #[test]
527    fn test_redact_text_line_quoted_value() {
528        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
529        let input = r#"2026-01-01T00:00:00Z  INFO target: hello password="secret123" user="john""#;
530        let result = redact_text_line(input, &sensitive);
531        assert!(!result.contains("secret123"));
532        assert!(result.contains("password=[REDACTED]"));
533        assert!(result.contains(r#"user="john""#));
534    }
535
536    #[test]
537    fn test_redact_text_line_unquoted_value() {
538        let sensitive: HashSet<String> = ["token".to_string()].into_iter().collect();
539        let input = "2026-01-01T00:00:00Z  INFO target: msg token=abc123 count=42";
540        let result = redact_text_line(input, &sensitive);
541        assert!(!result.contains("abc123"));
542        assert!(result.contains("token=[REDACTED]"));
543        assert!(result.contains("count=42"));
544    }
545
546    #[test]
547    fn test_redact_text_line_no_sensitive_fields() {
548        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
549        let input = "2026-01-01T00:00:00Z  INFO target: hello username=john count=42";
550        let result = redact_text_line(input, &sensitive);
551        assert_eq!(result, input);
552    }
553
554    #[test]
555    fn test_redact_text_line_case_insensitive() {
556        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
557        let input = r#"2026-01-01T00:00:00Z  INFO target: msg PASSWORD="secret""#;
558        let result = redact_text_line(input, &sensitive);
559        assert!(!result.contains("secret"));
560        assert!(result.contains("[REDACTED]"));
561    }
562
563    #[test]
564    fn test_redact_text_line_multiple_sensitive() {
565        let sensitive: HashSet<String> = ["password".to_string(), "token".to_string()]
566            .into_iter()
567            .collect();
568        let input = r#"password="pass1" host=localhost token=tok123"#;
569        let result = redact_text_line(input, &sensitive);
570        assert!(!result.contains("pass1"));
571        assert!(!result.contains("tok123"));
572        assert!(result.contains("host=localhost"));
573        assert_eq!(result.matches("[REDACTED]").count(), 2);
574    }
575
576    // --- MaskingWriter tests ---
577
578    #[test]
579    fn test_masking_writer_passthrough_when_empty() {
580        let buf = Arc::new(Mutex::new(Vec::new()));
581        let sensitive = Arc::new(HashSet::new());
582        {
583            let mut writer = MaskingWriter {
584                inner: TestWriter(Arc::clone(&buf)),
585                buffer: Vec::new(),
586                sensitive_fields: sensitive,
587                is_json: false,
588                service_name: None,
589                service_version: None,
590            };
591            io::Write::write_all(&mut writer, b"password=secret\n").unwrap();
592        }
593        let guard = buf.lock().unwrap();
594        let output = String::from_utf8_lossy(&guard);
595        assert_eq!(output, "password=secret\n");
596    }
597
598    #[test]
599    fn test_masking_writer_redacts_text_on_drop() {
600        let buf = Arc::new(Mutex::new(Vec::new()));
601        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
602        {
603            let mut writer = MaskingWriter {
604                inner: TestWriter(Arc::clone(&buf)),
605                buffer: Vec::new(),
606                sensitive_fields: Arc::new(sensitive),
607                is_json: false,
608                service_name: None,
609                service_version: None,
610            };
611            io::Write::write_all(&mut writer, b"password=secret123 user=john\n").unwrap();
612        }
613        let guard = buf.lock().unwrap();
614        let output = String::from_utf8_lossy(&guard);
615        assert!(output.contains("[REDACTED]"));
616        assert!(!output.contains("secret123"));
617        assert!(output.contains("user=john"));
618    }
619
620    #[test]
621    fn test_masking_writer_redacts_json_on_drop() {
622        let buf = Arc::new(Mutex::new(Vec::new()));
623        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
624        {
625            let mut writer = MaskingWriter {
626                inner: TestWriter(Arc::clone(&buf)),
627                buffer: Vec::new(),
628                sensitive_fields: Arc::new(sensitive),
629                is_json: true,
630                service_name: None,
631                service_version: None,
632            };
633            let json = b"{\"message\":\"hello\",\"password\":\"secret123\"}\n";
634            io::Write::write_all(&mut writer, json).unwrap();
635        }
636        let guard = buf.lock().unwrap();
637        let output = String::from_utf8_lossy(&guard);
638        assert!(output.contains("[REDACTED]"));
639        assert!(!output.contains("secret123"));
640        assert!(output.contains("hello"));
641    }
642}