Skip to main content

hyperi_rustlib/logger/
masking.rs

1// Project:   hyperi-rustlib
2// File:      src/logger/masking.rs
3// Purpose:   Sensitive data masking for log output
4// Language:  Rust
5//
6// License:   BUSL-1.1
7// Copyright: (c) 2026 HYPERI PTY LIMITED
8
9//! Sensitive data masking for tracing log output.
10//!
11//! Provides a [`MaskingWriter`] that intercepts formatted log output and
12//! redacts sensitive field values before writing to the underlying destination.
13
14use std::collections::HashSet;
15use std::io;
16use std::sync::Arc;
17
18/// Redacted value placeholder.
19pub const REDACTED: &str = "[REDACTED]";
20
21/// Default list of sensitive field names to mask.
22#[must_use]
23pub fn default_sensitive_fields() -> Vec<String> {
24    vec![
25        // Passwords
26        "password",
27        "passwd",
28        "pwd",
29        "pass",
30        // Tokens and keys
31        "token",
32        "secret",
33        "api_key",
34        "apikey",
35        "api-key",
36        "access_key",
37        "secret_key",
38        "private_key",
39        "privatekey",
40        // Auth
41        "auth",
42        "authorization",
43        "bearer",
44        "credential",
45        "credentials",
46        // OAuth
47        "client_secret",
48        "refresh_token",
49        "access_token",
50        // Other sensitive
51        "ssn",
52        "credit_card",
53        "creditcard",
54        "cvv",
55        "pin",
56    ]
57    .into_iter()
58    .map(String::from)
59    .collect()
60}
61
62/// Configuration for sensitive field detection.
63///
64/// Holds the set of field name patterns considered sensitive. Used both as a
65/// standalone detector and as configuration for the masking writer factory.
66#[derive(Debug, Clone)]
67pub struct MaskingLayer {
68    sensitive_fields: HashSet<String>,
69}
70
71impl MaskingLayer {
72    /// Create a new masking layer with default sensitive fields.
73    #[must_use]
74    pub fn new() -> Self {
75        Self::with_fields(default_sensitive_fields())
76    }
77
78    /// Create a masking layer with custom sensitive fields.
79    #[must_use]
80    pub fn with_fields(fields: Vec<String>) -> Self {
81        Self {
82            sensitive_fields: fields.into_iter().map(|s| s.to_lowercase()).collect(),
83        }
84    }
85
86    /// Add additional sensitive fields.
87    #[must_use]
88    pub fn add_fields(mut self, fields: Vec<String>) -> Self {
89        for field in fields {
90            self.sensitive_fields.insert(field.to_lowercase());
91        }
92        self
93    }
94
95    /// Check if a field name should be masked.
96    #[must_use]
97    pub fn should_mask(&self, field_name: &str) -> bool {
98        should_mask_field(field_name, &self.sensitive_fields)
99    }
100}
101
102impl Default for MaskingLayer {
103    fn default() -> Self {
104        Self::new()
105    }
106}
107
108// ---------------------------------------------------------------------------
109// Writer-based masking
110// ---------------------------------------------------------------------------
111
112/// Create a masking writer factory for use with tracing-subscriber's `with_writer`.
113///
114/// Returns a closure that produces [`MaskingWriter`] instances wrapping stderr.
115/// When the sensitive fields set is empty and no service fields are set, the
116/// writer passes through without buffering or redaction.
117pub fn make_masking_writer(
118    sensitive_fields: HashSet<String>,
119    is_json: bool,
120    service_name: Option<String>,
121    service_version: Option<String>,
122) -> impl Fn() -> MaskingWriter<io::Stderr> + Send + Sync {
123    let fields = Arc::new(sensitive_fields);
124    let name = service_name.map(Arc::from);
125    let version = service_version.map(Arc::from);
126    move || MaskingWriter {
127        inner: io::stderr(),
128        buffer: Vec::with_capacity(512),
129        sensitive_fields: Arc::clone(&fields),
130        is_json,
131        service_name: name.clone(),
132        service_version: version.clone(),
133    }
134}
135
136/// A writer that redacts sensitive field values from formatted log output.
137///
138/// Buffers each log line (tracing-subscriber writes complete lines via
139/// `write_all`), applies field-level redaction, then flushes to the inner
140/// writer. When the sensitive fields set is empty and no service fields are
141/// set, writes pass through directly with no buffering overhead.
142pub struct MaskingWriter<W: io::Write> {
143    inner: W,
144    buffer: Vec<u8>,
145    sensitive_fields: Arc<HashSet<String>>,
146    is_json: bool,
147    /// Service name injected into JSON log output (JSON mode only).
148    service_name: Option<Arc<str>>,
149    /// Service version injected into JSON log output (JSON mode only).
150    service_version: Option<Arc<str>>,
151}
152
153impl<W: io::Write> MaskingWriter<W> {
154    /// Create a new masking writer wrapping the given writer.
155    ///
156    /// When `sensitive_fields` is empty, writes pass through with no overhead.
157    /// Set `is_json` to `true` for JSON-format redaction, `false` for text.
158    #[must_use]
159    pub fn new(inner: W, sensitive_fields: Arc<HashSet<String>>, is_json: bool) -> Self {
160        Self {
161            inner,
162            buffer: Vec::with_capacity(512),
163            sensitive_fields,
164            is_json,
165            service_name: None,
166            service_version: None,
167        }
168    }
169
170    fn flush_buffer(&mut self) -> io::Result<()> {
171        if self.buffer.is_empty() {
172            return Ok(());
173        }
174        let line = String::from_utf8_lossy(&self.buffer);
175        let redacted = if self.is_json {
176            inject_and_redact_json_line(
177                &line,
178                &self.sensitive_fields,
179                self.service_name.as_deref(),
180                self.service_version.as_deref(),
181            )
182        } else {
183            redact_text_line(&line, &self.sensitive_fields)
184        };
185        self.inner.write_all(redacted.as_bytes())?;
186        self.buffer.clear();
187        Ok(())
188    }
189
190    /// Returns `true` if this writer must buffer output (masking or injection active).
191    fn needs_buffering(&self) -> bool {
192        !self.sensitive_fields.is_empty()
193            || self.service_name.is_some()
194            || self.service_version.is_some()
195    }
196}
197
198impl<W: io::Write> io::Write for MaskingWriter<W> {
199    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
200        if !self.needs_buffering() {
201            return self.inner.write(buf);
202        }
203        self.buffer.extend_from_slice(buf);
204        Ok(buf.len())
205    }
206
207    fn flush(&mut self) -> io::Result<()> {
208        if self.needs_buffering() {
209            self.flush_buffer()?;
210        }
211        self.inner.flush()
212    }
213}
214
215impl<W: io::Write> Drop for MaskingWriter<W> {
216    fn drop(&mut self) {
217        if self.needs_buffering() {
218            let _ = self.flush_buffer();
219        }
220    }
221}
222
223// ---------------------------------------------------------------------------
224// Redaction functions
225// ---------------------------------------------------------------------------
226
227/// Check if a field name matches any sensitive pattern (case-insensitive substring).
228fn should_mask_field(field_name: &str, sensitive: &HashSet<String>) -> bool {
229    let lower = field_name.to_lowercase();
230    sensitive.iter().any(|s| lower.contains(s.as_str()))
231}
232
233/// Inject service fields and redact sensitive fields in a JSON log line.
234///
235/// Parses the line as JSON, inserts `service` and `version` at the root level
236/// (if provided), walks the object tree and replaces values of sensitive keys
237/// with `[REDACTED]`, then re-serialises.
238fn inject_and_redact_json_line(
239    line: &str,
240    sensitive: &HashSet<String>,
241    service_name: Option<&str>,
242    service_version: Option<&str>,
243) -> String {
244    let trimmed = line.trim_end_matches('\n');
245    if let Ok(mut value) = serde_json::from_str::<serde_json::Value>(trimmed) {
246        if let serde_json::Value::Object(ref mut map) = value {
247            if let Some(name) = service_name {
248                map.insert(
249                    "service".to_string(),
250                    serde_json::Value::String(name.to_string()),
251                );
252            }
253            if let Some(ver) = service_version {
254                map.insert(
255                    "version".to_string(),
256                    serde_json::Value::String(ver.to_string()),
257                );
258            }
259
260            // Inject K8s context fields (no-op on bare metal -- fields are None)
261            let ctx = crate::env::runtime_context();
262            if let Some(ref pod) = ctx.pod_name {
263                map.insert(
264                    "pod_name".to_string(),
265                    serde_json::Value::String(pod.clone()),
266                );
267            }
268            if let Some(ref ns) = ctx.namespace {
269                map.insert(
270                    "namespace".to_string(),
271                    serde_json::Value::String(ns.clone()),
272                );
273            }
274            if let Some(ref node) = ctx.node_name {
275                map.insert(
276                    "node_name".to_string(),
277                    serde_json::Value::String(node.clone()),
278                );
279            }
280        }
281        redact_json_value(&mut value, sensitive);
282        let mut result = serde_json::to_string(&value).unwrap_or_else(|_| trimmed.to_string());
283        if line.ends_with('\n') {
284            result.push('\n');
285        }
286        result
287    } else {
288        line.to_string()
289    }
290}
291
292/// Recursively redact sensitive keys in a JSON value.
293fn redact_json_value(value: &mut serde_json::Value, sensitive: &HashSet<String>) {
294    match value {
295        serde_json::Value::Object(map) => {
296            for (key, val) in map.iter_mut() {
297                if should_mask_field(key, sensitive) {
298                    *val = serde_json::Value::String(REDACTED.to_string());
299                } else {
300                    redact_json_value(val, sensitive);
301                }
302            }
303        }
304        serde_json::Value::Array(arr) => {
305            for item in arr {
306                redact_json_value(item, sensitive);
307            }
308        }
309        _ => {}
310    }
311}
312
313/// Redact sensitive fields in a text-format log line.
314///
315/// Tracing-subscriber's text formatter outputs fields as `name=value` (Debug)
316/// or `name="string value"` (quoted strings). This function finds sensitive
317/// field names and replaces their values with `[REDACTED]`.
318fn redact_text_line(line: &str, sensitive: &HashSet<String>) -> String {
319    let mut result = String::with_capacity(line.len());
320    let mut pos = 0;
321
322    while pos < line.len() {
323        match line[pos..].find('=') {
324            None => {
325                result.push_str(&line[pos..]);
326                break;
327            }
328            Some(rel_eq) => {
329                let eq_pos = pos + rel_eq;
330
331                // Scan backwards from '=' to find the field name start
332                let field_start = line[pos..eq_pos]
333                    .rfind(|c: char| !c.is_alphanumeric() && c != '_' && c != '-' && c != '.')
334                    .map_or(pos, |rp| pos + rp + 1);
335                let field_name = &line[field_start..eq_pos];
336
337                if !field_name.is_empty() && should_mask_field(field_name, sensitive) {
338                    // Copy everything up to and including '='
339                    result.push_str(&line[pos..=eq_pos]);
340
341                    // Skip the value and replace with redacted placeholder
342                    let after_eq = eq_pos + 1;
343                    let value_end = skip_field_value(line, after_eq);
344                    result.push_str(REDACTED);
345                    pos = value_end;
346                } else {
347                    // Not sensitive -- copy through the '=' and continue
348                    result.push_str(&line[pos..=eq_pos]);
349                    pos = eq_pos + 1;
350                }
351            }
352        }
353    }
354
355    result
356}
357
358/// Skip past a field value in text-format output, returning the position after the value.
359fn skip_field_value(line: &str, start: usize) -> usize {
360    if start >= line.len() {
361        return start;
362    }
363    if line.as_bytes()[start] == b'"' {
364        // Quoted value -- find closing quote (handle escaped quotes)
365        let mut i = start + 1;
366        while i < line.len() {
367            if line.as_bytes()[i] == b'"' && line.as_bytes()[i - 1] != b'\\' {
368                return i + 1;
369            }
370            i += 1;
371        }
372        line.len()
373    } else {
374        // Unquoted value -- ends at next whitespace
375        line[start..]
376            .find(char::is_whitespace)
377            .map_or(line.len(), |wp| start + wp)
378    }
379}
380
381/// Mask sensitive values in a string.
382///
383/// Replaces values that look like tokens, keys, or passwords with `[REDACTED]`.
384#[must_use]
385pub fn mask_sensitive_string(input: &str, patterns: &[&str]) -> String {
386    // Build a flat list of search needles. We sweep the input once per
387    // needle, replacing every occurrence -- not just the first -- so a
388    // `Debug` dump of a config with several secret-shaped fields gets
389    // every value redacted, not only the leftmost one.
390    let mut needles: Vec<String> = Vec::with_capacity(patterns.len() * 3);
391    for pattern in patterns {
392        let p_lower = pattern.to_lowercase();
393        needles.push(format!("{p_lower}="));
394        needles.push(format!("{p_lower}:"));
395        needles.push(format!("\"{p_lower}\""));
396    }
397
398    // Operate on a lowercased view for case-insensitive matching but keep
399    // the original casing in the output (Rust `Debug` and YAML both vary).
400    let lower = input.to_lowercase();
401    let mut result = String::with_capacity(input.len());
402    let mut cursor = 0usize;
403
404    while cursor < lower.len() {
405        // Find the earliest occurrence of any needle from `cursor` onwards.
406        let next_hit = needles
407            .iter()
408            .filter_map(|n| {
409                lower[cursor..]
410                    .find(n.as_str())
411                    .map(|off| (cursor + off, n.len()))
412            })
413            .min_by_key(|(start, _len)| *start);
414        let Some((match_start, needle_len)) = next_hit else {
415            // No more matches -- copy the rest verbatim.
416            result.push_str(&input[cursor..]);
417            break;
418        };
419        // Copy bytes up to AND INCLUDING the needle (e.g. `password=`).
420        let value_start = match_start + needle_len;
421        result.push_str(&input[cursor..value_start]);
422        // Determine where the value ends. JSON/YAML/Rust-Debug all use
423        // similar value delimiters. For quoted values (`"key": "..."`)
424        // the opening quote is part of the needle ("\"key\""); the
425        // closing delimiter is the matching `"`. For unquoted values
426        // (`key=val` / `key: val`) the value ends at whitespace or a
427        // structural character.
428        let rest = &input[value_start..];
429        let value_end = if rest.starts_with(": ") || rest.starts_with(':') || rest.starts_with('=')
430        {
431            // Skip the separator(s) themselves so the redaction replaces
432            // ONLY the value, not the colon/equals.
433            let sep_skip = rest
434                .find(|c: char| !matches!(c, ' ' | ':' | '='))
435                .unwrap_or(rest.len());
436            sep_skip
437                + rest[sep_skip..]
438                    .find([',', '\n', '}', ')', ';'])
439                    .unwrap_or(rest.len() - sep_skip)
440        } else {
441            rest.find(|c: char| c.is_whitespace() || c == ',' || c == '"' || c == '}')
442                .unwrap_or(rest.len())
443        };
444        // Re-emit the separator (if any) before the REDACTED placeholder.
445        let value_bytes = &rest[..value_end];
446        if let Some(sep_end) = value_bytes.find(|c: char| !matches!(c, ' ' | ':' | '=')) {
447            result.push_str(&value_bytes[..sep_end]);
448        }
449        result.push_str(REDACTED);
450        cursor = value_start + value_end;
451    }
452
453    result
454}
455
456#[cfg(test)]
457mod tests {
458    use super::*;
459    use std::sync::Mutex;
460
461    // Shared buffer for testing MaskingWriter (survives writer drop)
462    struct TestWriter(Arc<Mutex<Vec<u8>>>);
463
464    impl io::Write for TestWriter {
465        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
466            self.0.lock().unwrap().extend_from_slice(buf);
467            Ok(buf.len())
468        }
469
470        fn flush(&mut self) -> io::Result<()> {
471            Ok(())
472        }
473    }
474
475    #[test]
476    fn test_default_sensitive_fields() {
477        let fields = default_sensitive_fields();
478        assert!(fields.contains(&"password".to_string()));
479        assert!(fields.contains(&"token".to_string()));
480        assert!(fields.contains(&"api_key".to_string()));
481        assert!(fields.contains(&"secret".to_string()));
482    }
483
484    #[test]
485    fn test_masking_layer_should_mask() {
486        let layer = MaskingLayer::new();
487
488        assert!(layer.should_mask("password"));
489        assert!(layer.should_mask("PASSWORD"));
490        assert!(layer.should_mask("user_password"));
491        assert!(layer.should_mask("api_key"));
492        assert!(layer.should_mask("secret_token"));
493
494        assert!(!layer.should_mask("username"));
495        assert!(!layer.should_mask("host"));
496        assert!(!layer.should_mask("port"));
497    }
498
499    #[test]
500    fn test_masking_layer_custom_fields() {
501        let layer = MaskingLayer::with_fields(vec!["custom_secret".to_string()]);
502
503        assert!(layer.should_mask("custom_secret"));
504        assert!(!layer.should_mask("password")); // Not in custom list
505    }
506
507    #[test]
508    fn test_masking_layer_add_fields() {
509        let layer = MaskingLayer::new().add_fields(vec!["my_custom_field".to_string()]);
510
511        assert!(layer.should_mask("my_custom_field"));
512        assert!(layer.should_mask("password")); // Still has defaults
513    }
514
515    #[test]
516    fn test_mask_sensitive_string() {
517        let input = "password=secret123 username=john";
518        let result = mask_sensitive_string(input, &["password"]);
519        assert!(result.contains("[REDACTED]"));
520        assert!(result.contains("username=john"));
521    }
522
523    // --- JSON redaction tests ---
524
525    #[test]
526    fn test_redact_json_line_sensitive_field() {
527        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
528        let input =
529            "{\"level\":\"INFO\",\"fields\":{\"message\":\"hello\",\"password\":\"secret123\"}}\n";
530        let result = inject_and_redact_json_line(input, &sensitive, None, None);
531        assert!(result.contains("[REDACTED]"));
532        assert!(!result.contains("secret123"));
533        assert!(result.contains("hello"));
534        assert!(result.ends_with('\n'));
535    }
536
537    #[test]
538    fn test_redact_json_line_nested() {
539        let sensitive: HashSet<String> = ["token".to_string()].into_iter().collect();
540        let input = r#"{"fields":{"config":{"token":"abc123","host":"localhost"}}}"#;
541        let result = inject_and_redact_json_line(input, &sensitive, None, None);
542        assert!(!result.contains("abc123"));
543        assert!(result.contains("localhost"));
544    }
545
546    #[test]
547    fn test_redact_json_line_preserves_non_sensitive() {
548        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
549        let input = r#"{"level":"INFO","fields":{"username":"john","host":"db.example.com"}}"#;
550        let result = inject_and_redact_json_line(input, &sensitive, None, None);
551        assert!(result.contains("john"));
552        assert!(result.contains("db.example.com"));
553    }
554
555    #[test]
556    fn test_redact_json_line_invalid_json_passthrough() {
557        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
558        let input = "this is not json\n";
559        let result = inject_and_redact_json_line(input, &sensitive, None, None);
560        assert_eq!(result, input);
561    }
562
563    // --- Text redaction tests ---
564
565    #[test]
566    fn test_redact_text_line_quoted_value() {
567        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
568        let input = r#"2026-01-01T00:00:00Z  INFO target: hello password="secret123" user="john""#;
569        let result = redact_text_line(input, &sensitive);
570        assert!(!result.contains("secret123"));
571        assert!(result.contains("password=[REDACTED]"));
572        assert!(result.contains(r#"user="john""#));
573    }
574
575    #[test]
576    fn test_redact_text_line_unquoted_value() {
577        let sensitive: HashSet<String> = ["token".to_string()].into_iter().collect();
578        let input = "2026-01-01T00:00:00Z  INFO target: msg token=abc123 count=42";
579        let result = redact_text_line(input, &sensitive);
580        assert!(!result.contains("abc123"));
581        assert!(result.contains("token=[REDACTED]"));
582        assert!(result.contains("count=42"));
583    }
584
585    #[test]
586    fn test_redact_text_line_no_sensitive_fields() {
587        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
588        let input = "2026-01-01T00:00:00Z  INFO target: hello username=john count=42";
589        let result = redact_text_line(input, &sensitive);
590        assert_eq!(result, input);
591    }
592
593    #[test]
594    fn test_redact_text_line_case_insensitive() {
595        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
596        let input = r#"2026-01-01T00:00:00Z  INFO target: msg PASSWORD="secret""#;
597        let result = redact_text_line(input, &sensitive);
598        assert!(!result.contains("secret"));
599        assert!(result.contains("[REDACTED]"));
600    }
601
602    #[test]
603    fn test_redact_text_line_multiple_sensitive() {
604        let sensitive: HashSet<String> = ["password".to_string(), "token".to_string()]
605            .into_iter()
606            .collect();
607        let input = r#"password="pass1" host=localhost token=tok123"#;
608        let result = redact_text_line(input, &sensitive);
609        assert!(!result.contains("pass1"));
610        assert!(!result.contains("tok123"));
611        assert!(result.contains("host=localhost"));
612        assert_eq!(result.matches("[REDACTED]").count(), 2);
613    }
614
615    // --- MaskingWriter tests ---
616
617    #[test]
618    fn test_masking_writer_passthrough_when_empty() {
619        let buf = Arc::new(Mutex::new(Vec::new()));
620        let sensitive = Arc::new(HashSet::new());
621        {
622            let mut writer = MaskingWriter {
623                inner: TestWriter(Arc::clone(&buf)),
624                buffer: Vec::new(),
625                sensitive_fields: sensitive,
626                is_json: false,
627                service_name: None,
628                service_version: None,
629            };
630            io::Write::write_all(&mut writer, b"password=secret\n").unwrap();
631        }
632        let guard = buf.lock().unwrap();
633        let output = String::from_utf8_lossy(&guard);
634        assert_eq!(output, "password=secret\n");
635    }
636
637    #[test]
638    fn test_masking_writer_redacts_text_on_drop() {
639        let buf = Arc::new(Mutex::new(Vec::new()));
640        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
641        {
642            let mut writer = MaskingWriter {
643                inner: TestWriter(Arc::clone(&buf)),
644                buffer: Vec::new(),
645                sensitive_fields: Arc::new(sensitive),
646                is_json: false,
647                service_name: None,
648                service_version: None,
649            };
650            io::Write::write_all(&mut writer, b"password=secret123 user=john\n").unwrap();
651        }
652        let guard = buf.lock().unwrap();
653        let output = String::from_utf8_lossy(&guard);
654        assert!(output.contains("[REDACTED]"));
655        assert!(!output.contains("secret123"));
656        assert!(output.contains("user=john"));
657    }
658
659    #[test]
660    fn test_masking_writer_redacts_json_on_drop() {
661        let buf = Arc::new(Mutex::new(Vec::new()));
662        let sensitive: HashSet<String> = ["password".to_string()].into_iter().collect();
663        {
664            let mut writer = MaskingWriter {
665                inner: TestWriter(Arc::clone(&buf)),
666                buffer: Vec::new(),
667                sensitive_fields: Arc::new(sensitive),
668                is_json: true,
669                service_name: None,
670                service_version: None,
671            };
672            let json = b"{\"message\":\"hello\",\"password\":\"secret123\"}\n";
673            io::Write::write_all(&mut writer, json).unwrap();
674        }
675        let guard = buf.lock().unwrap();
676        let output = String::from_utf8_lossy(&guard);
677        assert!(output.contains("[REDACTED]"));
678        assert!(!output.contains("secret123"));
679        assert!(output.contains("hello"));
680    }
681}