Skip to main content

ralph/
redaction.rs

1//! Redaction logic for sensitive data in strings, errors, and logs.
2//!
3//! This module provides functionality to redact sensitive information from text,
4//! including API keys, tokens, passwords, and other secrets. It uses a cached
5//! approach for environment variable redaction to avoid repeatedly scanning
6//! all environment variables.
7//!
8//! # Invariants
9//! - Sensitive environment variable values are cached at first access using a thread-safe `RwLock`
10//! - The cache is never refreshed during runtime; environment variable changes
11//!   after first redaction call will not be detected
12//! - All redaction functions are pure (no side effects except cache initialization)
13
14use crate::constants::defaults::REDACTED;
15use crate::constants::limits::MIN_ENV_VALUE_LEN;
16use std::collections::HashSet;
17use std::fmt;
18use std::sync::RwLock;
19
20/// Cache for sensitive environment variable values.
21/// Uses RwLock to allow cache clearing in tests while maintaining
22/// thread-safe access in production.
23static SENSITIVE_ENV_CACHE: RwLock<Option<HashSet<String>>> = RwLock::new(None);
24
25/// Initializes the sensitive environment variable cache.
26///
27/// Scans all environment variables once, filtering for:
28/// - Keys that look sensitive (contain KEY, SECRET, TOKEN, PASSWORD, etc.)
29/// - Non-path-like keys (excludes PATH, HOME, etc.)
30/// - Values meeting minimum length requirement (6 chars)
31///
32/// Returns a HashSet of sensitive values for O(1) lookup during redaction.
33fn init_sensitive_env_cache() -> HashSet<String> {
34    let mut sensitive_values = HashSet::new();
35    for (key, value) in std::env::vars() {
36        if !looks_sensitive_env_key(&key) {
37            continue;
38        }
39        if is_path_like_env_key(&key) {
40            continue;
41        }
42        let trimmed = value.trim();
43        if trimmed.len() < MIN_ENV_VALUE_LEN {
44            continue;
45        }
46        sensitive_values.insert(trimmed.to_string());
47    }
48    sensitive_values
49}
50
51/// Returns the cached set of sensitive environment variable values.
52/// Initializes the cache on first call if not already populated.
53#[cfg(test)]
54fn get_sensitive_env_values() -> HashSet<String> {
55    // Tests mutate process environment variables and run in parallel with
56    // unrelated tests. Avoid stale-cache behavior in test builds so each call
57    // reflects current env state deterministically.
58    init_sensitive_env_cache()
59}
60
61/// Returns the cached set of sensitive environment variable values.
62/// Initializes the cache on first call if not already populated.
63#[cfg(not(test))]
64fn get_sensitive_env_values() -> HashSet<String> {
65    // Fast path: check if cache is already populated
66    if let Ok(guard) = SENSITIVE_ENV_CACHE.read()
67        && let Some(ref values) = *guard
68    {
69        return values.clone();
70    }
71
72    // Slow path: initialize cache
73    if let Ok(mut guard) = SENSITIVE_ENV_CACHE.write() {
74        if guard.is_none() {
75            *guard = Some(init_sensitive_env_cache());
76        }
77        // Return cached values or empty set if somehow still None after initialization
78        guard.as_ref().cloned().unwrap_or_default()
79    } else {
80        // Fallback if lock fails: compute on the fly
81        init_sensitive_env_cache()
82    }
83}
84
85/// Clears the sensitive environment variable cache.
86///
87/// Intended for tests only to ensure clean state between tests
88/// that modify environment variables.
89#[cfg(test)]
90fn clear_sensitive_env_cache() {
91    if let Ok(mut guard) = SENSITIVE_ENV_CACHE.write() {
92        *guard = None;
93    }
94}
95
96/// A wrapper around `String` that applies redaction when displayed via `Display` or `Debug`.
97#[derive(Clone, Default, PartialEq, Eq)]
98pub struct RedactedString(pub String);
99
100impl From<String> for RedactedString {
101    fn from(s: String) -> Self {
102        Self(s)
103    }
104}
105
106impl From<&str> for RedactedString {
107    fn from(s: &str) -> Self {
108        Self(s.to_string())
109    }
110}
111
112impl fmt::Display for RedactedString {
113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114        write!(f, "{}", redact_text(&self.0))
115    }
116}
117
118impl fmt::Debug for RedactedString {
119    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
120        if f.alternate() {
121            write!(f, "RedactedString({:#?})", redact_text(&self.0))
122        } else {
123            write!(f, "RedactedString({:?})", redact_text(&self.0))
124        }
125    }
126}
127
128/// A `log::Log` implementation that wraps another logger and redacts all log messages.
129pub struct RedactedLogger {
130    inner: Box<dyn log::Log>,
131}
132
133impl RedactedLogger {
134    /// Creates a new `RedactedLogger` wrapping the given logger.
135    pub fn new(inner: Box<dyn log::Log>) -> Self {
136        Self { inner }
137    }
138
139    /// wraps the provided logger and sets it as the global logger.
140    /// This is a convenience for `log::set_boxed_logger(Box::new(RedactedLogger::new(inner)))`.
141    pub fn init(
142        inner: Box<dyn log::Log>,
143        max_level: log::LevelFilter,
144    ) -> Result<(), log::SetLoggerError> {
145        log::set_boxed_logger(Box::new(Self::new(inner)))?;
146        log::set_max_level(max_level);
147        Ok(())
148    }
149}
150
151impl log::Log for RedactedLogger {
152    fn enabled(&self, metadata: &log::Metadata) -> bool {
153        self.inner.enabled(metadata)
154    }
155
156    fn log(&self, record: &log::Record) {
157        if self.enabled(record.metadata()) {
158            crate::debuglog::write_log_record(record);
159            let redacted_msg = redact_text(&format!("{}", record.args()));
160            self.inner.log(
161                &log::Record::builder()
162                    .args(format_args!("{}", redacted_msg))
163                    .level(record.level())
164                    .target(record.target())
165                    .file(record.file())
166                    .line(record.line())
167                    .module_path(record.module_path())
168                    .build(),
169            );
170        }
171    }
172
173    fn flush(&self) {
174        self.inner.flush();
175    }
176}
177
178/// Macros for logging with immediate redaction.
179/// Note: These are mostly redundant if `RedactedLogger` is used, but provide
180/// explicit intent and can be used when the global logger is not wrapped.
181#[macro_export]
182macro_rules! rinfo {
183    ($($arg:tt)+) => {
184        log::info!("{}", $crate::redaction::redact_text(&format!($($arg)+)))
185    }
186}
187
188#[macro_export]
189macro_rules! rwarn {
190    ($($arg:tt)+) => {
191        log::warn!("{}", $crate::redaction::redact_text(&format!($($arg)+)))
192    }
193}
194
195#[macro_export]
196macro_rules! rerror {
197    ($($arg:tt)+) => {
198        log::error!("{}", $crate::redaction::redact_text(&format!($($arg)+)))
199    }
200}
201
202#[macro_export]
203macro_rules! rdebug {
204    ($($arg:tt)+) => {
205        log::debug!("{}", $crate::redaction::redact_text(&format!($($arg)+)))
206    }
207}
208
209#[macro_export]
210macro_rules! rtrace {
211    ($($arg:tt)+) => {
212        log::trace!("{}", $crate::redaction::redact_text(&format!($($arg)+)))
213    }
214}
215
216pub fn redact_text(value: &str) -> String {
217    if value.trim().is_empty() {
218        return value.to_string();
219    }
220
221    let with_pairs = redact_key_value_pairs(value);
222    let with_bearer = redact_bearer_tokens(&with_pairs);
223    let with_aws = redact_aws_keys(&with_bearer);
224    let with_ssh = redact_ssh_keys(&with_aws);
225    let with_hex = redact_hex_tokens(&with_ssh);
226    redact_sensitive_env_values(&with_hex)
227}
228
229pub fn looks_sensitive_env_key(key: &str) -> bool {
230    let normalized = normalize_key(key);
231    if normalized == "APIKEY" || normalized == "PRIVATEKEY" {
232        return true;
233    }
234    for token in normalized.split(['_', '-']) {
235        if token.is_empty() {
236            continue;
237        }
238        if is_sensitive_token(token) {
239            return true;
240        }
241    }
242    false
243}
244
245pub fn is_path_like_env_key(key: &str) -> bool {
246    matches!(
247        normalize_key(key).as_str(),
248        "CWD" | "HOME" | "OLDPWD" | "PATH" | "PWD" | "TEMP" | "TMP" | "TMPDIR"
249    )
250}
251
252fn push_next_char(out: &mut String, text: &str, index: &mut usize) {
253    debug_assert!(text.is_char_boundary(*index));
254    // SAFETY: We only call this when index is at a valid UTF-8 char boundary.
255    // If chars().next() returns None (empty string), we just advance the index.
256    if let Some(ch) = text[*index..].chars().next() {
257        out.push(ch);
258        *index += ch.len_utf8();
259    } else {
260        // Should not happen if called correctly, but avoid infinite loop
261        *index += 1;
262    }
263}
264
265fn redact_aws_keys(text: &str) -> String {
266    let mut out = String::with_capacity(text.len());
267    let bytes = text.as_bytes();
268    let mut i = 0;
269
270    while i < bytes.len() {
271        // Look for AKIA...
272        if i + 20 <= bytes.len() && &bytes[i..i + 4] == b"AKIA" {
273            let mut all_caps_alphanum = true;
274            for j in 0..16 {
275                let b = bytes[i + 4 + j];
276                if !(b.is_ascii_uppercase() || b.is_ascii_digit()) {
277                    all_caps_alphanum = false;
278                    break;
279                }
280            }
281            if all_caps_alphanum {
282                let word_boundary_start = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
283                let word_boundary_end =
284                    i + 20 == bytes.len() || !bytes[i + 20].is_ascii_alphanumeric();
285
286                if word_boundary_start && word_boundary_end {
287                    out.push_str(REDACTED);
288                    i += 20;
289                    continue;
290                }
291            }
292        }
293
294        // Generic AWS secret lookahead (40 chars)
295        // [0-9a-zA-Z/+=]{40}
296        if i + 40 <= bytes.len() {
297            let mut is_secret = true;
298            for j in 0..40 {
299                let b = bytes[i + j];
300                if !(b.is_ascii_alphanumeric() || b == b'/' || b == b'+' || b == b'=') {
301                    is_secret = false;
302                    break;
303                }
304            }
305            if is_secret {
306                let word_boundary_start = i == 0 || !bytes[i - 1].is_ascii_alphanumeric();
307                let word_boundary_end =
308                    i + 40 == bytes.len() || !bytes[i + 40].is_ascii_alphanumeric();
309
310                if word_boundary_start && word_boundary_end {
311                    // Check if it's near "secret" or "key" or "aws" or "akia"
312                    // to reduce false positives if we wanted, but for now let's be aggressive.
313                    out.push_str(REDACTED);
314                    i += 40;
315                    continue;
316                }
317            }
318        }
319
320        push_next_char(&mut out, text, &mut i);
321    }
322    out
323}
324
325fn redact_ssh_keys(text: &str) -> String {
326    let mut out = String::with_capacity(text.len());
327    let mut i = 0;
328
329    while i < text.len() {
330        if text[i..].starts_with("-----BEGIN")
331            && let Some(end_marker_pos) = text[i..].find("-----END")
332            && let Some(final_dash_pos) = text[i + end_marker_pos + 8..].find("-----")
333        {
334            let total_end = i + end_marker_pos + 8 + final_dash_pos + 5;
335            out.push_str(REDACTED);
336            i = total_end;
337            continue;
338        }
339        push_next_char(&mut out, text, &mut i);
340    }
341    out
342}
343
344fn redact_hex_tokens(text: &str) -> String {
345    let mut out = String::with_capacity(text.len());
346    let bytes = text.as_bytes();
347    let mut i = 0;
348
349    while i < bytes.len() {
350        if bytes[i].is_ascii_hexdigit() {
351            let start = i;
352            while i < bytes.len() && bytes[i].is_ascii_hexdigit() {
353                i += 1;
354            }
355            let len = i - start;
356            if len >= 32 {
357                let word_boundary_start = start == 0 || !bytes[start - 1].is_ascii_alphanumeric();
358                let word_boundary_end = i == bytes.len() || !bytes[i].is_ascii_alphanumeric();
359
360                if word_boundary_start && word_boundary_end {
361                    out.push_str(REDACTED);
362                    continue;
363                }
364            }
365            out.push_str(&text[start..i]);
366        } else {
367            push_next_char(&mut out, text, &mut i);
368        }
369    }
370    out
371}
372
373fn redact_key_value_pairs(text: &str) -> String {
374    let chars: Vec<char> = text.chars().collect();
375    let mut out = String::with_capacity(text.len());
376    let mut i = 0;
377
378    while i < chars.len() {
379        let ch = chars[i];
380        if !is_key_char(ch) {
381            out.push(ch);
382            i += 1;
383            continue;
384        }
385
386        let start = i;
387        let mut end = i;
388        while end < chars.len() && is_key_char(chars[end]) {
389            end += 1;
390        }
391
392        let key: String = chars[start..end].iter().collect();
393        if looks_sensitive_label(&key) {
394            let mut cursor = end;
395            while cursor < chars.len() && chars[cursor].is_whitespace() && chars[cursor] != '\n' {
396                cursor += 1;
397            }
398            if cursor < chars.len() && (chars[cursor] == ':' || chars[cursor] == '=') {
399                cursor += 1;
400                while cursor < chars.len() && chars[cursor].is_whitespace() && chars[cursor] != '\n'
401                {
402                    cursor += 1;
403                }
404
405                let value_start = cursor;
406                let mut value_end = value_start;
407                if value_start < chars.len()
408                    && (chars[value_start] == '"' || chars[value_start] == '\'')
409                {
410                    let quote = chars[value_start];
411                    value_end += 1;
412                    while value_end < chars.len() && chars[value_end] != quote {
413                        value_end += 1;
414                    }
415                    if value_end < chars.len() {
416                        value_end += 1;
417                    }
418                } else {
419                    while value_end < chars.len() && !chars[value_end].is_whitespace() {
420                        value_end += 1;
421                    }
422                }
423
424                out.extend(chars[i..value_start].iter());
425                out.push_str(REDACTED);
426                i = value_end;
427                continue;
428            }
429        }
430
431        out.extend(chars[i..end].iter());
432        i = end;
433    }
434
435    out
436}
437
438fn redact_bearer_tokens(text: &str) -> String {
439    let lower = text.to_ascii_lowercase();
440    let needle = "bearer ";
441    let mut out = String::with_capacity(text.len());
442    let mut index = 0;
443
444    while let Some(pos) = lower[index..].find(needle) {
445        let abs = index + pos;
446        if abs > 0 {
447            let prev = text.as_bytes()[abs - 1];
448            if prev.is_ascii_alphanumeric() {
449                let next_index = abs + 1;
450                out.push_str(&text[index..next_index]);
451                index = next_index;
452                continue;
453            }
454        }
455
456        let start = abs + needle.len();
457        let bytes = text.as_bytes();
458        let mut end = start;
459        while end < bytes.len() && !bytes[end].is_ascii_whitespace() {
460            end += 1;
461        }
462
463        out.push_str(&text[index..start]);
464        out.push_str(REDACTED);
465        index = end;
466    }
467
468    out.push_str(&text[index..]);
469    out
470}
471
472/// Redacts sensitive environment variable values from text.
473///
474/// Uses a cached set of sensitive values for efficient O(k) lookup where k
475/// is the number of sensitive environment variables (typically 0-5),
476/// rather than O(n) where n is all environment variables (typically 50-100+).
477fn redact_sensitive_env_values(text: &str) -> String {
478    let sensitive_values = get_sensitive_env_values();
479    if sensitive_values.is_empty() {
480        return text.to_string();
481    }
482    let mut redacted = text.to_string();
483    for value in &sensitive_values {
484        redacted = redacted.replace(value.as_str(), REDACTED);
485    }
486    redacted
487}
488
489fn looks_sensitive_label(key: &str) -> bool {
490    let normalized = normalize_key(key);
491    if normalized == "APIKEY" || normalized == "PRIVATEKEY" {
492        return true;
493    }
494    if normalized == "API_KEY" || normalized == "API-KEY" {
495        return true;
496    }
497    if normalized == "PRIVATE_KEY" || normalized == "PRIVATE-KEY" {
498        return true;
499    }
500    looks_sensitive_env_key(&normalized)
501}
502
503fn is_sensitive_token(token: &str) -> bool {
504    let token_upper = token.to_ascii_uppercase();
505    for base in ["KEY", "SECRET", "TOKEN", "PASSWORD"] {
506        if token_upper == base {
507            return true;
508        }
509        if let Some(suffix) = token_upper.strip_prefix(base)
510            && !suffix.is_empty()
511            && suffix.chars().all(|c| c.is_ascii_digit())
512        {
513            return true;
514        }
515    }
516    false
517}
518
519fn is_key_char(ch: char) -> bool {
520    ch.is_ascii_alphanumeric() || ch == '_' || ch == '-'
521}
522
523fn normalize_key(key: &str) -> String {
524    key.trim().to_uppercase()
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530    use crate::debuglog::{
531        enable as enable_debug_log, reset_for_tests as reset_debug_log, test_lock as debug_lock,
532    };
533    use std::sync::{Mutex, OnceLock};
534    use tempfile::tempdir;
535
536    fn env_lock() -> &'static Mutex<()> {
537        static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
538        LOCK.get_or_init(|| Mutex::new(()))
539    }
540
541    #[test]
542    fn looks_sensitive_env_key_matches_expected_values() {
543        let cases = [
544            ("API_KEY", true),
545            ("password", true),
546            ("auth-token", true),
547            ("TOKEN1", true),
548            ("  secret  ", true),
549            ("PATH", false),
550            ("HOME", false),
551            ("SHELL", false),
552            ("MONKEY", false),
553            ("PRIVATEKEY", true),
554            ("APIKEY", true),
555        ];
556
557        for (key, expected) in cases {
558            assert_eq!(looks_sensitive_env_key(key), expected, "key={key}");
559        }
560    }
561
562    #[test]
563    fn is_path_like_env_key_matches_expected_values() {
564        let cases = [
565            ("PATH", true),
566            ("HOME", true),
567            ("TMPDIR", true),
568            ("  pwd  ", true),
569            ("SHELL", false),
570            ("PATH_INFO", false),
571        ];
572
573        for (key, expected) in cases {
574            assert_eq!(is_path_like_env_key(key), expected, "key={key}");
575        }
576    }
577
578    #[test]
579    fn redact_text_masks_key_value_pairs() {
580        let input = "API_KEY=abc12345 token:xyz98765 password = hunter2";
581        let output = redact_text(input);
582        assert!(!output.contains("abc12345"));
583        assert!(!output.contains("xyz98765"));
584        assert!(!output.contains("hunter2"));
585        assert!(output.contains("API_KEY=[REDACTED]"));
586        assert!(output.contains("token:[REDACTED]"));
587        assert!(output.contains("password = [REDACTED]"));
588    }
589
590    #[test]
591    fn redact_text_masks_bearer_tokens() {
592        let input = "Authorization: Bearer abcdef123456";
593        let output = redact_text(input);
594        assert!(!output.contains("abcdef123456"));
595        assert!(output.contains("Bearer [REDACTED]"));
596    }
597
598    #[test]
599    fn redact_text_handles_non_ascii() {
600        let input = "Read AGENTS.md — voila âêîö 你好";
601        let output = redact_text(input);
602        assert_eq!(output, input);
603    }
604
605    #[test]
606    fn redact_text_masks_sensitive_env_values() {
607        let _guard = env_lock().lock().expect("env lock");
608        clear_sensitive_env_cache();
609        unsafe { std::env::set_var("API_TOKEN", "supersecretvalue") };
610
611        let input = "token is supersecretvalue";
612        let output = redact_text(input);
613
614        unsafe { std::env::remove_var("API_TOKEN") };
615
616        assert!(!output.contains("supersecretvalue"));
617        assert!(output.contains(REDACTED));
618    }
619
620    #[test]
621    fn redact_text_leaves_non_sensitive_env_values() {
622        let _guard = env_lock().lock().expect("env lock");
623        clear_sensitive_env_cache();
624        let key = "RALPH_NON_SENSITIVE_ENV";
625        let value = "visible_plain_value";
626        unsafe { std::env::set_var(key, value) };
627
628        let input = "value is visible_plain_value";
629        let output = redact_text(input);
630
631        unsafe { std::env::remove_var(key) };
632
633        assert!(output.contains(value));
634    }
635
636    #[test]
637    fn redact_text_masks_privatekey_env_value() {
638        let _guard = env_lock().lock().expect("env lock");
639        clear_sensitive_env_cache();
640        unsafe { std::env::set_var("PRIVATEKEY", "supersecretkeyvalue") };
641
642        let input = "key is supersecretkeyvalue";
643        let output = redact_text(input);
644
645        unsafe { std::env::remove_var("PRIVATEKEY") };
646
647        assert!(!output.contains("supersecretkeyvalue"));
648        assert!(output.contains(REDACTED));
649    }
650
651    #[test]
652    fn redact_text_reads_latest_sensitive_env_values_without_manual_cache_clear() {
653        let _guard = env_lock().lock().expect("env lock");
654        clear_sensitive_env_cache();
655        unsafe { std::env::set_var("API_TOKEN", "initialsecretvalue") };
656        let first = redact_text("token is initialsecretvalue");
657        unsafe { std::env::set_var("API_TOKEN", "updatedsecretvalue") };
658        let second = redact_text("token is updatedsecretvalue");
659        unsafe { std::env::remove_var("API_TOKEN") };
660
661        assert!(!first.contains("initialsecretvalue"));
662        assert!(!second.contains("updatedsecretvalue"));
663        assert!(first.contains(REDACTED));
664        assert!(second.contains(REDACTED));
665    }
666
667    struct MockLogger {
668        last_msg: std::sync::Arc<std::sync::Mutex<String>>,
669    }
670
671    impl log::Log for MockLogger {
672        fn enabled(&self, _: &log::Metadata) -> bool {
673            true
674        }
675        fn log(&self, record: &log::Record) {
676            let mut lock = self.last_msg.lock().unwrap();
677            *lock = format!("{}", record.args());
678        }
679        fn flush(&self) {}
680    }
681
682    #[test]
683    fn redacted_logger_masks_output() {
684        let last_msg = std::sync::Arc::new(std::sync::Mutex::new(String::new()));
685        let mock = Box::new(MockLogger {
686            last_msg: last_msg.clone(),
687        });
688
689        let wrapper = RedactedLogger::new(mock);
690
691        let record = log::Record::builder()
692            .args(format_args!("Connecting with API_KEY=secret123"))
693            .level(log::Level::Info)
694            .build();
695
696        use log::Log;
697        wrapper.log(&record);
698
699        let msg = last_msg.lock().unwrap();
700        assert!(!msg.contains("secret123"));
701        assert!(msg.contains("API_KEY=[REDACTED]"));
702    }
703
704    #[test]
705    fn redacted_logger_writes_raw_log_to_debug_log() {
706        let _guard = debug_lock().lock().expect("debug log lock");
707        reset_debug_log();
708        let dir = tempdir().expect("tempdir");
709        enable_debug_log(dir.path()).expect("enable debug log");
710
711        let last_msg = std::sync::Arc::new(std::sync::Mutex::new(String::new()));
712        let mock = Box::new(MockLogger {
713            last_msg: last_msg.clone(),
714        });
715
716        let wrapper = RedactedLogger::new(mock);
717
718        let record = log::Record::builder()
719            .args(format_args!("Connecting with API_KEY=secret123"))
720            .level(log::Level::Info)
721            .build();
722
723        use log::Log;
724        wrapper.log(&record);
725
726        let debug_log = dir.path().join(".ralph/logs/debug.log");
727        let contents = std::fs::read_to_string(&debug_log).expect("read log");
728        assert!(contents.contains("API_KEY=secret123"), "log: {contents}");
729        reset_debug_log();
730    }
731}