Skip to main content

ralph_workflow/cloud/
redaction.rs

1//! Redaction utilities for cloud-mode logging/payloads.
2//!
3//! Cloud mode must never log or report secrets. Git and HTTP error strings can
4//! contain embedded credentials (for example, URLs with `user:pass@host`).
5//!
6//! This module provides a conservative sanitizer for untrusted error strings.
7
8use itertools::Itertools;
9
10use crate::cloud::io_redaction::redact_bearer_tokens;
11use crate::cloud::io_redaction::redact_common_query_params;
12use crate::cloud::io_redaction::redact_token_like_substrings;
13
14/// Redact likely secrets from an untrusted, user-controlled string.
15///
16/// This is intentionally conservative. It may redact non-secret strings if they
17/// resemble tokens.
18#[must_use]
19pub fn redact_secrets(input: &str) -> String {
20    truncate_redacted(&redact_token_like_substrings(&redact_bearer_tokens(
21        &redact_common_query_params(&redact_http_url_userinfo(input)),
22    )))
23}
24
25fn truncate_redacted(input: &str) -> String {
26    const MAX_LEN: usize = 4096;
27
28    if input.len() <= MAX_LEN {
29        return input.to_string();
30    }
31
32    format!("{}...<truncated>", &input[..MAX_LEN])
33}
34
35fn redact_http_url_userinfo(input: &str) -> String {
36    let http_positions: Vec<(usize, &str)> = [("https://", "https://"), ("http://", "http://")]
37        .iter()
38        .flat_map(|(pattern, replacement)| {
39            input
40                .match_indices(*pattern)
41                .map(move |(idx, _)| (idx, *replacement))
42        })
43        .collect();
44
45    if http_positions.is_empty() {
46        return input.to_string();
47    }
48
49    let sorted_positions: Vec<(usize, &str)> = http_positions
50        .into_iter()
51        .sorted_by_key(|(idx, _)| *idx)
52        .collect();
53
54    let (result_parts, last_end): (Vec<&str>, usize) = sorted_positions.iter().fold(
55        (Vec::new(), 0usize),
56        |(parts, last_end): (Vec<&str>, usize), (start, scheme): &(usize, &str)| {
57            let new_parts: Vec<&str> = if *start > last_end {
58                parts
59                    .iter()
60                    .copied()
61                    .chain(std::iter::once(&input[last_end..*start]))
62                    .collect()
63            } else {
64                parts
65            };
66
67            let scheme_len = scheme.len();
68            let authority_start = start + scheme_len;
69            let authority_end = input[authority_start..]
70                .find(|c: char| c == '/' || c.is_ascii_whitespace())
71                .map(|pos| authority_start + pos)
72                .unwrap_or(input.len());
73
74            let authority = &input[authority_start..authority_end];
75            let final_parts: Vec<&str> = if let Some(at_pos) = authority.rfind('@') {
76                new_parts
77                    .iter()
78                    .copied()
79                    .chain(std::iter::once(*scheme))
80                    .chain(std::iter::once("<redacted>@"))
81                    .chain(std::iter::once(&authority[at_pos + 1..]))
82                    .collect()
83            } else {
84                new_parts
85                    .iter()
86                    .copied()
87                    .chain(std::iter::once(*scheme))
88                    .chain(std::iter::once(authority))
89                    .collect()
90            };
91
92            (final_parts, authority_end)
93        },
94    );
95
96    if last_end < input.len() {
97        result_parts
98            .iter()
99            .copied()
100            .chain(std::iter::once(&input[last_end..]))
101            .collect::<Vec<_>>()
102            .concat()
103    } else {
104        result_parts.concat()
105    }
106}
107
108#[cfg(test)]
109mod tests {
110    use super::redact_secrets;
111
112    #[test]
113    fn redacts_http_url_userinfo() {
114        let s = "fatal: could not read Username for 'https://token@github.com/org/repo.git': terminal prompts disabled";
115        let out = redact_secrets(s);
116        assert!(
117            !out.contains("token@github.com"),
118            "should remove userinfo from URL authority"
119        );
120        assert!(
121            out.contains("https://<redacted>@github.com"),
122            "should preserve scheme and host"
123        );
124    }
125
126    #[test]
127    fn redacts_http_url_user_and_password() {
128        let s = "remote: https://user:pass@github.com/org/repo.git";
129        let out = redact_secrets(s);
130        assert!(!out.contains("user:pass@"));
131        assert!(out.contains("https://<redacted>@github.com"));
132    }
133
134    #[test]
135    fn redacts_bearer_tokens() {
136        let s = "Authorization: Bearer abcdef123456";
137        let out = redact_secrets(s);
138        assert_eq!(out, "Authorization: Bearer <redacted>");
139    }
140
141    #[test]
142    fn redacts_common_query_token_params() {
143        let s = "GET /?access_token=abc123&other=ok";
144        let out = redact_secrets(s);
145        assert!(out.contains("access_token=<redacted>"));
146        assert!(out.contains("other=ok"));
147    }
148
149    #[test]
150    fn redacts_github_like_tokens() {
151        let s = "error: ghp_abcdefghijklmnopqrstuvwxyz0123456789";
152        let out = redact_secrets(s);
153        assert!(!out.contains("ghp_"));
154        assert!(out.contains("<redacted>"));
155    }
156
157    #[test]
158    fn truncates_very_long_messages() {
159        let input = "x".repeat(10_000);
160        let out = redact_secrets(&input);
161        assert!(out.len() < input.len());
162        assert!(out.ends_with("...<truncated>"));
163    }
164}