cuenv_ci/executor/
redact.rs

1//! Log Redaction
2//!
3//! Provides secret redaction for stdout/stderr streams to prevent
4//! accidental secret exposure in CI logs.
5
6use std::collections::HashSet;
7
8/// Minimum secret length to redact (shorter secrets may cause false positives)
9pub const MIN_SECRET_LENGTH: usize = 4;
10
11/// Placeholder for redacted secrets
12pub const REDACTED_PLACEHOLDER: &str = "[REDACTED]";
13
14/// Warning for short secrets that won't be redacted
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct ShortSecretWarning {
17    /// Secret key name
18    pub key: String,
19    /// Actual length
20    pub length: usize,
21}
22
23/// Log redactor that replaces secret values with placeholders
24///
25/// Uses a sliding window buffer to handle secrets that span chunk boundaries.
26///
27/// # Streaming Usage
28///
29/// When processing streaming input via [`redact`], the redactor buffers content
30/// to detect secrets that span chunk boundaries. **Callers must call [`flush`]
31/// when the stream ends** to retrieve any remaining buffered content.
32///
33/// ```ignore
34/// let (mut redactor, _) = LogRedactor::new(secrets);
35/// for chunk in stream {
36///     let redacted = redactor.redact(&chunk);
37///     output.push_str(&redacted);
38/// }
39/// // IMPORTANT: Don't forget to flush!
40/// output.push_str(&redactor.flush());
41/// ```
42///
43/// For complete strings where buffering isn't needed, use [`redact_immediate`] instead.
44///
45/// [`redact`]: LogRedactor::redact
46/// [`flush`]: LogRedactor::flush
47/// [`redact_immediate`]: LogRedactor::redact_immediate
48#[derive(Debug)]
49pub struct LogRedactor {
50    /// Secret values to redact (sorted by length descending for greedy matching)
51    secrets: Vec<String>,
52    /// Buffer for handling cross-boundary secrets
53    buffer: String,
54    /// Maximum secret length (determines buffer size)
55    max_secret_len: usize,
56}
57
58impl LogRedactor {
59    /// Create a new log redactor with the given secret values
60    ///
61    /// # Arguments
62    /// * `secrets` - Secret values to redact
63    ///
64    /// # Returns
65    /// Tuple of (redactor, warnings for short secrets)
66    #[must_use]
67    pub fn new(secrets: Vec<String>) -> (Self, Vec<ShortSecretWarning>) {
68        let mut warnings = Vec::new();
69        let mut valid_secrets: Vec<String> = Vec::new();
70
71        for (idx, secret) in secrets.into_iter().enumerate() {
72            if secret.len() < MIN_SECRET_LENGTH {
73                warnings.push(ShortSecretWarning {
74                    key: format!("secret_{idx}"),
75                    length: secret.len(),
76                });
77            } else {
78                valid_secrets.push(secret);
79            }
80        }
81
82        // Sort by length descending for greedy matching (longer secrets first)
83        valid_secrets.sort_by_key(|s| std::cmp::Reverse(s.len()));
84
85        let max_secret_len = valid_secrets.iter().map(String::len).max().unwrap_or(0);
86
87        (
88            Self {
89                secrets: valid_secrets,
90                buffer: String::new(),
91                max_secret_len,
92            },
93            warnings,
94        )
95    }
96
97    /// Create a redactor with named secrets for better warnings
98    ///
99    /// # Arguments
100    /// * `secrets` - Map of secret names to values
101    #[must_use]
102    pub fn with_names(
103        secrets: impl IntoIterator<Item = (String, String)>,
104    ) -> (Self, Vec<ShortSecretWarning>) {
105        let mut warnings = Vec::new();
106        let mut valid_secrets: Vec<String> = Vec::new();
107
108        for (key, value) in secrets {
109            if value.len() < MIN_SECRET_LENGTH {
110                warnings.push(ShortSecretWarning {
111                    key,
112                    length: value.len(),
113                });
114            } else {
115                valid_secrets.push(value);
116            }
117        }
118
119        // Sort by length descending for greedy matching
120        valid_secrets.sort_by_key(|s| std::cmp::Reverse(s.len()));
121
122        // Deduplicate secrets (same value may appear under different names)
123        let unique: HashSet<String> = valid_secrets.into_iter().collect();
124        let mut valid_secrets: Vec<String> = unique.into_iter().collect();
125        valid_secrets.sort_by_key(|s| std::cmp::Reverse(s.len()));
126
127        let max_secret_len = valid_secrets.iter().map(String::len).max().unwrap_or(0);
128
129        (
130            Self {
131                secrets: valid_secrets,
132                buffer: String::new(),
133                max_secret_len,
134            },
135            warnings,
136        )
137    }
138
139    /// Redact secrets from the input string
140    ///
141    /// This method handles streaming input by buffering to catch secrets
142    /// that span chunk boundaries.
143    ///
144    /// # Arguments
145    /// * `input` - Input chunk to process
146    ///
147    /// # Returns
148    /// Redacted output (may be shorter than input due to buffering)
149    pub fn redact(&mut self, input: &str) -> String {
150        if self.secrets.is_empty() {
151            return input.to_string();
152        }
153
154        // Append input to buffer
155        self.buffer.push_str(input);
156
157        // Keep enough in buffer to catch spanning secrets (2x max length)
158        let buffer_threshold = self.max_secret_len * 2;
159
160        if self.buffer.len() <= buffer_threshold {
161            // Not enough data yet, return empty and keep buffering
162            return String::new();
163        }
164
165        // Process all but the last buffer_threshold bytes
166        let process_len = self.buffer.len() - buffer_threshold;
167        let to_process: String = self.buffer.drain(..process_len).collect();
168
169        self.redact_immediate(&to_process)
170    }
171
172    /// Flush any remaining buffered content
173    ///
174    /// Call this when the stream ends to get any remaining output.
175    pub fn flush(&mut self) -> String {
176        if self.buffer.is_empty() {
177            return String::new();
178        }
179
180        let remaining = std::mem::take(&mut self.buffer);
181        self.redact_immediate(&remaining)
182    }
183
184    /// Redact secrets immediately without buffering
185    ///
186    /// Use this for complete strings where buffering isn't needed.
187    #[must_use]
188    pub fn redact_immediate(&self, input: &str) -> String {
189        let mut result = input.to_string();
190
191        for secret in &self.secrets {
192            result = result.replace(secret, REDACTED_PLACEHOLDER);
193        }
194
195        result
196    }
197
198    /// Check if any secrets are configured
199    #[must_use]
200    pub fn has_secrets(&self) -> bool {
201        !self.secrets.is_empty()
202    }
203
204    /// Get the number of secrets being redacted
205    #[must_use]
206    pub fn secret_count(&self) -> usize {
207        self.secrets.len()
208    }
209}
210
211/// Redact secrets from a complete string (convenience function)
212///
213/// # Arguments
214/// * `input` - String to redact
215/// * `secrets` - Secret values to redact
216#[must_use]
217pub fn redact_secrets(input: &str, secrets: &[String]) -> String {
218    if secrets.is_empty() {
219        return input.to_string();
220    }
221
222    let mut result = input.to_string();
223
224    // Sort by length descending for greedy matching
225    let mut sorted_secrets: Vec<&String> = secrets.iter().collect();
226    sorted_secrets.sort_by_key(|s| std::cmp::Reverse(s.len()));
227
228    for secret in sorted_secrets {
229        if secret.len() >= MIN_SECRET_LENGTH {
230            result = result.replace(secret.as_str(), REDACTED_PLACEHOLDER);
231        }
232    }
233
234    result
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_simple_redaction() {
243        let (redactor, _) = LogRedactor::new(vec!["secret123".to_string()]);
244        let result = redactor.redact_immediate("The password is secret123, don't share it");
245        assert_eq!(result, "The password is [REDACTED], don't share it");
246    }
247
248    #[test]
249    fn test_multiple_secrets() {
250        let (redactor, _) =
251            LogRedactor::new(vec!["password123".to_string(), "api_key_xyz".to_string()]);
252        let result = redactor.redact_immediate("password123 and api_key_xyz are both secrets");
253        assert_eq!(result, "[REDACTED] and [REDACTED] are both secrets");
254    }
255
256    #[test]
257    fn test_repeated_secret() {
258        let (redactor, _) = LogRedactor::new(vec!["secret".to_string()]);
259        let result = redactor.redact_immediate("secret appears twice: secret");
260        assert_eq!(result, "[REDACTED] appears twice: [REDACTED]");
261    }
262
263    #[test]
264    fn test_short_secret_warning() {
265        let (redactor, warnings) = LogRedactor::new(vec![
266            "ab".to_string(),   // Too short
267            "abc".to_string(),  // Too short
268            "abcd".to_string(), // Just right
269        ]);
270
271        assert_eq!(warnings.len(), 2);
272        assert_eq!(redactor.secret_count(), 1);
273    }
274
275    #[test]
276    fn test_named_secrets_warning() {
277        let secrets = vec![
278            ("DB_PASS".to_string(), "longpassword".to_string()),
279            ("SHORT".to_string(), "ab".to_string()),
280        ];
281        let (_, warnings) = LogRedactor::with_names(secrets);
282
283        assert_eq!(warnings.len(), 1);
284        assert_eq!(warnings[0].key, "SHORT");
285        assert_eq!(warnings[0].length, 2);
286    }
287
288    #[test]
289    fn test_streaming_redaction() {
290        let (mut redactor, _) = LogRedactor::new(vec!["secretpassword".to_string()]);
291
292        // Simulate streaming chunks where secret spans boundary
293        let chunk1 = "The password is secret";
294        let chunk2 = "password which is bad";
295
296        let out1 = redactor.redact(chunk1);
297        let out2 = redactor.redact(chunk2);
298        let out3 = redactor.flush();
299
300        let combined = format!("{out1}{out2}{out3}");
301        assert!(combined.contains("[REDACTED]"));
302        assert!(!combined.contains("secretpassword"));
303    }
304
305    #[test]
306    fn test_no_secrets() {
307        let (redactor, warnings) = LogRedactor::new(vec![]);
308        assert!(warnings.is_empty());
309        assert!(!redactor.has_secrets());
310
311        let result = redactor.redact_immediate("nothing to redact here");
312        assert_eq!(result, "nothing to redact here");
313    }
314
315    #[test]
316    fn test_greedy_matching() {
317        // Longer secret should be matched first
318        let (redactor, _) = LogRedactor::new(vec!["pass".to_string(), "password".to_string()]);
319        let result = redactor.redact_immediate("the password is set");
320        // Should redact "password" not just "pass"
321        assert_eq!(result, "the [REDACTED] is set");
322    }
323
324    #[test]
325    fn test_redact_secrets_function() {
326        let secrets = vec!["mysecret".to_string(), "another".to_string()];
327        let result = redact_secrets("mysecret and another value", &secrets);
328        assert_eq!(result, "[REDACTED] and [REDACTED] value");
329    }
330
331    #[test]
332    fn test_empty_input() {
333        let (redactor, _) = LogRedactor::new(vec!["secret".to_string()]);
334        let result = redactor.redact_immediate("");
335        assert_eq!(result, "");
336    }
337
338    #[test]
339    fn test_secret_at_boundaries() {
340        let (redactor, _) = LogRedactor::new(vec!["secret".to_string()]);
341
342        // Secret at start
343        let result = redactor.redact_immediate("secret is here");
344        assert_eq!(result, "[REDACTED] is here");
345
346        // Secret at end
347        let result = redactor.redact_immediate("here is secret");
348        assert_eq!(result, "here is [REDACTED]");
349    }
350
351    #[test]
352    fn test_duplicate_secrets_deduplicated() {
353        let secrets = vec![
354            ("KEY1".to_string(), "samevalue".to_string()),
355            ("KEY2".to_string(), "samevalue".to_string()),
356        ];
357        let (redactor, _) = LogRedactor::with_names(secrets);
358
359        // Should only have one secret after deduplication
360        assert_eq!(redactor.secret_count(), 1);
361    }
362
363    #[test]
364    fn test_special_characters() {
365        let (redactor, _) = LogRedactor::new(vec!["pass$word!@#".to_string()]);
366        let result = redactor.redact_immediate("the pass$word!@# is special");
367        assert_eq!(result, "the [REDACTED] is special");
368    }
369
370    #[test]
371    fn test_multiline_content() {
372        let (redactor, _) = LogRedactor::new(vec!["secretkey".to_string()]);
373        let input = "line1\nsecretkey\nline3";
374        let result = redactor.redact_immediate(input);
375        assert_eq!(result, "line1\n[REDACTED]\nline3");
376    }
377}