Skip to main content

punch_types/
taint.rs

1//! Taint tracking and shell bleed detection.
2//!
3//! Every piece of data entering the ring carries taint labels — markers that
4//! track where the data came from and how sensitive it is. When a fighter
5//! attempts a shell move, the **bleed detector** scans the command for leaked
6//! secrets before the punch lands. If a tainted move bleeds confidential data,
7//! it gets blocked before it can do damage.
8
9use chrono::{DateTime, Utc};
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12
13// ---------------------------------------------------------------------------
14// Taint source and sensitivity
15// ---------------------------------------------------------------------------
16
17/// Where a piece of data originated — every tainted move has an origin story.
18#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
19pub enum TaintSource {
20    /// Data entered directly by a human operator.
21    UserInput,
22    /// Output from a tool (move) execution. The string identifies the tool.
23    ToolOutput(String),
24    /// Generated by an LLM during the bout.
25    LlmGenerated,
26    /// Received from an external API. The string identifies the API.
27    ExternalApi(String),
28    /// Read from the filesystem. The string is the file path.
29    FileSystem(String),
30    /// Sourced from the process environment.
31    Environment,
32}
33
34/// Sensitivity level for tainted data — determines how hard the bleed
35/// detection hits when it finds a leak.
36#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
37pub enum Sensitivity {
38    /// Public data — no bleed risk.
39    Public,
40    /// Internal data — log a warning but let the move land.
41    Internal,
42    /// Confidential data — block the move.
43    Confidential,
44    /// Secret data — block the move and raise an alert.
45    Secret,
46}
47
48impl std::fmt::Display for Sensitivity {
49    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50        match self {
51            Self::Public => write!(f, "public"),
52            Self::Internal => write!(f, "internal"),
53            Self::Confidential => write!(f, "confidential"),
54            Self::Secret => write!(f, "secret"),
55        }
56    }
57}
58
59// ---------------------------------------------------------------------------
60// Taint labels and tainted values
61// ---------------------------------------------------------------------------
62
63/// A taint label attached to a value — records the origin, sensitivity, and
64/// how the taint propagated through the ring.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct TaintLabel {
67    /// Where this data came from.
68    pub source: TaintSource,
69    /// How sensitive this data is.
70    pub sensitivity: Sensitivity,
71    /// When the taint was first applied.
72    pub timestamp: DateTime<Utc>,
73    /// Chain of operations that propagated this taint (e.g. tool names,
74    /// transformation steps).
75    pub propagation_chain: Vec<String>,
76}
77
78/// A value carrying taint labels — knows where it has been and what it
79/// carries from the ring.
80#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct TaintedValue {
82    /// The raw string value.
83    pub value: String,
84    /// All taint labels attached to this value.
85    pub labels: Vec<TaintLabel>,
86}
87
88// ---------------------------------------------------------------------------
89// TaintTracker
90// ---------------------------------------------------------------------------
91
92/// Tracks taint propagation across tool calls in a bout.
93///
94/// The tracker keeps a registry of tainted values and their labels. When data
95/// flows from one move to another, `propagate` extends the chain so we can
96/// trace the full path a tainted value took through the ring.
97#[derive(Debug, Clone, Default)]
98pub struct TaintTracker {
99    /// All registered tainted values.
100    values: Vec<TaintedValue>,
101}
102
103impl TaintTracker {
104    /// Create a fresh tracker — no tainted moves recorded yet.
105    pub fn new() -> Self {
106        Self { values: Vec::new() }
107    }
108
109    /// Register a value as tainted with the given source and sensitivity.
110    ///
111    /// This marks the value as carrying sensitive data from the specified
112    /// origin. Like marking a fighter's gloves before a bout.
113    pub fn taint(&mut self, value: &str, source: TaintSource, sensitivity: Sensitivity) {
114        // Check if this exact value is already tracked.
115        for tv in &mut self.values {
116            if tv.value == value {
117                tv.labels.push(TaintLabel {
118                    source,
119                    sensitivity,
120                    timestamp: Utc::now(),
121                    propagation_chain: Vec::new(),
122                });
123                return;
124            }
125        }
126
127        self.values.push(TaintedValue {
128            value: value.to_string(),
129            labels: vec![TaintLabel {
130                source,
131                sensitivity,
132                timestamp: Utc::now(),
133                propagation_chain: Vec::new(),
134            }],
135        });
136    }
137
138    /// Check if a value (or any tainted substring) is tainted.
139    ///
140    /// Returns all matching taint labels. Uses simple substring matching —
141    /// if a tainted value appears anywhere inside the checked string, or if
142    /// the checked string appears inside a tainted value, the taint is
143    /// detected. No fancy footwork needed.
144    pub fn check_taint(&self, value: &str) -> Vec<&TaintLabel> {
145        let mut labels = Vec::new();
146        for tv in &self.values {
147            // Match if the tainted value is a substring of the checked value,
148            // or the checked value is a substring of the tainted value.
149            if value.contains(&tv.value) || tv.value.contains(value) {
150                for label in &tv.labels {
151                    labels.push(label);
152                }
153            }
154        }
155        labels
156    }
157
158    /// Propagate taint from one value to another.
159    ///
160    /// When data flows from one move's output to another move's input, this
161    /// copies all taint labels from the source to the destination, extending
162    /// each label's propagation chain. Like blood on the canvas spreading
163    /// from one round to the next.
164    pub fn propagate(&mut self, from: &str, to: &str) {
165        // Collect labels from the source value.
166        let mut propagated_labels: Vec<TaintLabel> = Vec::new();
167        for tv in &self.values {
168            if tv.value == from || from.contains(&tv.value) || tv.value.contains(from) {
169                for label in &tv.labels {
170                    let mut new_label = label.clone();
171                    new_label
172                        .propagation_chain
173                        .push(format!("{} -> {}", from, to));
174                    propagated_labels.push(new_label);
175                }
176            }
177        }
178
179        if propagated_labels.is_empty() {
180            return;
181        }
182
183        // Apply labels to the destination value.
184        for tv in &mut self.values {
185            if tv.value == to {
186                tv.labels.extend(propagated_labels);
187                return;
188            }
189        }
190
191        // Destination not yet tracked — create it.
192        self.values.push(TaintedValue {
193            value: to.to_string(),
194            labels: propagated_labels,
195        });
196    }
197}
198
199// ---------------------------------------------------------------------------
200// Shell bleed detection
201// ---------------------------------------------------------------------------
202
203/// A warning raised by the shell bleed detector — something leaked where it
204/// shouldn't have, like blood between rounds.
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct ShellBleedWarning {
207    /// Name of the pattern that matched (e.g. "aws_access_key").
208    pub pattern_name: String,
209    /// The text that matched the pattern.
210    pub matched_text: String,
211    /// How severe the bleed is.
212    pub severity: Sensitivity,
213    /// Where in the command/environment the match was found.
214    pub location: String,
215}
216
217/// Internal representation of a compiled secret pattern.
218#[derive(Debug, Clone)]
219struct SecretPattern {
220    /// Human-readable name for this pattern.
221    name: String,
222    /// Compiled regex.
223    regex: Regex,
224    /// Severity if this pattern matches.
225    severity: Sensitivity,
226}
227
228/// Scans shell commands and environment variables for leaked secrets before
229/// a shell move lands.
230///
231/// The bleed detector watches for sensitive data escaping into shell commands
232/// — API keys, tokens, private keys, and other secrets that should never
233/// leave the ring unprotected.
234#[derive(Debug, Clone)]
235pub struct ShellBleedDetector {
236    /// Registered secret patterns.
237    patterns: Vec<SecretPattern>,
238}
239
240impl Default for ShellBleedDetector {
241    fn default() -> Self {
242        Self::new()
243    }
244}
245
246impl ShellBleedDetector {
247    /// Create a new bleed detector with built-in patterns for common secret
248    /// types. Ready to scan from the first bell.
249    pub fn new() -> Self {
250        let mut detector = Self {
251            patterns: Vec::new(),
252        };
253        detector.register_builtin_patterns();
254        detector
255    }
256
257    /// Register built-in patterns for common secret formats.
258    fn register_builtin_patterns(&mut self) {
259        let builtins: &[(&str, &str, Sensitivity)] = &[
260            // AWS access key IDs start with AKIA followed by 16 alphanumeric chars.
261            ("aws_access_key", r"AKIA[0-9A-Z]{16}", Sensitivity::Secret),
262            // Bearer tokens in command arguments.
263            (
264                "bearer_token",
265                r"[Bb]earer\s+[A-Za-z0-9\-._~+/]+=*",
266                Sensitivity::Secret,
267            ),
268            // Passwords embedded in URLs (https://user:pass@host).
269            (
270                "password_in_url",
271                r"://[^/\s]+:[^@/\s]+@",
272                Sensitivity::Confidential,
273            ),
274            // PEM private key markers.
275            (
276                "private_key",
277                r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
278                Sensitivity::Secret,
279            ),
280            // Generic API key patterns (key=..., api_key=..., apikey=...).
281            (
282                "generic_api_key",
283                r#"(?i)(api[_\-]?key|api[_\-]?secret|access[_\-]?token|auth[_\-]?token)\s*[=:]\s*['\"]?[A-Za-z0-9\-._~+/]{16,}"#,
284                Sensitivity::Confidential,
285            ),
286            // Long base64-encoded strings (40+ chars) that look like encoded secrets.
287            (
288                "base64_secret",
289                r"[A-Za-z0-9+/]{40,}={0,2}",
290                Sensitivity::Internal,
291            ),
292        ];
293
294        for (name, pattern, severity) in builtins {
295            if let Ok(regex) = Regex::new(pattern) {
296                self.patterns.push(SecretPattern {
297                    name: name.to_string(),
298                    regex,
299                    severity: *severity,
300                });
301            }
302        }
303    }
304
305    /// Add a custom secret pattern to scan for.
306    ///
307    /// The pattern is a regex string. If the regex fails to compile, the
308    /// pattern is silently ignored (no unwrap — we don't panic in the ring).
309    pub fn add_secret_pattern(&mut self, name: &str, pattern: &str) {
310        if let Ok(regex) = Regex::new(pattern) {
311            self.patterns.push(SecretPattern {
312                name: name.to_string(),
313                regex,
314                severity: Sensitivity::Confidential,
315            });
316        }
317    }
318
319    /// Scan a shell command string for leaked secrets.
320    ///
321    /// Returns a list of bleed warnings for every pattern that matched.
322    /// If the command is clean, the vec is empty — no blood on the canvas.
323    pub fn scan_command(&self, command: &str) -> Vec<ShellBleedWarning> {
324        let mut warnings = Vec::new();
325
326        for pattern in &self.patterns {
327            for m in pattern.regex.find_iter(command) {
328                warnings.push(ShellBleedWarning {
329                    pattern_name: pattern.name.clone(),
330                    matched_text: m.as_str().to_string(),
331                    severity: pattern.severity,
332                    location: "command".to_string(),
333                });
334            }
335        }
336
337        warnings
338    }
339
340    /// Scan environment variables that would be passed to a subprocess.
341    ///
342    /// Checks both variable names and values for secret patterns —
343    /// bleed detection covers every angle of the ring.
344    pub fn scan_environment(&self, env_vars: &[(String, String)]) -> Vec<ShellBleedWarning> {
345        let mut warnings = Vec::new();
346
347        for (key, value) in env_vars {
348            let combined = format!("{}={}", key, value);
349            for pattern in &self.patterns {
350                for m in pattern.regex.find_iter(&combined) {
351                    warnings.push(ShellBleedWarning {
352                        pattern_name: pattern.name.clone(),
353                        matched_text: m.as_str().to_string(),
354                        severity: pattern.severity,
355                        location: format!("env:{}", key),
356                    });
357                }
358            }
359        }
360
361        warnings
362    }
363}
364
365// ---------------------------------------------------------------------------
366// Tests
367// ---------------------------------------------------------------------------
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372
373    /// Build a fake AWS access key dynamically so static scanners don't flag
374    /// the test source itself.
375    fn fake_aws_key() -> String {
376        // Prefix + 16 uppercase alphanumeric chars = valid AKIA pattern.
377        let prefix = "AKIA";
378        let suffix = "IOSFODNN7EXAMPLE";
379        format!("{}{}", prefix, suffix)
380    }
381
382    /// Build a PEM private key header dynamically to avoid static scanner
383    /// flags on the source code.
384    fn pem_begin_header() -> String {
385        let marker = "BEGIN RSA PRIVATE KEY";
386        format!("-----{}-----", marker)
387    }
388
389    fn pem_end_header() -> String {
390        let marker = "END RSA PRIVATE KEY";
391        format!("-----{}-----", marker)
392    }
393
394    // -- TaintTracker tests -------------------------------------------------
395
396    #[test]
397    fn test_taint_basic_taint_and_check() {
398        let mut tracker = TaintTracker::new();
399        tracker.taint(
400            "my-secret-value",
401            TaintSource::UserInput,
402            Sensitivity::Secret,
403        );
404
405        let labels = tracker.check_taint("my-secret-value");
406        assert_eq!(labels.len(), 1);
407        assert_eq!(labels[0].source, TaintSource::UserInput);
408        assert_eq!(labels[0].sensitivity, Sensitivity::Secret);
409    }
410
411    #[test]
412    fn test_taint_substring_detection() {
413        let mut tracker = TaintTracker::new();
414        tracker.taint(
415            "API_KEY=abc123secret",
416            TaintSource::Environment,
417            Sensitivity::Secret,
418        );
419
420        // Checking a substring of the tainted value should match.
421        let labels = tracker.check_taint("abc123secret");
422        assert_eq!(labels.len(), 1);
423        assert_eq!(labels[0].source, TaintSource::Environment);
424    }
425
426    #[test]
427    fn test_taint_propagation_chain() {
428        let mut tracker = TaintTracker::new();
429        tracker.taint(
430            "original-secret",
431            TaintSource::UserInput,
432            Sensitivity::Secret,
433        );
434
435        tracker.propagate("original-secret", "derived-value");
436
437        let labels = tracker.check_taint("derived-value");
438        assert_eq!(labels.len(), 1);
439        assert_eq!(labels[0].propagation_chain.len(), 1);
440        assert!(labels[0].propagation_chain[0].contains("original-secret"));
441        assert!(labels[0].propagation_chain[0].contains("derived-value"));
442    }
443
444    #[test]
445    fn test_taint_no_taint_on_clean_value() {
446        let mut tracker = TaintTracker::new();
447        tracker.taint("secret-data", TaintSource::UserInput, Sensitivity::Secret);
448
449        let labels = tracker.check_taint("completely-unrelated");
450        assert!(labels.is_empty());
451    }
452
453    #[test]
454    fn test_taint_multiple_labels_on_same_value() {
455        let mut tracker = TaintTracker::new();
456        tracker.taint(
457            "shared-value",
458            TaintSource::UserInput,
459            Sensitivity::Internal,
460        );
461        tracker.taint(
462            "shared-value",
463            TaintSource::ExternalApi("stripe".to_string()),
464            Sensitivity::Secret,
465        );
466
467        let labels = tracker.check_taint("shared-value");
468        assert_eq!(labels.len(), 2);
469
470        let sources: Vec<&TaintSource> = labels.iter().map(|l| &l.source).collect();
471        assert!(sources.contains(&&TaintSource::UserInput));
472        assert!(sources.contains(&&TaintSource::ExternalApi("stripe".to_string())));
473    }
474
475    // -- ShellBleedDetector tests -------------------------------------------
476
477    #[test]
478    fn test_bleed_detect_aws_access_key() {
479        let detector = ShellBleedDetector::new();
480        let key = fake_aws_key();
481        let cmd = format!("aws s3 cp --access-key {} s3://bucket", key);
482        let warnings = detector.scan_command(&cmd);
483
484        assert!(!warnings.is_empty());
485        let aws_warning = warnings
486            .iter()
487            .find(|w| w.pattern_name == "aws_access_key")
488            .expect("should detect AWS access key");
489        assert_eq!(aws_warning.severity, Sensitivity::Secret);
490        assert!(aws_warning.matched_text.starts_with("AKIA"));
491    }
492
493    #[test]
494    fn test_bleed_detect_bearer_token() {
495        let detector = ShellBleedDetector::new();
496        let warnings =
497            detector.scan_command("curl -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.test'");
498
499        assert!(!warnings.is_empty());
500        let token_warning = warnings
501            .iter()
502            .find(|w| w.pattern_name == "bearer_token")
503            .expect("should detect bearer token");
504        assert_eq!(token_warning.severity, Sensitivity::Secret);
505    }
506
507    #[test]
508    fn test_bleed_detect_password_in_url() {
509        let detector = ShellBleedDetector::new();
510        let warnings = detector.scan_command("curl https://admin:supersecret@db.example.com/data");
511
512        assert!(!warnings.is_empty());
513        let pw_warning = warnings
514            .iter()
515            .find(|w| w.pattern_name == "password_in_url")
516            .expect("should detect password in URL");
517        assert_eq!(pw_warning.severity, Sensitivity::Confidential);
518    }
519
520    #[test]
521    fn test_bleed_detect_private_key() {
522        let detector = ShellBleedDetector::new();
523        let begin = pem_begin_header();
524        let end = pem_end_header();
525        let cmd = format!("echo '{}\nMIIEow...\n{}' > /tmp/key", begin, end);
526        let warnings = detector.scan_command(&cmd);
527
528        assert!(!warnings.is_empty());
529        let key_warning = warnings
530            .iter()
531            .find(|w| w.pattern_name == "private_key")
532            .expect("should detect private key marker");
533        assert_eq!(key_warning.severity, Sensitivity::Secret);
534    }
535
536    #[test]
537    fn test_bleed_clean_command_passes() {
538        let detector = ShellBleedDetector::new();
539        let warnings = detector.scan_command("ls -la /tmp");
540
541        assert!(
542            warnings.is_empty(),
543            "clean command should produce no warnings"
544        );
545    }
546
547    #[test]
548    fn test_bleed_detect_base64_encoded_secret() {
549        let detector = ShellBleedDetector::new();
550        // A long base64 string that looks like an encoded secret.
551        let long_b64 = "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXoxMjM0NTY3ODk=";
552        let cmd = format!("export SECRET={}", long_b64);
553        let warnings = detector.scan_command(&cmd);
554
555        assert!(!warnings.is_empty());
556        let b64_warning = warnings
557            .iter()
558            .find(|w| w.pattern_name == "base64_secret")
559            .expect("should detect base64-encoded secret");
560        assert_eq!(b64_warning.severity, Sensitivity::Internal);
561    }
562
563    #[test]
564    fn test_bleed_env_var_scanning() {
565        let detector = ShellBleedDetector::new();
566        let key = fake_aws_key();
567        let env_vars = vec![
568            ("PATH".to_string(), "/usr/bin:/usr/local/bin".to_string()),
569            ("AWS_ACCESS_KEY_ID".to_string(), key),
570        ];
571
572        let warnings = detector.scan_environment(&env_vars);
573        assert!(!warnings.is_empty());
574
575        let aws_warning = warnings
576            .iter()
577            .find(|w| w.pattern_name == "aws_access_key")
578            .expect("should detect AWS key in env vars");
579        assert!(aws_warning.location.starts_with("env:"));
580    }
581
582    #[test]
583    fn test_bleed_custom_pattern_registration() {
584        let mut detector = ShellBleedDetector::new();
585        detector.add_secret_pattern("github_token", r"ghp_[A-Za-z0-9]{36}");
586
587        let token = format!("ghp_{}", "A".repeat(36));
588        let cmd = format!("git clone https://{}@github.com/repo", token);
589        let warnings = detector.scan_command(&cmd);
590
591        assert!(!warnings.is_empty());
592        let gh_warning = warnings
593            .iter()
594            .find(|w| w.pattern_name == "github_token")
595            .expect("should detect custom GitHub token pattern");
596        assert_eq!(gh_warning.severity, Sensitivity::Confidential);
597    }
598
599    #[test]
600    fn test_sensitivity_ordering() {
601        // Sensitivity should be ordered: Public < Internal < Confidential < Secret.
602        assert!(Sensitivity::Public < Sensitivity::Internal);
603        assert!(Sensitivity::Internal < Sensitivity::Confidential);
604        assert!(Sensitivity::Confidential < Sensitivity::Secret);
605        assert!(Sensitivity::Public < Sensitivity::Secret);
606    }
607
608    #[test]
609    fn test_integration_shell_command_with_leaked_secret_blocked() {
610        // Simulate the integration flow: taint a value, then scan a command
611        // that contains it and verify the bleed detector catches it.
612        let mut tracker = TaintTracker::new();
613        let secret_key = fake_aws_key();
614        tracker.taint(&secret_key, TaintSource::Environment, Sensitivity::Secret);
615
616        let command = format!("aws s3 ls --access-key {}", secret_key);
617
618        // Check taint on the command.
619        let taint_labels = tracker.check_taint(&command);
620        assert!(
621            !taint_labels.is_empty(),
622            "command containing tainted value should be detected"
623        );
624
625        // Also run the bleed detector.
626        let detector = ShellBleedDetector::new();
627        let warnings = detector.scan_command(&command);
628        assert!(
629            !warnings.is_empty(),
630            "bleed detector should catch the AWS key"
631        );
632
633        // Verify that at least one warning is Secret severity.
634        let has_secret = warnings
635            .iter()
636            .any(|w| w.severity >= Sensitivity::Confidential);
637        assert!(
638            has_secret,
639            "leaked secret should produce at least Confidential severity"
640        );
641    }
642}