1use chrono::{DateTime, Utc};
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
19pub enum TaintSource {
20 UserInput,
22 ToolOutput(String),
24 LlmGenerated,
26 ExternalApi(String),
28 FileSystem(String),
30 Environment,
32}
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
37pub enum Sensitivity {
38 Public,
40 Internal,
42 Confidential,
44 Secret,
46}
47
48impl std::fmt::Display for Sensitivity {
49 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 match self {
51 Self::Public => write!(f, "public"),
52 Self::Internal => write!(f, "internal"),
53 Self::Confidential => write!(f, "confidential"),
54 Self::Secret => write!(f, "secret"),
55 }
56 }
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct TaintLabel {
67 pub source: TaintSource,
69 pub sensitivity: Sensitivity,
71 pub timestamp: DateTime<Utc>,
73 pub propagation_chain: Vec<String>,
76}
77
78#[derive(Debug, Clone, Serialize, Deserialize)]
81pub struct TaintedValue {
82 pub value: String,
84 pub labels: Vec<TaintLabel>,
86}
87
88#[derive(Debug, Clone, Default)]
98pub struct TaintTracker {
99 values: Vec<TaintedValue>,
101}
102
103impl TaintTracker {
104 pub fn new() -> Self {
106 Self { values: Vec::new() }
107 }
108
109 pub fn taint(&mut self, value: &str, source: TaintSource, sensitivity: Sensitivity) {
114 for tv in &mut self.values {
116 if tv.value == value {
117 tv.labels.push(TaintLabel {
118 source,
119 sensitivity,
120 timestamp: Utc::now(),
121 propagation_chain: Vec::new(),
122 });
123 return;
124 }
125 }
126
127 self.values.push(TaintedValue {
128 value: value.to_string(),
129 labels: vec![TaintLabel {
130 source,
131 sensitivity,
132 timestamp: Utc::now(),
133 propagation_chain: Vec::new(),
134 }],
135 });
136 }
137
138 pub fn check_taint(&self, value: &str) -> Vec<&TaintLabel> {
145 let mut labels = Vec::new();
146 for tv in &self.values {
147 if value.contains(&tv.value) || tv.value.contains(value) {
150 for label in &tv.labels {
151 labels.push(label);
152 }
153 }
154 }
155 labels
156 }
157
158 pub fn propagate(&mut self, from: &str, to: &str) {
165 let mut propagated_labels: Vec<TaintLabel> = Vec::new();
167 for tv in &self.values {
168 if tv.value == from || from.contains(&tv.value) || tv.value.contains(from) {
169 for label in &tv.labels {
170 let mut new_label = label.clone();
171 new_label
172 .propagation_chain
173 .push(format!("{} -> {}", from, to));
174 propagated_labels.push(new_label);
175 }
176 }
177 }
178
179 if propagated_labels.is_empty() {
180 return;
181 }
182
183 for tv in &mut self.values {
185 if tv.value == to {
186 tv.labels.extend(propagated_labels);
187 return;
188 }
189 }
190
191 self.values.push(TaintedValue {
193 value: to.to_string(),
194 labels: propagated_labels,
195 });
196 }
197}
198
199#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct ShellBleedWarning {
207 pub pattern_name: String,
209 pub matched_text: String,
211 pub severity: Sensitivity,
213 pub location: String,
215}
216
217#[derive(Debug, Clone)]
219struct SecretPattern {
220 name: String,
222 regex: Regex,
224 severity: Sensitivity,
226}
227
228#[derive(Debug, Clone)]
235pub struct ShellBleedDetector {
236 patterns: Vec<SecretPattern>,
238}
239
240impl Default for ShellBleedDetector {
241 fn default() -> Self {
242 Self::new()
243 }
244}
245
246impl ShellBleedDetector {
247 pub fn new() -> Self {
250 let mut detector = Self {
251 patterns: Vec::new(),
252 };
253 detector.register_builtin_patterns();
254 detector
255 }
256
257 fn register_builtin_patterns(&mut self) {
259 let builtins: &[(&str, &str, Sensitivity)] = &[
260 ("aws_access_key", r"AKIA[0-9A-Z]{16}", Sensitivity::Secret),
262 (
264 "bearer_token",
265 r"[Bb]earer\s+[A-Za-z0-9\-._~+/]+=*",
266 Sensitivity::Secret,
267 ),
268 (
270 "password_in_url",
271 r"://[^/\s]+:[^@/\s]+@",
272 Sensitivity::Confidential,
273 ),
274 (
276 "private_key",
277 r"-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY-----",
278 Sensitivity::Secret,
279 ),
280 (
282 "generic_api_key",
283 r#"(?i)(api[_\-]?key|api[_\-]?secret|access[_\-]?token|auth[_\-]?token)\s*[=:]\s*['\"]?[A-Za-z0-9\-._~+/]{16,}"#,
284 Sensitivity::Confidential,
285 ),
286 (
288 "base64_secret",
289 r"[A-Za-z0-9+/]{40,}={0,2}",
290 Sensitivity::Internal,
291 ),
292 ];
293
294 for (name, pattern, severity) in builtins {
295 if let Ok(regex) = Regex::new(pattern) {
296 self.patterns.push(SecretPattern {
297 name: name.to_string(),
298 regex,
299 severity: *severity,
300 });
301 }
302 }
303 }
304
305 pub fn add_secret_pattern(&mut self, name: &str, pattern: &str) {
310 if let Ok(regex) = Regex::new(pattern) {
311 self.patterns.push(SecretPattern {
312 name: name.to_string(),
313 regex,
314 severity: Sensitivity::Confidential,
315 });
316 }
317 }
318
319 pub fn scan_command(&self, command: &str) -> Vec<ShellBleedWarning> {
324 let mut warnings = Vec::new();
325
326 for pattern in &self.patterns {
327 for m in pattern.regex.find_iter(command) {
328 warnings.push(ShellBleedWarning {
329 pattern_name: pattern.name.clone(),
330 matched_text: m.as_str().to_string(),
331 severity: pattern.severity,
332 location: "command".to_string(),
333 });
334 }
335 }
336
337 warnings
338 }
339
340 pub fn scan_environment(&self, env_vars: &[(String, String)]) -> Vec<ShellBleedWarning> {
345 let mut warnings = Vec::new();
346
347 for (key, value) in env_vars {
348 let combined = format!("{}={}", key, value);
349 for pattern in &self.patterns {
350 for m in pattern.regex.find_iter(&combined) {
351 warnings.push(ShellBleedWarning {
352 pattern_name: pattern.name.clone(),
353 matched_text: m.as_str().to_string(),
354 severity: pattern.severity,
355 location: format!("env:{}", key),
356 });
357 }
358 }
359 }
360
361 warnings
362 }
363}
364
365#[cfg(test)]
370mod tests {
371 use super::*;
372
373 fn fake_aws_key() -> String {
376 let prefix = "AKIA";
378 let suffix = "IOSFODNN7EXAMPLE";
379 format!("{}{}", prefix, suffix)
380 }
381
382 fn pem_begin_header() -> String {
385 let marker = "BEGIN RSA PRIVATE KEY";
386 format!("-----{}-----", marker)
387 }
388
389 fn pem_end_header() -> String {
390 let marker = "END RSA PRIVATE KEY";
391 format!("-----{}-----", marker)
392 }
393
394 #[test]
397 fn test_taint_basic_taint_and_check() {
398 let mut tracker = TaintTracker::new();
399 tracker.taint(
400 "my-secret-value",
401 TaintSource::UserInput,
402 Sensitivity::Secret,
403 );
404
405 let labels = tracker.check_taint("my-secret-value");
406 assert_eq!(labels.len(), 1);
407 assert_eq!(labels[0].source, TaintSource::UserInput);
408 assert_eq!(labels[0].sensitivity, Sensitivity::Secret);
409 }
410
411 #[test]
412 fn test_taint_substring_detection() {
413 let mut tracker = TaintTracker::new();
414 tracker.taint(
415 "API_KEY=abc123secret",
416 TaintSource::Environment,
417 Sensitivity::Secret,
418 );
419
420 let labels = tracker.check_taint("abc123secret");
422 assert_eq!(labels.len(), 1);
423 assert_eq!(labels[0].source, TaintSource::Environment);
424 }
425
426 #[test]
427 fn test_taint_propagation_chain() {
428 let mut tracker = TaintTracker::new();
429 tracker.taint(
430 "original-secret",
431 TaintSource::UserInput,
432 Sensitivity::Secret,
433 );
434
435 tracker.propagate("original-secret", "derived-value");
436
437 let labels = tracker.check_taint("derived-value");
438 assert_eq!(labels.len(), 1);
439 assert_eq!(labels[0].propagation_chain.len(), 1);
440 assert!(labels[0].propagation_chain[0].contains("original-secret"));
441 assert!(labels[0].propagation_chain[0].contains("derived-value"));
442 }
443
444 #[test]
445 fn test_taint_no_taint_on_clean_value() {
446 let mut tracker = TaintTracker::new();
447 tracker.taint("secret-data", TaintSource::UserInput, Sensitivity::Secret);
448
449 let labels = tracker.check_taint("completely-unrelated");
450 assert!(labels.is_empty());
451 }
452
453 #[test]
454 fn test_taint_multiple_labels_on_same_value() {
455 let mut tracker = TaintTracker::new();
456 tracker.taint(
457 "shared-value",
458 TaintSource::UserInput,
459 Sensitivity::Internal,
460 );
461 tracker.taint(
462 "shared-value",
463 TaintSource::ExternalApi("stripe".to_string()),
464 Sensitivity::Secret,
465 );
466
467 let labels = tracker.check_taint("shared-value");
468 assert_eq!(labels.len(), 2);
469
470 let sources: Vec<&TaintSource> = labels.iter().map(|l| &l.source).collect();
471 assert!(sources.contains(&&TaintSource::UserInput));
472 assert!(sources.contains(&&TaintSource::ExternalApi("stripe".to_string())));
473 }
474
475 #[test]
478 fn test_bleed_detect_aws_access_key() {
479 let detector = ShellBleedDetector::new();
480 let key = fake_aws_key();
481 let cmd = format!("aws s3 cp --access-key {} s3://bucket", key);
482 let warnings = detector.scan_command(&cmd);
483
484 assert!(!warnings.is_empty());
485 let aws_warning = warnings
486 .iter()
487 .find(|w| w.pattern_name == "aws_access_key")
488 .expect("should detect AWS access key");
489 assert_eq!(aws_warning.severity, Sensitivity::Secret);
490 assert!(aws_warning.matched_text.starts_with("AKIA"));
491 }
492
493 #[test]
494 fn test_bleed_detect_bearer_token() {
495 let detector = ShellBleedDetector::new();
496 let warnings =
497 detector.scan_command("curl -H 'Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.test'");
498
499 assert!(!warnings.is_empty());
500 let token_warning = warnings
501 .iter()
502 .find(|w| w.pattern_name == "bearer_token")
503 .expect("should detect bearer token");
504 assert_eq!(token_warning.severity, Sensitivity::Secret);
505 }
506
507 #[test]
508 fn test_bleed_detect_password_in_url() {
509 let detector = ShellBleedDetector::new();
510 let warnings = detector.scan_command("curl https://admin:supersecret@db.example.com/data");
511
512 assert!(!warnings.is_empty());
513 let pw_warning = warnings
514 .iter()
515 .find(|w| w.pattern_name == "password_in_url")
516 .expect("should detect password in URL");
517 assert_eq!(pw_warning.severity, Sensitivity::Confidential);
518 }
519
520 #[test]
521 fn test_bleed_detect_private_key() {
522 let detector = ShellBleedDetector::new();
523 let begin = pem_begin_header();
524 let end = pem_end_header();
525 let cmd = format!("echo '{}\nMIIEow...\n{}' > /tmp/key", begin, end);
526 let warnings = detector.scan_command(&cmd);
527
528 assert!(!warnings.is_empty());
529 let key_warning = warnings
530 .iter()
531 .find(|w| w.pattern_name == "private_key")
532 .expect("should detect private key marker");
533 assert_eq!(key_warning.severity, Sensitivity::Secret);
534 }
535
536 #[test]
537 fn test_bleed_clean_command_passes() {
538 let detector = ShellBleedDetector::new();
539 let warnings = detector.scan_command("ls -la /tmp");
540
541 assert!(
542 warnings.is_empty(),
543 "clean command should produce no warnings"
544 );
545 }
546
547 #[test]
548 fn test_bleed_detect_base64_encoded_secret() {
549 let detector = ShellBleedDetector::new();
550 let long_b64 = "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXoxMjM0NTY3ODk=";
552 let cmd = format!("export SECRET={}", long_b64);
553 let warnings = detector.scan_command(&cmd);
554
555 assert!(!warnings.is_empty());
556 let b64_warning = warnings
557 .iter()
558 .find(|w| w.pattern_name == "base64_secret")
559 .expect("should detect base64-encoded secret");
560 assert_eq!(b64_warning.severity, Sensitivity::Internal);
561 }
562
563 #[test]
564 fn test_bleed_env_var_scanning() {
565 let detector = ShellBleedDetector::new();
566 let key = fake_aws_key();
567 let env_vars = vec![
568 ("PATH".to_string(), "/usr/bin:/usr/local/bin".to_string()),
569 ("AWS_ACCESS_KEY_ID".to_string(), key),
570 ];
571
572 let warnings = detector.scan_environment(&env_vars);
573 assert!(!warnings.is_empty());
574
575 let aws_warning = warnings
576 .iter()
577 .find(|w| w.pattern_name == "aws_access_key")
578 .expect("should detect AWS key in env vars");
579 assert!(aws_warning.location.starts_with("env:"));
580 }
581
582 #[test]
583 fn test_bleed_custom_pattern_registration() {
584 let mut detector = ShellBleedDetector::new();
585 detector.add_secret_pattern("github_token", r"ghp_[A-Za-z0-9]{36}");
586
587 let token = format!("ghp_{}", "A".repeat(36));
588 let cmd = format!("git clone https://{}@github.com/repo", token);
589 let warnings = detector.scan_command(&cmd);
590
591 assert!(!warnings.is_empty());
592 let gh_warning = warnings
593 .iter()
594 .find(|w| w.pattern_name == "github_token")
595 .expect("should detect custom GitHub token pattern");
596 assert_eq!(gh_warning.severity, Sensitivity::Confidential);
597 }
598
599 #[test]
600 fn test_sensitivity_ordering() {
601 assert!(Sensitivity::Public < Sensitivity::Internal);
603 assert!(Sensitivity::Internal < Sensitivity::Confidential);
604 assert!(Sensitivity::Confidential < Sensitivity::Secret);
605 assert!(Sensitivity::Public < Sensitivity::Secret);
606 }
607
608 #[test]
609 fn test_integration_shell_command_with_leaked_secret_blocked() {
610 let mut tracker = TaintTracker::new();
613 let secret_key = fake_aws_key();
614 tracker.taint(&secret_key, TaintSource::Environment, Sensitivity::Secret);
615
616 let command = format!("aws s3 ls --access-key {}", secret_key);
617
618 let taint_labels = tracker.check_taint(&command);
620 assert!(
621 !taint_labels.is_empty(),
622 "command containing tainted value should be detected"
623 );
624
625 let detector = ShellBleedDetector::new();
627 let warnings = detector.scan_command(&command);
628 assert!(
629 !warnings.is_empty(),
630 "bleed detector should catch the AWS key"
631 );
632
633 let has_secret = warnings
635 .iter()
636 .any(|w| w.severity >= Sensitivity::Confidential);
637 assert!(
638 has_secret,
639 "leaked secret should produce at least Confidential severity"
640 );
641 }
642}