use std::collections::{HashMap, HashSet};
use std::path::Path;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Serialize};
use crate::error::{Result, BrrrError};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum SecretType {
AwsAccessKey,
AwsSecretKey,
GitHubToken,
GitLabToken,
SlackToken,
StripeKey,
GoogleApiKey,
GoogleServiceAccount,
TwilioKey,
SendGridKey,
MailgunKey,
HerokuKey,
NpmToken,
PyPiToken,
DiscordToken,
TelegramToken,
FirebaseKey,
AzureKey,
DigitalOceanToken,
DatadogKey,
OpenAiKey,
AnthropicKey,
PrivateKey,
PgpPrivateKey,
SshPrivateKey,
Jwt,
ConnectionString,
Password,
ApiKey,
GenericSecret,
HighEntropyString,
}
impl std::fmt::Display for SecretType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SecretType::AwsAccessKey => write!(f, "AWS Access Key"),
SecretType::AwsSecretKey => write!(f, "AWS Secret Access Key"),
SecretType::GitHubToken => write!(f, "GitHub Token"),
SecretType::GitLabToken => write!(f, "GitLab Token"),
SecretType::SlackToken => write!(f, "Slack Token"),
SecretType::StripeKey => write!(f, "Stripe API Key"),
SecretType::GoogleApiKey => write!(f, "Google API Key"),
SecretType::GoogleServiceAccount => write!(f, "Google Service Account Key"),
SecretType::TwilioKey => write!(f, "Twilio Key"),
SecretType::SendGridKey => write!(f, "SendGrid API Key"),
SecretType::MailgunKey => write!(f, "Mailgun API Key"),
SecretType::HerokuKey => write!(f, "Heroku API Key"),
SecretType::NpmToken => write!(f, "npm Token"),
SecretType::PyPiToken => write!(f, "PyPI Token"),
SecretType::DiscordToken => write!(f, "Discord Token"),
SecretType::TelegramToken => write!(f, "Telegram Bot Token"),
SecretType::FirebaseKey => write!(f, "Firebase/FCM Key"),
SecretType::AzureKey => write!(f, "Azure Key"),
SecretType::DigitalOceanToken => write!(f, "DigitalOcean Token"),
SecretType::DatadogKey => write!(f, "Datadog API Key"),
SecretType::OpenAiKey => write!(f, "OpenAI API Key"),
SecretType::AnthropicKey => write!(f, "Anthropic API Key"),
SecretType::PrivateKey => write!(f, "Private Key"),
SecretType::PgpPrivateKey => write!(f, "PGP Private Key"),
SecretType::SshPrivateKey => write!(f, "SSH Private Key"),
SecretType::Jwt => write!(f, "JSON Web Token"),
SecretType::ConnectionString => write!(f, "Connection String"),
SecretType::Password => write!(f, "Password"),
SecretType::ApiKey => write!(f, "API Key"),
SecretType::GenericSecret => write!(f, "Generic Secret"),
SecretType::HighEntropyString => write!(f, "High-Entropy String"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub enum Severity {
Info,
Low,
Medium,
High,
Critical,
}
impl std::fmt::Display for Severity {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Severity::Info => write!(f, "INFO"),
Severity::Low => write!(f, "LOW"),
Severity::Medium => write!(f, "MEDIUM"),
Severity::High => write!(f, "HIGH"),
Severity::Critical => write!(f, "CRITICAL"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub enum Confidence {
Low,
Medium,
High,
}
impl std::fmt::Display for Confidence {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Confidence::Low => write!(f, "LOW"),
Confidence::Medium => write!(f, "MEDIUM"),
Confidence::High => write!(f, "HIGH"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Location {
pub file: String,
pub line: usize,
pub column: usize,
pub end_line: usize,
pub end_column: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SecretFinding {
pub location: Location,
pub secret_type: SecretType,
pub severity: Severity,
pub confidence: Confidence,
pub masked_value: String,
pub variable_name: Option<String>,
pub description: String,
pub remediation: String,
pub is_test_file: bool,
pub entropy: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanResult {
pub findings: Vec<SecretFinding>,
pub files_scanned: usize,
pub type_counts: HashMap<String, usize>,
pub severity_counts: HashMap<String, usize>,
}
struct ProviderPattern {
regex: Regex,
secret_type: SecretType,
severity: Severity,
confidence: Confidence,
description: &'static str,
}
struct GenericPattern {
var_regex: Regex,
secret_type: SecretType,
min_value_length: usize,
description: &'static str,
}
static PROVIDER_PATTERNS: Lazy<Vec<ProviderPattern>> = Lazy::new(|| {
vec![
ProviderPattern {
regex: Regex::new(r"(?i)\b(AKIA[0-9A-Z]{16})\b").expect("Invalid regex"),
secret_type: SecretType::AwsAccessKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "AWS Access Key ID detected",
},
ProviderPattern {
regex: Regex::new(r#"(?i)(?:aws_secret_access_key|aws_secret_key|secret_access_key)\s*[=:]\s*["']?([A-Za-z0-9/+=]{40})["']?"#).expect("Invalid regex"),
secret_type: SecretType::AwsSecretKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "AWS Secret Access Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(ghp_[a-zA-Z0-9]{36})\b").expect("Invalid regex"),
secret_type: SecretType::GitHubToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitHub Personal Access Token (classic) detected",
},
ProviderPattern {
regex: Regex::new(r"\b(gho_[a-zA-Z0-9]{36})\b").expect("Invalid regex"),
secret_type: SecretType::GitHubToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitHub OAuth Access Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(ghu_[a-zA-Z0-9]{36})\b").expect("Invalid regex"),
secret_type: SecretType::GitHubToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitHub User-to-Server Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(ghs_[a-zA-Z0-9]{36})\b").expect("Invalid regex"),
secret_type: SecretType::GitHubToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitHub Server-to-Server Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(ghr_[a-zA-Z0-9]{36})\b").expect("Invalid regex"),
secret_type: SecretType::GitHubToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitHub Refresh Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})\b").expect("Invalid regex"),
secret_type: SecretType::GitHubToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitHub Fine-Grained Personal Access Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(glpat-[a-zA-Z0-9_-]{20,})\b").expect("Invalid regex"),
secret_type: SecretType::GitLabToken,
severity: Severity::High,
confidence: Confidence::High,
description: "GitLab Personal Access Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(xox[baprs]-[0-9a-zA-Z-]{10,})\b").expect("Invalid regex"),
secret_type: SecretType::SlackToken,
severity: Severity::High,
confidence: Confidence::High,
description: "Slack Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(sk_live_[0-9a-zA-Z]{24,})\b").expect("Invalid regex"),
secret_type: SecretType::StripeKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "Stripe Live Secret Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(sk_test_[0-9a-zA-Z]{24,})\b").expect("Invalid regex"),
secret_type: SecretType::StripeKey,
severity: Severity::Medium,
confidence: Confidence::High,
description: "Stripe Test Secret Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(rk_live_[0-9a-zA-Z]{24,})\b").expect("Invalid regex"),
secret_type: SecretType::StripeKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "Stripe Live Restricted Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(AIza[0-9A-Za-z_-]{35})\b").expect("Invalid regex"),
secret_type: SecretType::GoogleApiKey,
severity: Severity::High,
confidence: Confidence::High,
description: "Google API Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(SK[0-9a-fA-F]{32})\b").expect("Invalid regex"),
secret_type: SecretType::TwilioKey,
severity: Severity::High,
confidence: Confidence::Medium,
description: "Twilio API Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43})\b").expect("Invalid regex"),
secret_type: SecretType::SendGridKey,
severity: Severity::High,
confidence: Confidence::High,
description: "SendGrid API Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(npm_[a-zA-Z0-9]{36})\b").expect("Invalid regex"),
secret_type: SecretType::NpmToken,
severity: Severity::High,
confidence: Confidence::High,
description: "npm Access Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(pypi-[a-zA-Z0-9_-]{50,})\b").expect("Invalid regex"),
secret_type: SecretType::PyPiToken,
severity: Severity::High,
confidence: Confidence::High,
description: "PyPI API Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b([MN][A-Za-z0-9]{23,}\.[A-Za-z0-9_-]{6}\.[A-Za-z0-9_-]{27,})\b").expect("Invalid regex"),
secret_type: SecretType::DiscordToken,
severity: Severity::High,
confidence: Confidence::Medium,
description: "Discord Bot Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(\d{8,10}:[a-zA-Z0-9_-]{35})\b").expect("Invalid regex"),
secret_type: SecretType::TelegramToken,
severity: Severity::High,
confidence: Confidence::Medium,
description: "Telegram Bot Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(AAAA[a-zA-Z0-9_-]{140,})\b").expect("Invalid regex"),
secret_type: SecretType::FirebaseKey,
severity: Severity::High,
confidence: Confidence::Medium,
description: "Firebase Cloud Messaging Server Key detected",
},
ProviderPattern {
regex: Regex::new(r#"(?i)(?:AccountKey|SharedAccessKey)\s*=\s*([A-Za-z0-9+/=]{44,})"#).expect("Invalid regex"),
secret_type: SecretType::AzureKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "Azure Storage/Service Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(dop_v1_[a-f0-9]{64})\b").expect("Invalid regex"),
secret_type: SecretType::DigitalOceanToken,
severity: Severity::High,
confidence: Confidence::High,
description: "DigitalOcean Personal Access Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b(doo_v1_[a-f0-9]{64})\b").expect("Invalid regex"),
secret_type: SecretType::DigitalOceanToken,
severity: Severity::High,
confidence: Confidence::High,
description: "DigitalOcean OAuth Token detected",
},
ProviderPattern {
regex: Regex::new(r"\b([a-f0-9]{32})\b").expect("Invalid regex"),
secret_type: SecretType::DatadogKey,
severity: Severity::Medium,
confidence: Confidence::Low,
description: "Possible Datadog API Key detected (32-char hex)",
},
ProviderPattern {
regex: Regex::new(r"\b(sk-[a-zA-Z0-9]{20}T3BlbkFJ[a-zA-Z0-9]{20})\b").expect("Invalid regex"),
secret_type: SecretType::OpenAiKey,
severity: Severity::High,
confidence: Confidence::High,
description: "OpenAI API Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(sk-proj-[a-zA-Z0-9_-]{40,})\b").expect("Invalid regex"),
secret_type: SecretType::OpenAiKey,
severity: Severity::High,
confidence: Confidence::High,
description: "OpenAI Project API Key detected",
},
ProviderPattern {
regex: Regex::new(r"\b(sk-ant-api[0-9]{2}-[a-zA-Z0-9_-]{90,})\b").expect("Invalid regex"),
secret_type: SecretType::AnthropicKey,
severity: Severity::High,
confidence: Confidence::High,
description: "Anthropic API Key detected",
},
ProviderPattern {
regex: Regex::new(r#"(?i)heroku[a-z_-]*\s*[=:]\s*["']?([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})["']?"#).expect("Invalid regex"),
secret_type: SecretType::HerokuKey,
severity: Severity::High,
confidence: Confidence::Medium,
description: "Heroku API Key detected",
},
ProviderPattern {
regex: Regex::new(r"-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----").expect("Invalid regex"),
secret_type: SecretType::PrivateKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "RSA Private Key detected",
},
ProviderPattern {
regex: Regex::new(r"-----BEGIN\s+EC\s+PRIVATE\s+KEY-----").expect("Invalid regex"),
secret_type: SecretType::PrivateKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "EC Private Key detected",
},
ProviderPattern {
regex: Regex::new(r"-----BEGIN\s+DSA\s+PRIVATE\s+KEY-----").expect("Invalid regex"),
secret_type: SecretType::PrivateKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "DSA Private Key detected",
},
ProviderPattern {
regex: Regex::new(r"-----BEGIN\s+ENCRYPTED\s+PRIVATE\s+KEY-----").expect("Invalid regex"),
secret_type: SecretType::PrivateKey,
severity: Severity::High,
confidence: Confidence::High,
description: "Encrypted Private Key detected (still sensitive)",
},
ProviderPattern {
regex: Regex::new(r"-----BEGIN\s+OPENSSH\s+PRIVATE\s+KEY-----").expect("Invalid regex"),
secret_type: SecretType::SshPrivateKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "OpenSSH Private Key detected",
},
ProviderPattern {
regex: Regex::new(r"-----BEGIN\s+PGP\s+PRIVATE\s+KEY\s+BLOCK-----").expect("Invalid regex"),
secret_type: SecretType::PgpPrivateKey,
severity: Severity::Critical,
confidence: Confidence::High,
description: "PGP Private Key Block detected",
},
ProviderPattern {
regex: Regex::new(r"\b(eyJ[A-Za-z0-9_-]{10,}\.eyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]+)\b").expect("Invalid regex"),
secret_type: SecretType::Jwt,
severity: Severity::Medium,
confidence: Confidence::High,
description: "JSON Web Token detected",
},
ProviderPattern {
regex: Regex::new(r##"(?i)(postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:[^@]+@[^\s"'`]+"##).expect("Invalid regex"),
secret_type: SecretType::ConnectionString,
severity: Severity::Critical,
confidence: Confidence::High,
description: "Database connection string with embedded credentials detected",
},
]
});
static GENERIC_PATTERNS: Lazy<Vec<GenericPattern>> = Lazy::new(|| {
vec![
GenericPattern {
var_regex: Regex::new(r#"(?i)(?:^|[^a-zA-Z0-9_])(?:password|passwd|pwd)\s*[=:]\s*["']([^"']+)["']"#).expect("Invalid regex"),
secret_type: SecretType::Password,
min_value_length: 4,
description: "Hardcoded password detected",
},
GenericPattern {
var_regex: Regex::new(r#"(?i)(?:^|[^a-zA-Z0-9_])(?:api_?key|apikey|api_?token)\s*[=:]\s*["']([^"']+)["']"#).expect("Invalid regex"),
secret_type: SecretType::ApiKey,
min_value_length: 8,
description: "Hardcoded API key detected",
},
GenericPattern {
var_regex: Regex::new(r#"(?i)(?:^|[^a-zA-Z0-9_])(?:secret|secret_?key|private_?key|auth_?key|access_?key)\s*[=:]\s*["']([^"']+)["']"#).expect("Invalid regex"),
secret_type: SecretType::GenericSecret,
min_value_length: 8,
description: "Hardcoded secret detected",
},
GenericPattern {
var_regex: Regex::new(r#"(?i)(?:^|[^a-zA-Z0-9_])(?:token|auth_?token|access_?token|bearer_?token)\s*[=:]\s*["']([^"']+)["']"#).expect("Invalid regex"),
secret_type: SecretType::GenericSecret,
min_value_length: 8,
description: "Hardcoded token detected",
},
GenericPattern {
var_regex: Regex::new(r#"(?i)(?:^|[^a-zA-Z0-9_])(?:credentials?|creds?)\s*[=:]\s*["']([^"']+)["']"#).expect("Invalid regex"),
secret_type: SecretType::GenericSecret,
min_value_length: 6,
description: "Hardcoded credentials detected",
},
]
});
static PLACEHOLDER_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r"(?i)^(your[_-]?api[_-]?key|your[_-]?secret|your[_-]?token|your[_-]?password)").expect("Invalid regex"),
Regex::new(r"(?i)(changeme|change[_-]?me|replace[_-]?me|insert[_-]?here|put[_-]?here|add[_-]?here)").expect("Invalid regex"),
Regex::new(r"(?i)^(xxx+|placeholder|dummy|fake|test|example|sample|demo)").expect("Invalid regex"),
Regex::new(r"(?i)(todo|fixme|tbd|none|null|undefined|empty)").expect("Invalid regex"),
Regex::new(r"(?i)^<[^>]+>$").expect("Invalid regex"), Regex::new(r"(?i)^\$\{[^}]+\}$").expect("Invalid regex"), Regex::new(r"(?i)^\{\{[^}]+\}\}$").expect("Invalid regex"), Regex::new(r"(?i)^%[^%]+%$").expect("Invalid regex"), Regex::new(r"^(1234|abcd|pass|password|secret|token|key)").expect("Invalid regex"),
]
});
static ENV_VAR_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r#"os\.environ\s*\["#).expect("Invalid regex"),
Regex::new(r#"os\.environ\.get\s*\("#).expect("Invalid regex"),
Regex::new(r#"os\.getenv\s*\("#).expect("Invalid regex"),
Regex::new(r#"environ\s*\["#).expect("Invalid regex"),
Regex::new(r#"process\.env\."#).expect("Invalid regex"),
Regex::new(r#"process\.env\["#).expect("Invalid regex"),
Regex::new(r#"Deno\.env\.get\("#).expect("Invalid regex"),
Regex::new(r#"import\.meta\.env\."#).expect("Invalid regex"),
Regex::new(r#"os\.Getenv\s*\("#).expect("Invalid regex"),
Regex::new(r#"os\.LookupEnv\s*\("#).expect("Invalid regex"),
Regex::new(r#"std::env::var\s*\("#).expect("Invalid regex"),
Regex::new(r#"env::var\s*\("#).expect("Invalid regex"),
Regex::new(r#"env!\s*\("#).expect("Invalid regex"),
Regex::new(r#"System\.getenv\s*\("#).expect("Invalid regex"),
Regex::new(r#"System\.getProperty\s*\("#).expect("Invalid regex"),
Regex::new(r#"ENV\s*\["#).expect("Invalid regex"),
Regex::new(r#"ENV\.fetch\s*\("#).expect("Invalid regex"),
Regex::new(r#"getenv\s*\("#).expect("Invalid regex"),
Regex::new(r#"Environment\.GetEnvironmentVariable\s*\("#).expect("Invalid regex"),
]
});
static CONFIG_READ_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r#"config\s*\.\s*get\s*\("#).expect("Invalid regex"),
Regex::new(r#"config\s*\["#).expect("Invalid regex"),
Regex::new(r#"settings\s*\.\s*get\s*\("#).expect("Invalid regex"),
Regex::new(r#"settings\s*\["#).expect("Invalid regex"),
Regex::new(r#"\.config\("#).expect("Invalid regex"),
Regex::new(r#"viper\.Get"#).expect("Invalid regex"),
Regex::new(r#"configuration\."#).expect("Invalid regex"),
]
});
fn calculate_entropy(s: &str) -> f64 {
let bytes = s.as_bytes();
if bytes.is_empty() {
return 0.0;
}
let histogram = build_byte_histogram(bytes);
let len = bytes.len() as f64;
let mut entropy = 0.0;
for &count in &histogram {
if count > 0 {
let probability = f64::from(count) / len;
entropy -= probability * probability.log2();
}
}
entropy
}
#[inline]
fn build_byte_histogram(bytes: &[u8]) -> [u32; 256] {
#[cfg(target_arch = "x86_64")]
{
if is_x86_feature_detected!("avx2") {
return unsafe { build_byte_histogram_avx2(bytes) };
}
}
build_byte_histogram_scalar(bytes)
}
#[inline]
fn build_byte_histogram_scalar(bytes: &[u8]) -> [u32; 256] {
let mut histogram = [0u32; 256];
let chunks = bytes.chunks_exact(4);
let remainder = chunks.remainder();
for chunk in chunks {
histogram[chunk[0] as usize] += 1;
histogram[chunk[1] as usize] += 1;
histogram[chunk[2] as usize] += 1;
histogram[chunk[3] as usize] += 1;
}
for &b in remainder {
histogram[b as usize] += 1;
}
histogram
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")]
unsafe fn build_byte_histogram_avx2(bytes: &[u8]) -> [u32; 256] {
use std::arch::x86_64::{
__m256i, _mm256_cmpeq_epi8, _mm256_loadu_si256, _mm256_movemask_epi8, _mm256_set1_epi8,
};
let mut histogram = [0u32; 256];
let len = bytes.len();
if len < 128 {
return build_byte_histogram_scalar(bytes);
}
let ptr = bytes.as_ptr();
let aligned_len = len & !31;
for offset in (0..aligned_len).step_by(32) {
let data = _mm256_loadu_si256(ptr.add(offset).cast::<__m256i>());
for byte_val in 0u8..=255u8 {
let pattern = _mm256_set1_epi8(byte_val as i8);
let cmp = _mm256_cmpeq_epi8(data, pattern);
let mask = _mm256_movemask_epi8(cmp) as u32;
histogram[byte_val as usize] += mask.count_ones();
}
}
for &b in bytes.get_unchecked(aligned_len..) {
histogram[b as usize] += 1;
}
histogram
}
const MIN_ENTROPY_THRESHOLD: f64 = 4.5;
const MIN_ENTROPY_STRING_LENGTH: usize = 16;
pub struct SecretsDetector {
skip_test_files: bool,
include_entropy: bool,
entropy_threshold: f64,
}
impl Default for SecretsDetector {
fn default() -> Self {
Self::new()
}
}
#[inline]
#[allow(clippy::manual_is_ascii_check)] fn is_all_same_byte(bytes: &[u8]) -> bool {
use std::simd::{cmp::SimdPartialEq, u8x32};
if bytes.is_empty() {
return true;
}
let first = bytes[0];
let len = bytes.len();
if len < 32 {
return bytes.iter().all(|&b| b == first);
}
let first_vec = u8x32::splat(first);
let mut offset = 0_usize;
while offset + 32 <= len {
let chunk = u8x32::from_slice(&bytes[offset..offset + 32]);
let mask = chunk.simd_eq(first_vec);
if mask.to_bitmask() != 0xFFFF_FFFF_u64 {
return false;
}
offset += 32;
}
bytes[offset..].iter().all(|&b| b == first)
}
#[inline]
fn count_non_whitespace_simd(s: &str) -> usize {
use std::simd::{cmp::SimdPartialEq, u8x32};
let bytes = s.as_bytes();
let len = bytes.len();
if len < 32 {
return bytes
.iter()
.filter(|&&b| !matches!(b, b' ' | b'\t' | b'\n' | b'\r'))
.count();
}
let space = u8x32::splat(b' '); let tab = u8x32::splat(b'\t'); let newline = u8x32::splat(b'\n'); let cr = u8x32::splat(b'\r');
let mut whitespace_count: usize = 0;
let mut offset = 0_usize;
while offset + 32 <= len {
let chunk = u8x32::from_slice(&bytes[offset..offset + 32]);
let is_space = chunk.simd_eq(space);
let is_tab = chunk.simd_eq(tab);
let is_newline = chunk.simd_eq(newline);
let is_cr = chunk.simd_eq(cr);
let is_whitespace = is_space | is_tab | is_newline | is_cr;
whitespace_count += is_whitespace.to_bitmask().count_ones() as usize;
offset += 32;
}
for &b in &bytes[offset..] {
if matches!(b, b' ' | b'\t' | b'\n' | b'\r') {
whitespace_count += 1;
}
}
len - whitespace_count
}
impl SecretsDetector {
pub fn new() -> Self {
Self {
skip_test_files: false,
include_entropy: true,
entropy_threshold: MIN_ENTROPY_THRESHOLD,
}
}
#[must_use]
pub fn skip_test_files(mut self, skip: bool) -> Self {
self.skip_test_files = skip;
self
}
#[must_use]
pub fn include_entropy(mut self, include: bool) -> Self {
self.include_entropy = include;
self
}
#[must_use]
pub fn entropy_threshold(mut self, threshold: f64) -> Self {
self.entropy_threshold = threshold;
self
}
fn is_test_file(path: &Path) -> bool {
let path_str = path.to_string_lossy().to_lowercase();
if path_str.contains("/test/")
|| path_str.contains("/tests/")
|| path_str.contains("/__tests__/")
|| path_str.contains("/spec/")
|| path_str.contains("/specs/")
|| path_str.contains("/_test/")
|| path_str.contains("/test_")
|| path_str.contains("/testdata/")
|| path_str.contains("/fixtures/")
|| path_str.contains("/mocks/")
{
return true;
}
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
let name_lower = name.to_lowercase();
if name_lower.starts_with("test_")
|| name_lower.ends_with("_test.py")
|| name_lower.ends_with("_test.go")
|| name_lower.ends_with("_test.rs")
|| name_lower.ends_with("_test.ts")
|| name_lower.ends_with("_test.js")
|| name_lower.ends_with(".test.ts")
|| name_lower.ends_with(".test.js")
|| name_lower.ends_with(".test.tsx")
|| name_lower.ends_with(".test.jsx")
|| name_lower.ends_with(".spec.ts")
|| name_lower.ends_with(".spec.js")
|| name_lower.ends_with("_spec.rb")
|| name_lower.ends_with("test.java")
{
return true;
}
}
false
}
fn is_placeholder(value: &str) -> bool {
if value.len() < 4 {
return true;
}
if value.len() >= 8 && is_all_same_byte(value.as_bytes()) {
return true;
}
for pattern in PLACEHOLDER_PATTERNS.iter() {
if pattern.is_match(value) {
return true;
}
}
false
}
fn is_env_or_config_read(line: &str) -> bool {
for pattern in ENV_VAR_PATTERNS.iter() {
if pattern.is_match(line) {
return true;
}
}
for pattern in CONFIG_READ_PATTERNS.iter() {
if pattern.is_match(line) {
return true;
}
}
false
}
fn mask_value(value: &str) -> String {
let len = value.len();
if len <= 8 {
return "*".repeat(len);
}
let visible = std::cmp::min(4, len / 4);
let masked_len = len - (visible * 2);
format!(
"{}{}{}",
&value[..visible],
"*".repeat(masked_len),
&value[len - visible..]
)
}
fn get_remediation(secret_type: SecretType) -> String {
match secret_type {
SecretType::AwsAccessKey | SecretType::AwsSecretKey => {
"1. Immediately rotate the AWS credentials in IAM console\n\
2. Use environment variables or AWS Secrets Manager\n\
3. Consider using IAM roles for EC2/Lambda/ECS instead of keys"
.to_string()
}
SecretType::GitHubToken | SecretType::GitLabToken => {
"1. Revoke the token immediately in repository settings\n\
2. Generate a new token with minimal required permissions\n\
3. Use GITHUB_TOKEN in Actions or environment variables"
.to_string()
}
SecretType::PrivateKey | SecretType::SshPrivateKey | SecretType::PgpPrivateKey => {
"1. Remove the private key from source control\n\
2. Generate new key pair and distribute new public key\n\
3. Store private keys in secure key management systems\n\
4. Use ssh-agent or similar for authentication"
.to_string()
}
SecretType::ConnectionString => {
"1. Remove credentials from connection string\n\
2. Use environment variables for database credentials\n\
3. Consider using managed identity or IAM authentication\n\
4. Use connection pooling with credential injection"
.to_string()
}
SecretType::Jwt => {
"1. Determine if JWT is a hardcoded test token\n\
2. If production: rotate signing keys and invalidate tokens\n\
3. Generate JWTs dynamically, never hardcode"
.to_string()
}
SecretType::Password | SecretType::ApiKey | SecretType::GenericSecret => {
"1. Remove hardcoded credential from source code\n\
2. Store in environment variables or secrets manager\n\
3. Rotate the credential if it was exposed in VCS history\n\
4. Consider using a .env file (not committed to VCS)"
.to_string()
}
SecretType::HighEntropyString => {
"1. Review if this is actually a secret\n\
2. If secret: move to environment variable or secrets manager\n\
3. If not a secret: consider adding to ignore list"
.to_string()
}
_ => {
"1. Remove hardcoded credential from source code\n\
2. Store securely using environment variables or secrets manager\n\
3. Rotate the credential if exposed in VCS history"
.to_string()
}
}
}
pub fn scan_file(&self, file_path: &str) -> Result<Vec<SecretFinding>> {
let path = Path::new(file_path);
let is_test = Self::is_test_file(path);
if self.skip_test_files && is_test {
return Ok(vec![]);
}
let source = std::fs::read_to_string(path).map_err(|e| BrrrError::io_with_path(e, path))?;
let mut findings = Vec::new();
for (line_num, line) in source.lines().enumerate() {
let line_number = line_num + 1;
if Self::is_env_or_config_read(line) {
continue;
}
for pattern in PROVIDER_PATTERNS.iter() {
if let Some(captures) = pattern.regex.captures(line) {
let matched = captures.get(1).or_else(|| captures.get(0));
if let Some(m) = matched {
let value = m.as_str();
if Self::is_placeholder(value) {
continue;
}
if pattern.confidence == Confidence::Low && value.len() < 20 {
continue;
}
let severity = if is_test {
match pattern.severity {
Severity::Critical => Severity::Medium,
Severity::High => Severity::Low,
_ => Severity::Info,
}
} else {
pattern.severity
};
let column = m.start() + 1;
let end_column = m.end() + 1;
findings.push(SecretFinding {
location: Location {
file: file_path.to_string(),
line: line_number,
column,
end_line: line_number,
end_column,
},
secret_type: pattern.secret_type,
severity,
confidence: pattern.confidence,
masked_value: Self::mask_value(value),
variable_name: None,
description: pattern.description.to_string(),
remediation: Self::get_remediation(pattern.secret_type),
is_test_file: is_test,
entropy: Some(calculate_entropy(value)),
});
}
}
}
for pattern in GENERIC_PATTERNS.iter() {
if let Some(captures) = pattern.var_regex.captures(line) {
if let Some(value_match) = captures.get(1) {
let value = value_match.as_str();
if Self::is_placeholder(value) {
continue;
}
if value.len() < pattern.min_value_length {
continue;
}
let entropy = calculate_entropy(value);
if entropy < 3.0 {
continue;
}
let confidence = if entropy > 4.5 {
Confidence::High
} else if entropy > 3.5 {
Confidence::Medium
} else {
Confidence::Low
};
let base_severity = if entropy > 4.5 {
Severity::High
} else if entropy > 3.5 {
Severity::Medium
} else {
Severity::Low
};
let severity = if is_test {
match base_severity {
Severity::High => Severity::Low,
Severity::Medium => Severity::Info,
_ => Severity::Info,
}
} else {
base_severity
};
let column = value_match.start() + 1;
let end_column = value_match.end() + 1;
findings.push(SecretFinding {
location: Location {
file: file_path.to_string(),
line: line_number,
column,
end_line: line_number,
end_column,
},
secret_type: pattern.secret_type,
severity,
confidence,
masked_value: Self::mask_value(value),
variable_name: None,
description: pattern.description.to_string(),
remediation: Self::get_remediation(pattern.secret_type),
is_test_file: is_test,
entropy: Some(entropy),
});
}
}
}
if self.include_entropy {
self.detect_high_entropy_strings(
line,
line_number,
file_path,
is_test,
&mut findings,
);
}
}
Ok(findings)
}
fn detect_high_entropy_strings(
&self,
line: &str,
line_number: usize,
file_path: &str,
is_test: bool,
findings: &mut Vec<SecretFinding>,
) {
static STRING_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"["']([^"']{16,})["']"#).expect("Invalid regex")
});
let trimmed = line.trim();
if trimmed.starts_with('#')
|| trimmed.starts_with("//")
|| trimmed.starts_with('*')
|| trimmed.starts_with("/*")
{
return;
}
for captures in STRING_PATTERN.captures_iter(line) {
if let Some(value_match) = captures.get(1) {
let value = value_match.as_str();
if value.len() < MIN_ENTROPY_STRING_LENGTH {
continue;
}
if Self::is_placeholder(value) {
continue;
}
if value.contains("http://")
|| value.contains("https://")
|| value.contains("file://")
|| value.starts_with('/')
|| value.contains("\\\\")
|| value.contains(".com")
|| value.contains(".org")
|| value.contains(".io")
{
continue;
}
let non_space: usize = count_non_whitespace_simd(value);
if (non_space as f64) / (value.len() as f64) < 0.8 {
continue;
}
let entropy = calculate_entropy(value);
if entropy >= self.entropy_threshold {
let already_matched = findings.iter().any(|f| {
f.location.line == line_number
&& f.location.column <= value_match.start() + 1
&& f.location.end_column >= value_match.end() + 1
});
if already_matched {
continue;
}
let severity = if is_test {
Severity::Info
} else if entropy > 5.0 {
Severity::Medium
} else {
Severity::Low
};
let column = value_match.start() + 1;
let end_column = value_match.end() + 1;
findings.push(SecretFinding {
location: Location {
file: file_path.to_string(),
line: line_number,
column,
end_line: line_number,
end_column,
},
secret_type: SecretType::HighEntropyString,
severity,
confidence: Confidence::Low,
masked_value: Self::mask_value(value),
variable_name: None,
description: format!(
"High-entropy string detected (entropy: {:.2} bits/char)",
entropy
),
remediation: Self::get_remediation(SecretType::HighEntropyString),
is_test_file: is_test,
entropy: Some(entropy),
});
}
}
}
}
pub fn scan_directory(&self, dir_path: &str, language: Option<&str>) -> Result<ScanResult> {
let path = Path::new(dir_path);
if !path.is_dir() {
return Err(BrrrError::InvalidArgument(format!(
"Not a directory: {}",
dir_path
)));
}
let mut findings = Vec::new();
let mut files_scanned = 0;
let mut builder = ignore::WalkBuilder::new(path);
builder.add_custom_ignore_filename(".brrrignore");
builder.hidden(true);
let extensions: HashSet<&str> = match language {
Some("python") => ["py"].iter().copied().collect(),
Some("typescript") | Some("javascript") => ["ts", "tsx", "js", "jsx", "mjs", "cjs"]
.iter()
.copied()
.collect(),
Some("go") => ["go"].iter().copied().collect(),
Some("rust") => ["rs"].iter().copied().collect(),
Some("java") => ["java"].iter().copied().collect(),
Some("c") => ["c", "h"].iter().copied().collect(),
Some("cpp") => ["cpp", "cc", "cxx", "hpp", "h"].iter().copied().collect(),
Some("ruby") => ["rb", "erb"].iter().copied().collect(),
Some("php") => ["php"].iter().copied().collect(),
Some("csharp") => ["cs"].iter().copied().collect(),
_ => [
"py", "ts", "tsx", "js", "jsx", "mjs", "cjs", "go", "rs", "java", "c", "h", "cpp",
"cc", "cxx", "hpp", "rb", "erb", "php", "cs", "yaml", "yml", "json", "toml", "xml",
"properties", "ini", "cfg", "conf", "env", "sh", "bash", "zsh", "ps1", "bat", "cmd",
]
.iter()
.copied()
.collect(),
};
for entry in builder.build().flatten() {
let entry_path = entry.path();
if !entry_path.is_file() {
continue;
}
let ext = entry_path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("");
let file_name = entry_path
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
let should_scan = extensions.contains(ext)
|| file_name.starts_with(".env")
|| file_name.ends_with(".env")
|| file_name == "Dockerfile"
|| file_name == "docker-compose.yml"
|| file_name == "docker-compose.yaml";
if !should_scan {
continue;
}
files_scanned += 1;
if let Ok(file_findings) = self.scan_file(entry_path.to_str().unwrap_or("")) {
findings.extend(file_findings);
}
}
let mut type_counts: HashMap<String, usize> = HashMap::new();
let mut severity_counts: HashMap<String, usize> = HashMap::new();
for finding in &findings {
*type_counts
.entry(finding.secret_type.to_string())
.or_insert(0) += 1;
*severity_counts
.entry(finding.severity.to_string())
.or_insert(0) += 1;
}
Ok(ScanResult {
findings,
files_scanned,
type_counts,
severity_counts,
})
}
}
pub fn scan_secrets(path: &str, language: Option<&str>) -> Result<ScanResult> {
let detector = SecretsDetector::new();
let path_obj = Path::new(path);
if path_obj.is_file() {
let findings = detector.scan_file(path)?;
let mut type_counts: HashMap<String, usize> = HashMap::new();
let mut severity_counts: HashMap<String, usize> = HashMap::new();
for finding in &findings {
*type_counts
.entry(finding.secret_type.to_string())
.or_insert(0) += 1;
*severity_counts
.entry(finding.severity.to_string())
.or_insert(0) += 1;
}
Ok(ScanResult {
findings,
files_scanned: 1,
type_counts,
severity_counts,
})
} else {
detector.scan_directory(path, language)
}
}
pub fn scan_file_secrets(path: &Path, _language: Option<&str>) -> Result<Vec<SecretFinding>> {
let detector = SecretsDetector::new();
detector.scan_file(path.to_str().unwrap_or(""))
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
fn create_temp_file(content: &str, extension: &str) -> tempfile::NamedTempFile {
let mut file = tempfile::Builder::new()
.suffix(extension)
.tempfile()
.expect("Failed to create temp file");
file.write_all(content.as_bytes())
.expect("Failed to write temp file");
file
}
#[test]
fn test_detect_aws_access_key() {
let source = r#"
AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE"
aws_secret_key = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect AWS credentials");
let aws_key = findings.iter().find(|f| f.secret_type == SecretType::AwsAccessKey);
assert!(aws_key.is_some(), "Should detect AWS Access Key");
}
#[test]
fn test_detect_github_token_classic() {
let source = r#"
GITHUB_TOKEN = "ghp_1234567890abcdefghijklmnopqrstuvwxyz"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect GitHub token");
assert_eq!(findings[0].secret_type, SecretType::GitHubToken);
assert_eq!(findings[0].confidence, Confidence::High);
}
#[test]
fn test_detect_github_fine_grained_token() {
let source = r#"
token = "github_pat_11ABCD1234567890abcdef_1234567890abcdefghijklmnopqrstuvwxyz1234567890abcdefghijklm"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let gh_token = findings.iter().find(|f| f.secret_type == SecretType::GitHubToken);
assert!(gh_token.is_some(), "Should detect fine-grained GitHub token");
}
#[test]
fn test_detect_slack_token() {
let source = r#"
SLACK_TOKEN = "xoxb-FAKE-TEST-TOKEN-FOR-UNIT-TESTS-ONLY-abcd"
"#;
let file = create_temp_file(source, ".ts");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect Slack token");
assert_eq!(findings[0].secret_type, SecretType::SlackToken);
}
#[test]
fn test_detect_stripe_live_key() {
let source = r#"
stripe.api_key = "sk_test_FAKE_TEST_KEY_FOR_UNIT_TESTS"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let stripe_key = findings.iter().find(|f| f.secret_type == SecretType::StripeKey);
assert!(stripe_key.is_some(), "Should detect Stripe live key");
assert_eq!(stripe_key.unwrap().severity, Severity::Critical);
}
#[test]
fn test_detect_stripe_test_key_lower_severity() {
let source = r#"
stripe.api_key = "sk_test_1234567890abcdefghijklmnop"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let stripe_key = findings.iter().find(|f| f.secret_type == SecretType::StripeKey);
assert!(stripe_key.is_some(), "Should detect Stripe test key");
assert_eq!(stripe_key.unwrap().severity, Severity::Medium);
}
#[test]
fn test_detect_google_api_key() {
let source = r#"
const apiKey = "AIzaSyDaGmWKa4JsXZ-HjGw7ISLn_3namBGewQe"
"#;
let file = create_temp_file(source, ".ts");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let google_key = findings.iter().find(|f| f.secret_type == SecretType::GoogleApiKey);
assert!(google_key.is_some(), "Should detect Google API key");
}
#[test]
fn test_detect_rsa_private_key() {
let source = r#"
-----BEGIN RSA PRIVATE KEY-----
MIIEpQIBAAKCAQEA2Z3qX2BTLS4e...
-----END RSA PRIVATE KEY-----
"#;
let file = create_temp_file(source, ".pem");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let priv_key = findings.iter().find(|f| f.secret_type == SecretType::PrivateKey);
assert!(priv_key.is_some(), "Should detect RSA private key");
assert_eq!(priv_key.unwrap().severity, Severity::Critical);
}
#[test]
fn test_detect_openssh_private_key() {
let source = r#"
-----BEGIN OPENSSH PRIVATE KEY-----
b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAA...
-----END OPENSSH PRIVATE KEY-----
"#;
let file = create_temp_file(source, ".key");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let ssh_key = findings.iter().find(|f| f.secret_type == SecretType::SshPrivateKey);
assert!(ssh_key.is_some(), "Should detect OpenSSH private key");
}
#[test]
fn test_detect_jwt() {
let source = r#"
const token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"
"#;
let file = create_temp_file(source, ".js");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let jwt = findings.iter().find(|f| f.secret_type == SecretType::Jwt);
assert!(jwt.is_some(), "Should detect JWT");
}
#[test]
fn test_detect_postgres_connection_string() {
let source = r#"
DATABASE_URL = "postgres://user:password123@localhost:5432/mydb"
"#;
let file = create_temp_file(source, ".env");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let conn_str = findings
.iter()
.find(|f| f.secret_type == SecretType::ConnectionString);
assert!(conn_str.is_some(), "Should detect Postgres connection string");
assert_eq!(conn_str.unwrap().severity, Severity::Critical);
}
#[test]
fn test_detect_mongodb_connection_string() {
let source = r#"
MONGO_URI = "mongodb+srv://admin:secretpass@cluster0.abc123.mongodb.net/mydb"
"#;
let file = create_temp_file(source, ".env");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let conn_str = findings
.iter()
.find(|f| f.secret_type == SecretType::ConnectionString);
assert!(conn_str.is_some(), "Should detect MongoDB connection string");
}
#[test]
fn test_detect_hardcoded_password() {
let source = r#"
password = "MyS3cr3tP@ssw0rd!"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let password = findings.iter().find(|f| f.secret_type == SecretType::Password);
assert!(password.is_some(), "Should detect hardcoded password");
}
#[test]
fn test_detect_hardcoded_api_key() {
let source = r#"
api_key = "abc123xyz789secretkey456"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let api_key = findings.iter().find(|f| f.secret_type == SecretType::ApiKey);
assert!(api_key.is_some(), "Should detect hardcoded API key");
}
#[test]
fn test_ignore_environment_variable_read_python() {
let source = r#"
API_KEY = os.environ["API_KEY"]
SECRET = os.getenv("SECRET_KEY")
TOKEN = os.environ.get("AUTH_TOKEN")
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(findings.is_empty(), "Should NOT flag environment variable reads");
}
#[test]
fn test_ignore_environment_variable_read_typescript() {
let source = r#"
const apiKey = process.env.API_KEY;
const secret = process.env["SECRET_KEY"];
"#;
let file = create_temp_file(source, ".ts");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(findings.is_empty(), "Should NOT flag process.env reads");
}
#[test]
fn test_ignore_placeholder_values() {
let source = r#"
API_KEY = "YOUR_API_KEY_HERE"
password = "changeme"
secret = "xxxxxxxxxxxxxxxx"
token = "<your-token>"
key = "placeholder"
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(findings.is_empty(), "Should NOT flag placeholder values");
}
#[test]
fn test_ignore_config_reads() {
let source = r#"
password = config.get("password")
api_key = settings["api_key"]
"#;
let file = create_temp_file(source, ".py");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(findings.is_empty(), "Should NOT flag config reads");
}
#[test]
fn test_reduced_severity_in_test_files() {
let source = r#"
GITHUB_TOKEN = "ghp_1234567890abcdefghijklmnopqrstuvwxyz"
"#;
let mut file = tempfile::Builder::new()
.suffix("_test.py")
.tempfile()
.expect("Failed to create temp file");
file.write_all(source.as_bytes())
.expect("Failed to write temp file");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
assert!(!findings.is_empty(), "Should detect token in test file");
assert!(findings[0].is_test_file, "Should mark as test file");
assert!(
findings[0].severity < Severity::High,
"Severity should be reduced for test files"
);
}
#[test]
fn test_entropy_calculation() {
let low_entropy = calculate_entropy("aaaaaaaaaaaaaaaaaaaa");
assert!(low_entropy < 1.0, "Repeating chars should have low entropy");
let medium_entropy = calculate_entropy("hello world test");
assert!(
medium_entropy > 2.0 && medium_entropy < 4.0,
"Simple text should have medium entropy"
);
let high_entropy = calculate_entropy("aB3$kL9#mN2@pQ5&rT8");
assert!(high_entropy > 4.0, "Random-looking string should have high entropy");
}
#[test]
fn test_detect_high_entropy_string() {
let source = r#"
const key = "aB3kL9mN2pQ5rT8xY1cD4eF7gH0iJ"
"#;
let file = create_temp_file(source, ".js");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let high_entropy = findings
.iter()
.find(|f| f.secret_type == SecretType::HighEntropyString);
assert!(
high_entropy.is_some(),
"Should detect high-entropy string as potential secret"
);
}
#[test]
fn test_mask_value() {
assert_eq!(SecretsDetector::mask_value("short"), "*****");
assert_eq!(SecretsDetector::mask_value("12345678"), "********");
assert_eq!(
SecretsDetector::mask_value("1234567890123456"),
"1234********3456"
);
assert_eq!(
SecretsDetector::mask_value("ghp_1234567890abcdefghijklmnopqrstuvwxyz"),
"ghp_********************************wxyz"
);
}
#[test]
fn test_detect_openai_key() {
let source = r#"
OPENAI_API_KEY = "sk-proj-abcdefghijklmnopqrstuvwxyz1234567890ABCD"
"#;
let file = create_temp_file(source, ".env");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let openai_key = findings.iter().find(|f| f.secret_type == SecretType::OpenAiKey);
assert!(openai_key.is_some(), "Should detect OpenAI API key");
}
#[test]
fn test_detect_sendgrid_key() {
let source = r#"
SENDGRID_API_KEY = "SG.FAKE_TEST_KEY_ONLY.THIS_IS_NOT_A_REAL_SENDGRID_API_KEY_FOR_TESTS"
"#;
let file = create_temp_file(source, ".env");
let detector = SecretsDetector::new();
let findings = detector
.scan_file(file.path().to_str().unwrap())
.expect("Scan should succeed");
let sg_key = findings.iter().find(|f| f.secret_type == SecretType::SendGridKey);
assert!(sg_key.is_some(), "Should detect SendGrid API key");
}
#[test]
fn test_severity_display() {
assert_eq!(Severity::Critical.to_string(), "CRITICAL");
assert_eq!(Severity::High.to_string(), "HIGH");
assert_eq!(Severity::Medium.to_string(), "MEDIUM");
assert_eq!(Severity::Low.to_string(), "LOW");
assert_eq!(Severity::Info.to_string(), "INFO");
}
#[test]
fn test_confidence_display() {
assert_eq!(Confidence::High.to_string(), "HIGH");
assert_eq!(Confidence::Medium.to_string(), "MEDIUM");
assert_eq!(Confidence::Low.to_string(), "LOW");
}
#[test]
fn test_secret_type_display() {
assert_eq!(SecretType::AwsAccessKey.to_string(), "AWS Access Key");
assert_eq!(SecretType::GitHubToken.to_string(), "GitHub Token");
assert_eq!(SecretType::PrivateKey.to_string(), "Private Key");
assert_eq!(SecretType::ConnectionString.to_string(), "Connection String");
}
#[test]
fn test_scan_result_counts() {
let result = ScanResult {
findings: vec![],
files_scanned: 10,
type_counts: [("AWS Access Key".to_string(), 2)]
.into_iter()
.collect(),
severity_counts: [("CRITICAL".to_string(), 2), ("HIGH".to_string(), 3)]
.into_iter()
.collect(),
};
assert_eq!(result.files_scanned, 10);
assert_eq!(result.type_counts.get("AWS Access Key"), Some(&2));
assert_eq!(result.severity_counts.get("CRITICAL"), Some(&2));
}
#[test]
fn test_count_non_whitespace_simd() {
assert_eq!(count_non_whitespace_simd(""), 0);
assert_eq!(count_non_whitespace_simd("hello"), 5);
assert_eq!(count_non_whitespace_simd("abc123!@#"), 9);
assert_eq!(count_non_whitespace_simd(" "), 0);
assert_eq!(count_non_whitespace_simd("\t\t\t"), 0);
assert_eq!(count_non_whitespace_simd("\n\n\n"), 0);
assert_eq!(count_non_whitespace_simd("\r\n\r\n"), 0);
assert_eq!(count_non_whitespace_simd(" \t\n\r"), 0);
assert_eq!(count_non_whitespace_simd("hello world"), 10);
assert_eq!(count_non_whitespace_simd(" hello "), 5);
assert_eq!(count_non_whitespace_simd("\thello\n"), 5);
assert_eq!(count_non_whitespace_simd("a b c d e"), 5);
let s32 = "abcdefghijklmnopqrstuvwxyz123456";
assert_eq!(s32.len(), 32);
assert_eq!(count_non_whitespace_simd(s32), 32);
let s33_ws = "abcd efgh ijkl mnop qrst uvwx yz1";
assert_eq!(s33_ws.len(), 33);
assert_eq!(count_non_whitespace_simd(s33_ws), 27);
let long = "a".repeat(100);
assert_eq!(count_non_whitespace_simd(&long), 100);
let long_ws = "a ".repeat(50); assert_eq!(count_non_whitespace_simd(&long_ws), 50);
let s31 = "abcdefghijklmnopqrstuvwxyz12345";
assert_eq!(s31.len(), 31);
assert_eq!(count_non_whitespace_simd(s31), 31);
let s33 = "abcdefghijklmnopqrstuvwxyz1234567";
assert_eq!(s33.len(), 33);
assert_eq!(count_non_whitespace_simd(s33), 33);
let secret = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY";
assert_eq!(count_non_whitespace_simd(secret), secret.len());
}
}