use std::path::Path;
use crate::error::Result;
pub struct Finding {
pub file: String,
pub line: usize,
pub pattern_name: String,
pub preview: String,
}
struct Pattern {
name: &'static str,
detect: fn(&str) -> bool,
}
fn mask(value: &str) -> String {
let chars: Vec<char> = value.chars().collect();
if chars.len() <= 8 {
return "*".repeat(chars.len());
}
let visible = 4;
format!("{}{}",
&chars[..visible].iter().collect::<String>(),
"*".repeat(chars.len() - visible)
)
}
const PATTERNS: &[Pattern] = &[
Pattern {
name: "Private key (PEM)",
detect: |l| l.contains("-----BEGIN") && (
l.contains("PRIVATE KEY") ||
l.contains("RSA PRIVATE") ||
l.contains("EC PRIVATE")
),
},
Pattern {
name: "JWT token",
detect: |l| {
l.split_whitespace().any(|w| {
let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '.' && c != '_' && c != '-');
let parts: Vec<&str> = w.split('.').collect();
parts.len() == 3
&& parts[0].starts_with("eyJ")
&& parts[0].len() > 10
&& parts[1].len() > 10
})
},
},
Pattern {
name: "AWS access key",
detect: |l| {
l.split_whitespace().any(|w| {
let w = w.trim_matches(|c: char| !c.is_alphanumeric());
(w.starts_with("AKIA") || w.starts_with("ASIA") || w.starts_with("AROA"))
&& w.len() == 20
&& w.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
})
},
},
Pattern {
name: "AWS secret key",
detect: |l| {
let lower = l.to_lowercase();
(lower.contains("aws_secret") || lower.contains("aws secret"))
&& (l.contains('=') || l.contains(':'))
&& l.len() > 40
},
},
Pattern {
name: "GitHub/GitLab token",
detect: |l| {
l.split_whitespace().any(|w| {
let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-');
w.starts_with("ghp_") || w.starts_with("gho_") ||
w.starts_with("ghs_") || w.starts_with("github_pat_") ||
w.starts_with("glpat-") || w.starts_with("glptt-")
})
},
},
Pattern {
name: "Generic API key / token",
detect: |l| {
let lower = l.to_lowercase();
let has_key_word =
lower.contains("api_key") || lower.contains("api_secret") ||
lower.contains("auth_token") || lower.contains("access_token") ||
lower.contains("secret_key") || lower.contains("private_key") ||
lower.contains("password") || lower.contains("passwd") ||
lower.contains("auth_token");
let has_assignment = l.contains('=') || l.contains(':');
let has_value = l.split(&['=', ':'][..])
.nth(1)
.map(|v| {
let v = v.trim().trim_matches(|c: char| c == '"' || c == '\'' || c == '`');
let vl = v.to_lowercase();
let looks_like_secret = v.len() >= 16
&& !v.contains(' ')
&& !v.contains('.') && !v.starts_with("${")
&& !v.starts_with("$(")
&& !v.starts_with("process.env")
&& !v.starts_with("env.")
&& !v.starts_with("os.environ")
&& !v.starts_with("<")
&& !vl.eq("your_secret_here")
&& !vl.eq("changeme")
&& !vl.eq("placeholder")
&& !vl.eq("todo")
&& !vl.starts_with("your_")
&& !vl.starts_with("my_")
&& !vl.contains("example")
&& !vl.contains("sample")
&& !vl.contains("replace")
&& !vl.contains("change_me")
&& !vl.contains("insert")
&& !vl.starts_with("tu_")
&& !vl.starts_with("mi_")
&& !vl.contains("cambiar")
&& !vl.contains("reemplazar")
&& !vl.contains("ejemplo")
&& !vl.contains("aqui")
&& !vl.contains("pon_")
&& !vl.contains("escribe");
looks_like_secret
})
.unwrap_or(false);
has_key_word && has_assignment && has_value
},
},
Pattern {
name: "Database connection string with credentials",
detect: |l| {
let lower = l.to_lowercase();
(lower.contains("postgresql://") || lower.contains("mysql://") ||
lower.contains("mongodb://") || lower.contains("redis://") ||
lower.contains("libsql://") || lower.contains("turso://"))
&& l.contains('@')
&& !l.contains("user:password@")
&& !l.contains("user:pass@")
&& !l.contains("<password>")
},
},
Pattern {
name: "Stripe key",
detect: |l| {
l.split_whitespace().any(|w| {
let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '_');
w.starts_with("sk_live_") || w.starts_with("pk_live_") ||
w.starts_with("rk_live_")
})
},
},
Pattern {
name: "Twilio / SendGrid / Brevo key",
detect: |l| {
l.split_whitespace().any(|w| {
let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '-');
(w.starts_with("SG.") && w.len() > 40) ||
(w.starts_with("AC") && w.len() == 34 && w.chars().all(|c| c.is_ascii_alphanumeric()))
})
},
},
];
fn is_example_file(path: &str) -> bool {
let lower = path.to_lowercase();
lower.ends_with(".example")
|| lower.ends_with(".sample")
|| lower.ends_with(".template")
|| lower.ends_with(".example.env")
|| lower.ends_with(".env.example")
|| lower.ends_with(".env.sample")
|| lower.ends_with(".env.template")
|| lower.contains(".example.")
|| lower.contains(".sample.")
}
fn is_sensitive_file(path: &str) -> bool {
let lower = path.to_lowercase();
let filename = lower.split('/').last().unwrap_or(&lower);
matches!(filename,
".env" | ".envrc" | "secrets.json" | "secrets.yaml" | "secrets.yml" |
"credentials.json" | "credentials.yml" | "credentials.yaml" |
".netrc" | ".npmrc" | ".pypirc"
)
|| (filename.starts_with(".env.") && !is_example_file(path))
|| lower.ends_with("_rsa")
|| lower.ends_with("_ed25519")
|| lower.ends_with("_ecdsa")
|| lower.ends_with(".pem")
|| lower.ends_with(".p12")
|| lower.ends_with(".pfx")
|| lower.ends_with(".key")
|| lower.ends_with(".keystore")
|| filename == "id_rsa"
|| filename == "id_ed25519"
|| filename == "id_ecdsa"
}
fn should_skip_file(path: &str) -> bool {
let lower = path.to_lowercase();
lower.ends_with(".lock")
|| lower.ends_with(".png")
|| lower.ends_with(".jpg")
|| lower.ends_with(".jpeg")
|| lower.ends_with(".gif")
|| lower.ends_with(".svg")
|| lower.ends_with(".ico")
|| lower.ends_with(".wasm")
|| lower.ends_with(".pdf")
|| lower.ends_with(".zip")
|| lower.contains("bun.lock")
|| lower.contains("package-lock")
|| lower.contains("yarn.lock")
}
pub fn scan_staged(repo_path: &Path) -> Result<Vec<Finding>> {
use git2::Repository;
let mut findings = Vec::new();
let repo = Repository::discover(repo_path)
.map_err(|e| crate::error::ToriiError::Git(e))?;
let index = repo.index()
.map_err(|e| crate::error::ToriiError::Git(e))?;
let head_tree = repo.head().ok()
.and_then(|h| h.peel_to_tree().ok());
let diff = match &head_tree {
Some(tree) => repo.diff_tree_to_index(Some(tree), Some(&index), None),
None => repo.diff_tree_to_index(None, Some(&index), None),
}.map_err(|e| crate::error::ToriiError::Git(e))?;
let mut staged_files: Vec<String> = Vec::new();
diff.foreach(
&mut |delta, _| {
if let Some(path) = delta.new_file().path() {
staged_files.push(path.to_string_lossy().to_string());
}
true
},
None, None, None,
).map_err(|e| crate::error::ToriiError::Git(e))?;
for file_path in &staged_files {
let file_path_str = file_path.as_str();
if is_example_file(file_path_str) || should_skip_file(file_path_str) {
continue;
}
if is_sensitive_file(file_path_str) {
findings.push(Finding {
file: file_path.clone(),
line: 0,
pattern_name: "Sensitive file — should not be committed".to_string(),
preview: format!("⚠ {} should not be tracked by version control", file_path),
});
continue;
}
let entry = index.get_path(std::path::Path::new(file_path_str), 0);
let content = match entry {
Some(e) => {
match repo.find_blob(e.id) {
Ok(blob) => String::from_utf8_lossy(blob.content()).to_string(),
Err(_) => continue,
}
}
None => continue,
};
for (line_num, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.starts_with("/*") {
continue;
}
for pattern in PATTERNS {
if (pattern.detect)(line) {
let preview = mask(line.trim());
findings.push(Finding {
file: file_path.clone(),
line: line_num + 1,
pattern_name: pattern.name.to_string(),
preview,
});
break;
}
}
}
}
Ok(findings)
}
pub fn scan_history(repo_path: &Path) -> Result<Vec<(String, Vec<Finding>)>> {
use git2::Repository;
let mut results = Vec::new();
let repo = Repository::discover(repo_path)
.map_err(|e| crate::error::ToriiError::Git(e))?;
let mut revwalk = repo.revwalk()
.map_err(|e| crate::error::ToriiError::Git(e))?;
revwalk.push_glob("*").map_err(|e| crate::error::ToriiError::Git(e))?;
let commits: Vec<(git2::Oid, String)> = revwalk
.filter_map(|id| id.ok())
.filter_map(|id| {
repo.find_commit(id).ok().map(|c| {
let subject = c.summary().unwrap_or("").to_string();
(id, subject)
})
})
.collect();
println!("🔍 Scanning {} commits...", commits.len());
for (oid, subject) in &commits {
let commit = match repo.find_commit(*oid) {
Ok(c) => c,
Err(_) => continue,
};
let commit_tree = match commit.tree() {
Ok(t) => t,
Err(_) => continue,
};
let parent_tree = commit.parent(0).ok().and_then(|p| p.tree().ok());
let diff = match repo.diff_tree_to_tree(
parent_tree.as_ref(),
Some(&commit_tree),
None,
) {
Ok(d) => d,
Err(_) => continue,
};
let mut commit_findings = Vec::new();
let mut changed_files: Vec<String> = Vec::new();
let _ = diff.foreach(
&mut |delta, _| {
if let Some(path) = delta.new_file().path() {
changed_files.push(path.to_string_lossy().to_string());
}
true
},
None, None, None,
);
for file_path in &changed_files {
if is_example_file(file_path) || should_skip_file(file_path) {
continue;
}
let entry = commit_tree.get_path(std::path::Path::new(file_path));
let content = match entry {
Ok(e) => match repo.find_blob(e.id()) {
Ok(blob) => String::from_utf8_lossy(blob.content()).to_string(),
Err(_) => continue,
},
Err(_) => continue,
};
for (line_num, line) in content.lines().enumerate() {
let trimmed = line.trim();
if trimmed.starts_with('#') || trimmed.starts_with("//") {
continue;
}
for pattern in PATTERNS {
if (pattern.detect)(line) {
commit_findings.push(Finding {
file: file_path.clone(),
line: line_num + 1,
pattern_name: pattern.name.to_string(),
preview: mask(line.trim()),
});
break;
}
}
}
}
if !commit_findings.is_empty() {
results.push((
format!("{} — {}", &oid.to_string()[..8], subject),
commit_findings,
));
}
}
Ok(results)
}