1use std::path::Path;
3use crate::error::Result;
4
5pub struct Finding {
7 pub file: String,
8 pub line: usize,
9 pub pattern_name: String,
10 pub preview: String,
11}
12
13struct Pattern {
15 name: &'static str,
16 detect: fn(&str) -> bool,
18}
19
20fn mask(value: &str) -> String {
21 let chars: Vec<char> = value.chars().collect();
22 if chars.len() <= 8 {
23 return "*".repeat(chars.len());
24 }
25 let visible = 4;
26 format!("{}{}",
27 &chars[..visible].iter().collect::<String>(),
28 "*".repeat(chars.len() - visible)
29 )
30}
31
32const DEFAULT_MAX_BLOB_BYTES: usize = 5 * 1024 * 1024;
37
38fn max_blob_bytes() -> usize {
39 std::env::var("TORII_SCAN_MAX_BYTES")
40 .ok()
41 .and_then(|s| s.parse().ok())
42 .unwrap_or(DEFAULT_MAX_BLOB_BYTES)
43}
44
45const PATTERNS: &[Pattern] = &[
46 Pattern {
47 name: "Private key (PEM)",
48 detect: |l| l.contains("-----BEGIN") && (
49 l.contains("PRIVATE KEY") ||
50 l.contains("RSA PRIVATE") ||
51 l.contains("EC PRIVATE")
52 ),
53 },
54 Pattern {
55 name: "JWT token",
56 detect: |l| {
57 l.split_whitespace().any(|w| {
59 let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '.' && c != '_' && c != '-');
60 let parts: Vec<&str> = w.split('.').collect();
61 parts.len() == 3
62 && parts[0].starts_with("eyJ")
63 && parts[0].len() > 10
64 && parts[1].len() > 10
65 })
66 },
67 },
68 Pattern {
69 name: "AWS access key",
70 detect: |l| {
71 l.split_whitespace().any(|w| {
72 let w = w.trim_matches(|c: char| !c.is_alphanumeric());
73 (w.starts_with("AKIA") || w.starts_with("ASIA") || w.starts_with("AROA"))
74 && w.len() == 20
75 && w.chars().all(|c| c.is_ascii_uppercase() || c.is_ascii_digit())
76 })
77 },
78 },
79 Pattern {
80 name: "AWS secret key",
81 detect: |l| {
82 let lower = l.to_lowercase();
83 (lower.contains("aws_secret") || lower.contains("aws secret"))
84 && (l.contains('=') || l.contains(':'))
85 && l.len() > 40
86 },
87 },
88 Pattern {
89 name: "GitHub/GitLab token",
90 detect: |l| {
91 let trimmed = l.trim();
92 if trimmed.starts_with('<') || trimmed.starts_with("//") || trimmed.starts_with("*") {
94 return false;
95 }
96 l.split_whitespace().any(|w| {
97 let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-');
98 if w.ends_with("xxx") || w.ends_with("_xxx") || w.contains("xxxx") {
100 return false;
101 }
102 let is_prefix_only = (w.starts_with("ghp_") && w.len() <= 5) ||
104 (w.starts_with("gho_") && w.len() <= 5) ||
105 (w.starts_with("ghs_") && w.len() <= 5) ||
106 (w.starts_with("glpat-") && w.len() <= 7) ||
107 (w.starts_with("glptt-") && w.len() <= 7) ||
108 (w.starts_with("github_pat_") && w.len() <= 12);
109 if is_prefix_only { return false; }
110 (w.starts_with("ghp_") || w.starts_with("gho_") ||
111 w.starts_with("ghs_") || w.starts_with("github_pat_") ||
112 w.starts_with("glpat-") || w.starts_with("glptt-")) && w.len() > 20
113 })
114 },
115 },
116 Pattern {
117 name: "Generic API key / token",
118 detect: |l| {
119 let lower = l.to_lowercase();
120 let has_key_word =
121 lower.contains("api_key") || lower.contains("api_secret") ||
122 lower.contains("auth_token") || lower.contains("access_token") ||
123 lower.contains("secret_key") || lower.contains("private_key") ||
124 lower.contains("password") || lower.contains("passwd") ||
125 lower.contains("auth_token");
126 let has_assignment = l.contains('=') || l.contains(':');
127 let has_value = l.split(&['=', ':'][..])
128 .nth(1)
129 .map(|v| {
130 let v = v.trim().trim_matches(|c: char| c == '"' || c == '\'' || c == '`');
131 let vl = v.to_lowercase();
132 let looks_like_secret = v.len() >= 16
134 && !v.contains(' ')
135 && !v.contains('.') && !v.starts_with("${")
137 && !v.starts_with("$(")
138 && !v.starts_with("process.env")
139 && !v.starts_with("env.")
140 && !v.starts_with("os.environ")
141 && !v.starts_with("<")
142 && !vl.eq("your_secret_here")
144 && !vl.eq("changeme")
145 && !vl.eq("placeholder")
146 && !vl.eq("todo")
147 && !vl.starts_with("your_")
148 && !vl.starts_with("my_")
149 && !vl.contains("example")
150 && !vl.contains("sample")
151 && !vl.contains("replace")
152 && !vl.contains("change_me")
153 && !vl.contains("insert")
154 && !vl.starts_with("tu_")
156 && !vl.starts_with("mi_")
157 && !vl.contains("cambiar")
158 && !vl.contains("reemplazar")
159 && !vl.contains("ejemplo")
160 && !vl.contains("aqui")
161 && !vl.contains("pon_")
162 && !vl.contains("escribe");
163 looks_like_secret
164 })
165 .unwrap_or(false);
166 has_key_word && has_assignment && has_value
167 },
168 },
169 Pattern {
170 name: "Database connection string with credentials",
171 detect: |l| {
172 let lower = l.to_lowercase();
173 (lower.contains("postgresql://") || lower.contains("mysql://") ||
174 lower.contains("mongodb://") || lower.contains("redis://") ||
175 lower.contains("libsql://") || lower.contains("turso://"))
176 && l.contains('@')
177 && !l.contains("user:password@")
178 && !l.contains("user:pass@")
179 && !l.contains("<password>")
180 },
181 },
182 Pattern {
183 name: "Stripe key",
184 detect: |l| {
185 l.split_whitespace().any(|w| {
186 let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '_');
187 (w.starts_with("sk_live_") || w.starts_with("pk_live_") ||
188 w.starts_with("rk_live_")) && w.len() > 16
189 })
190 },
191 },
192 Pattern {
193 name: "Twilio / SendGrid / Brevo key",
194 detect: |l| {
195 l.split_whitespace().any(|w| {
196 let w = w.trim_matches(|c: char| !c.is_alphanumeric() && c != '-');
197 (w.starts_with("SG.") && w.len() > 40) ||
199 (w.starts_with("AC") && w.len() == 34 && w.chars().all(|c| c.is_ascii_alphanumeric()))
201 })
202 },
203 },
204];
205
206fn is_example_file(path: &str) -> bool {
208 let lower = path.to_lowercase();
209 lower.ends_with(".example")
210 || lower.ends_with(".sample")
211 || lower.ends_with(".template")
212 || lower.ends_with(".example.env")
213 || lower.ends_with(".env.example")
214 || lower.ends_with(".env.sample")
215 || lower.ends_with(".env.template")
216 || lower.contains(".example.")
217 || lower.contains(".sample.")
218}
219
220fn is_sensitive_file(path: &str) -> bool {
222 let lower = path.to_lowercase();
223 let filename = lower.split('/').last().unwrap_or(&lower);
224
225 matches!(filename,
227 ".env" | ".envrc" | "secrets.json" | "secrets.yaml" | "secrets.yml" |
228 "credentials.json" | "credentials.yml" | "credentials.yaml" |
229 ".netrc" | ".npmrc" | ".pypirc"
230 )
231 || (filename.starts_with(".env.") && !is_example_file(path))
233 || lower.ends_with("_rsa")
235 || lower.ends_with("_ed25519")
236 || lower.ends_with("_ecdsa")
237 || lower.ends_with(".pem")
238 || lower.ends_with(".p12")
239 || lower.ends_with(".pfx")
240 || lower.ends_with(".key")
241 || lower.ends_with(".keystore")
242 || filename == "id_rsa"
244 || filename == "id_ed25519"
245 || filename == "id_ecdsa"
246}
247
248fn should_skip_file(path: &str) -> bool {
250 let lower = path.to_lowercase();
251 lower.ends_with(".lock")
252 || lower.ends_with(".png")
253 || lower.ends_with(".jpg")
254 || lower.ends_with(".jpeg")
255 || lower.ends_with(".gif")
256 || lower.ends_with(".svg")
257 || lower.ends_with(".ico")
258 || lower.ends_with(".wasm")
259 || lower.ends_with(".pdf")
260 || lower.ends_with(".zip")
261 || lower.contains("bun.lock")
262 || lower.contains("package-lock")
263 || lower.contains("yarn.lock")
264 || lower.contains("/i18n/")
265 || lower.contains("\\i18n\\")
266}
267
268pub fn staged_paths(repo_path: &Path) -> Result<Vec<String>> {
270 use git2::Repository;
271 let repo = Repository::discover(repo_path).map_err(crate::error::ToriiError::Git)?;
272 let index = repo.index().map_err(crate::error::ToriiError::Git)?;
273 let head_tree = repo.head().ok().and_then(|h| h.peel_to_tree().ok());
274 let diff = match &head_tree {
275 Some(tree) => repo.diff_tree_to_index(Some(tree), Some(&index), None),
276 None => repo.diff_tree_to_index(None, Some(&index), None),
277 }.map_err(crate::error::ToriiError::Git)?;
278 let mut out = Vec::new();
279 diff.foreach(&mut |delta, _| {
280 if let Some(p) = delta.new_file().path() {
281 out.push(p.to_string_lossy().to_string());
282 }
283 true
284 }, None, None, None).map_err(crate::error::ToriiError::Git)?;
285 Ok(out)
286}
287
288pub fn scan_staged_with_custom(
291 repo_path: &Path,
292 rules: &[crate::toriignore::SecretRule],
293) -> Result<Vec<Finding>> {
294 use git2::Repository;
295 if rules.is_empty() { return Ok(Vec::new()); }
296
297 let mut findings = Vec::new();
298 let repo = Repository::discover(repo_path).map_err(crate::error::ToriiError::Git)?;
299 let index = repo.index().map_err(crate::error::ToriiError::Git)?;
300 let paths = staged_paths(repo_path)?;
301
302 for file_path in &paths {
303 let p = std::path::Path::new(file_path);
304 if is_example_file(file_path) || should_skip_file(file_path) { continue; }
305 let entry = match index.get_path(p, 0) { Some(e) => e, None => continue };
306 let blob = match repo.find_blob(entry.id) { Ok(b) => b, Err(_) => continue };
307 if blob.size() > max_blob_bytes() { continue; }
308 let content = String::from_utf8_lossy(blob.content()).to_string();
309
310 for (i, line) in content.lines().enumerate() {
311 let trimmed = line.trim();
312 if trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*') {
316 continue;
317 }
318 for rule in rules {
319 if rule.regex.is_match(line) {
320 findings.push(Finding {
321 file: file_path.clone(),
322 line: i + 1,
323 pattern_name: format!("custom: {}", rule.name),
324 preview: mask(line.trim()),
325 });
326 break;
327 }
328 }
329 }
330 }
331 Ok(findings)
332}
333
334pub fn scan_staged(repo_path: &Path) -> Result<Vec<Finding>> {
337 use git2::Repository;
338
339 let mut findings = Vec::new();
340
341 let repo = Repository::discover(repo_path)
342 .map_err(|e| crate::error::ToriiError::Git(e))?;
343 let index = repo.index()
344 .map_err(|e| crate::error::ToriiError::Git(e))?;
345
346 let head_tree = repo.head().ok()
348 .and_then(|h| h.peel_to_tree().ok());
349
350 let diff = match &head_tree {
351 Some(tree) => repo.diff_tree_to_index(Some(tree), Some(&index), None),
352 None => repo.diff_tree_to_index(None, Some(&index), None),
353 }.map_err(|e| crate::error::ToriiError::Git(e))?;
354
355 let mut staged_files: Vec<String> = Vec::new();
356 diff.foreach(
357 &mut |delta, _| {
358 if let Some(path) = delta.new_file().path() {
359 staged_files.push(path.to_string_lossy().to_string());
360 }
361 true
362 },
363 None, None, None,
364 ).map_err(|e| crate::error::ToriiError::Git(e))?;
365
366 for file_path in &staged_files {
367 let file_path_str = file_path.as_str();
368
369 if is_example_file(file_path_str) || should_skip_file(file_path_str) {
370 continue;
371 }
372
373 if is_sensitive_file(file_path_str) {
374 findings.push(Finding {
375 file: file_path.clone(),
376 line: 0,
377 pattern_name: "Sensitive file — should not be committed".to_string(),
378 preview: format!("⚠ {} should not be tracked by version control", file_path),
379 });
380 continue;
381 }
382
383 let entry = index.get_path(std::path::Path::new(file_path_str), 0);
385 let content = match entry {
386 Some(e) => {
387 match repo.find_blob(e.id) {
388 Ok(blob) => {
389 if blob.size() > max_blob_bytes() { continue; }
390 String::from_utf8_lossy(blob.content()).to_string()
391 }
392 Err(_) => continue,
393 }
394 }
395 None => continue,
396 };
397
398 for (line_num, line) in content.lines().enumerate() {
399 let trimmed = line.trim();
400 if trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*') {
401 continue;
402 }
403
404 for pattern in PATTERNS {
405 if (pattern.detect)(line) {
406 let preview = mask(line.trim());
407 findings.push(Finding {
408 file: file_path.clone(),
409 line: line_num + 1,
410 pattern_name: pattern.name.to_string(),
411 preview,
412 });
413 break;
414 }
415 }
416 }
417 }
418
419 Ok(findings)
420}
421
422pub fn scan_history(repo_path: &Path) -> Result<Vec<(String, Vec<Finding>)>> {
425 use git2::Repository;
426
427 let mut results = Vec::new();
428
429 let repo = Repository::discover(repo_path)
430 .map_err(|e| crate::error::ToriiError::Git(e))?;
431
432 let mut revwalk = repo.revwalk()
434 .map_err(|e| crate::error::ToriiError::Git(e))?;
435 revwalk.push_glob("*").map_err(|e| crate::error::ToriiError::Git(e))?;
436
437 let commits: Vec<(git2::Oid, String)> = revwalk
438 .filter_map(|id| id.ok())
439 .filter_map(|id| {
440 repo.find_commit(id).ok().map(|c| {
441 let subject = c.summary().unwrap_or("").to_string();
442 (id, subject)
443 })
444 })
445 .collect();
446
447 println!("🔍 Scanning {} commits...", commits.len());
448
449 for (oid, subject) in &commits {
450 let commit = match repo.find_commit(*oid) {
451 Ok(c) => c,
452 Err(_) => continue,
453 };
454
455 let commit_tree = match commit.tree() {
457 Ok(t) => t,
458 Err(_) => continue,
459 };
460 let parent_tree = commit.parent(0).ok().and_then(|p| p.tree().ok());
461
462 let diff = match repo.diff_tree_to_tree(
463 parent_tree.as_ref(),
464 Some(&commit_tree),
465 None,
466 ) {
467 Ok(d) => d,
468 Err(_) => continue,
469 };
470
471 let mut commit_findings = Vec::new();
472
473 let mut changed_files: Vec<String> = Vec::new();
475 let _ = diff.foreach(
476 &mut |delta, _| {
477 if let Some(path) = delta.new_file().path() {
478 changed_files.push(path.to_string_lossy().to_string());
479 }
480 true
481 },
482 None, None, None,
483 );
484
485 for file_path in &changed_files {
486 if is_example_file(file_path) || should_skip_file(file_path) {
487 continue;
488 }
489
490 let entry = commit_tree.get_path(std::path::Path::new(file_path));
492 let content = match entry {
493 Ok(e) => match repo.find_blob(e.id()) {
494 Ok(blob) => {
495 if blob.size() > max_blob_bytes() { continue; }
496 String::from_utf8_lossy(blob.content()).to_string()
497 }
498 Err(_) => continue,
499 },
500 Err(_) => continue,
501 };
502
503 for (line_num, line) in content.lines().enumerate() {
504 let trimmed = line.trim();
505 if trimmed.starts_with('#') || trimmed.starts_with("//") || trimmed.starts_with("/*") || trimmed.starts_with('*') {
506 continue;
507 }
508
509 for pattern in PATTERNS {
510 if (pattern.detect)(line) {
511 commit_findings.push(Finding {
512 file: file_path.clone(),
513 line: line_num + 1,
514 pattern_name: pattern.name.to_string(),
515 preview: mask(line.trim()),
516 });
517 break;
518 }
519 }
520 }
521 }
522
523 if !commit_findings.is_empty() {
524 results.push((
525 format!("{} — {}", &oid.to_string()[..8], subject),
526 commit_findings,
527 ));
528 }
529 }
530
531 Ok(results)
532}