1use std::path::Path;
8
9use anyhow::Result;
10use once_cell::sync::Lazy;
11use regex::Regex;
12use walkdir::WalkDir;
13
14use crate::finding::{redact, Category, Finding, Severity};
15use crate::scanner::{ScanContext, Scanner};
16
17struct SecretPattern {
20 name: &'static str,
21 severity: Severity,
22 keep: usize,
24 re: Regex,
25}
26
27static PATTERNS: Lazy<Vec<SecretPattern>> = Lazy::new(|| {
28 vec![
29 SecretPattern {
31 name: "Anthropic API key",
32 severity: Severity::Critical,
33 keep: 10,
34 re: Regex::new(r"sk-ant-[a-z]{2,8}\d{0,6}-[A-Za-z0-9_-]{20,}").unwrap(),
36 },
37 SecretPattern {
38 name: "OpenAI API key",
39 severity: Severity::Critical,
40 keep: 7,
41 re: Regex::new(r"sk-(?:proj-)?[A-Za-z0-9_-]{20}T3BlbkFJ[A-Za-z0-9_-]{20}").unwrap(),
42 },
43 SecretPattern {
44 name: "OpenAI project key",
45 severity: Severity::Critical,
46 keep: 8,
47 re: Regex::new(r"sk-proj-[A-Za-z0-9_-]{48,}").unwrap(),
48 },
49 SecretPattern {
50 name: "xAI / Grok API key",
51 severity: Severity::Critical,
52 keep: 4,
53 re: Regex::new(r"xai-[A-Za-z0-9_-]{32,}").unwrap(),
54 },
55 SecretPattern {
56 name: "OpenRouter API key",
57 severity: Severity::Critical,
58 keep: 9,
59 re: Regex::new(r"sk-or-v1-[A-Za-z0-9_-]{48,}").unwrap(),
60 },
61 SecretPattern {
62 name: "Google AI / Gemini key",
63 severity: Severity::Critical,
64 keep: 4,
65 re: Regex::new(r"AIza[0-9A-Za-z\-_]{35}").unwrap(),
66 },
67 SecretPattern {
68 name: "Hugging Face token",
69 severity: Severity::High,
70 keep: 3,
71 re: Regex::new(r"hf_[A-Za-z0-9]{34}").unwrap(),
72 },
73 SecretPattern {
75 name: "AWS access key ID",
76 severity: Severity::Critical,
77 keep: 4,
78 re: Regex::new(r"(?:A3T[A-Z0-9]|AKIA|ASIA|ABIA|ACCA)[A-Z0-9]{16}").unwrap(),
79 },
80 SecretPattern {
81 name: "GitHub personal access token",
82 severity: Severity::High,
83 keep: 4,
84 re: Regex::new(r"ghp_[0-9a-zA-Z]{36}").unwrap(),
85 },
86 SecretPattern {
87 name: "GitHub OAuth token",
88 severity: Severity::High,
89 keep: 4,
90 re: Regex::new(r"gho_[0-9a-zA-Z]{36}").unwrap(),
91 },
92 SecretPattern {
93 name: "GitHub fine-grained PAT",
94 severity: Severity::High,
95 keep: 11,
96 re: Regex::new(r"github_pat_[0-9a-zA-Z_]{82}").unwrap(),
97 },
98 SecretPattern {
99 name: "GitLab personal access token",
100 severity: Severity::High,
101 keep: 7,
102 re: Regex::new(r"glpat-[0-9a-zA-Z\-_]{20}").unwrap(),
103 },
104 SecretPattern {
106 name: "PEM private key",
107 severity: Severity::Critical,
108 keep: 11,
109 re: Regex::new(r"-----BEGIN (?:RSA |DSA |EC |OPENSSH )?PRIVATE KEY").unwrap(),
110 },
111 SecretPattern {
112 name: "JWT token",
113 severity: Severity::High,
114 keep: 5,
115 re: Regex::new(r"eyJ[A-Za-z0-9_-]{4,}\.eyJ[A-Za-z0-9_-]{4,}\.[A-Za-z0-9_-]{4,}").unwrap(),
116 },
117 SecretPattern {
118 name: "Database connection string with credentials",
119 severity: Severity::High,
120 keep: 8,
121 re: Regex::new(r"(?i)(?:postgres|mysql|mongodb|redis)://[^:@\s]{1,64}:[^@\s]{1,64}@").unwrap(),
122 },
123 SecretPattern {
124 name: "Generic high-entropy secret",
125 severity: Severity::Medium,
126 keep: 6,
127 re: Regex::new(
130 r#"(?i)(?:api[_-]?key|secret[_-]?key?|auth[_-]?token|access[_-]?token|password|passwd|pwd)\s*[=:]\s*['"]?([A-Za-z0-9/+!@#$%^&*]{32,})['"]?"#,
131 ).unwrap(),
132 },
133 ]
136});
137
138const SKIP_EXTENSIONS: &[&str] = &[
142 "png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "svg", "mp3", "mp4", "wav", "ogg", "flac",
143 "zip", "gz", "bz2", "tar", "xz", "7z", "pdf", "doc", "docx", "xls", "xlsx", "bin", "exe",
144 "dll", "so", "dylib", "wasm", "class",
145];
146
147const MAX_FILE_SIZE: u64 = 8 * 1024 * 1024;
149
150pub struct SecretsScanner;
153
154impl Scanner for SecretsScanner {
155 fn name(&self) -> &'static str {
156 "secrets"
157 }
158
159 fn scan(&self, ctx: &ScanContext) -> Result<Vec<Finding>> {
160 let mut findings = Vec::new();
161 scan_dir(&ctx.root, &mut findings);
162 Ok(findings)
163 }
164}
165
166fn scan_dir(root: &Path, findings: &mut Vec<Finding>) {
167 for entry in WalkDir::new(root)
168 .follow_links(false)
169 .into_iter()
170 .filter_map(|e| e.ok())
171 .filter(|e| e.file_type().is_file())
172 {
173 let path = entry.path();
174
175 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
177 if SKIP_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
178 continue;
179 }
180 }
181
182 if let Ok(meta) = entry.metadata() {
184 if meta.len() > MAX_FILE_SIZE {
185 continue;
186 }
187 }
188
189 if let Ok(bytes) = std::fs::read(path) {
191 let content = String::from_utf8_lossy(&bytes);
192 scan_content(&content, path, findings);
193 }
194 }
195}
196
197fn scan_content(content: &str, path: &Path, findings: &mut Vec<Finding>) {
198 for (line_no, line) in content.lines().enumerate() {
199 for pattern in PATTERNS.iter() {
200 let Some(caps) = pattern.re.captures(line) else {
204 continue;
205 };
206 let matched = caps.get(1).or_else(|| caps.get(0));
207 let Some(m) = matched else { continue };
208
209 let evidence = redact(m.as_str(), pattern.keep);
210 findings.push(
211 Finding::new(
212 pattern.severity,
213 Category::SecretDetection,
214 format!("{} detected", pattern.name),
215 format!(
216 "A {} was found in '{}'. This credential may have been \
217 pasted into a conversation or written by an agent and \
218 is now stored in plain text.",
219 pattern.name,
220 path.display()
221 ),
222 path,
223 "Rotate this credential immediately. Remove the file or \
224 redact the line. Consider running `ocls` again after \
225 rotation to verify the credential no longer appears.",
226 )
227 .with_line(line_no + 1)
228 .with_evidence(evidence),
229 );
230 break;
232 }
233 }
234}
235
236#[cfg(test)]
239mod tests {
240 use super::*;
241 use std::path::PathBuf;
242
243 fn findings_for(content: &str) -> Vec<Finding> {
244 let mut findings = Vec::new();
245 scan_content(content, &PathBuf::from("/test/file.json"), &mut findings);
246 findings
247 }
248
249 #[test]
252 fn detects_anthropic_key() {
253 let content = r#"{"token": "sk-ant-api03-abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGH"}"#;
254 let findings = findings_for(content);
255 assert!(!findings.is_empty(), "should detect Anthropic key");
256 assert_eq!(findings[0].severity, Severity::Critical);
257 assert!(findings[0].title.contains("Anthropic"));
258 }
259
260 #[test]
261 fn detects_openai_key() {
262 let content = r#"api_key = "sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz""#;
263 let findings = findings_for(content);
264 assert!(!findings.is_empty(), "should detect OpenAI project key");
265 assert_eq!(findings[0].severity, Severity::Critical);
266 }
267
268 #[test]
269 fn detects_xai_key() {
270 let content = r#"key = "xai-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh""#;
271 let findings = findings_for(content);
272 assert!(!findings.is_empty(), "should detect xAI key");
273 assert_eq!(findings[0].severity, Severity::Critical);
274 }
275
276 #[test]
277 fn detects_openrouter_key() {
278 let content =
279 r#"key = "sk-or-v1-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz12345678""#;
280 let findings = findings_for(content);
281 assert!(!findings.is_empty(), "should detect OpenRouter key");
282 }
283
284 #[test]
285 fn detects_google_ai_key() {
286 let content = r#"key = "AIzaABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghi""#;
287 let findings = findings_for(content);
288 assert!(!findings.is_empty(), "should detect Google AI key");
289 }
290
291 #[test]
292 fn detects_aws_key() {
293 let content = "access_key = AKIAIOSFODNN7EXAMPLE";
294 let findings = findings_for(content);
295 assert!(!findings.is_empty(), "should detect AWS key");
296 assert_eq!(findings[0].severity, Severity::Critical);
297 }
298
299 #[test]
300 fn detects_github_pat() {
301 let content = "token = ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
302 let findings = findings_for(content);
303 assert!(!findings.is_empty(), "should detect GitHub PAT");
304 assert_eq!(findings[0].severity, Severity::High);
305 }
306
307 #[test]
308 fn detects_gitlab_token() {
309 let content = "token = glpat-abcdefghijklmnopqrst";
310 let findings = findings_for(content);
311 assert!(!findings.is_empty(), "should detect GitLab token");
312 }
313
314 #[test]
315 fn detects_private_key_header() {
316 let content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEo...";
317 let findings = findings_for(content);
318 assert!(!findings.is_empty(), "should detect PEM private key");
319 assert_eq!(findings[0].severity, Severity::Critical);
320 }
321
322 #[test]
323 fn detects_database_url_with_credentials() {
324 let content = r#"DATABASE_URL=postgres://admin:supersecret@localhost:5432/mydb"#;
325 let findings = findings_for(content);
326 assert!(!findings.is_empty(), "should detect DB connection string");
327 }
328
329 #[test]
332 fn no_false_positive_on_empty_line() {
333 assert!(findings_for("").is_empty());
334 }
335
336 #[test]
337 fn no_false_positive_on_safe_json() {
338 let content = r#"{"model": "claude-3-5-sonnet-20241022", "max_tokens": 4096}"#;
339 assert!(findings_for(content).is_empty());
340 }
341
342 #[test]
343 fn no_false_positive_on_env_variable_reference() {
344 let content = r#"api_key = "${OPENAI_API_KEY}""#;
346 let _findings = findings_for(content);
350 }
351
352 #[test]
355 fn evidence_is_redacted() {
356 let content = "token = AKIAIOSFODNN7EXAMPLE";
357 let findings = findings_for(content);
358 if let Some(ev) = findings.first().and_then(|f| f.evidence.as_deref()) {
359 assert!(ev.contains("****"), "evidence must be redacted: {}", ev);
360 assert!(
361 !ev.contains("EXAMPLE"),
362 "evidence must not contain full secret"
363 );
364 }
365 }
366
367 #[test]
370 fn finding_has_correct_line_number() {
371 let content = "normal line\ntoken = AKIAIOSFODNN7EXAMPLE\nanother line";
372 let findings = findings_for(content);
373 assert!(!findings.is_empty());
374 assert_eq!(findings[0].line, Some(2));
375 }
376}