1use serde::Serialize;
22
23#[derive(Debug, Clone, Serialize)]
26pub struct Match {
27 pub category: Category,
28 pub step_index: usize,
31 pub snippet: String,
34}
35
36#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, Hash)]
39#[serde(rename_all = "snake_case")]
40pub enum Category {
41 Email,
42 Ipv4,
43 AwsAccessKey,
44 StripeSecretKey,
45 StripePublishableKey,
46 GithubToken,
47 OpenaiKey,
48 AnthropicKey,
49 SshPrivateKeyHeader,
50 JwtToken,
51}
52
53impl Category {
54 pub fn label(self) -> &'static str {
55 match self {
56 Category::Email => "email",
57 Category::Ipv4 => "ipv4",
58 Category::AwsAccessKey => "aws_access_key",
59 Category::StripeSecretKey => "stripe_secret_key",
60 Category::StripePublishableKey => "stripe_publishable_key",
61 Category::GithubToken => "github_token",
62 Category::OpenaiKey => "openai_key",
63 Category::AnthropicKey => "anthropic_key",
64 Category::SshPrivateKeyHeader => "ssh_private_key_header",
65 Category::JwtToken => "jwt_token",
66 }
67 }
68}
69
70#[must_use]
72pub fn scan(text: &str) -> Vec<Match> {
73 scan_with_step(text, 0)
74}
75
76#[must_use]
80pub fn scan_with_step(text: &str, step_index: usize) -> Vec<Match> {
81 let mut out = Vec::new();
82 const PREFIXES: &[(Category, &str, usize)] = &[
89 (Category::AwsAccessKey, "AKIA", 16),
90 (Category::AwsAccessKey, "ASIA", 16),
91 (Category::StripeSecretKey, "sk_live_", 24),
92 (Category::StripeSecretKey, "sk_test_", 24),
93 (Category::StripePublishableKey, "pk_live_", 24),
94 (Category::StripePublishableKey, "pk_test_", 24),
95 (Category::GithubToken, "ghp_", 36),
96 (Category::GithubToken, "gho_", 36),
97 (Category::GithubToken, "ghu_", 36),
98 (Category::GithubToken, "ghs_", 36),
99 (Category::GithubToken, "ghr_", 36),
100 (Category::AnthropicKey, "sk-ant-", 32),
101 ];
102 for &(cat, prefix, min_tail) in PREFIXES {
103 scan_prefix(text, step_index, cat, prefix, min_tail, &mut out);
104 }
105
106 scan_openai_key(text, step_index, &mut out);
109
110 scan_email(text, step_index, &mut out);
112
113 scan_ipv4(text, step_index, &mut out);
115
116 const SSH_HEADERS: &[&str] = &[
118 "-----BEGIN OPENSSH PRIVATE KEY-----",
119 "-----BEGIN RSA PRIVATE KEY-----",
120 "-----BEGIN DSA PRIVATE KEY-----",
121 "-----BEGIN EC PRIVATE KEY-----",
122 "-----BEGIN PRIVATE KEY-----",
123 ];
124 for header in SSH_HEADERS {
125 if text.contains(header) {
126 out.push(Match {
127 category: Category::SshPrivateKeyHeader,
128 step_index,
129 snippet: (*header).to_string(),
130 });
131 }
132 }
133
134 scan_jwt(text, step_index, &mut out);
138
139 out
140}
141
142#[must_use]
146pub fn scan_steps(steps: &[crate::timeline::Step]) -> Vec<Match> {
147 let mut all = Vec::new();
148 for (i, step) in steps.iter().enumerate() {
149 all.extend(scan_with_step(&step.detail, i));
150 all.extend(scan_with_step(&step.label, i));
151 }
152 all
153}
154
155fn is_token_byte(b: u8) -> bool {
158 b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
159}
160
161fn scan_prefix(
162 text: &str,
163 step_index: usize,
164 cat: Category,
165 prefix: &str,
166 min_tail: usize,
167 out: &mut Vec<Match>,
168) {
169 let bytes = text.as_bytes();
170 let prefix_bytes = prefix.as_bytes();
171 let mut i = 0;
172 while i + prefix_bytes.len() <= bytes.len() {
173 if &bytes[i..i + prefix_bytes.len()] == prefix_bytes {
174 let tail_start = i + prefix_bytes.len();
176 let mut tail = 0;
177 while tail_start + tail < bytes.len() && is_token_byte(bytes[tail_start + tail]) {
178 tail += 1;
179 }
180 if tail >= min_tail {
181 let end = tail_start + tail;
182 let snippet = snippet_around(text, i, end);
183 out.push(Match {
184 category: cat,
185 step_index,
186 snippet,
187 });
188 i = end;
189 continue;
190 }
191 }
192 i += 1;
193 }
194}
195
196fn scan_openai_key(text: &str, step_index: usize, out: &mut Vec<Match>) {
197 let bytes = text.as_bytes();
199 let mut i = 0;
200 while i + 3 <= bytes.len() {
201 if &bytes[i..i + 3] == b"sk-" {
202 if bytes[i..].starts_with(b"sk-ant-") {
204 i += 1;
205 continue;
206 }
207 let tail_start = i + 3;
208 let mut tail = 0;
209 while tail_start + tail < bytes.len() && is_token_byte(bytes[tail_start + tail]) {
210 tail += 1;
211 }
212 if tail >= 32 {
213 let end = tail_start + tail;
214 out.push(Match {
215 category: Category::OpenaiKey,
216 step_index,
217 snippet: snippet_around(text, i, end),
218 });
219 i = end;
220 continue;
221 }
222 }
223 i += 1;
224 }
225}
226
227fn scan_email(text: &str, step_index: usize, out: &mut Vec<Match>) {
228 let bytes = text.as_bytes();
232 for (i, &b) in bytes.iter().enumerate() {
233 if b != b'@' {
234 continue;
235 }
236 let mut start = i;
238 while start > 0 && is_email_local_byte(bytes[start - 1]) {
239 start -= 1;
240 }
241 if start == i {
242 continue;
243 }
244 let mut end = i + 1;
246 while end < bytes.len() && is_email_domain_byte(bytes[end]) {
247 end += 1;
248 }
249 if end == i + 1 {
250 continue;
251 }
252 let domain = &text[i + 1..end];
254 if !domain.contains('.') {
255 continue;
256 }
257 out.push(Match {
258 category: Category::Email,
259 step_index,
260 snippet: text[start..end].to_string(),
261 });
262 }
263}
264
265fn is_email_local_byte(b: u8) -> bool {
266 b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'-' | b'+')
267}
268
269fn is_email_domain_byte(b: u8) -> bool {
270 b.is_ascii_alphanumeric() || b == b'.' || b == b'-'
271}
272
273fn scan_ipv4(text: &str, step_index: usize, out: &mut Vec<Match>) {
274 let bytes = text.as_bytes();
275 let mut i = 0;
276 while i < bytes.len() {
277 if !bytes[i].is_ascii_digit() {
278 i += 1;
279 continue;
280 }
281 if let Some(end) = parse_ipv4_at(bytes, i) {
283 out.push(Match {
284 category: Category::Ipv4,
285 step_index,
286 snippet: text[i..end].to_string(),
287 });
288 i = end;
289 } else {
290 while i < bytes.len() && bytes[i].is_ascii_digit() {
292 i += 1;
293 }
294 }
295 }
296}
297
298fn parse_ipv4_at(bytes: &[u8], start: usize) -> Option<usize> {
299 let mut pos = start;
300 for seg in 0..4 {
301 if pos >= bytes.len() || !bytes[pos].is_ascii_digit() {
302 return None;
303 }
304 let mut digits = 0;
305 let mut val: u32 = 0;
306 while pos < bytes.len() && bytes[pos].is_ascii_digit() && digits < 3 {
307 val = val * 10 + u32::from(bytes[pos] - b'0');
308 pos += 1;
309 digits += 1;
310 }
311 if val > 255 {
312 return None;
313 }
314 if seg < 3 {
315 if pos >= bytes.len() || bytes[pos] != b'.' {
316 return None;
317 }
318 pos += 1;
319 }
320 }
321 if pos < bytes.len() && bytes[pos].is_ascii_digit() {
324 return None;
325 }
326 Some(pos)
327}
328
329fn scan_jwt(text: &str, step_index: usize, out: &mut Vec<Match>) {
330 let bytes = text.as_bytes();
333 let mut i = 0;
334 while i + 3 <= bytes.len() {
335 if &bytes[i..i + 3] == b"eyJ" {
336 if let Some(end) = parse_jwt_at(bytes, i) {
337 out.push(Match {
338 category: Category::JwtToken,
339 step_index,
340 snippet: text[i..end].to_string(),
341 });
342 i = end;
343 continue;
344 }
345 }
346 i += 1;
347 }
348}
349
350fn parse_jwt_at(bytes: &[u8], start: usize) -> Option<usize> {
351 let mut pos = start;
352 for seg in 0..3 {
353 let seg_start = pos;
354 while pos < bytes.len() && is_base64url_byte(bytes[pos]) {
355 pos += 1;
356 }
357 if pos - seg_start < 16 {
358 return None;
359 }
360 if seg < 2 {
361 if pos >= bytes.len() || bytes[pos] != b'.' {
362 return None;
363 }
364 pos += 1;
365 }
366 }
367 Some(pos)
368}
369
370fn is_base64url_byte(b: u8) -> bool {
371 b.is_ascii_alphanumeric() || b == b'_' || b == b'-'
372}
373
374fn snippet_around(text: &str, start: usize, end: usize) -> String {
375 text[start..end].to_string()
379}
380
381#[cfg(test)]
382mod tests {
383 use super::*;
384
385 #[test]
386 fn finds_aws_access_key() {
387 let m = scan("export AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE");
388 assert!(m.iter().any(|x| x.category == Category::AwsAccessKey));
389 }
390
391 #[test]
392 fn finds_stripe_keys() {
393 let m = scan("key: sk_live_aaaaaaaaaaaaaaaaaaaaaaaaaa");
394 assert!(m.iter().any(|x| x.category == Category::StripeSecretKey));
395 let m = scan("pub: pk_test_bbbbbbbbbbbbbbbbbbbbbbbbbb");
396 assert!(
397 m.iter()
398 .any(|x| x.category == Category::StripePublishableKey)
399 );
400 }
401
402 #[test]
403 fn finds_github_tokens() {
404 let tok = "ghp_".to_string() + &"a".repeat(36);
405 let m = scan(&tok);
406 assert!(m.iter().any(|x| x.category == Category::GithubToken));
407 }
408
409 #[test]
410 fn distinguishes_openai_from_anthropic() {
411 let openai = "sk-".to_string() + &"a".repeat(48);
412 let anthropic = "sk-ant-".to_string() + &"a".repeat(40);
413 let m_o = scan(&openai);
414 assert!(m_o.iter().any(|x| x.category == Category::OpenaiKey));
415 assert!(!m_o.iter().any(|x| x.category == Category::AnthropicKey));
416 let m_a = scan(&anthropic);
417 assert!(m_a.iter().any(|x| x.category == Category::AnthropicKey));
418 assert!(!m_a.iter().any(|x| x.category == Category::OpenaiKey));
419 }
420
421 #[test]
422 fn finds_emails() {
423 let m = scan("contact alice+test@example.com and bob@x.io");
424 let emails: Vec<_> = m.iter().filter(|x| x.category == Category::Email).collect();
425 assert_eq!(emails.len(), 2);
426 }
427
428 #[test]
429 fn rejects_bare_at_without_domain_dot() {
430 let m = scan("twitter handle @alice here");
431 assert!(!m.iter().any(|x| x.category == Category::Email));
432 }
433
434 #[test]
435 fn finds_ipv4_but_rejects_out_of_range() {
436 let m = scan("connect to 10.0.0.1 and 192.168.1.50");
437 let ips: Vec<_> = m.iter().filter(|x| x.category == Category::Ipv4).collect();
438 assert_eq!(ips.len(), 2);
439 let m = scan("fake 999.999.999.999 and 300.1.1.1");
440 assert!(!m.iter().any(|x| x.category == Category::Ipv4));
441 }
442
443 #[test]
444 fn finds_ssh_private_key_header() {
445 let text = "-----BEGIN OPENSSH PRIVATE KEY-----\nfake";
446 let m = scan(text);
447 assert!(
448 m.iter()
449 .any(|x| x.category == Category::SshPrivateKeyHeader)
450 );
451 }
452
453 #[test]
454 fn finds_jwt() {
455 let jwt = format!(
457 "{}.{}.{}",
458 "eyJ".to_string() + &"a".repeat(20),
459 "a".repeat(20),
460 "a".repeat(20)
461 );
462 let m = scan(&jwt);
463 assert!(m.iter().any(|x| x.category == Category::JwtToken));
464 }
465
466 #[test]
467 fn rejects_non_jwt_starting_with_eyj() {
468 let m = scan("eyJ{not a jwt");
470 assert!(!m.iter().any(|x| x.category == Category::JwtToken));
471 }
472
473 #[test]
474 fn scan_steps_indexes_by_step_position() {
475 use crate::timeline::{tool_result_step, user_text_step};
476 let steps = vec![
477 user_text_step("clean input"),
478 tool_result_step(
479 "t1",
480 "secret AKIAIOSFODNN7EXAMPLE found",
481 Some("Bash"),
482 None,
483 ),
484 ];
485 let matches = scan_steps(&steps);
486 assert!(
487 matches
488 .iter()
489 .any(|m| m.step_index == 1 && m.category == Category::AwsAccessKey)
490 );
491 }
492
493 #[test]
494 fn empty_input_returns_no_matches() {
495 assert!(scan("").is_empty());
496 }
497}