keyhog_scanner/context/
inference.rs1use super::{documentation::documentation_line_flags, CodeContext};
2
3const TEST_PREFIX_LEN: usize = 5;
4const ENCRYPTED_BLOCK_LOOKBACK_LINES: usize = 10;
5const TEST_FUNCTION_LOOKBACK_LINES: usize = 100;
8
9pub fn infer_context(lines: &[&str], line_idx: usize, file_path: Option<&str>) -> CodeContext {
11 let documentation_lines = documentation_line_flags(lines);
12 infer_context_with_documentation(lines, line_idx, file_path, &documentation_lines)
13}
14
15pub fn is_known_example_credential(credential: &str) -> bool {
19 let upper = credential.to_uppercase();
20
21 if upper.ends_with("EXAMPLE") || upper.ends_with("EXAMPLEKEY") {
23 return true;
24 }
25
26 let body = credential.as_bytes();
28 let x_count = body.iter().filter(|&&b| b == b'x' || b == b'X').count();
29 if body.len() >= 16 && x_count > body.len() * 3 / 4 {
30 return true;
31 }
32
33 if is_hex_sequential_placeholder(credential) {
35 return true;
36 }
37
38 if is_empty_input_hash(credential) {
40 return true;
41 }
42
43 is_sequential_placeholder(credential)
45}
46
47fn is_empty_input_hash(credential: &str) -> bool {
50 let lower = credential.to_ascii_lowercase();
51 match lower.len() {
53 32 => lower == "d41d8cd98f00b204e9800998ecf8427e", 40 => lower == "da39a3ee5e6b4b0d3255bfef95601890afd80709", 64 => lower == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", _ => false,
57 }
58}
59
60pub fn is_sequential_placeholder(credential: &str) -> bool {
61 let body = crate::confidence::KNOWN_PREFIXES
65 .iter()
66 .find_map(|prefix| credential.strip_prefix(prefix))
67 .unwrap_or(credential);
68 if body.len() < 8 {
69 return false;
70 }
71
72 let bytes = body.as_bytes();
73 if bytes.iter().all(|&byte| byte == bytes[0]) {
74 return true;
75 }
76 if bytes.len() >= 8 {
77 let pair = &bytes[..2];
78 if bytes
79 .chunks(2)
80 .all(|chunk| chunk == pair || (chunk.len() < 2 && chunk[0] == pair[0]))
81 {
82 return true;
83 }
84 }
85 false
86}
87
88fn is_hex_sequential_placeholder(credential: &str) -> bool {
89 let body = crate::confidence::KNOWN_PREFIXES
93 .iter()
94 .find_map(|prefix| credential.strip_prefix(prefix))
95 .unwrap_or(credential);
96
97 if body.len() < 16 || !body.bytes().all(|b| b.is_ascii_hexdigit()) {
98 return false;
99 }
100
101 let bytes: Vec<u8> = body.bytes().collect();
102
103 if bytes.len() >= 16 {
105 let ascending = bytes
106 .windows(2)
107 .filter(|w| {
108 w[1] == w[0] + 1 || (w[0] == b'9' && w[1] == b'a') || (w[0] == b'f' && w[1] == b'0')
109 })
110 .count();
111 let descending = bytes
112 .windows(2)
113 .filter(|w| {
114 w[1] + 1 == w[0] || (w[0] == b'a' && w[1] == b'9') || (w[0] == b'0' && w[1] == b'f')
115 })
116 .count();
117 let threshold = (bytes.len() - 1) * 9 / 10;
118 if ascending > threshold || descending > threshold {
119 return true;
120 }
121 }
122
123 let pairs: Vec<&[u8]> = bytes.chunks(2).filter(|chunk| chunk.len() == 2).collect();
124 if pairs.len() < 8 {
125 return false;
126 }
127
128 let first_chars: Vec<u8> = pairs
129 .iter()
130 .map(|pair| pair[0].to_ascii_lowercase())
131 .collect();
132 let ascending = first_chars
133 .windows(2)
134 .filter(|window| {
135 window[1] == window[0] + 1
136 || (window[0] == b'f' && window[1] == b'a')
137 || (window[0] == b'9' && window[1] == b'a')
138 || (window[0] == b'9' && window[1] == b'0')
139 })
140 .count();
141
142 let second_chars: Vec<u8> = pairs
143 .iter()
144 .map(|pair| pair[1].to_ascii_lowercase())
145 .collect();
146 let ascending2 = second_chars
147 .windows(2)
148 .filter(|window| {
149 window[1] == window[0] + 1
150 || (window[0] == b'f' && window[1] == b'0')
151 || (window[0] == b'9' && window[1] == b'0')
152 || (window[0] == b'9' && window[1] == b'a')
153 })
154 .count();
155
156 let threshold = pairs.len() * 9 / 10;
157 ascending > threshold && ascending2 > threshold
158}
159
160pub struct ContextRegions {
168 encrypted: Vec<bool>,
169 test_function: Vec<bool>,
170}
171
172impl ContextRegions {
173 pub fn new(lines: &[&str]) -> Self {
175 ContextRegions {
176 encrypted: encrypted_block_flags(lines),
177 test_function: test_function_flags(lines),
178 }
179 }
180
181 fn is_encrypted(&self, line_idx: usize) -> bool {
182 self.encrypted.get(line_idx).copied().unwrap_or(false)
183 }
184
185 fn is_test_function(&self, line_idx: usize) -> bool {
186 self.test_function.get(line_idx).copied().unwrap_or(false)
187 }
188}
189
190fn encrypted_block_flags(lines: &[&str]) -> Vec<bool> {
197 let mut flags = vec![false; lines.len()];
198 let mut lines_since_marker = ENCRYPTED_BLOCK_LOOKBACK_LINES + 1;
199 for (idx, line) in lines.iter().enumerate() {
200 if is_encrypted_marker_line(line.trim()) {
201 lines_since_marker = 0;
202 }
203 flags[idx] = lines_since_marker <= ENCRYPTED_BLOCK_LOOKBACK_LINES;
204 lines_since_marker = lines_since_marker.saturating_add(1);
205 }
206 flags
207}
208
209fn is_encrypted_marker_line(trimmed: &str) -> bool {
210 trimmed.starts_with("$ANSIBLE_VAULT")
211 || trimmed.starts_with("ENC[")
212 || memchr::memmem::find(trimmed.as_bytes(), b"sops:").is_some()
213 || memchr::memmem::find(trimmed.as_bytes(), b"sealed-secrets").is_some()
214 || trimmed.starts_with("-----BEGIN PGP MESSAGE-----")
215 || trimmed.starts_with("-----BEGIN AGE ENCRYPTED")
216}
217
218#[derive(Clone, Copy, PartialEq)]
220enum TestScanMark {
221 TestStart,
223 Boundary,
225 Neither,
227}
228
229fn test_function_flags(lines: &[&str]) -> Vec<bool> {
237 let marks: Vec<TestScanMark> = (0..lines.len())
238 .map(|idx| classify_test_scan_line(lines, idx))
239 .collect();
240
241 let mut flags = vec![false; lines.len()];
242 let mut last_interesting: Option<usize> = None;
244 for line_idx in 0..lines.len() {
245 if let Some(prev_idx) = last_interesting {
246 if line_idx - prev_idx <= TEST_FUNCTION_LOOKBACK_LINES {
247 flags[line_idx] = marks[prev_idx] == TestScanMark::TestStart;
248 }
249 }
250 if marks[line_idx] != TestScanMark::Neither {
254 last_interesting = Some(line_idx);
255 }
256 }
257 flags
258}
259
260fn classify_test_scan_line(lines: &[&str], candidate_line_idx: usize) -> TestScanMark {
264 let trimmed = lines[candidate_line_idx].trim();
265
266 if trimmed.starts_with("def test_")
267 || trimmed.starts_with("class Test")
268 || trimmed.starts_with("it(")
269 || trimmed.starts_with("describe(")
270 || trimmed.starts_with("test(")
271 || trimmed == "#[test]"
272 || trimmed == concat!("#[cfg(", "test)]")
273 || trimmed.starts_with("#[tokio::test")
274 || trimmed.starts_with("func Test")
275 || trimmed == "@Test"
276 {
277 return TestScanMark::TestStart;
278 }
279
280 if trimmed.starts_with("class ") {
282 return TestScanMark::Boundary;
283 }
284
285 if (trimmed.starts_with("def ") || trimmed.starts_with("async def "))
286 && !trimmed.contains("def test_")
287 {
288 return TestScanMark::Boundary;
289 }
290
291 if trimmed.starts_with("func ") && !trimmed.contains("func Test") {
292 return TestScanMark::Boundary;
293 }
294
295 if (trimmed.starts_with("fn ")
296 || trimmed.starts_with("pub fn ")
297 || trimmed.starts_with("async fn ")
298 || trimmed.starts_with("pub async fn "))
299 && !trimmed.contains("fn test_")
300 {
301 let pre_start = candidate_line_idx.saturating_sub(3);
302 for pre_line in &lines[pre_start..candidate_line_idx] {
303 let pre_trimmed = pre_line.trim();
304 if pre_trimmed == "#[test]"
305 || pre_trimmed == concat!("#[cfg(", "test)]")
306 || pre_trimmed.starts_with("#[tokio::test")
307 || pre_trimmed.starts_with("#[test")
308 || pre_trimmed == "@Test"
309 {
310 return TestScanMark::TestStart;
311 }
312 }
313 return TestScanMark::Boundary;
314 }
315
316 if trimmed.starts_with("function ") && !trimmed.contains("function test") {
317 return TestScanMark::Boundary;
318 }
319
320 TestScanMark::Neither
321}
322
323pub fn infer_context_with_documentation(
325 lines: &[&str],
326 line_idx: usize,
327 file_path: Option<&str>,
328 documentation_lines: &[bool],
329) -> CodeContext {
330 if line_idx >= lines.len() {
331 return CodeContext::Unknown;
332 }
333
334 let line = lines[line_idx];
335 let trimmed = line.trim();
336
337 if file_path.is_some_and(is_test_file) {
338 return CodeContext::TestCode;
339 }
340 if is_in_encrypted_block(lines, line_idx) {
341 return CodeContext::Encrypted;
342 }
343 if is_commented_assignment_line(trimmed) {
344 return CodeContext::Assignment;
345 }
346 if is_comment_line(trimmed) {
347 return CodeContext::Comment;
348 }
349 if documentation_lines.get(line_idx).copied().unwrap_or(false) {
350 return CodeContext::Documentation;
351 }
352 if is_in_test_function(lines, line_idx) {
353 return CodeContext::TestCode;
354 }
355 if is_assignment_line(trimmed) {
356 return CodeContext::Assignment;
357 }
358 infer_default_context(trimmed)
359}
360
361pub fn infer_context_with_regions(
365 lines: &[&str],
366 line_idx: usize,
367 file_path: Option<&str>,
368 documentation_lines: &[bool],
369 regions: &ContextRegions,
370) -> CodeContext {
371 if line_idx >= lines.len() {
372 return CodeContext::Unknown;
373 }
374
375 let trimmed = lines[line_idx].trim();
376
377 if file_path.is_some_and(is_test_file) {
378 return CodeContext::TestCode;
379 }
380 if regions.is_encrypted(line_idx) {
381 return CodeContext::Encrypted;
382 }
383 if is_commented_assignment_line(trimmed) {
384 return CodeContext::Assignment;
385 }
386 if is_comment_line(trimmed) {
387 return CodeContext::Comment;
388 }
389 if documentation_lines.get(line_idx).copied().unwrap_or(false) {
390 return CodeContext::Documentation;
391 }
392 if regions.is_test_function(line_idx) {
393 return CodeContext::TestCode;
394 }
395 if is_assignment_line(trimmed) {
396 return CodeContext::Assignment;
397 }
398 infer_default_context(trimmed)
399}
400
401fn is_test_file(path: &str) -> bool {
402 let filename = path.rsplit(['/', '\\']).next().unwrap_or(path);
404 let stem = filename.split('.').next().unwrap_or(filename);
405
406 stem.len() > TEST_PREFIX_LEN
407 && stem
408 .as_bytes()
409 .get(..TEST_PREFIX_LEN)
410 .is_some_and(|bytes| bytes.eq_ignore_ascii_case(b"test_"))
411 || filename.ends_with("_test.go")
412 || filename.ends_with("_test.rs")
413 || filename.ends_with("_test.py")
414 || filename.ends_with("_test.rb")
415 || filename.ends_with("_test.java")
416 || filename.ends_with("Test.java")
417 || filename.ends_with("Tests.java")
418 || filename.ends_with(".test.js")
419 || filename.ends_with(".test.ts")
420 || filename.ends_with(".spec.js")
421 || filename.ends_with(".spec.ts")
422 || path.split(['/', '\\']).any(|component| {
423 component.eq_ignore_ascii_case("test")
424 || component.eq_ignore_ascii_case("tests")
425 || component.eq_ignore_ascii_case("__tests__")
426 || component.eq_ignore_ascii_case("fixtures")
427 || component.eq_ignore_ascii_case("testdata")
428 || component.eq_ignore_ascii_case("spec")
429 })
430}
431
432fn infer_default_context(trimmed: &str) -> CodeContext {
433 if memchr::memchr(b'"', trimmed.as_bytes()).is_some()
434 || memchr::memchr(b'\'', trimmed.as_bytes()).is_some()
435 {
436 CodeContext::StringLiteral
437 } else {
438 CodeContext::Unknown
439 }
440}
441
442fn is_comment_line(trimmed: &str) -> bool {
443 trimmed.starts_with("//")
444 || trimmed.starts_with('#')
445 || (trimmed.starts_with("--") && !trimmed.starts_with("---"))
446 || trimmed.starts_with("/*")
447 || trimmed.starts_with("<!--")
448 || trimmed.starts_with("<#")
449 || trimmed.starts_with("* ")
450 || trimmed.starts_with("*/")
451 || trimmed.starts_with("rem ")
452 || trimmed.starts_with("REM ")
453}
454
455fn is_commented_assignment_line(trimmed: &str) -> bool {
456 let Some(comment_body) = strip_comment_prefix(trimmed) else {
457 return false;
458 };
459 let body = comment_body
460 .trim_start()
461 .trim_end_matches("*/")
462 .trim_end_matches("-->")
463 .trim();
464 has_assignment_operator(body) || has_yaml_mapping(body)
465}
466
467fn strip_comment_prefix(trimmed: &str) -> Option<&str> {
468 if let Some(rest) = trimmed.strip_prefix("//") {
469 Some(rest)
470 } else if let Some(rest) = trimmed.strip_prefix('#') {
471 Some(rest)
472 } else if trimmed.starts_with("--") && !trimmed.starts_with("---") {
473 trimmed.strip_prefix("--")
474 } else if let Some(rest) = trimmed.strip_prefix("/*") {
475 Some(rest)
476 } else if let Some(rest) = trimmed.strip_prefix("<!--") {
477 Some(rest)
478 } else if let Some(rest) = trimmed.strip_prefix("<#") {
479 Some(rest)
480 } else if let Some(rest) = trimmed.strip_prefix("* ") {
481 Some(rest)
482 } else if let Some(rest) = trimmed.strip_prefix("rem ") {
483 Some(rest)
484 } else {
485 trimmed.strip_prefix("REM ")
486 }
487}
488
489fn is_assignment_line(trimmed: &str) -> bool {
490 has_assignment_operator(trimmed) || has_yaml_mapping(trimmed)
491}
492
493pub(crate) fn has_assignment_operator(trimmed: &str) -> bool {
494 for operator in [":=", "->", "="] {
495 if let Some(pos) = trimmed.find(operator) {
496 if !is_comparison_operator(trimmed, pos, operator) {
497 return true;
498 }
499 }
500 }
501 false
502}
503
504fn has_yaml_mapping(trimmed: &str) -> bool {
505 memchr::memmem::find(trimmed.as_bytes(), b": ").is_some() && !trimmed.starts_with("- ")
506}
507
508fn is_comparison_operator(trimmed: &str, pos: usize, operator: &str) -> bool {
509 if operator != "=" {
510 return false;
511 }
512
513 let before = trimmed[..pos].chars().last();
514 let after = trimmed[pos + operator.len()..].chars().next();
515 matches!(before, Some('=' | '!' | '>' | '<')) || matches!(after, Some('='))
516}
517
518fn is_in_encrypted_block(lines: &[&str], line_idx: usize) -> bool {
519 let start = line_idx.saturating_sub(ENCRYPTED_BLOCK_LOOKBACK_LINES);
520 lines
521 .iter()
522 .take(line_idx + 1)
523 .skip(start)
524 .any(|line| is_encrypted_marker_line(line.trim()))
525}
526
527fn is_in_test_function(lines: &[&str], line_idx: usize) -> bool {
528 let start = line_idx.saturating_sub(TEST_FUNCTION_LOOKBACK_LINES);
529 for candidate_line_idx in (start..line_idx).rev() {
530 let trimmed = lines[candidate_line_idx].trim();
531
532 if trimmed.starts_with("def test_")
533 || trimmed.starts_with("class Test")
534 || trimmed.starts_with("it(")
535 || trimmed.starts_with("describe(")
536 || trimmed.starts_with("test(")
537 || trimmed == "#[test]"
538 || trimmed == concat!("#[cfg(", "test)]")
539 || trimmed.starts_with("#[tokio::test")
540 || trimmed.starts_with("func Test")
541 || trimmed == "@Test"
542 {
543 return true;
544 }
545
546 if trimmed.starts_with("class ") {
548 return false;
549 }
550
551 if (trimmed.starts_with("def ") || trimmed.starts_with("async def "))
552 && !trimmed.contains("def test_")
553 {
554 return false;
555 }
556
557 if trimmed.starts_with("func ") && !trimmed.contains("func Test") {
558 return false;
559 }
560
561 if (trimmed.starts_with("fn ")
562 || trimmed.starts_with("pub fn ")
563 || trimmed.starts_with("async fn ")
564 || trimmed.starts_with("pub async fn "))
565 && !trimmed.contains("fn test_")
566 {
567 let pre_start = candidate_line_idx.saturating_sub(3);
568 let mut is_test_attr = false;
569 for pre_line in &lines[pre_start..candidate_line_idx] {
570 let pre_trimmed = pre_line.trim();
571 if pre_trimmed == "#[test]"
572 || pre_trimmed == concat!("#[cfg(", "test)]")
573 || pre_trimmed.starts_with("#[tokio::test")
574 || pre_trimmed.starts_with("#[test")
575 || pre_trimmed == "@Test"
576 {
577 is_test_attr = true;
578 break;
579 }
580 }
581 if is_test_attr {
582 return true;
583 }
584 return false;
585 }
586
587 if trimmed.starts_with("function ") && !trimmed.contains("function test") {
588 return false;
589 }
590 }
591 false
592}
593
594pub(crate) fn surrounding_line_window(text: &str, offset: usize, radius: usize) -> &str {
595 if text.is_empty() {
596 return "";
597 }
598 let bytes = text.as_bytes();
599 let safe_offset = offset.min(bytes.len());
600
601 const MAX_WINDOW_BYTES: usize = 2 * 1024;
616
617 let mut start = safe_offset;
618 let mut found_lines = 0;
619 while start > 0 && found_lines <= radius && safe_offset - start < MAX_WINDOW_BYTES {
620 start -= 1;
621 if bytes[start] == b'\n' {
622 found_lines += 1;
623 }
624 }
625 if start > 0 || (start == 0 && bytes[0] == b'\n') {
626 start += 1;
627 }
628
629 let mut end = safe_offset;
630 let mut found_lines = 0;
631 while end < bytes.len() && found_lines <= radius && end - safe_offset < MAX_WINDOW_BYTES {
632 if bytes[end] == b'\n' {
633 found_lines += 1;
634 }
635 end += 1;
636 }
637
638 while start < text.len() && !text.is_char_boundary(start) {
639 start += 1;
640 }
641 while end > start && !text.is_char_boundary(end) {
642 end -= 1;
643 }
644 &text[start..end]
645}