1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
//! Boundary test for program-identifier rejection (keywords.rs:438-461).
//!
//! Source-code identifiers in mainstream languages follow patterns:
//! - snake_case: lowercase + underscores (e.g., "my_helper_name")
//! - camelCase / PascalCase: mixed case with internal lower→Upper transitions
//! (e.g., "BulkUpdateApiKeyResponse")
//!
//! Real API keys almost never match these patterns. The gate rejects identifiers
//! by checking: (1) underscore + all-lowercase OR (2) camelCase/PascalCase
//! with 1+ internal case transitions. This test pins the exact boundary.
use keyhog_scanner::entropy::keywords::is_secret_plausible;
#[test]
fn program_identifier_snake_case_with_underscores_rejected() {
// snake_case: lowercase letters + underscores. "my_long_helper_name"
// matches the pattern and must be rejected.
let snake_case = "my_long_helper_name";
assert!(snake_case.contains('_'));
assert!(snake_case
.chars()
.all(|c| c.is_ascii_lowercase() || c == '_'));
assert!(!is_secret_plausible(snake_case, &[]));
}
#[test]
fn program_identifier_camelcase_with_transitions_rejected() {
// camelCase: at least one lower→Upper transition (e.g., "getValue").
// "convertSearchHitToVersionedApiKeyDoc" has multiple transitions.
let camel_case = "convertSearchHitToVersionedApiKeyDoc";
let bytes = camel_case.as_bytes();
let transitions = bytes
.windows(2)
.filter(|pair| pair[0].is_ascii_lowercase() && pair[1].is_ascii_uppercase())
.count();
assert!(transitions >= 1);
assert!(!is_secret_plausible(camel_case, &[]));
}
#[test]
fn program_identifier_pascalcase_with_transitions_rejected() {
// PascalCase: starts uppercase, has internal transitions (e.g., "BulkUpdateApiKey").
let pascal_case = "BulkUpdateApiKeyResponse";
let bytes = pascal_case.as_bytes();
let transitions = bytes
.windows(2)
.filter(|pair| pair[0].is_ascii_lowercase() && pair[1].is_ascii_uppercase())
.count();
assert!(transitions >= 1);
assert!(!is_secret_plausible(pascal_case, &[]));
}
#[test]
fn program_identifier_boundary_zero_transitions_not_identifier() {
// "ALLUPPERCASE" is all uppercase (no lower→Upper transitions).
// The gate requires transitions >= 1, so this is NOT rejected as an identifier.
let all_upper = "ALLUPPERCASE";
let bytes = all_upper.as_bytes();
let transitions = bytes
.windows(2)
.filter(|pair| pair[0].is_ascii_lowercase() && pair[1].is_ascii_uppercase())
.count();
assert_eq!(transitions, 0);
// Must NOT be rejected by the program-identifier gate.
}
#[test]
fn program_identifier_boundary_single_uppercase_letter() {
// "Foo" is a single uppercase letter at the start. It has the shape of
// PascalCase but NO internal lower→Upper transitions (length 3 means
// windows(2) checks "Fo" and "oo"; "Fo" is U→l, "oo" is l→l).
// Transitions == 0, so NOT an identifier per the gate.
let single_upper = "Foo";
let bytes = single_upper.as_bytes();
let transitions = bytes
.windows(2)
.filter(|pair| pair[0].is_ascii_lowercase() && pair[1].is_ascii_uppercase())
.count();
assert_eq!(transitions, 0);
// Must NOT be rejected by the program-identifier gate.
}
#[test]
fn program_identifier_with_digits_breaks_pattern() {
// "myHelper123Key" has digits mixed in. The identifier gate checks:
// "!value.chars().all(|ch| ch.is_ascii_alphabetic() || ch == '_')"
// This fails the check (has digits), so it's not rejected as an identifier.
let with_digits = "myHelper123Key";
assert!(with_digits.chars().any(|c| c.is_ascii_digit()));
// Must NOT match the identifier pattern (all [A-Za-z_]).
}
#[test]
fn program_identifier_with_symbols_breaks_pattern() {
// "my-Helper_Key" has symbols other than underscore (the hyphen).
// The gate checks for "[A-Za-z_] only". This breaks the pattern.
let with_symbols = "my-Helper_Key";
assert!(with_symbols
.chars()
.any(|c| !c.is_ascii_alphabetic() && c != '_'));
// Must NOT be rejected by the identifier gate.
}
#[test]
fn program_identifier_boundary_transitions_exactly_one() {
// Exactly 1 lower→Upper transition. "aB" or "camelCase" have exactly 1.
// The gate requires transitions >= 1, so this IS rejected.
let one_transition = "aB";
let bytes = one_transition.as_bytes();
let transitions = bytes
.windows(2)
.filter(|pair| pair[0].is_ascii_lowercase() && pair[1].is_ascii_uppercase())
.count();
assert_eq!(transitions, 1);
assert!(!is_secret_plausible(one_transition, &[]));
}
#[test]
fn program_identifier_snake_case_boundary_no_underscore() {
// "lowercaseonly" (all lowercase, no underscore). The gate checks:
// "contains('_') && all lowercase" → false (no underscore).
// So NOT rejected by the snake_case branch. Must pass the identifier gate.
let lowercase_only = "lowercaseonly";
assert!(!lowercase_only.contains('_'));
assert!(lowercase_only.chars().all(|c| c.is_ascii_lowercase()));
// NOT an identifier per the gate.
}