1const SCORE_ZERO: f64 = 0.0;
5const CONFIDENCE_MIN: f64 = 0.0;
6const CONFIDENCE_MAX: f64 = 1.0;
7const LITERAL_PREFIX_WEIGHT: f64 = 0.35;
8const CONTEXT_ANCHOR_WEIGHT: f64 = 0.20;
9const ENTROPY_WEIGHT: f64 = 0.20;
10const HIGH_ENTROPY_PARTIAL_WEIGHT: f64 = 0.12;
11const MODERATE_ENTROPY_THRESHOLD: f64 = 3.0;
12const MODERATE_ENTROPY_WEIGHT: f64 = 0.05;
13const LOW_ENTROPY_THRESHOLD: f64 = 2.0;
14const LOW_ENTROPY_MIN_MATCH_LENGTH: usize = 10;
15const LOW_ENTROPY_PENALTY: f64 = 0.6;
16const KEYWORD_NEARBY_WEIGHT: f64 = 0.10;
17const SENSITIVE_FILE_WEIGHT: f64 = 0.10;
18const COMPANION_WEIGHT: f64 = 0.05;
19const HIGH_ENTROPY_THRESHOLD: f64 = 4.5;
20const VERY_HIGH_ENTROPY_THRESHOLD: f64 = 5.5;
21
22pub struct ConfidenceSignals {
41 pub has_literal_prefix: bool,
43 pub has_context_anchor: bool,
45 pub entropy: f64,
47 pub keyword_nearby: bool,
49 pub sensitive_file: bool,
51 pub match_length: usize,
53 pub has_companion: bool,
55}
56
57pub fn compute_confidence(signals: &ConfidenceSignals) -> f64 {
81 let mut score = SCORE_ZERO;
82 let mut max_possible = SCORE_ZERO;
83
84 max_possible += LITERAL_PREFIX_WEIGHT;
88 if signals.has_literal_prefix {
89 score += LITERAL_PREFIX_WEIGHT;
90 }
91
92 max_possible += CONTEXT_ANCHOR_WEIGHT;
94 if signals.has_context_anchor {
95 score += CONTEXT_ANCHOR_WEIGHT;
96 }
97
98 max_possible += ENTROPY_WEIGHT;
100 if signals.entropy >= VERY_HIGH_ENTROPY_THRESHOLD {
101 score += ENTROPY_WEIGHT;
102 } else if signals.entropy >= HIGH_ENTROPY_THRESHOLD {
103 score += HIGH_ENTROPY_PARTIAL_WEIGHT;
104 } else if signals.entropy >= MODERATE_ENTROPY_THRESHOLD {
105 score += MODERATE_ENTROPY_WEIGHT;
106 }
107 let low_entropy_penalty = if signals.entropy < LOW_ENTROPY_THRESHOLD
110 && signals.match_length > LOW_ENTROPY_MIN_MATCH_LENGTH
111 {
112 LOW_ENTROPY_PENALTY
113 } else {
114 CONFIDENCE_MAX
115 };
116
117 max_possible += KEYWORD_NEARBY_WEIGHT;
119 if signals.keyword_nearby {
120 score += KEYWORD_NEARBY_WEIGHT;
121 }
122
123 max_possible += SENSITIVE_FILE_WEIGHT;
125 if signals.sensitive_file {
126 score += SENSITIVE_FILE_WEIGHT;
127 }
128
129 max_possible += COMPANION_WEIGHT;
131 if signals.has_companion {
132 score += COMPANION_WEIGHT;
133 }
134
135 if max_possible == SCORE_ZERO {
137 return SCORE_ZERO;
138 }
139 let normalized_score: f64 = (score / max_possible) * low_entropy_penalty;
140 normalized_score.clamp(CONFIDENCE_MIN, CONFIDENCE_MAX)
141}
142
143pub fn is_sensitive_path(path: &str) -> bool {
154 let path_bytes = path.as_bytes();
155 const SENSITIVE_NAMES: &[&[u8]] = &[
156 b".env",
157 b".env.local",
158 b".env.production",
159 b".env.staging",
160 b"credentials",
161 b"secrets",
162 b"apikeys",
163 b"api_keys",
164 b".npmrc",
165 b".pypirc",
166 b".netrc",
167 b".pgpass",
168 b"terraform.tfvars",
169 b"variables.tf",
170 b"docker-compose",
171 b"application.yml",
172 b"application.properties",
173 b"config.json",
174 b"config.yaml",
175 ];
176
177 for name in SENSITIVE_NAMES {
178 if path_bytes
179 .windows(name.len())
180 .any(|w| w.eq_ignore_ascii_case(name))
181 {
182 return true;
183 }
184 }
185 const SENSITIVE_EXTENSIONS: &[&[u8]] = &[b".env", b".pem", b".key", b".p12", b".pfx", b".jks"];
186 for ext in SENSITIVE_EXTENSIONS {
187 if path_bytes.len() >= ext.len()
188 && path_bytes[path_bytes.len() - ext.len()..].eq_ignore_ascii_case(ext)
189 {
190 return true;
191 }
192 }
193 false
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 #[test]
201 fn high_confidence_with_prefix_and_entropy() {
202 let signals = ConfidenceSignals {
203 has_literal_prefix: true,
204 has_context_anchor: false,
205 entropy: 5.2,
206 keyword_nearby: true,
207 sensitive_file: true,
208 match_length: 50,
209 has_companion: false,
210 };
211 let score = compute_confidence(&signals);
212 assert!(score > 0.6, "score was {}", score);
213 }
214
215 #[test]
216 fn low_confidence_generic_hex() {
217 let signals = ConfidenceSignals {
218 has_literal_prefix: false,
219 has_context_anchor: false,
220 entropy: 3.5,
221 keyword_nearby: false,
222 sensitive_file: false,
223 match_length: 32,
224 has_companion: false,
225 };
226 let score = compute_confidence(&signals);
227 assert!(score < 0.3, "score was {}", score);
228 }
229
230 #[test]
231 fn medium_confidence_with_context() {
232 let signals = ConfidenceSignals {
233 has_literal_prefix: false,
234 has_context_anchor: true,
235 entropy: 4.8,
236 keyword_nearby: true,
237 sensitive_file: false,
238 match_length: 40,
239 has_companion: false,
240 };
241 let score = compute_confidence(&signals);
242 assert!(score > 0.4 && score < 0.8, "score was {}", score);
243 }
244
245 #[test]
246 fn sensitive_paths() {
247 assert!(is_sensitive_path(".env.production"));
248 assert!(is_sensitive_path("config/credentials.json"));
249 assert!(is_sensitive_path("server.key"));
250 assert!(!is_sensitive_path("src/main.rs"));
251 assert!(!is_sensitive_path("README.md"));
252 }
253
254 #[test]
255 fn low_entropy_penalty() {
256 let signals = ConfidenceSignals {
257 has_literal_prefix: true,
258 has_context_anchor: false,
259 entropy: 1.5, keyword_nearby: false,
261 sensitive_file: false,
262 match_length: 32,
263 has_companion: false,
264 };
265 let score = compute_confidence(&signals);
266 assert!(score < 0.5, "score was {}", score);
268 }
269
270 #[test]
271 fn confidence_is_zero_without_positive_signals() {
272 let signals = ConfidenceSignals {
273 has_literal_prefix: false,
274 has_context_anchor: false,
275 entropy: 0.0,
276 keyword_nearby: false,
277 sensitive_file: false,
278 match_length: 0,
279 has_companion: false,
280 };
281 assert_eq!(compute_confidence(&signals), 0.0);
282 }
283
284 #[test]
285 fn confidence_clamps_to_one_for_all_positive_signals() {
286 let signals = ConfidenceSignals {
287 has_literal_prefix: true,
288 has_context_anchor: true,
289 entropy: 8.0,
290 keyword_nearby: true,
291 sensitive_file: true,
292 match_length: 128,
293 has_companion: true,
294 };
295 assert_eq!(compute_confidence(&signals), 1.0);
296 }
297
298 #[test]
299 fn very_high_entropy_gets_full_entropy_weight() {
300 let signals = ConfidenceSignals {
301 has_literal_prefix: false,
302 has_context_anchor: false,
303 entropy: VERY_HIGH_ENTROPY_THRESHOLD,
304 keyword_nearby: false,
305 sensitive_file: false,
306 match_length: 32,
307 has_companion: false,
308 };
309 let score = compute_confidence(&signals);
310 assert!((score - 0.2).abs() < 1e-9, "score was {}", score);
311 }
312
313 #[test]
314 fn high_entropy_gets_partial_entropy_weight() {
315 let signals = ConfidenceSignals {
316 has_literal_prefix: false,
317 has_context_anchor: false,
318 entropy: HIGH_ENTROPY_THRESHOLD,
319 keyword_nearby: false,
320 sensitive_file: false,
321 match_length: 32,
322 has_companion: false,
323 };
324 let score = compute_confidence(&signals);
325 assert!((score - 0.12).abs() < 1e-9, "score was {}", score);
326 }
327
328 #[test]
329 fn moderate_entropy_gets_small_weight() {
330 let signals = ConfidenceSignals {
331 has_literal_prefix: false,
332 has_context_anchor: false,
333 entropy: 3.0,
334 keyword_nearby: false,
335 sensitive_file: false,
336 match_length: 32,
337 has_companion: false,
338 };
339 let score = compute_confidence(&signals);
340 assert!((score - 0.05).abs() < 1e-9, "score was {}", score);
341 }
342
343 #[test]
344 fn entropy_below_moderate_threshold_adds_no_weight() {
345 let signals = ConfidenceSignals {
346 has_literal_prefix: false,
347 has_context_anchor: false,
348 entropy: 2.99,
349 keyword_nearby: false,
350 sensitive_file: false,
351 match_length: 32,
352 has_companion: false,
353 };
354 assert_eq!(compute_confidence(&signals), 0.0);
355 }
356
357 #[test]
358 fn low_entropy_penalty_requires_length_above_threshold() {
359 let signals = ConfidenceSignals {
360 has_literal_prefix: true,
361 has_context_anchor: false,
362 entropy: 1.0,
363 keyword_nearby: false,
364 sensitive_file: false,
365 match_length: 10,
366 has_companion: false,
367 };
368 let score = compute_confidence(&signals);
369 assert!((score - 0.35).abs() < 1e-9, "score was {}", score);
370 }
371
372 #[test]
373 fn low_entropy_penalty_applies_only_below_threshold() {
374 let signals = ConfidenceSignals {
375 has_literal_prefix: true,
376 has_context_anchor: false,
377 entropy: 2.0,
378 keyword_nearby: false,
379 sensitive_file: false,
380 match_length: 64,
381 has_companion: false,
382 };
383 let score = compute_confidence(&signals);
384 assert!((score - 0.35).abs() < 1e-9, "score was {}", score);
385 }
386
387 #[test]
388 fn low_entropy_penalty_scales_nonzero_score() {
389 let signals = ConfidenceSignals {
390 has_literal_prefix: true,
391 has_context_anchor: true,
392 entropy: 1.0,
393 keyword_nearby: false,
394 sensitive_file: false,
395 match_length: 11,
396 has_companion: false,
397 };
398 let score = compute_confidence(&signals);
399 assert!((score - 0.33).abs() < 1e-9, "score was {}", score);
400 }
401
402 #[test]
403 fn companion_signal_adds_expected_weight() {
404 let signals = ConfidenceSignals {
405 has_literal_prefix: false,
406 has_context_anchor: false,
407 entropy: 0.0,
408 keyword_nearby: false,
409 sensitive_file: false,
410 match_length: 24,
411 has_companion: true,
412 };
413 let score = compute_confidence(&signals);
414 assert!((score - 0.03).abs() < 1e-9, "score was {}", score);
415 }
416
417 #[test]
418 fn context_and_keyword_signals_stack_linearly() {
419 let signals = ConfidenceSignals {
420 has_literal_prefix: false,
421 has_context_anchor: true,
422 entropy: 0.0,
423 keyword_nearby: true,
424 sensitive_file: false,
425 match_length: 20,
426 has_companion: false,
427 };
428 let score = compute_confidence(&signals);
429 assert!((score - 0.18).abs() < 1e-9, "score was {}", score);
430 }
431
432 #[test]
433 fn sensitive_path_matches_case_insensitively() {
434 assert!(is_sensitive_path("CONFIG/.ENV.PRODUCTION"));
435 assert!(is_sensitive_path("Secrets/CREDENTIALS.JSON"));
436 assert!(is_sensitive_path("keys/CLIENT.P12"));
437 }
438
439 #[test]
440 fn sensitive_path_rejects_empty_and_non_sensitive_values() {
441 assert!(!is_sensitive_path(""));
442 assert!(!is_sensitive_path("notes/environment.txt"));
443 assert!(!is_sensitive_path("docs/secretary.txt"));
444 }
445
446 #[test]
447 fn sensitive_path_detects_embedded_sensitive_names_with_special_characters() {
448 assert!(is_sensitive_path("deploy/docker-compose.override.yml"));
449 assert!(is_sensitive_path("dir/my api_keys-backup.txt"));
450 assert!(is_sensitive_path("nested/application.properties.template"));
451 }
452
453 #[test]
454 fn sensitive_path_handles_huge_input() {
455 let long_prefix = "a/".repeat(4096);
456 let long_sensitive = format!("{long_prefix}terraform.tfvars");
457 let long_non_sensitive = format!("{long_prefix}plain-text-file.txt");
458 assert!(is_sensitive_path(&long_sensitive));
459 assert!(!is_sensitive_path(&long_non_sensitive));
460 }
461}