Skip to main content

keyhog_scanner/
lib.rs

1//! KeyHog Scanner: A high-performance, multi-layered secret detection engine.
2//!
3//! This crate implements the core scanning logic, combining SIMD pre-filtering,
4//! Aho-Corasick literal matching, regex fallback, and ML-based confidence scoring.
5
6#![deny(unsafe_op_in_unsafe_fn)]
7#![allow(clippy::too_many_arguments)]
8
9// ── Public API ──────────────────────────────────────────────────────
10pub mod checksum;
11pub mod compiler;
12pub mod confidence;
13pub mod context;
14pub mod decode;
15pub mod engine;
16pub mod entropy;
17pub mod error;
18pub mod gpu;
19pub mod hw_probe;
20pub mod ml_scorer;
21pub mod multiline;
22pub mod resolution;
23pub mod types;
24
25// ── Internal modules ────────────────────────────────────────────────
26// These provide internal utilities; not all functions are consumed yet.
27#[allow(dead_code)]
28pub mod alphabet_filter;
29#[allow(dead_code)]
30pub(crate) mod entropy_fast;
31#[allow(dead_code)]
32pub(crate) mod fragment_cache;
33#[allow(dead_code)]
34pub(crate) mod homoglyph;
35pub mod pipeline;
36#[allow(dead_code)]
37pub(crate) mod prefix_trie;
38#[allow(dead_code)]
39pub(crate) mod probabilistic_gate;
40pub(crate) mod structured;
41#[allow(dead_code)]
42pub(crate) mod unicode_hardening;
43
44pub(crate) fn sha256_hash(s: &str) -> String {
45    use sha2::{Digest, Sha256};
46    let mut hasher = Sha256::new();
47    hasher.update(s.as_bytes());
48    hex::encode(hasher.finalize())
49}
50
51#[cfg(feature = "simd")]
52pub(crate) mod simd;
53#[cfg(feature = "simdsieve")]
54mod simdsieve_prefilter;
55
56pub use engine::CompiledScanner;
57pub use error::{Result, ScanError};
58pub use hw_probe::{HardwareCaps, ScanBackend, probe_hardware, select_backend};
59pub use types::ScannerConfig;
60
61use std::borrow::Cow;
62
63/// Normalize scannable text by removing evasion characters and handling homoglyphs.
64pub fn normalize_chunk_data(data: &str) -> Cow<'_, str> {
65    if data.is_ascii() {
66        return Cow::Borrowed(data);
67    }
68    let mut normalized = String::with_capacity(data.len());
69    let mut changed = false;
70    for ch in data.chars() {
71        if !unicode_hardening::is_evasion_char(ch) {
72            normalized.push(ch);
73        } else {
74            changed = true;
75        }
76    }
77    if changed {
78        Cow::Owned(normalized)
79    } else {
80        Cow::Borrowed(data)
81    }
82}
83
84/// Pre-process a chunk of text for scanning.
85pub fn normalize_scannable_chunk<'a>(
86    chunk: &'a keyhog_core::Chunk,
87    owned: &'a mut Option<keyhog_core::Chunk>,
88) -> &'a keyhog_core::Chunk {
89    pipeline::normalize_scannable_chunk(chunk, owned)
90}
91
92/// Compute line offsets for a block of text.
93pub fn compute_line_offsets(text: &str) -> Vec<usize> {
94    pipeline::compute_line_offsets(text)
95}
96
97/// Map a byte offset to a line number using pre-computed offsets.
98pub fn match_line_number(
99    preprocessed: &types::ScannerPreprocessedText,
100    line_offsets: &[usize],
101    offset: usize,
102) -> usize {
103    pipeline::match_line_number(preprocessed, line_offsets, offset)
104}
105
106/// measure shannon entropy of a byte slice.
107pub fn match_entropy(data: &[u8]) -> f64 {
108    pipeline::match_entropy(data)
109}
110
111/// Find the largest char boundary <= index.
112pub fn floor_char_boundary(text: &str, index: usize) -> usize {
113    engine::floor_char_boundary(text, index)
114}
115
116/// Check if a match is within a hex-encoded context.
117pub fn is_within_hex_context(data: &str, match_start: usize, match_end: usize) -> bool {
118    pipeline::is_within_hex_context(data, match_start, match_end)
119}
120
121/// Check if a credential should be suppressed because it is a known example.
122pub fn should_suppress_known_example_credential(
123    credential: &str,
124    path: Option<&str>,
125    context: context::CodeContext,
126) -> bool {
127    pipeline::should_suppress_known_example_credential(credential, path, context)
128}
129
130/// Search for a companion pattern near a primary match.
131pub fn find_companion(
132    preprocessed: &types::ScannerPreprocessedText,
133    primary_line: usize,
134    companion: &types::CompiledCompanion,
135) -> Option<String> {
136    pipeline::find_companion(preprocessed, primary_line, companion)
137}