1#![deny(unsafe_op_in_unsafe_fn)]
7#![allow(clippy::too_many_arguments)]
8
9pub mod checksum;
11pub mod compiler;
12pub mod confidence;
13pub mod context;
14pub mod decode;
15pub mod engine;
16pub mod entropy;
17pub mod error;
18pub mod gpu;
19pub mod hw_probe;
20pub mod ml_scorer;
21pub mod multiline;
22pub mod resolution;
23pub mod types;
24
25#[allow(dead_code)]
28pub mod alphabet_filter;
29#[allow(dead_code)]
30pub(crate) mod entropy_fast;
31#[allow(dead_code)]
32pub(crate) mod fragment_cache;
33#[allow(dead_code)]
34pub(crate) mod homoglyph;
35pub mod pipeline;
36#[allow(dead_code)]
37pub(crate) mod prefix_trie;
38#[allow(dead_code)]
39pub(crate) mod probabilistic_gate;
40pub(crate) mod structured;
41#[allow(dead_code)]
42pub(crate) mod unicode_hardening;
43
44pub(crate) fn sha256_hash(s: &str) -> String {
45 use sha2::{Digest, Sha256};
46 let mut hasher = Sha256::new();
47 hasher.update(s.as_bytes());
48 hex::encode(hasher.finalize())
49}
50
51#[cfg(feature = "simd")]
52pub(crate) mod simd;
53#[cfg(feature = "simdsieve")]
54mod simdsieve_prefilter;
55
56pub use engine::CompiledScanner;
57pub use error::{Result, ScanError};
58pub use hw_probe::{HardwareCaps, ScanBackend, probe_hardware, select_backend};
59pub use types::ScannerConfig;
60
61use std::borrow::Cow;
62
63pub fn normalize_chunk_data(data: &str) -> Cow<'_, str> {
65 if data.is_ascii() {
66 return Cow::Borrowed(data);
67 }
68 let mut normalized = String::with_capacity(data.len());
69 let mut changed = false;
70 for ch in data.chars() {
71 if !unicode_hardening::is_evasion_char(ch) {
72 normalized.push(ch);
73 } else {
74 changed = true;
75 }
76 }
77 if changed {
78 Cow::Owned(normalized)
79 } else {
80 Cow::Borrowed(data)
81 }
82}
83
84pub fn normalize_scannable_chunk<'a>(
86 chunk: &'a keyhog_core::Chunk,
87 owned: &'a mut Option<keyhog_core::Chunk>,
88) -> &'a keyhog_core::Chunk {
89 pipeline::normalize_scannable_chunk(chunk, owned)
90}
91
92pub fn compute_line_offsets(text: &str) -> Vec<usize> {
94 pipeline::compute_line_offsets(text)
95}
96
97pub fn match_line_number(
99 preprocessed: &types::ScannerPreprocessedText,
100 line_offsets: &[usize],
101 offset: usize,
102) -> usize {
103 pipeline::match_line_number(preprocessed, line_offsets, offset)
104}
105
106pub fn match_entropy(data: &[u8]) -> f64 {
108 pipeline::match_entropy(data)
109}
110
111pub fn floor_char_boundary(text: &str, index: usize) -> usize {
113 engine::floor_char_boundary(text, index)
114}
115
116pub fn is_within_hex_context(data: &str, match_start: usize, match_end: usize) -> bool {
118 pipeline::is_within_hex_context(data, match_start, match_end)
119}
120
121pub fn should_suppress_known_example_credential(
123 credential: &str,
124 path: Option<&str>,
125 context: context::CodeContext,
126) -> bool {
127 pipeline::should_suppress_known_example_credential(credential, path, context)
128}
129
130pub fn find_companion(
132 preprocessed: &types::ScannerPreprocessedText,
133 primary_line: usize,
134 companion: &types::CompiledCompanion,
135) -> Option<String> {
136 pipeline::find_companion(preprocessed, primary_line, companion)
137}