1#![deny(unsafe_op_in_unsafe_fn)]
7#![allow(clippy::too_many_arguments)]
8
9pub mod checksum;
12pub mod compiler;
14pub mod confidence;
16pub mod context;
18pub mod decode;
20pub mod decode_structure;
23pub mod engine;
25pub mod entropy;
27pub mod error;
29pub mod gpu;
31pub mod hw_probe;
33pub mod ml_scorer;
35pub mod multiline;
37pub mod resolution;
39pub mod scanner_config;
41pub mod static_intern;
46pub mod types;
48
49pub mod alphabet_filter;
52pub(crate) mod ascii_ci;
55pub mod bigram_bloom;
57pub(crate) mod entropy_avx512;
59pub mod entropy_fast;
61#[cfg(target_arch = "aarch64")]
62pub(crate) mod entropy_fast_neon;
63#[cfg(target_arch = "x86_64")]
64pub(crate) mod entropy_fast_x86;
65pub mod jwt;
67pub use multiline::fragment_cache;
71pub(crate) mod homoglyph;
72pub mod pipeline;
74pub mod prefix_trie;
76pub(crate) mod probabilistic_gate;
77pub(crate) mod structured;
78pub(crate) mod suppression;
79pub mod telemetry;
81pub mod unicode_hardening;
83
84pub(crate) fn sha256_hash(s: &str) -> String {
85 use sha2::{Digest, Sha256};
86 let mut hasher = Sha256::new();
87 hasher.update(s.as_bytes());
88 hex::encode(hasher.finalize())
89}
90
91#[cfg(feature = "simd")]
92pub(crate) mod simd;
93#[cfg(feature = "simdsieve")]
94mod simdsieve_prefilter;
95
96pub(crate) mod shared_regexes;
97
98pub use engine::CompiledScanner;
99pub use engine::GpuPhase1Output;
100pub use error::{Result, ScanError};
101pub use hw_probe::{probe_hardware, select_backend, HardwareCaps, ScanBackend};
102pub use types::ScannerConfig;
103
104use std::borrow::Cow;
105
106pub fn normalize_chunk_data(data: &str) -> Cow<'_, str> {
108 if data.is_ascii() {
109 return Cow::Borrowed(data);
110 }
111 let mut normalized = String::with_capacity(data.len());
112 let mut changed = false;
113 for ch in data.chars() {
114 if !unicode_hardening::is_evasion_char(ch) {
115 normalized.push(ch);
116 } else {
117 changed = true;
118 }
119 }
120 if changed {
121 Cow::Owned(normalized)
122 } else {
123 Cow::Borrowed(data)
124 }
125}
126
127pub fn normalize_scannable_chunk<'a>(
129 chunk: &'a keyhog_core::Chunk,
130 owned: &'a mut Option<keyhog_core::Chunk>,
131) -> &'a keyhog_core::Chunk {
132 pipeline::normalize_scannable_chunk(chunk, owned)
133}
134
135pub fn compute_line_offsets(text: &str) -> Vec<usize> {
137 pipeline::compute_line_offsets(text)
138}
139
140pub fn match_line_number(
142 preprocessed: &types::ScannerPreprocessedText,
143 line_offsets: &[usize],
144 offset: usize,
145) -> usize {
146 pipeline::match_line_number(preprocessed, line_offsets, offset)
147}
148
149pub fn match_entropy(data: &[u8]) -> f64 {
151 pipeline::match_entropy(data)
152}
153
154pub fn floor_char_boundary(text: &str, index: usize) -> usize {
156 engine::floor_char_boundary(text, index)
157}
158
159pub fn is_within_hex_context(data: &str, match_start: usize, match_end: usize) -> bool {
161 pipeline::is_within_hex_context(data, match_start, match_end)
162}
163
164pub fn should_suppress_known_example_credential(
166 credential: &str,
167 path: Option<&str>,
168 context: context::CodeContext,
169) -> bool {
170 pipeline::should_suppress_known_example_credential(credential, path, context)
171}
172
173pub fn find_companion(
175 preprocessed: &types::ScannerPreprocessedText,
176 primary_line: usize,
177 companion: &types::CompiledCompanion,
178) -> Option<String> {
179 pipeline::find_companion(preprocessed, primary_line, companion)
180}
181
182pub mod testing {
183 pub use crate::compiler::{rewrite_alternation_prefix, split_leading_inline_flag};
184 pub use crate::confidence::penalties::finalize_confidence;
185 pub use crate::ml_scorer::compute_features_with_config;
189 pub use crate::engine::boundary::scan_chunk_boundaries;
190 pub use crate::engine::gpu_postprocess::{
191 attribute_matches_to_chunks, fold_overlapping_same_pid_inplace,
192 };
193 pub use crate::engine::gpu_regex_dfa::extract_literal_core;
194 pub use crate::entropy::keywords::looks_like_program_identifier;
195 pub use crate::probabilistic_gate::ProbabilisticGate;
196 pub use crate::static_intern::seed_source_type_count;
197
198 pub mod ascii_ci {
199 pub use crate::ascii_ci::{ci_find, contains_path_segment, contains_path_segment_two};
200 }
201
202 pub mod shape {
203 pub use crate::suppression::shape::{
204 looks_like_credential_colliding_punctuation,
205 looks_like_punctuation_decorated_identifier, looks_like_syntactic_punctuation_marker,
206 };
207 }
208
209 pub mod compiler_prefix {
210 pub use crate::compiler::compiler_prefix::{
211 extract_literal_prefixes, strip_leading_boundary_guard, strip_leading_inline_flags,
212 };
213 }
214
215 pub use crate::decode::caesar::{
216 caesar_shift, is_source_code_path, looks_credential_shaped, CaesarDecoder,
217 };
218 pub use crate::decode::hex::find_hex_strings;
219 pub use crate::decode::reverse::{looks_reversible, reverse_str, ReverseDecoder};
220 pub use crate::decode::util::take_hex_digits;
221 pub use crate::gpu::{env_no_gpu, is_ci_environment};
222
223 pub unsafe fn calculate_shannon_entropy(chunk: &[u8]) -> f64 {
240 #[cfg(target_arch = "x86_64")]
241 {
242 unsafe { crate::entropy_avx512::calculate_shannon_entropy(chunk) }
243 }
244 #[cfg(not(target_arch = "x86_64"))]
245 {
246 crate::entropy_fast::shannon_entropy_simd(chunk)
247 }
248 }
249
250 #[cfg(feature = "simd")]
251 pub use crate::simd::backend::HsScanner;
252
253 #[cfg(feature = "simdsieve")]
254 pub use crate::simdsieve_prefilter::{
255 HOT_PATTERNS, HOT_PATTERN_DETECTOR_IDS, HOT_PATTERN_DISPLAY_NAMES, HOT_PATTERN_NAMES,
256 };
257
258 pub use crate::structured::parsers::{
259 parse_docker_compose, parse_env, parse_jupyter, parse_k8s_secret, parse_tfstate,
260 };
261}