1#![deny(unsafe_op_in_unsafe_fn)]
7#![allow(clippy::too_many_arguments)]
8
9pub mod aws;
12pub mod checksum;
14pub mod compiler;
16pub mod confidence;
18pub mod context;
20pub mod decode;
22pub mod decode_structure;
25pub mod engine;
27pub mod entropy;
29pub mod error;
31pub mod gpu;
33pub mod hw_probe;
35pub mod ml_scorer;
37pub mod multiline;
39pub mod resolution;
41pub mod scanner_config;
43pub mod static_intern;
48pub mod types;
50
51pub mod alphabet_filter;
54pub(crate) mod ascii_ci;
57pub mod bigram_bloom;
59pub(crate) mod entropy_avx512;
61pub mod entropy_fast;
63#[cfg(target_arch = "aarch64")]
64pub(crate) mod entropy_fast_neon;
65#[cfg(target_arch = "x86_64")]
66pub(crate) mod entropy_fast_x86;
67pub mod jwt;
69pub use multiline::fragment_cache;
73pub(crate) mod homoglyph;
74pub mod pipeline;
76pub mod prefix_trie;
78pub(crate) mod probabilistic_gate;
79pub(crate) mod structured;
80pub(crate) mod suppression;
81pub mod telemetry;
83pub mod unicode_hardening;
85pub(crate) mod util_hash;
89
90pub(crate) fn sha256_hash(s: &str) -> [u8; 32] {
96 use sha2::{Digest, Sha256};
97 let mut hasher = Sha256::new();
98 hasher.update(s.as_bytes());
99 hasher.finalize().into()
100}
101
102#[cfg(feature = "simd")]
103pub(crate) mod simd;
104#[cfg(feature = "simdsieve")]
105mod simdsieve_prefilter;
106
107pub(crate) mod shared_regexes;
108
109pub use engine::GpuPhase1Output;
110pub use engine::{CompiledScanner, GpuInitPolicy};
111pub use error::{Result, ScanError};
112pub use hw_probe::{probe_hardware, select_backend, HardwareCaps, ScanBackend};
113pub use types::ScannerConfig;
114
115use std::borrow::Cow;
116
117pub fn normalize_chunk_data(data: &str) -> Cow<'_, str> {
119 if data.is_ascii() {
120 return Cow::Borrowed(data);
121 }
122 let mut normalized = String::with_capacity(data.len());
123 let mut changed = false;
124 for ch in data.chars() {
125 if !unicode_hardening::is_evasion_char(ch) {
126 normalized.push(ch);
127 } else {
128 changed = true;
129 }
130 }
131 if changed {
132 Cow::Owned(normalized)
133 } else {
134 Cow::Borrowed(data)
135 }
136}
137
138pub fn normalize_scannable_chunk<'a>(
140 chunk: &'a keyhog_core::Chunk,
141 owned: &'a mut Option<keyhog_core::Chunk>,
142) -> &'a keyhog_core::Chunk {
143 pipeline::normalize_scannable_chunk(chunk, owned)
144}
145
146pub fn compute_line_offsets(text: &str) -> Vec<usize> {
148 pipeline::compute_line_offsets(text)
149}
150
151pub fn match_line_number(
153 preprocessed: &types::ScannerPreprocessedText<'_>,
154 line_offsets: &[usize],
155 offset: usize,
156) -> usize {
157 pipeline::match_line_number(preprocessed, line_offsets, offset)
158}
159
160pub fn match_entropy(data: &[u8]) -> f64 {
162 pipeline::match_entropy(data)
163}
164
165pub fn floor_char_boundary(text: &str, index: usize) -> usize {
167 engine::floor_char_boundary(text, index)
168}
169
170pub fn is_within_hex_context(data: &str, match_start: usize, match_end: usize) -> bool {
172 pipeline::is_within_hex_context(data, match_start, match_end)
173}
174
175pub fn should_suppress_known_example_credential(
177 credential: &str,
178 path: Option<&str>,
179 context: context::CodeContext,
180) -> bool {
181 pipeline::should_suppress_known_example_credential(credential, path, context)
182}
183
184pub fn find_companion(
186 preprocessed: &types::ScannerPreprocessedText<'_>,
187 primary_line: usize,
188 companion: &types::CompiledCompanion,
189) -> Option<String> {
190 pipeline::find_companion(preprocessed, primary_line, companion)
191}
192
193pub mod testing {
194 pub use crate::compiler::{rewrite_alternation_prefix, split_leading_inline_flag};
195 pub use crate::confidence::penalties::finalize_confidence;
196 pub use crate::engine::boundary::scan_chunk_boundaries;
197 pub use crate::engine::gpu_postprocess::{
198 attribute_matches_to_chunks, fold_overlapping_same_pid_inplace, gpu_phase2_hits_are_dense,
199 };
200 pub use crate::engine::gpu_regex_dfa::extract_literal_core;
201 pub use crate::entropy::keywords::looks_like_program_identifier;
202
203 pub mod entropy_scanner {
208 pub use crate::entropy::keywords::KeywordContext;
209 pub use crate::entropy::scanner::{
210 candidate_is_plausible, credential_keyword_context, is_canonical_non_secret_shape,
211 };
212 }
213
214 pub mod entropy_keywords {
217 pub use crate::entropy::keywords::{
218 entropy_value_looks_like_prose, is_dash_segmented_alnum_decoy,
219 looks_like_english_prose, passes_strict_secret_checks,
220 };
221 }
222 pub use crate::ml_scorer::compute_features_with_config;
226 pub use crate::probabilistic_gate::ProbabilisticGate;
227 pub use crate::static_intern::seed_source_type_count;
228 pub use crate::suppression::shape_gates::looks_like_standard_base64_blob;
229 pub use crate::util_hash::{hash_fast, memoize_by_hash};
230
231 pub mod ascii_ci {
232 pub use crate::ascii_ci::{ci_find, contains_path_segment, contains_path_segment_two};
233 }
234
235 pub mod shape {
236 pub use crate::suppression::shape::{
237 looks_like_credential_colliding_punctuation,
238 looks_like_punctuation_decorated_identifier, looks_like_syntactic_punctuation_marker,
239 };
240 }
241
242 pub mod compiler_prefix {
243 pub use crate::compiler::compiler_prefix::{
244 extract_literal_prefixes, strip_leading_boundary_guard, strip_leading_inline_flags,
245 };
246 }
247
248 pub use crate::decode::caesar::{
249 caesar_shift, is_source_code_path, looks_credential_shaped, CaesarDecoder,
250 };
251 pub use crate::decode::hex::find_hex_strings;
252 pub use crate::decode::reverse::{looks_reversible, reverse_str, ReverseDecoder};
253 pub use crate::decode::util::take_hex_digits;
254 pub use crate::gpu::{env_no_gpu, is_ci_environment};
255
256 pub unsafe fn calculate_shannon_entropy(chunk: &[u8]) -> f64 {
273 #[cfg(target_arch = "x86_64")]
274 {
275 unsafe { crate::entropy_avx512::calculate_shannon_entropy(chunk) }
276 }
277 #[cfg(not(target_arch = "x86_64"))]
278 {
279 crate::entropy_fast::shannon_entropy_simd(chunk)
280 }
281 }
282
283 #[cfg(feature = "simd")]
284 pub use crate::simd::backend::HsScanner;
285
286 #[cfg(feature = "simdsieve")]
287 pub use crate::simdsieve_prefilter::{
288 HOT_PATTERNS, HOT_PATTERN_DETECTOR_IDS, HOT_PATTERN_DISPLAY_NAMES, HOT_PATTERN_NAMES,
289 };
290
291 pub use crate::structured::parsers::{
292 parse_docker_compose, parse_env, parse_hcl, parse_jupyter, parse_k8s_secret, parse_tfstate,
293 };
294}