#![deny(unsafe_op_in_unsafe_fn)]
#![allow(clippy::too_many_arguments)]
pub mod checksum;
pub mod compiler;
pub mod confidence;
pub mod context;
pub mod decode;
pub mod decode_structure;
pub mod engine;
pub mod entropy;
pub mod error;
pub mod gpu;
pub mod hw_probe;
pub mod ml_scorer;
pub mod multiline;
pub mod resolution;
pub mod scanner_config;
pub mod static_intern;
pub mod types;
pub mod alphabet_filter;
pub(crate) mod ascii_ci;
pub mod bigram_bloom;
pub(crate) mod entropy_avx512;
pub mod entropy_fast;
#[cfg(target_arch = "aarch64")]
pub(crate) mod entropy_fast_neon;
#[cfg(target_arch = "x86_64")]
pub(crate) mod entropy_fast_x86;
pub mod jwt;
pub use multiline::fragment_cache;
pub(crate) mod homoglyph;
pub mod pipeline;
pub mod prefix_trie;
pub(crate) mod probabilistic_gate;
pub(crate) mod structured;
pub(crate) mod suppression;
pub mod telemetry;
pub mod unicode_hardening;
pub(crate) fn sha256_hash(s: &str) -> String {
use sha2::{Digest, Sha256};
let mut hasher = Sha256::new();
hasher.update(s.as_bytes());
hex::encode(hasher.finalize())
}
#[cfg(feature = "simd")]
pub(crate) mod simd;
#[cfg(feature = "simdsieve")]
mod simdsieve_prefilter;
pub(crate) mod shared_regexes;
pub use engine::CompiledScanner;
pub use engine::GpuPhase1Output;
pub use error::{Result, ScanError};
pub use hw_probe::{probe_hardware, select_backend, HardwareCaps, ScanBackend};
pub use types::ScannerConfig;
use std::borrow::Cow;
pub fn normalize_chunk_data(data: &str) -> Cow<'_, str> {
if data.is_ascii() {
return Cow::Borrowed(data);
}
let mut normalized = String::with_capacity(data.len());
let mut changed = false;
for ch in data.chars() {
if !unicode_hardening::is_evasion_char(ch) {
normalized.push(ch);
} else {
changed = true;
}
}
if changed {
Cow::Owned(normalized)
} else {
Cow::Borrowed(data)
}
}
pub fn normalize_scannable_chunk<'a>(
chunk: &'a keyhog_core::Chunk,
owned: &'a mut Option<keyhog_core::Chunk>,
) -> &'a keyhog_core::Chunk {
pipeline::normalize_scannable_chunk(chunk, owned)
}
pub fn compute_line_offsets(text: &str) -> Vec<usize> {
pipeline::compute_line_offsets(text)
}
pub fn match_line_number(
preprocessed: &types::ScannerPreprocessedText,
line_offsets: &[usize],
offset: usize,
) -> usize {
pipeline::match_line_number(preprocessed, line_offsets, offset)
}
pub fn match_entropy(data: &[u8]) -> f64 {
pipeline::match_entropy(data)
}
pub fn floor_char_boundary(text: &str, index: usize) -> usize {
engine::floor_char_boundary(text, index)
}
pub fn is_within_hex_context(data: &str, match_start: usize, match_end: usize) -> bool {
pipeline::is_within_hex_context(data, match_start, match_end)
}
pub fn should_suppress_known_example_credential(
credential: &str,
path: Option<&str>,
context: context::CodeContext,
) -> bool {
pipeline::should_suppress_known_example_credential(credential, path, context)
}
pub fn find_companion(
preprocessed: &types::ScannerPreprocessedText,
primary_line: usize,
companion: &types::CompiledCompanion,
) -> Option<String> {
pipeline::find_companion(preprocessed, primary_line, companion)
}
pub mod testing {
pub use crate::compiler::{rewrite_alternation_prefix, split_leading_inline_flag};
pub use crate::confidence::penalties::finalize_confidence;
pub use crate::ml_scorer::compute_features_with_config;
pub use crate::engine::boundary::scan_chunk_boundaries;
pub use crate::engine::gpu_postprocess::{
attribute_matches_to_chunks, fold_overlapping_same_pid_inplace,
};
pub use crate::engine::gpu_regex_dfa::extract_literal_core;
pub use crate::entropy::keywords::looks_like_program_identifier;
pub use crate::probabilistic_gate::ProbabilisticGate;
pub use crate::static_intern::seed_source_type_count;
pub mod ascii_ci {
pub use crate::ascii_ci::{ci_find, contains_path_segment, contains_path_segment_two};
}
pub mod shape {
pub use crate::suppression::shape::{
looks_like_credential_colliding_punctuation,
looks_like_punctuation_decorated_identifier, looks_like_syntactic_punctuation_marker,
};
}
pub mod compiler_prefix {
pub use crate::compiler::compiler_prefix::{
extract_literal_prefixes, strip_leading_boundary_guard, strip_leading_inline_flags,
};
}
pub use crate::decode::caesar::{
caesar_shift, is_source_code_path, looks_credential_shaped, CaesarDecoder,
};
pub use crate::decode::hex::find_hex_strings;
pub use crate::decode::reverse::{looks_reversible, reverse_str, ReverseDecoder};
pub use crate::decode::util::take_hex_digits;
pub use crate::gpu::{env_no_gpu, is_ci_environment};
pub unsafe fn calculate_shannon_entropy(chunk: &[u8]) -> f64 {
#[cfg(target_arch = "x86_64")]
{
unsafe { crate::entropy_avx512::calculate_shannon_entropy(chunk) }
}
#[cfg(not(target_arch = "x86_64"))]
{
crate::entropy_fast::shannon_entropy_simd(chunk)
}
}
#[cfg(feature = "simd")]
pub use crate::simd::backend::HsScanner;
#[cfg(feature = "simdsieve")]
pub use crate::simdsieve_prefilter::{
HOT_PATTERNS, HOT_PATTERN_DETECTOR_IDS, HOT_PATTERN_DISPLAY_NAMES, HOT_PATTERN_NAMES,
};
pub use crate::structured::parsers::{
parse_docker_compose, parse_env, parse_jupyter, parse_k8s_secret, parse_tfstate,
};
}