keyhog-scanner 0.5.37

keyhog-scanner: high-performance SIMD-accelerated secret detection engine
Documentation
//! Post-match processing: raw match construction and placeholder suppression.

pub(crate) use crate::suppression::{
    contains_uuid_v4_substring, looks_like_email_address,
    looks_like_punctuation_decorated_identifier, looks_like_pure_identifier,
    looks_like_regex_literal_tail, looks_like_scheme_prefixed_uri,
    looks_like_secret_scanner_source, looks_like_url_or_path_segment,
    looks_like_vendored_minified_path, looks_like_word_separated_identifier,
};
pub use crate::suppression::{
    is_weakly_anchored_named_detector, should_suppress_known_example_credential,
    should_suppress_known_example_credential_with_source, should_suppress_named_detector_finding,
};

use crate::types::*;
use keyhog_core::{Chunk, MatchLocation, RawMatch};
use std::collections::HashMap;

pub fn build_raw_match(
    detector: &keyhog_core::DetectorSpec,
    chunk: &Chunk,
    credential: &str,
    companions: HashMap<String, String>,
    offset: usize,
    line: usize,
    ent: f64,
    confidence: f64,
    scan_state: &mut ScanState,
    pattern_client_safe: bool,
) -> RawMatch {
    // Diff-aware severity: a credential whose only sighting is in non-HEAD
    // git history (the developer already removed it from `main`) is still
    // a leak - but it's strictly less urgent than a credential live in HEAD
    // that an attacker can grep right now. Drop one tier when the source
    // backend tagged this chunk as `git/history`. Everything else (live
    // filesystem, `git/head`, S3/Docker/Web/etc) keeps the detector's
    // declared severity.
    //
    // Client-safe tier: a match against a pattern marked `client_safe = true`
    // (Sentry DSN, Stripe pk_*, Firebase web key, etc.) is collapsed to
    // `Severity::ClientSafe` regardless of the detector's nominal severity
    // and regardless of the git-diff state. The credential is real but it
    // was *intended* to ship in client bundles - bug-bounty hunters running
    // `--hide-client-safe` drop these entirely; defaults still surface them
    // below `Low` so a misconfigured "publishable" key on a server-only
    // detector still gets flagged.
    let severity = if pattern_client_safe {
        keyhog_core::Severity::ClientSafe
    } else if chunk.metadata.source_type == "git/history" {
        detector.severity.downgrade_one()
    } else {
        detector.severity
    };
    RawMatch {
        detector_id: scan_state.intern_metadata(&detector.id),
        detector_name: scan_state.intern_metadata(&detector.name),
        service: scan_state.intern_metadata(&detector.service),
        severity,
        credential_hash: crate::sha256_hash(credential),
        credential: scan_state.intern_credential(credential),
        companions,
        location: MatchLocation {
            source: scan_state.intern_metadata(&chunk.metadata.source_type),
            file_path: chunk
                .metadata
                .path
                .as_ref()
                .map(|p| scan_state.intern_metadata(p)),
            line: Some(line),
            offset: offset + chunk.metadata.base_offset,
            commit: chunk
                .metadata
                .commit
                .as_ref()
                .map(|c| scan_state.intern_metadata(c)),
            author: chunk
                .metadata
                .author
                .as_ref()
                .map(|a| scan_state.intern_metadata(a)),
            date: chunk
                .metadata
                .date
                .as_ref()
                .map(|d| scan_state.intern_metadata(d)),
        },
        entropy: Some(ent),
        confidence: Some(confidence),
    }
}