keyhog-scanner 0.5.37

keyhog-scanner: high-performance SIMD-accelerated secret detection engine
Documentation
//! Cross-chunk fragment cache for virtual secret reassembly.
//!
//! This allows KeyHog to detect secrets split across different files or
//! distant locations within a large file that exceed the chunk window.

use lru::LruCache;
use parking_lot::Mutex;
use std::num::NonZeroUsize;
use std::path::Path;
use std::sync::Arc;
use zeroize::Zeroizing;

const SHARD_COUNT: usize = 64;
const MAX_FRAGMENTS_PER_SCOPE: usize = 8;

/// A potential fragment of a secret (variable assignment part).
///
/// `value` is wrapped in `Zeroizing<String>` so that fragment text gets
/// scrubbed from the heap when an entry is evicted from the LRU or the
/// cache is dropped. kimi-wave1 audit finding 1.HIGH: previously the
/// credential text lived in a plain `String` for the lifetime of the
/// scan, and reassembled candidates were materialized into a `Chunk`
/// that re-embedded the secret in a `format!`-built dummy line. The
/// `Debug` derive is also intentionally NOT wired through `value`
/// - `Zeroizing<String>` prints redacted in `{:?}`.
#[derive(Clone)]
pub struct SecretFragment {
    pub prefix: String,
    pub var_name: String,
    pub value: Zeroizing<String>,
    pub line: usize,
    pub path: Option<Arc<str>>,
}

impl std::fmt::Debug for SecretFragment {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("SecretFragment")
            .field("prefix", &self.prefix)
            .field("var_name", &self.var_name)
            .field(
                "value",
                &format_args!("<redacted {} bytes>", self.value.len()),
            )
            .field("line", &self.line)
            .field("path", &self.path)
            .finish()
    }
}

/// Global cache for tracking fragmented secrets across the entire scan run.
pub struct FragmentCache {
    /// Maps normalized prefix (e.g. "aws_key") to a list of found fragments.
    /// Sharded to avoid a single global mutex becoming a bottleneck under rayon.
    shards: [Mutex<LruCache<String, Vec<SecretFragment>>>; SHARD_COUNT],
}

impl FragmentCache {
    pub fn new(capacity: usize) -> Self {
        let per_shard = (capacity / SHARD_COUNT).max(1);
        let nz = NonZeroUsize::new(per_shard).unwrap_or(NonZeroUsize::MIN);
        Self {
            shards: std::array::from_fn(|_| Mutex::new(LruCache::new(nz))),
        }
    }

    /// Record a fragment and return a list of "complete" candidates if any.
    /// The returned `Zeroizing<String>` lets the caller scope the
    /// reassembled credential's lifetime tightly - drop it (or pass it
    /// to a scan that consumes by reference) and the heap copy is zeroed.
    pub fn record_and_reassemble(&self, fragment: SecretFragment) -> Vec<Zeroizing<String>> {
        let key = scoped_key(&fragment);
        let shard_idx = shard_index(&key);
        let mut lock = self.shards[shard_idx].lock();

        let cluster = lock.get_or_insert_mut(key, Vec::new);

        // Don't add duplicate fragments (same path/line/value)
        if !cluster.iter().any(|f| {
            f.path == fragment.path && f.line == fragment.line && **f.value == **fragment.value
        }) {
            cluster.push(fragment);
            if cluster.len() > MAX_FRAGMENTS_PER_SCOPE {
                // LRU-style: drop the oldest. The Zeroizing<String> drop
                // impl scrubs the bytes before the allocator gets them.
                cluster.remove(0);
            }
        }

        // Senior Audit §Phase 8: Proximity-Aware Reassembly (God-Mode Taint)
        // Brute-force O(N^2) join is replaced with proximity gating.
        // Only join fragments that are physically near each other (<100 lines)
        // or logically related. This eliminates combinatorial explosion.
        if cluster.len() >= 2 {
            let mut candidates = Vec::new();
            for i in 0..cluster.len() {
                for j in 0..cluster.len() {
                    if i == j {
                        continue;
                    }
                    let f1 = &cluster[i];
                    let f2 = &cluster[j];

                    let near = if f1.path == f2.path {
                        (f1.line as isize - f2.line as isize).abs() < 100
                    } else {
                        // For cross-file, only join if they share the same directory scope
                        // (already handled by scoped_key usually, but we check again)
                        true
                    };

                    if near {
                        let mut joined = Zeroizing::new(String::new());
                        joined.push_str(f1.value.as_str());
                        joined.push_str(f2.value.as_str());
                        candidates.push(joined);
                    }
                }
            }
            candidates
        } else {
            Vec::new()
        }
    }

    pub fn clear(&self) {
        for shard in &self.shards {
            shard.lock().clear();
        }
    }
}

fn scoped_key(fragment: &SecretFragment) -> String {
    let scope = fragment
        .path
        .as_deref()
        .and_then(|path| Path::new(path).parent())
        .and_then(Path::to_str)
        .unwrap_or("");
    format!("{}\0{}", fragment.prefix, scope)
}

fn shard_index(key: &str) -> usize {
    key.bytes()
        .fold(0usize, |h, b| h.wrapping_mul(31).wrapping_add(b as usize))
        % SHARD_COUNT
}