moeix 0.4.1

High-performance trigram code search for humans and AI agents.
Documentation
//! Negative result cache.
//!
//! Stores "file X with `content_hash` H does NOT match query Q" results.
//! Prevents re-verification of unchanged files that previously had no matches.

use std::collections::HashSet;
use std::sync::RwLock;
use xxhash_rust::xxh64::xxh64;

#[derive(Debug, Default, Clone)]
pub struct NegCacheStats {
    pub hits: u64,
    pub misses: u64,
    pub evictions: u64,
}

#[derive(Debug)]
pub struct NegCache {
    set: RwLock<HashSet<(u64, u64)>>,
    max_entries: usize,
    admit: RwLock<bool>,
    stats: RwLock<NegCacheStats>,
}

impl NegCache {
    #[must_use]
    pub fn new(max_entries: usize) -> Self {
        Self {
            set: RwLock::new(HashSet::new()),
            max_entries,
            admit: RwLock::new(true),
            stats: RwLock::new(NegCacheStats::default()),
        }
    }

    /// Set whether new entries are admitted.
    ///
    /// When `false`, `record_negative` returns immediately without caching.
    /// Lookups still work on existing entries.
    ///
    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    pub fn set_admit(&self, allow: bool) {
        *self.admit.write().unwrap_or_else(std::sync::PoisonError::into_inner) = allow;
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    #[must_use]
    pub fn is_known_negative(&self, query_fingerprint: u64, content_hash: u64) -> bool {
        let set = self.set.read().unwrap_or_else(std::sync::PoisonError::into_inner);
        let found = set.contains(&(query_fingerprint, content_hash));
        drop(set);
        let mut stats = self.stats.write().unwrap_or_else(std::sync::PoisonError::into_inner);
        if found {
            stats.hits += 1;
        } else {
            stats.misses += 1;
        }
        found
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    pub fn record_negative(&self, query_fingerprint: u64, content_hash: u64) {
        if !*self.admit.read().unwrap_or_else(std::sync::PoisonError::into_inner) {
            return;
        }

        let mut set = self.set.write().unwrap_or_else(std::sync::PoisonError::into_inner);
        if set.len() >= self.max_entries {
            let to_evict = self.max_entries.checked_div(10usize).unwrap_or(1usize).max(1);
            let mut remaining = to_evict;
            set.retain(|_| {
                if remaining > 0 {
                    remaining -= 1;
                    false
                } else {
                    true
                }
            });
            self.stats.write().unwrap_or_else(std::sync::PoisonError::into_inner).evictions +=
                u64::try_from(to_evict).unwrap_or(0);
        }
        set.insert((query_fingerprint, content_hash));
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    pub fn invalidate_file(&self, content_hash: u64) {
        let mut set = self.set.write().unwrap_or_else(std::sync::PoisonError::into_inner);
        set.retain(|&(_, ch)| ch != content_hash);
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    pub fn invalidate_query(&self, query_fingerprint: u64) {
        let mut set = self.set.write().unwrap_or_else(std::sync::PoisonError::into_inner);
        set.retain(|&(qf, _)| qf != query_fingerprint);
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    pub fn clear(&self) {
        let mut set = self.set.write().unwrap_or_else(std::sync::PoisonError::into_inner);
        set.clear();
    }

    /// Evict a fraction of entries (e.g., 0.5 evicts half).
    /// Used by adaptive cache policy under memory pressure.
    ///
    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    #[allow(clippy::cast_precision_loss, clippy::cast_sign_loss, clippy::cast_possible_truncation, clippy::as_conversions)]
    pub fn evict_fraction(&self, fraction: f64) {
        if fraction <= 0.0_f64 {
            return;
        }
        if fraction >= 1.0_f64 {
            self.clear();
            return;
        }
        let mut set = self.set.write().unwrap_or_else(std::sync::PoisonError::into_inner);
        let len = set.len();
        let to_remove = (len as f64 * fraction) as usize;
        let mut remaining = to_remove;
        set.retain(|_| {
            if remaining > 0 {
                remaining -= 1;
                false
            } else {
                true
            }
        });
        self.stats.write().unwrap_or_else(std::sync::PoisonError::into_inner).evictions +=
            u64::try_from(to_remove).unwrap_or(0);
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    #[must_use]
    pub fn stats(&self) -> NegCacheStats {
        self.stats.read().unwrap_or_else(std::sync::PoisonError::into_inner).clone()
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    #[must_use]
    pub fn len(&self) -> usize {
        self.set.read().unwrap_or_else(std::sync::PoisonError::into_inner).len()
    }

    /// # Panics
    ///
    /// Panics if the internal `RwLock` is poisoned.
    #[must_use]
    pub fn is_empty(&self) -> bool {
        self.set.read().unwrap_or_else(std::sync::PoisonError::into_inner).is_empty()
    }
}

#[must_use]
pub fn query_fingerprint(pattern: &str) -> u64 {
    xxh64(pattern.as_bytes(), 0)
}

#[cfg(test)]
#[allow(clippy::as_conversions, clippy::unwrap_used, clippy::indexing_slicing)]
mod tests {
    use super::*;

    #[test]
    fn is_known_negative_hit_and_miss() {
        let cache = NegCache::new(100);
        assert!(!cache.is_known_negative(1, 2));
        let stats = cache.stats();
        assert_eq!(stats.misses, 1);
        assert_eq!(stats.hits, 0);

        cache.record_negative(1, 2);
        assert!(cache.is_known_negative(1, 2));
        let stats = cache.stats();
        assert_eq!(stats.hits, 1);
        assert_eq!(stats.misses, 1);
    }

    #[test]
    fn record_negative_adds_entry() {
        let cache = NegCache::new(100);
        assert!(!cache.is_known_negative(10, 20));
        cache.record_negative(10, 20);
        assert!(cache.is_known_negative(10, 20));
        assert_eq!(cache.len(), 1);
    }

    #[test]
    fn invalidate_file_removes_all_entries_for_hash() {
        let cache = NegCache::new(100);
        cache.record_negative(1, 100);
        cache.record_negative(2, 100);
        cache.record_negative(3, 200);
        assert_eq!(cache.len(), 3);

        cache.invalidate_file(100);
        assert_eq!(cache.len(), 1);
        assert!(!cache.is_known_negative(1, 100));
        assert!(!cache.is_known_negative(2, 100));
        assert!(cache.is_known_negative(3, 200));
    }

    #[test]
    fn eviction_at_max_entries() {
        let cache = NegCache::new(10);
        for i in 0..15u64 {
            cache.record_negative(i, i * 10);
        }
        assert!(cache.len() <= 10);
        assert!(cache.stats().evictions > 0);
    }

    #[test]
    fn clear_empties_cache() {
        let cache = NegCache::new(100);
        cache.record_negative(1, 2);
        cache.record_negative(3, 4);
        assert_eq!(cache.len(), 2);
        cache.clear();
        assert_eq!(cache.len(), 0);
    }

    #[test]
    fn invalidate_query_removes_all_entries_for_query() {
        let cache = NegCache::new(100);
        cache.record_negative(42, 1);
        cache.record_negative(42, 2);
        cache.record_negative(99, 1);
        cache.invalidate_query(42);
        assert!(!cache.is_known_negative(42, 1));
        assert!(!cache.is_known_negative(42, 2));
        assert!(cache.is_known_negative(99, 1));
    }

    #[test]
    fn query_fingerprint_is_deterministic() {
        let a = query_fingerprint("hello world");
        let b = query_fingerprint("hello world");
        let c = query_fingerprint("goodbye");
        assert_eq!(a, b);
        assert_ne!(a, c);
    }

    #[test]
    fn is_empty_works() {
        let cache = NegCache::new(100);
        assert!(cache.is_empty());
        cache.record_negative(1, 2);
        assert!(!cache.is_empty());
        cache.invalidate_file(2);
        assert!(cache.is_empty());
    }
}