thread-flow 0.1.0

// SPDX-FileCopyrightText: 2025 Knitli Inc. <knitli@knit.li>
// SPDX-License-Identifier: AGPL-3.0-or-later

//! Query result caching for Thread pipeline
//!
//! This module provides LRU caching for frequently accessed query results,
//! reducing database round-trips and improving response times.
//!
//! ## Features
//!
//! - **Async-first**: Built on moka's async cache for tokio compatibility
//! - **Type-safe**: Generic caching with compile-time type checking
//! - **TTL support**: Configurable time-to-live for cache entries
//! - **Statistics**: Track cache hit/miss rates for monitoring
//! - **Size limits**: Automatic eviction when cache exceeds capacity
//!
//! ## Usage
//!
//! ```rust,ignore
//! use thread_flow::cache::{QueryCache, CacheConfig};
//! use thread_services::conversion::Fingerprint;
//!
//! // Create cache with 1000 entry limit, 5 minute TTL
//! let cache = QueryCache::new(CacheConfig {
//!     max_capacity: 1000,
//!     ttl_seconds: 300,
//! });
//!
//! // Cache symbol query results
//! let fingerprint = compute_content_fingerprint("fn main() {}");
//! cache.insert(fingerprint, symbols).await;
//!
//! // Retrieve from cache
//! if let Some(symbols) = cache.get(&fingerprint).await {
//!     // Cache hit - saved D1 query!
//! }
//! ```
//!
//! ## Performance Impact
//!
//! | Scenario | Without Cache | With Cache | Savings |
//! |----------|---------------|------------|---------|
//! | Symbol lookup | 50-100ms (D1) | <1µs (memory) | **99.9%** |
//! | Metadata query | 20-50ms (D1) | <1µs (memory) | **99.9%** |
//! | Re-analysis (90% hit) | 100ms total | 10ms total | **90%** |

#[cfg(feature = "caching")]
use moka::future::Cache;
#[cfg(feature = "caching")]
use std::hash::Hash;
#[cfg(feature = "caching")]
use std::sync::Arc;
#[cfg(feature = "caching")]
use std::time::Duration;
#[cfg(feature = "caching")]
use tokio::sync::RwLock;

/// Configuration for query result cache
#[derive(Debug, Clone)]
pub struct CacheConfig {
    /// Maximum number of entries in cache
    pub max_capacity: u64,
    /// Time-to-live for cache entries (seconds)
    pub ttl_seconds: u64,
}

impl Default for CacheConfig {
    fn default() -> Self {
        Self {
            max_capacity: 10_000, // 10k entries
            ttl_seconds: 300,     // 5 minutes
        }
    }
}

/// Cache statistics for monitoring
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
    /// Total number of cache lookups
    pub total_lookups: u64,
    /// Number of cache hits
    pub hits: u64,
    /// Number of cache misses
    pub misses: u64,
}

impl CacheStats {
    /// Calculate cache hit rate as percentage
    pub fn hit_rate(&self) -> f64 {
        if self.total_lookups == 0 {
            0.0
        } else {
            (self.hits as f64 / self.total_lookups as f64) * 100.0
        }
    }

    /// Calculate cache miss rate as percentage
    pub fn miss_rate(&self) -> f64 {
        100.0 - self.hit_rate()
    }
}

/// Generic query result cache
///
/// Provides LRU caching with TTL for any key-value pair where:
/// - Key: Must be Clone + Hash + Eq + Send + Sync
/// - Value: Must be Clone + Send + Sync
///
/// # Examples
///
/// ```rust,ignore
/// use thread_flow::cache::{QueryCache, CacheConfig};
///
/// // Cache for symbol queries (Fingerprint -> Vec<Symbol>)
/// let symbol_cache = QueryCache::new(CacheConfig::default());
///
/// // Cache for metadata queries (String -> Metadata)
/// let metadata_cache = QueryCache::new(CacheConfig {
///     max_capacity: 5000,
///     ttl_seconds: 600,  // 10 minutes
/// });
/// ```
#[cfg(feature = "caching")]
pub struct QueryCache<K, V> {
    cache: Cache<K, V>,
    stats: Arc<RwLock<CacheStats>>,
}

#[cfg(feature = "caching")]
impl<K, V> QueryCache<K, V>
where
    K: Hash + Eq + Send + Sync + 'static,
    V: Clone + Send + Sync + 'static,
{
    /// Create a new query cache with the given configuration
    pub fn new(config: CacheConfig) -> Self {
        let cache = Cache::builder()
            .max_capacity(config.max_capacity)
            .time_to_live(Duration::from_secs(config.ttl_seconds))
            .build();

        Self {
            cache,
            stats: Arc::new(RwLock::new(CacheStats::default())),
        }
    }

    /// Insert a key-value pair into the cache
    ///
    /// If the key already exists, the value will be updated and TTL reset.
    pub async fn insert(&self, key: K, value: V) {
        self.cache.insert(key, value).await;
    }

    /// Get a value from the cache
    ///
    /// Returns `None` if the key is not found or has expired.
    /// Updates cache statistics (hit/miss counters).
    pub async fn get(&self, key: &K) -> Option<V>
    where
        K: Clone,
    {
        let mut stats = self.stats.write().await;
        stats.total_lookups += 1;

        if let Some(value) = self.cache.get(key).await {
            stats.hits += 1;
            Some(value)
        } else {
            stats.misses += 1;
            None
        }
    }

    /// Get a value from cache or compute it if missing
    ///
    /// This is the recommended way to use the cache as it handles
    /// cache misses transparently and updates statistics correctly.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// let symbols = cache.get_or_insert(fingerprint, || async {
    ///     // This closure only runs on cache miss
    ///     query_database_for_symbols(fingerprint).await
    /// }).await;
    /// ```
    pub async fn get_or_insert<F, Fut>(&self, key: K, f: F) -> V
    where
        K: Clone,
        F: FnOnce() -> Fut,
        Fut: std::future::Future<Output = V>,
    {
        // Check cache first
        if let Some(value) = self.get(&key).await {
            return value;
        }

        // Compute value on cache miss
        let value = f().await;
        self.insert(key, value.clone()).await;
        value
    }

    /// Invalidate (remove) a specific cache entry
    pub async fn invalidate(&self, key: &K) {
        self.cache.invalidate(key).await;
    }

    /// Clear all cache entries
    pub async fn clear(&self) {
        self.cache.invalidate_all();
        // Sync to ensure all entries are actually removed before returning
        self.cache.run_pending_tasks().await;
    }

    /// Get current cache statistics
    pub async fn stats(&self) -> CacheStats {
        self.stats.read().await.clone()
    }

    /// Reset cache statistics
    pub async fn reset_stats(&self) {
        let mut stats = self.stats.write().await;
        *stats = CacheStats::default();
    }

    /// Get the number of entries currently in the cache
    pub fn entry_count(&self) -> u64 {
        self.cache.entry_count()
    }
}

/// No-op cache for when caching feature is disabled
///
/// This provides the same API but doesn't actually cache anything,
/// allowing code to compile with or without the `caching` feature.
#[cfg(not(feature = "caching"))]
pub struct QueryCache<K, V> {
    _phantom: std::marker::PhantomData<(K, V)>,
}

#[cfg(not(feature = "caching"))]
impl<K, V> QueryCache<K, V> {
    pub fn new(_config: CacheConfig) -> Self {
        Self {
            _phantom: std::marker::PhantomData,
        }
    }

    pub async fn insert(&self, _key: K, _value: V) {}

    pub async fn get(&self, _key: &K) -> Option<V> {
        None
    }

    pub async fn get_or_insert<F, Fut>(&self, _key: K, f: F) -> V
    where
        F: FnOnce() -> Fut,
        Fut: std::future::Future<Output = V>,
    {
        f().await
    }

    pub async fn invalidate(&self, _key: &K) {}

    pub async fn clear(&self) {}

    pub async fn stats(&self) -> CacheStats {
        CacheStats::default()
    }

    pub async fn reset_stats(&self) {}

    pub fn entry_count(&self) -> u64 {
        0
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    #[cfg(feature = "caching")]
    async fn test_cache_basic_operations() {
        let cache = QueryCache::new(CacheConfig {
            max_capacity: 100,
            ttl_seconds: 60,
        });

        // Insert and retrieve
        cache.insert("key1".to_string(), "value1".to_string()).await;
        let value = cache.get(&"key1".to_string()).await;
        assert_eq!(value, Some("value1".to_string()));

        // Cache miss
        let missing = cache.get(&"nonexistent".to_string()).await;
        assert_eq!(missing, None);
    }

    #[tokio::test]
    #[cfg(feature = "caching")]
    async fn test_cache_statistics() {
        let cache = QueryCache::new(CacheConfig::default());

        // Initial stats
        let stats = cache.stats().await;
        assert_eq!(stats.total_lookups, 0);
        assert_eq!(stats.hits, 0);
        assert_eq!(stats.misses, 0);

        // Insert and hit
        cache.insert(1, "one".to_string()).await;
        let _ = cache.get(&1).await;

        let stats = cache.stats().await;
        assert_eq!(stats.total_lookups, 1);
        assert_eq!(stats.hits, 1);
        assert_eq!(stats.hit_rate(), 100.0);

        // Miss
        let _ = cache.get(&2).await;

        let stats = cache.stats().await;
        assert_eq!(stats.total_lookups, 2);
        assert_eq!(stats.hits, 1);
        assert_eq!(stats.misses, 1);
        assert_eq!(stats.hit_rate(), 50.0);
    }

    #[tokio::test]
    #[cfg(feature = "caching")]
    async fn test_get_or_insert() {
        let cache = QueryCache::new(CacheConfig::default());

        let mut call_count = 0;

        // First call - cache miss, should execute closure
        let value1 = cache
            .get_or_insert(1, || async {
                call_count += 1;
                "computed".to_string()
            })
            .await;

        assert_eq!(value1, "computed");
        assert_eq!(call_count, 1);

        // Second call - cache hit, should NOT execute closure
        let value2 = cache
            .get_or_insert(1, || async {
                call_count += 1;
                "should_not_be_called".to_string()
            })
            .await;

        assert_eq!(value2, "computed");
        assert_eq!(call_count, 1); // Closure not called on cache hit

        let stats = cache.stats().await;
        assert_eq!(stats.hits, 1);
        assert_eq!(stats.misses, 1);
    }

    #[tokio::test]
    #[cfg(feature = "caching")]
    async fn test_cache_invalidation() {
        let cache = QueryCache::new(CacheConfig::default());

        cache.insert("key", "value".to_string()).await;
        assert!(cache.get(&"key").await.is_some());

        cache.invalidate(&"key").await;
        assert!(cache.get(&"key").await.is_none());
    }

    #[tokio::test]
    #[cfg(feature = "caching")]
    async fn test_cache_clear() {
        let cache = QueryCache::new(CacheConfig::default());

        cache.insert(1, "one".to_string()).await;
        cache.insert(2, "two".to_string()).await;
        cache.insert(3, "three".to_string()).await;

        // Verify entries exist
        assert!(cache.get(&1).await.is_some());
        assert!(cache.get(&2).await.is_some());
        assert!(cache.get(&3).await.is_some());

        cache.clear().await;

        // Verify entries are gone after clear
        assert!(cache.get(&1).await.is_none());
        assert!(cache.get(&2).await.is_none());
        assert!(cache.get(&3).await.is_none());
    }

    #[tokio::test]
    #[cfg(not(feature = "caching"))]
    async fn test_no_op_cache() {
        let cache = QueryCache::new(CacheConfig::default());

        // Insert does nothing
        cache.insert("key", "value".to_string()).await;

        // Get always returns None
        assert_eq!(cache.get(&"key").await, None);

        // get_or_insert always computes
        let value = cache
            .get_or_insert("key", || async { "computed".to_string() })
            .await;
        assert_eq!(value, "computed");

        // Stats are always empty
        let stats = cache.stats().await;
        assert_eq!(stats.total_lookups, 0);
        assert_eq!(cache.entry_count(), 0);
    }
}