collet 0.1.1

Relentless agentic coding orchestrator with zero-drop agent loops
Documentation
//! Project-level cache for RepoMap and related indices.
//!
//! Keeps one `RepoMap` alive per project directory so that multiple sessions
//! (and resume) can share the same pre-built index instead of rebuilding from
//! scratch every time.
//!
//! Design constraints:
//! - All heavy operations (rebuild, incremental update) are CPU-bound, so we
//!   use `std::sync` locks (compatible with `spawn_blocking`).
//! - FileModified events are debounced: rapid edits are batched into a single
//!   incremental update.
//! - LRU eviction drops caches for projects untouched for 30 minutes.

use std::collections::HashMap;
use std::path::Path;
use std::sync::{Arc, LazyLock, Mutex, RwLock};
use std::time::{Duration, Instant};

use crate::repo_map::RepoMap;

/// Global singleton — shared across TUI, headless, remote, watch, ACP, server.
static GLOBAL: LazyLock<Arc<ProjectCacheManager>> =
    LazyLock::new(|| Arc::new(ProjectCacheManager::new()));

/// Access the global `ProjectCacheManager`.
pub fn global() -> &'static Arc<ProjectCacheManager> {
    &GLOBAL
}

/// Convenience: get-or-build a project and return a snapshot in one call.
///
/// This is the primary entry point for all modes that just need the repo map
/// data for building a system prompt.
pub fn snapshot(project_dir: &str) -> RepoMapSnapshot {
    global().get_or_build(project_dir).snapshot()
}

/// Lightweight data extracted from a cached `RepoMap`.
pub struct RepoMapSnapshot {
    pub map_string: String,
    pub file_count: usize,
    pub symbol_count: usize,
}

/// Per-project cached state.
pub struct ProjectCache {
    project_dir: String,
    repo_map: RwLock<RepoMap>,
    /// Tracks last time this cache was accessed (for LRU eviction).
    last_access: Mutex<Instant>,
    /// File paths invalidated by FileModified events, awaiting flush.
    pending_invalidations: Mutex<Vec<String>>,
}

/// How long a project cache can sit unused before eviction.
const EVICTION_TTL: Duration = Duration::from_secs(30 * 60); // 30 minutes

/// Minimum interval between incremental flushes (debounce).
const FLUSH_DEBOUNCE: Duration = Duration::from_millis(500);

/// Background maintenance interval.
const MAINTENANCE_INTERVAL: Duration = Duration::from_secs(300); // 5 minutes

impl ProjectCache {
    /// Build a new cache entry, performing the initial full scan.
    ///
    /// This is CPU-bound — call from `spawn_blocking` or a blocking context.
    fn build(project_dir: &str) -> Self {
        let mut repo_map = RepoMap::new(Path::new(project_dir));
        repo_map.rebuild();
        Self {
            project_dir: project_dir.to_string(),
            repo_map: RwLock::new(repo_map),
            last_access: Mutex::new(Instant::now()),
            pending_invalidations: Mutex::new(Vec::new()),
        }
    }

    /// Touch the last-access timestamp (prevents eviction).
    fn touch(&self) {
        if let Ok(mut ts) = self.last_access.lock() {
            *ts = Instant::now();
        }
    }

    /// Time since last access.
    fn idle_duration(&self) -> Duration {
        self.last_access
            .lock()
            .map(|ts| ts.elapsed())
            .unwrap_or(Duration::ZERO)
    }

    /// Generate the repo map string for system prompt injection.
    pub fn map_string(&self) -> String {
        self.touch();
        self.repo_map.read().unwrap().to_map_string()
    }

    /// Number of indexed files.
    pub fn file_count(&self) -> usize {
        self.repo_map.read().unwrap().file_count()
    }

    /// Number of indexed symbols.
    pub fn symbol_count(&self) -> usize {
        self.repo_map.read().unwrap().symbol_count()
    }

    /// Extract all data needed for system prompt building in a single
    /// read-lock acquisition. More efficient than calling map_string(),
    /// file_count(), symbol_count() separately.
    pub fn snapshot(&self) -> RepoMapSnapshot {
        self.touch();
        let map = self.repo_map.read().unwrap();
        RepoMapSnapshot {
            map_string: map.to_map_string(),
            file_count: map.file_count(),
            symbol_count: map.symbol_count(),
        }
    }

    /// Whether the underlying repo map has been built and is usable.
    #[allow(dead_code)]
    pub fn is_ready(&self) -> bool {
        self.repo_map.read().unwrap().is_ready()
    }

    /// BM25-based query-relevant file ranking (read-only).
    #[allow(dead_code)]
    pub fn relevant_files_for_query(&self, query: &str, top_n: usize) -> Vec<(String, f64)> {
        self.touch();
        self.repo_map
            .read()
            .unwrap()
            .relevant_files_for_query(query, top_n)
    }

    /// BM25 search across indexed symbols (read-only).
    #[allow(dead_code)]
    pub fn search(&self, query: &str, top_n: usize) -> Vec<crate::repo_map::bm25::SearchResult> {
        self.touch();
        self.repo_map.read().unwrap().search(query, top_n)
    }

    /// Snapshot with BM25-based query-relevant file annotation.
    ///
    /// Unlike `snapshot()`, this appends a "Relevant files" section to the map
    /// string based on BM25 ranking, without mutating the shared `RepoMap`
    /// (which would cause contention on set_conversation_files).
    #[allow(dead_code)]
    pub fn snapshot_with_query(&self, query: &str, top_n: usize) -> RepoMapSnapshot {
        self.touch();
        let map = self.repo_map.read().unwrap();
        let mut map_string = map.to_map_string();
        let file_count = map.file_count();
        let symbol_count = map.symbol_count();

        // Append BM25 relevant files section if the index is ready
        if map.is_ready() {
            let intent = crate::repo_map::bm25::classify_query(query);
            if intent != crate::repo_map::bm25::QueryIntent::Skip {
                let relevant = map.relevant_files_for_query(query, top_n);
                if !relevant.is_empty() {
                    map_string.push_str("\n[Query-relevant files]\n");
                    for (path, score) in &relevant {
                        let _ = std::fmt::Write::write_fmt(
                            &mut map_string,
                            format_args!("  {path} (relevance: {score:.2})\n"),
                        );
                    }
                }
            }
        }

        RepoMapSnapshot {
            map_string,
            file_count,
            symbol_count,
        }
    }

    /// Queue a file for incremental invalidation (called from async context).
    pub fn queue_invalidation(&self, path: String) {
        if let Ok(mut pending) = self.pending_invalidations.lock()
            && !pending.contains(&path)
        {
            pending.push(path);
        }
    }

    /// Flush pending invalidations and perform incremental rebuild.
    ///
    /// CPU-bound — call from blocking context.
    fn flush_invalidations(&self) {
        let paths: Vec<String> = {
            let mut pending = self.pending_invalidations.lock().unwrap();
            if pending.is_empty() {
                return;
            }
            std::mem::take(&mut *pending)
        };

        let mut map = self.repo_map.write().unwrap();
        for path in &paths {
            map.invalidate(Path::new(path));
        }
        let updated = map.rebuild();
        if updated > 0 {
            tracing::debug!(
                project = %self.project_dir,
                invalidated = paths.len(),
                re_parsed = updated,
                "ProjectCache: incremental rebuild"
            );
        }
    }

    /// Full rebuild (used by periodic maintenance).
    fn full_rebuild(&self) {
        let mut map = self.repo_map.write().unwrap();
        let count = map.rebuild();
        tracing::debug!(
            project = %self.project_dir,
            re_parsed = count,
            "ProjectCache: periodic rebuild"
        );
    }
}

// ---------------------------------------------------------------------------
// ProjectCacheManager — owns the HashMap and background tasks
// ---------------------------------------------------------------------------

/// Manages per-project caches with LRU eviction and background maintenance.
pub struct ProjectCacheManager {
    caches: Mutex<HashMap<String, Arc<ProjectCache>>>,
    /// Ensures background tasks are only spawned once.
    bg_started: std::sync::atomic::AtomicBool,
}

impl Default for ProjectCacheManager {
    fn default() -> Self {
        Self::new()
    }
}

impl ProjectCacheManager {
    pub fn new() -> Self {
        Self {
            caches: Mutex::new(HashMap::new()),
            bg_started: std::sync::atomic::AtomicBool::new(false),
        }
    }

    /// Start background maintenance + flush tasks (idempotent).
    ///
    /// Must be called from within a tokio runtime. Safe to call multiple times
    /// — only the first call spawns tasks.
    pub fn ensure_background_tasks(self: &Arc<Self>) {
        if self
            .bg_started
            .swap(true, std::sync::atomic::Ordering::SeqCst)
        {
            return; // Already started.
        }
        self.spawn_maintenance();
        self.spawn_flush_loop();
    }

    /// Get or create a cache for the given project directory.
    ///
    /// **CPU-bound on first call** (full repo scan). Subsequent calls for the
    /// same project return the cached Arc immediately.
    pub fn get_or_build(&self, project_dir: &str) -> Arc<ProjectCache> {
        // Fast path: cache hit.
        {
            let caches = self.caches.lock().unwrap();
            if let Some(cache) = caches.get(project_dir) {
                cache.touch();
                return Arc::clone(cache);
            }
        }

        // Slow path: build and insert.
        let cache = Arc::new(ProjectCache::build(project_dir));
        let mut caches = self.caches.lock().unwrap();
        // Double-check: another thread may have inserted while we were building.
        caches
            .entry(project_dir.to_string())
            .or_insert_with(|| Arc::clone(&cache));
        Arc::clone(caches.get(project_dir).unwrap())
    }

    /// Notify that a file was modified (from FileModified event).
    ///
    /// Finds the matching project cache by path prefix and queues invalidation.
    pub fn notify_file_modified(&self, file_path: &str) {
        let caches = self.caches.lock().unwrap();
        for (project_dir, cache) in caches.iter() {
            if file_path.starts_with(project_dir.as_str()) {
                cache.queue_invalidation(file_path.to_string());
                return;
            }
        }
    }

    /// Flush pending invalidations for all projects.
    ///
    /// CPU-bound — call from blocking context.
    fn flush_all(&self) {
        let caches: Vec<Arc<ProjectCache>> =
            { self.caches.lock().unwrap().values().cloned().collect() };
        for cache in caches {
            cache.flush_invalidations();
        }
    }

    /// Evict caches that haven't been accessed within the TTL.
    fn evict_stale(&self) -> usize {
        let mut caches = self.caches.lock().unwrap();
        let before = caches.len();
        caches.retain(|_, cache| cache.idle_duration() < EVICTION_TTL);
        let evicted = before - caches.len();
        if evicted > 0 {
            tracing::info!(
                evicted,
                remaining = caches.len(),
                "ProjectCache: LRU eviction"
            );
        }
        evicted
    }

    /// Periodic maintenance: flush invalidations, rebuild stale maps, evict.
    fn maintenance(&self) {
        self.flush_all();
        self.evict_stale();

        // Periodic full rebuild for caches that may have external changes.
        let caches: Vec<Arc<ProjectCache>> =
            { self.caches.lock().unwrap().values().cloned().collect() };
        for cache in caches {
            cache.full_rebuild();
        }
    }

    /// Spawn the background maintenance loop.
    pub fn spawn_maintenance(self: &Arc<Self>) {
        let mgr = Arc::clone(self);
        tokio::spawn(async move {
            // Initial delay so startup isn't slowed.
            tokio::time::sleep(Duration::from_secs(60)).await;
            loop {
                tokio::time::sleep(MAINTENANCE_INTERVAL).await;
                let mgr = Arc::clone(&mgr);
                // CPU-bound work runs on blocking thread pool.
                let _ = tokio::task::spawn_blocking(move || mgr.maintenance()).await;
            }
        });
    }

    /// Spawn the debounced flush loop (runs more frequently than maintenance).
    pub fn spawn_flush_loop(self: &Arc<Self>) {
        let mgr = Arc::clone(self);
        tokio::spawn(async move {
            loop {
                tokio::time::sleep(FLUSH_DEBOUNCE).await;
                let mgr = Arc::clone(&mgr);
                let _ = tokio::task::spawn_blocking(move || mgr.flush_all()).await;
            }
        });
    }
}