lean-ctx 3.1.5

Context Runtime for AI Agents with CCP. 42 MCP tools, 10 read modes, 90+ compression patterns, cross-session memory (CCP), persistent AI knowledge with temporal facts + contradiction detection, multi-agent context sharing + diaries, LITM-aware positioning, AAAK compact format, adaptive compression with Thompson Sampling bandits. Supports 24 AI tools. Reduces LLM token consumption by up to 99%.
Documentation
//! File watcher for automatic incremental re-indexing.
//!
//! Monitors the project directory for file changes and triggers
//! incremental BM25 + embedding index updates. Uses debouncing
//! to avoid thrashing during rapid edits (e.g., auto-save).
//!
//! Architecture:
//! - Background task polls filesystem for changes (no native fs events dependency)
//! - Debounces rapid changes (configurable, default 2s)
//! - Only re-indexes changed files (via content hash comparison)
//! - Notifies subscribers when index is updated

use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::time::{Duration, SystemTime};

use md5::{Digest, Md5};

const DEFAULT_POLL_INTERVAL: Duration = Duration::from_secs(5);
const DEFAULT_DEBOUNCE: Duration = Duration::from_secs(2);
const MAX_TRACKED_FILES: usize = 5000;

pub struct WatcherConfig {
    pub poll_interval: Duration,
    pub debounce: Duration,
    pub root: PathBuf,
}

impl Default for WatcherConfig {
    fn default() -> Self {
        Self {
            poll_interval: DEFAULT_POLL_INTERVAL,
            debounce: DEFAULT_DEBOUNCE,
            root: PathBuf::from("."),
        }
    }
}

/// Tracks file modification state for change detection.
#[derive(Debug)]
pub struct FileTracker {
    states: HashMap<PathBuf, FileState>,
    root: PathBuf,
}

#[derive(Debug, Clone)]
struct FileState {
    modified: SystemTime,
    size: u64,
    content_hash: Option<String>,
}

/// Result of a file scan — lists which files changed.
#[derive(Debug, Clone)]
pub struct ScanResult {
    pub added: Vec<PathBuf>,
    pub modified: Vec<PathBuf>,
    pub removed: Vec<PathBuf>,
}

impl ScanResult {
    pub fn has_changes(&self) -> bool {
        !self.added.is_empty() || !self.modified.is_empty() || !self.removed.is_empty()
    }

    pub fn total_changes(&self) -> usize {
        self.added.len() + self.modified.len() + self.removed.len()
    }

    pub fn changed_files(&self) -> Vec<&PathBuf> {
        self.added.iter().chain(self.modified.iter()).collect()
    }
}

impl FileTracker {
    pub fn new(root: &Path) -> Self {
        Self {
            states: HashMap::new(),
            root: root.to_path_buf(),
        }
    }

    /// Scan the directory and detect changes since last scan.
    pub fn scan(&mut self) -> ScanResult {
        let mut current_files: HashMap<PathBuf, FileState> = HashMap::new();

        let walker = ignore::WalkBuilder::new(&self.root)
            .hidden(true)
            .git_ignore(true)
            .max_depth(Some(10))
            .build();

        let mut count = 0usize;
        for entry in walker.flatten() {
            if count >= MAX_TRACKED_FILES {
                break;
            }
            let path = entry.path().to_path_buf();
            if !path.is_file() || !is_indexable(&path) {
                continue;
            }

            if let Ok(meta) = std::fs::metadata(&path) {
                let modified = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH);
                let size = meta.len();
                current_files.insert(
                    path,
                    FileState {
                        modified,
                        size,
                        content_hash: None,
                    },
                );
                count += 1;
            }
        }

        let mut added = Vec::new();
        let mut modified = Vec::new();
        let mut removed = Vec::new();

        for (path, state) in &current_files {
            match self.states.get(path) {
                None => added.push(path.clone()),
                Some(old) => {
                    if (old.modified != state.modified || old.size != state.size)
                        && has_content_changed(path, old)
                    {
                        modified.push(path.clone());
                    }
                }
            }
        }

        for path in self.states.keys() {
            if !current_files.contains_key(path) {
                removed.push(path.clone());
            }
        }

        self.states = current_files;

        ScanResult {
            added,
            modified,
            removed,
        }
    }

    pub fn tracked_count(&self) -> usize {
        self.states.len()
    }
}

/// Shared flag to control watcher lifecycle.
pub fn create_stop_flag() -> Arc<AtomicBool> {
    Arc::new(AtomicBool::new(false))
}

fn is_indexable(path: &Path) -> bool {
    let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
    matches!(
        ext,
        "rs" | "ts"
            | "tsx"
            | "js"
            | "jsx"
            | "py"
            | "go"
            | "java"
            | "c"
            | "cpp"
            | "h"
            | "hpp"
            | "rb"
            | "cs"
            | "kt"
            | "swift"
            | "php"
            | "scala"
            | "ex"
            | "exs"
            | "zig"
            | "lua"
            | "dart"
            | "vue"
            | "svelte"
    )
}

fn has_content_changed(path: &Path, old_state: &FileState) -> bool {
    if let Some(ref old_hash) = old_state.content_hash {
        if let Ok(content) = std::fs::read(path) {
            let new_hash = hash_bytes(&content);
            return &new_hash != old_hash;
        }
    }
    true
}

fn hash_bytes(data: &[u8]) -> String {
    let mut hasher = Md5::new();
    hasher.update(data);
    format!("{:x}", hasher.finalize())
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;

    #[test]
    fn is_indexable_code_files() {
        assert!(is_indexable(Path::new("main.rs")));
        assert!(is_indexable(Path::new("app.tsx")));
        assert!(is_indexable(Path::new("server.go")));
        assert!(!is_indexable(Path::new("readme.md")));
        assert!(!is_indexable(Path::new("image.png")));
        assert!(!is_indexable(Path::new("data.json")));
    }

    #[test]
    fn tracker_detects_new_files() {
        let dir = std::env::temp_dir().join("lean_ctx_watcher_test_new");
        let _ = fs::remove_dir_all(&dir);
        fs::create_dir_all(&dir).unwrap();
        fs::write(dir.join("test.rs"), "fn main() {}").unwrap();

        let mut tracker = FileTracker::new(&dir);
        let result = tracker.scan();
        assert!(result.added.len() >= 1, "should detect new file");
        assert!(result.modified.is_empty());
        assert!(result.removed.is_empty());
        assert!(result.has_changes());

        let _ = fs::remove_dir_all(&dir);
    }

    #[test]
    fn tracker_detects_no_changes_on_rescan() {
        let dir = std::env::temp_dir().join("lean_ctx_watcher_test_stable");
        let _ = fs::remove_dir_all(&dir);
        fs::create_dir_all(&dir).unwrap();
        fs::write(dir.join("stable.rs"), "fn main() {}").unwrap();

        let mut tracker = FileTracker::new(&dir);
        let _ = tracker.scan();

        let result = tracker.scan();
        assert!(result.added.is_empty());
        assert!(result.modified.is_empty());
        assert!(result.removed.is_empty());
        assert!(!result.has_changes());

        let _ = fs::remove_dir_all(&dir);
    }

    #[test]
    fn tracker_detects_removed_files() {
        let dir = std::env::temp_dir().join("lean_ctx_watcher_test_rm");
        let _ = fs::remove_dir_all(&dir);
        fs::create_dir_all(&dir).unwrap();
        let file = dir.join("temp.rs");
        fs::write(&file, "fn main() {}").unwrap();

        let mut tracker = FileTracker::new(&dir);
        let _ = tracker.scan();

        fs::remove_file(&file).unwrap();
        let result = tracker.scan();
        assert!(!result.removed.is_empty(), "should detect removed file");

        let _ = fs::remove_dir_all(&dir);
    }

    #[test]
    fn scan_result_methods() {
        let result = ScanResult {
            added: vec![PathBuf::from("a.rs")],
            modified: vec![PathBuf::from("b.rs")],
            removed: vec![PathBuf::from("c.rs")],
        };
        assert!(result.has_changes());
        assert_eq!(result.total_changes(), 3);
        assert_eq!(result.changed_files().len(), 2);
    }

    #[test]
    fn empty_scan_result() {
        let result = ScanResult {
            added: vec![],
            modified: vec![],
            removed: vec![],
        };
        assert!(!result.has_changes());
        assert_eq!(result.total_changes(), 0);
    }
}