use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use md5::{Digest, Md5};
const DEFAULT_POLL_INTERVAL: Duration = Duration::from_secs(5);
const DEFAULT_DEBOUNCE: Duration = Duration::from_secs(2);
const MAX_TRACKED_FILES: usize = 5000;
pub struct WatcherConfig {
pub poll_interval: Duration,
pub debounce: Duration,
pub root: PathBuf,
}
impl Default for WatcherConfig {
fn default() -> Self {
Self {
poll_interval: DEFAULT_POLL_INTERVAL,
debounce: DEFAULT_DEBOUNCE,
root: PathBuf::from("."),
}
}
}
#[derive(Debug)]
pub struct FileTracker {
states: HashMap<PathBuf, FileState>,
root: PathBuf,
}
#[derive(Debug, Clone)]
struct FileState {
modified: SystemTime,
size: u64,
content_hash: Option<String>,
}
#[derive(Debug, Clone)]
pub struct ScanResult {
pub added: Vec<PathBuf>,
pub modified: Vec<PathBuf>,
pub removed: Vec<PathBuf>,
}
impl ScanResult {
pub fn has_changes(&self) -> bool {
!self.added.is_empty() || !self.modified.is_empty() || !self.removed.is_empty()
}
pub fn total_changes(&self) -> usize {
self.added.len() + self.modified.len() + self.removed.len()
}
pub fn changed_files(&self) -> Vec<&PathBuf> {
self.added.iter().chain(self.modified.iter()).collect()
}
}
impl FileTracker {
pub fn new(root: &Path) -> Self {
Self {
states: HashMap::new(),
root: root.to_path_buf(),
}
}
pub fn scan(&mut self) -> ScanResult {
let mut current_files: HashMap<PathBuf, FileState> = HashMap::new();
let walker = ignore::WalkBuilder::new(&self.root)
.hidden(true)
.git_ignore(true)
.max_depth(Some(10))
.build();
let mut count = 0usize;
for entry in walker.flatten() {
if count >= MAX_TRACKED_FILES {
break;
}
let path = entry.path().to_path_buf();
if !path.is_file() || !is_indexable(&path) {
continue;
}
if let Ok(meta) = std::fs::metadata(&path) {
let modified = meta.modified().unwrap_or(SystemTime::UNIX_EPOCH);
let size = meta.len();
current_files.insert(
path,
FileState {
modified,
size,
content_hash: None,
},
);
count += 1;
}
}
let mut added = Vec::new();
let mut modified = Vec::new();
let mut removed = Vec::new();
for (path, state) in ¤t_files {
match self.states.get(path) {
None => added.push(path.clone()),
Some(old) => {
if (old.modified != state.modified || old.size != state.size)
&& has_content_changed(path, old)
{
modified.push(path.clone());
}
}
}
}
for path in self.states.keys() {
if !current_files.contains_key(path) {
removed.push(path.clone());
}
}
self.states = current_files;
ScanResult {
added,
modified,
removed,
}
}
pub fn tracked_count(&self) -> usize {
self.states.len()
}
}
pub fn create_stop_flag() -> Arc<AtomicBool> {
Arc::new(AtomicBool::new(false))
}
fn is_indexable(path: &Path) -> bool {
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
matches!(
ext,
"rs" | "ts"
| "tsx"
| "js"
| "jsx"
| "py"
| "go"
| "java"
| "c"
| "cpp"
| "h"
| "hpp"
| "rb"
| "cs"
| "kt"
| "swift"
| "php"
| "scala"
| "ex"
| "exs"
| "zig"
| "lua"
| "dart"
| "vue"
| "svelte"
)
}
fn has_content_changed(path: &Path, old_state: &FileState) -> bool {
if let Some(ref old_hash) = old_state.content_hash {
if let Ok(content) = std::fs::read(path) {
let new_hash = hash_bytes(&content);
return &new_hash != old_hash;
}
}
true
}
fn hash_bytes(data: &[u8]) -> String {
let mut hasher = Md5::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn is_indexable_code_files() {
assert!(is_indexable(Path::new("main.rs")));
assert!(is_indexable(Path::new("app.tsx")));
assert!(is_indexable(Path::new("server.go")));
assert!(!is_indexable(Path::new("readme.md")));
assert!(!is_indexable(Path::new("image.png")));
assert!(!is_indexable(Path::new("data.json")));
}
#[test]
fn tracker_detects_new_files() {
let dir = std::env::temp_dir().join("lean_ctx_watcher_test_new");
let _ = fs::remove_dir_all(&dir);
fs::create_dir_all(&dir).unwrap();
fs::write(dir.join("test.rs"), "fn main() {}").unwrap();
let mut tracker = FileTracker::new(&dir);
let result = tracker.scan();
assert!(result.added.len() >= 1, "should detect new file");
assert!(result.modified.is_empty());
assert!(result.removed.is_empty());
assert!(result.has_changes());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn tracker_detects_no_changes_on_rescan() {
let dir = std::env::temp_dir().join("lean_ctx_watcher_test_stable");
let _ = fs::remove_dir_all(&dir);
fs::create_dir_all(&dir).unwrap();
fs::write(dir.join("stable.rs"), "fn main() {}").unwrap();
let mut tracker = FileTracker::new(&dir);
let _ = tracker.scan();
let result = tracker.scan();
assert!(result.added.is_empty());
assert!(result.modified.is_empty());
assert!(result.removed.is_empty());
assert!(!result.has_changes());
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn tracker_detects_removed_files() {
let dir = std::env::temp_dir().join("lean_ctx_watcher_test_rm");
let _ = fs::remove_dir_all(&dir);
fs::create_dir_all(&dir).unwrap();
let file = dir.join("temp.rs");
fs::write(&file, "fn main() {}").unwrap();
let mut tracker = FileTracker::new(&dir);
let _ = tracker.scan();
fs::remove_file(&file).unwrap();
let result = tracker.scan();
assert!(!result.removed.is_empty(), "should detect removed file");
let _ = fs::remove_dir_all(&dir);
}
#[test]
fn scan_result_methods() {
let result = ScanResult {
added: vec![PathBuf::from("a.rs")],
modified: vec![PathBuf::from("b.rs")],
removed: vec![PathBuf::from("c.rs")],
};
assert!(result.has_changes());
assert_eq!(result.total_changes(), 3);
assert_eq!(result.changed_files().len(), 2);
}
#[test]
fn empty_scan_result() {
let result = ScanResult {
added: vec![],
modified: vec![],
removed: vec![],
};
assert!(!result.has_changes());
assert_eq!(result.total_changes(), 0);
}
}