dx_forge/watcher_legacy/
cache_warmer.rs

1use anyhow::Result;
2use colored::*;
3use once_cell::sync::Lazy;
4use parking_lot::RwLock;
5use rayon::prelude::*;
6use std::collections::HashMap;
7use std::fs;
8use std::fs::File;
9use std::path::{Path, PathBuf};
10use std::sync::atomic::{AtomicUsize, Ordering};
11use std::sync::Arc;
12use std::time::Instant;
13
14const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024; // 10MB
15
16// Shared file handle pool
17pub static FILE_POOL: Lazy<RwLock<HashMap<PathBuf, Arc<File>>>> =
18    Lazy::new(|| RwLock::new(HashMap::new()));
19
20/// Warm the OS page cache by reading all trackable files
21pub fn warm_cache(repo_root: &Path) -> Result<CacheStats> {
22    let start = Instant::now();
23
24    // println!("{}", "📦 Warming OS page cache...".bright_cyan());
25
26    // Collect all trackable files
27    let files = collect_trackable_files(repo_root)?;
28    let total_files = files.len();
29
30    if total_files == 0 {
31        println!("{} No files to cache", "✓".bright_green());
32        return Ok(CacheStats::default());
33    }
34
35    // Progress tracking
36    let cached_count = Arc::new(AtomicUsize::new(0));
37    let cached_bytes = Arc::new(AtomicUsize::new(0));
38
39    // Pre-open file handles and warm cache in parallel
40    // This ensures subsequent reads are instant
41    let handles: Vec<_> = files
42        .par_iter()
43        .filter_map(|path| {
44            // Try to open and read to warm cache
45            if let Ok(file) = File::open(path) {
46                // Read to warm OS cache
47                if let Ok(mmap) = unsafe { memmap2::Mmap::map(&file) } {
48                    let size = mmap.len();
49                    cached_count.fetch_add(1, Ordering::Relaxed);
50                    cached_bytes.fetch_add(size, Ordering::Relaxed);
51                    return Some((path.clone(), Arc::new(file)));
52                }
53            }
54            None
55        })
56        .collect();
57
58    // Populate pool with all opened handles
59    let mut pool = FILE_POOL.write();
60    for (path, file) in handles {
61        pool.insert(path, file);
62    }
63    drop(pool);
64
65    let final_count = cached_count.load(Ordering::Relaxed);
66    let final_bytes = cached_bytes.load(Ordering::Relaxed);
67    let elapsed = start.elapsed();
68
69    // println!(
70    //     "{} Cached {} files ({} KB) in {:?}",
71    //     "✓".bright_green(),
72    //     final_count,
73    //     final_bytes / 1024,
74    //     elapsed
75    // );
76
77    Ok(CacheStats {
78        files_cached: final_count,
79        bytes_cached: final_bytes,
80        duration_ms: elapsed.as_millis() as u64,
81    })
82}
83
84/// Incrementally warm cache for new files as they're discovered
85pub fn warm_file(path: &Path) -> Result<()> {
86    // Simply read the file to get it into OS cache
87    let _ = fs::read(path)?;
88    Ok(())
89}
90
91/// Collect all files that should be tracked (respecting .gitignore-like rules)
92fn collect_trackable_files(root: &Path) -> Result<Vec<PathBuf>> {
93    use ignore::WalkBuilder;
94
95    let mut files = Vec::new();
96
97    let walker = WalkBuilder::new(root)
98        .hidden(false)
99        .git_ignore(true)
100        .git_global(true)
101        .git_exclude(true)
102        .max_depth(None)
103        .follow_links(false)
104        .build();
105
106    for entry in walker {
107        if let Ok(entry) = entry {
108            let path = entry.path();
109
110            // Skip if not a file
111            if !path.is_file() {
112                continue;
113            }
114
115            // Skip if in ignored directories
116            if !is_trackable(path) {
117                continue;
118            }
119
120            // Skip if too large
121            if let Ok(metadata) = fs::metadata(path) {
122                if metadata.len() > MAX_FILE_SIZE {
123                    continue;
124                }
125            }
126
127            files.push(path.to_path_buf());
128        }
129    }
130
131    Ok(files)
132}
133
134fn is_trackable(path: &Path) -> bool {
135    use std::path::Component;
136
137    const IGNORED_COMPONENTS: [&str; 5] = [".git", ".dx", ".dx_client", "target", "node_modules"];
138
139    for component in path.components() {
140        if let Component::Normal(seg) = component {
141            if let Some(segment) = seg.to_str() {
142                let lower = segment.to_ascii_lowercase();
143                if IGNORED_COMPONENTS.iter().any(|needle| needle == &lower) {
144                    return false;
145                }
146            }
147        }
148    }
149
150    true
151}
152
153#[derive(Debug, Default, Clone)]
154#[allow(dead_code)]
155pub struct CacheStats {
156    pub files_cached: usize,
157    pub bytes_cached: usize,
158    pub duration_ms: u64,
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use std::fs;
165    use tempfile::TempDir;
166
167    #[test]
168    fn test_collect_trackable_files() {
169        let temp_dir = TempDir::new().unwrap();
170        let root = temp_dir.path();
171
172        // Create test structure
173        fs::create_dir_all(root.join("src")).unwrap();
174        fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
175        fs::write(root.join("README.md"), "# Test").unwrap();
176
177        fs::create_dir_all(root.join(".git")).unwrap();
178        fs::write(root.join(".git/config"), "ignored").unwrap();
179
180        let files = collect_trackable_files(root).unwrap();
181
182        assert!(files.iter().any(|p| p.ends_with("main.rs")));
183        assert!(files.iter().any(|p| p.ends_with("README.md")));
184        assert!(!files.iter().any(|p| p.to_str().unwrap().contains(".git")));
185    }
186
187    #[test]
188    fn test_warm_cache() {
189        let temp_dir = TempDir::new().unwrap();
190        let root = temp_dir.path();
191
192        fs::write(root.join("test.txt"), "test content").unwrap();
193
194        let stats = warm_cache(root).unwrap();
195        assert!(stats.files_cached > 0);
196        assert!(stats.bytes_cached > 0);
197    }
198}