candle_coreml/cache/
manager.rs

1//! Cache Management for CoreML Models and Runtime Data
2//!
3//! This module provides centralized cache management for:
4//! 1. Downloaded models from HuggingFace
5//! 2. CoreML runtime caches (e5rt)
6//! 3. Temporary build artifacts
7//!
8//! The goal is to provide better control over cache locations and cleanup.
9
10use anyhow::Result;
11use std::path::{Path, PathBuf};
12use tracing::{debug, info, warn};
13
14/// Simple title case conversion for directory names
15trait ToTitleCase {
16    fn to_title_case(&self) -> String;
17}
18
19impl ToTitleCase for str {
20    fn to_title_case(&self) -> String {
21        self.split(|c: char| c.is_whitespace() || c == '-' || c == '_')
22            .map(|word| {
23                let mut chars = word.chars();
24                match chars.next() {
25                    None => String::new(),
26                    Some(first) => {
27                        first.to_uppercase().collect::<String>() + &chars.as_str().to_lowercase()
28                    }
29                }
30            })
31            .collect::<Vec<_>>()
32            .join(" ")
33    }
34}
35
36#[cfg(target_os = "macos")]
37use objc2_foundation::NSBundle;
38
39/// Central cache manager for candle-coreml
40pub struct CacheManager {
41    /// Base cache directory (defaults to ~/.cache/candle-coreml)
42    cache_base: PathBuf,
43    /// Current bundle identifier (affects CoreML cache locations)
44    bundle_id: Option<String>,
45}
46
47impl CacheManager {
48    /// Create a new cache manager with default settings
49    pub fn new() -> Result<Self> {
50        let cache_base = Self::default_cache_dir()?;
51        let bundle_id = Self::get_current_bundle_identifier();
52
53        std::fs::create_dir_all(&cache_base)?;
54
55        let manager = Self {
56            cache_base,
57            bundle_id,
58        };
59
60        // Initialize the unified cache structure
61        manager.initialize_cache_structure()?;
62
63        info!("๐Ÿ—‚๏ธ  Cache manager initialized");
64        info!("   Base directory: {}", manager.cache_base.display());
65        if let Some(ref id) = manager.bundle_id {
66            info!("   Bundle identifier: {}", id);
67        } else {
68            warn!("   Bundle identifier: nil (command-line process)");
69        }
70
71        Ok(manager)
72    }
73
74    /// Get the default cache directory
75    fn default_cache_dir() -> Result<PathBuf> {
76        if let Some(cache_dir) = dirs::cache_dir() {
77            Ok(cache_dir.join("candle-coreml"))
78        } else {
79            // Fallback for systems without standard cache dir
80            let home = dirs::home_dir()
81                .ok_or_else(|| anyhow::Error::msg("Cannot determine home directory"))?;
82            Ok(home.join(".cache").join("candle-coreml"))
83        }
84    }
85
86    /// Get the current bundle identifier using NSBundle (macOS only)
87    #[cfg(target_os = "macos")]
88    fn get_current_bundle_identifier() -> Option<String> {
89        unsafe {
90            let main_bundle = NSBundle::mainBundle();
91            let bundle_id = main_bundle.bundleIdentifier();
92
93            bundle_id.map(|id| {
94                let bundle_str = id.to_string();
95                debug!("๐Ÿ“ฑ Current bundle identifier: {}", bundle_str);
96                bundle_str
97            })
98        }
99    }
100
101    /// Get the current bundle identifier (non-macOS fallback)
102    #[cfg(not(target_os = "macos"))]
103    fn get_current_bundle_identifier() -> Option<String> {
104        None
105    }
106
107    /// Get models cache directory
108    pub fn models_dir(&self) -> PathBuf {
109        self.cache_base.join("models")
110    }
111
112    /// Get configs cache directory  
113    pub fn configs_dir(&self) -> PathBuf {
114        self.cache_base.join("configs")
115    }
116
117    /// Get CoreML runtime cache directory
118    pub fn coreml_runtime_dir(&self) -> PathBuf {
119        self.cache_base.join("coreml-runtime")
120    }
121
122    /// Get temp directory for build artifacts
123    pub fn temp_dir(&self) -> PathBuf {
124        self.cache_base.join("temp")
125    }
126
127    /// Initialize the unified cache directory structure
128    pub fn initialize_cache_structure(&self) -> Result<()> {
129        let directories = [
130            ("models", "Downloaded models from HuggingFace"),
131            ("configs", "Auto-generated model configurations"),
132            ("coreml-runtime", "CoreML runtime session data"),
133            ("temp", "Temporary build and processing artifacts"),
134        ];
135
136        for (dir_name, description) in &directories {
137            let dir_path = self.cache_base.join(dir_name);
138            std::fs::create_dir_all(&dir_path)?;
139
140            // Create a README file in each directory
141            let readme_path = dir_path.join("README.md");
142            if !readme_path.exists() {
143                let readme_content = format!(
144                    "# {} Cache Directory\n\n{}\n\nThis directory is managed by candle-coreml's CacheManager.\n",
145                    dir_name.replace('-', " ").to_title_case(),
146                    description
147                );
148                std::fs::write(readme_path, readme_content)?;
149            }
150        }
151
152        // Create main cache directory README
153        let main_readme = self.cache_base.join("README.md");
154        if !main_readme.exists() {
155            let main_content = format!(
156                r#"# candle-coreml Cache Directory
157
158This directory contains cached data for the candle-coreml library:
159
160## Directory Structure
161
162- `models/` - Downloaded models from HuggingFace
163- `configs/` - Auto-generated model configurations  
164- `coreml-runtime/` - CoreML runtime session data
165- `temp/` - Temporary build and processing artifacts
166
167## Management
168
169Use the candle-coreml CacheManager API or cleanup scripts to manage this cache:
170
171```bash
172# Enhanced cleanup script
173./cleanup_coreml_caches_enhanced.sh
174
175# Rust API
176use candle_coreml::CacheManager;
177let manager = CacheManager::new()?;
178manager.cleanup_old_caches(7)?; // Clean files older than 7 days
179```
180
181## Bundle Identifier
182
183Current bundle identifier: {:?}
184
185---
186Generated by candle-coreml v{} at {}
187"#,
188                self.bundle_id,
189                env!("CARGO_PKG_VERSION"),
190                chrono::Utc::now().format("%Y-%m-%d %H:%M:%S UTC")
191            );
192            std::fs::write(main_readme, main_content)?;
193        }
194
195        info!(
196            "โœ… Cache directory structure initialized at {}",
197            self.cache_base.display()
198        );
199        Ok(())
200    }
201
202    /// Get the current bundle identifier
203    pub fn bundle_identifier(&self) -> Option<&str> {
204        self.bundle_id.as_deref()
205    }
206
207    /// Get the base cache directory path
208    pub fn cache_base(&self) -> &Path {
209        &self.cache_base
210    }
211
212    /// Report potential CoreML cache locations based on bundle ID
213    pub fn report_coreml_cache_locations(&self) -> Vec<PathBuf> {
214        let mut locations = Vec::new();
215
216        // Standard system cache location
217        if let Some(cache_dir) = dirs::cache_dir() {
218            if let Some(bundle_id) = &self.bundle_id {
219                // CoreML typically creates: ~/Library/Caches/{bundle_id}/com.apple.e5rt.e5bundlecache
220                locations.push(
221                    cache_dir
222                        .join(bundle_id)
223                        .join("com.apple.e5rt.e5bundlecache"),
224                );
225            }
226
227            // Also check for process-name based caches (common pattern)
228            let process_name = std::env::current_exe()
229                .ok()
230                .and_then(|p| p.file_stem().map(|s| s.to_string_lossy().to_string()))
231                .unwrap_or_else(|| "unknown".to_string());
232
233            // Pattern: {process_name}-{hash}/com.apple.e5rt.e5bundlecache
234            if let Ok(entries) = std::fs::read_dir(&cache_dir) {
235                for entry in entries.flatten() {
236                    let name = entry.file_name().to_string_lossy().to_string();
237                    if name.starts_with(&format!("{process_name}-")) {
238                        locations.push(entry.path().join("com.apple.e5rt.e5bundlecache"));
239                    }
240                }
241            }
242        }
243
244        locations
245    }
246
247    /// Clean up old cache entries based on policy
248    pub fn cleanup_old_caches(&self, max_age_days: u64) -> Result<()> {
249        info!("๐Ÿงน Starting cache cleanup (max age: {} days)", max_age_days);
250
251        let cutoff_time = std::time::SystemTime::now()
252            - std::time::Duration::from_secs(max_age_days * 24 * 60 * 60);
253
254        // Clean up temp directory
255        self.cleanup_directory(&self.temp_dir(), cutoff_time)?;
256
257        // Report CoreML cache locations (but don't clean them - Apple manages these)
258        let coreml_locations = self.report_coreml_cache_locations();
259        if !coreml_locations.is_empty() {
260            info!(
261                "๐Ÿ“ Found {} potential CoreML cache locations:",
262                coreml_locations.len()
263            );
264            for location in &coreml_locations {
265                if location.exists() {
266                    info!("   โ€ข {}", location.display());
267                }
268            }
269            info!("   Note: CoreML caches are managed by Apple's system");
270        }
271
272        Ok(())
273    }
274
275    /// Find all candle-coreml related cache directories (for enhanced cleanup)
276    pub fn find_all_candle_coreml_caches(&self) -> Result<Vec<(PathBuf, u64)>> {
277        let mut caches = Vec::new();
278
279        let cache_dir = dirs::cache_dir()
280            .ok_or_else(|| anyhow::Error::msg("Cannot determine cache directory"))?;
281
282        // Patterns to search for based on investigation
283        let patterns = [
284            "candle_coreml-*",
285            "candle-coreml-*",
286            "integration_tests-*",
287            "performance_regression_tests-*",
288            "qwen_tests-*",
289            "typo_fixer_test*",
290            "typo_fixer_tests-*",
291            "flex_pipeline_tests-*",
292            "builder_tests-*",
293            "tensor_regression_tests-*",
294            "utils_tests-*",
295            "bundle_id_*",
296        ];
297
298        for pattern in &patterns {
299            if let Ok(output) = std::process::Command::new("find")
300                .args([
301                    &cache_dir.to_string_lossy(),
302                    "-maxdepth",
303                    "1",
304                    "-name",
305                    pattern,
306                    "-type",
307                    "d",
308                ])
309                .output()
310            {
311                let entries_str = String::from_utf8_lossy(&output.stdout);
312                for line in entries_str.lines() {
313                    let entry = PathBuf::from(line.trim());
314                    if entry.is_dir() {
315                        // Check if it contains CoreML-specific files
316                        let has_coreml = entry.join("com.apple.e5rt.e5bundlecache").exists()
317                            || entry.join(".coreml_cache").exists()
318                            || entry.to_string_lossy().contains("coreml");
319
320                        if has_coreml {
321                            let size = self.get_directory_size(&entry)?;
322                            caches.push((entry, size));
323                        }
324                    }
325                }
326            }
327        }
328
329        // Also find standalone e5rt caches
330        if let Ok(output) = std::process::Command::new("find")
331            .args([
332                &cache_dir.to_string_lossy(),
333                "-maxdepth",
334                "1",
335                "-name",
336                "*e5rt*",
337                "-type",
338                "d",
339            ])
340            .output()
341        {
342            let entries_str = String::from_utf8_lossy(&output.stdout);
343            for line in entries_str.lines() {
344                let entry = PathBuf::from(line.trim());
345                if entry.is_dir() && !caches.iter().any(|(path, _)| path == &entry) {
346                    let size = self.get_directory_size(&entry)?;
347                    caches.push((entry, size));
348                }
349            }
350        }
351
352        // Sort by size (largest first)
353        caches.sort_by(|a, b| b.1.cmp(&a.1));
354
355        Ok(caches)
356    }
357
358    /// Get the size of a directory in bytes
359    fn get_directory_size(&self, path: &Path) -> Result<u64> {
360        let mut total_size = 0;
361
362        fn visit_dir(dir: &Path, total: &mut u64) -> Result<()> {
363            if dir.is_dir() {
364                for entry in std::fs::read_dir(dir)? {
365                    let entry = entry?;
366                    let path = entry.path();
367                    if path.is_dir() {
368                        visit_dir(&path, total)?;
369                    } else {
370                        *total += entry.metadata()?.len();
371                    }
372                }
373            }
374            Ok(())
375        }
376
377        visit_dir(path, &mut total_size)?;
378        Ok(total_size)
379    }
380
381    /// Remove specific cache directories with safety checks
382    pub fn remove_cache_directories(
383        &self,
384        paths: &[PathBuf],
385        dry_run: bool,
386    ) -> Result<(usize, u64)> {
387        let mut removed_count = 0;
388        let mut freed_bytes = 0;
389
390        for path in paths {
391            if !path.exists() {
392                continue;
393            }
394
395            // Safety check: ensure we're only removing cache directories
396            if let Some(cache_dir) = dirs::cache_dir() {
397                if !path.starts_with(&cache_dir) {
398                    warn!("Skipping path outside cache directory: {}", path.display());
399                    continue;
400                }
401            }
402
403            // Additional safety: don't remove important system directories
404            let path_str = path.to_string_lossy();
405            if path_str.contains("System")
406                || path_str.contains("Applications")
407                || path_str.contains("/usr/")
408                || path_str.contains("/bin/")
409            {
410                warn!("Skipping system path: {}", path.display());
411                continue;
412            }
413
414            let size = self.get_directory_size(path).unwrap_or(0);
415
416            if dry_run {
417                info!("Would remove: {} ({} bytes)", path.display(), size);
418            } else {
419                info!("Removing: {}", path.display());
420                match std::fs::remove_dir_all(path) {
421                    Ok(()) => {
422                        removed_count += 1;
423                        freed_bytes += size;
424                        debug!("โœ… Removed: {}", path.display());
425                    }
426                    Err(e) => {
427                        warn!("โš ๏ธ  Failed to remove {}: {}", path.display(), e);
428                    }
429                }
430            }
431        }
432
433        Ok((removed_count, freed_bytes))
434    }
435
436    /// Clean up a specific directory based on age
437    fn cleanup_directory(&self, dir: &Path, cutoff_time: std::time::SystemTime) -> Result<()> {
438        if !dir.exists() {
439            return Ok(());
440        }
441
442        let entries = std::fs::read_dir(dir)?;
443        let mut cleaned_count = 0;
444
445        for entry in entries {
446            let entry = entry?;
447            let metadata = entry.metadata()?;
448
449            if let Ok(modified) = metadata.modified() {
450                if modified < cutoff_time {
451                    let path = entry.path();
452                    if path.is_dir() {
453                        std::fs::remove_dir_all(&path)?;
454                    } else {
455                        std::fs::remove_file(&path)?;
456                    }
457                    cleaned_count += 1;
458                    debug!("๐Ÿ—‘๏ธ  Cleaned: {}", path.display());
459                }
460            }
461        }
462
463        if cleaned_count > 0 {
464            info!("โœ… Cleaned {} items from {}", cleaned_count, dir.display());
465        }
466
467        Ok(())
468    }
469}
470
471impl Default for CacheManager {
472    fn default() -> Self {
473        Self::new().expect("Failed to initialize cache manager")
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480
481    #[test]
482    fn test_cache_manager_creation() {
483        let manager = CacheManager::new().expect("Failed to create cache manager");
484
485        // Test directory creation
486        assert!(manager.models_dir().parent().unwrap().exists());
487
488        // Test bundle identifier detection
489        println!("Bundle ID: {:?}", manager.bundle_identifier());
490    }
491
492    #[test]
493    fn test_coreml_cache_location_detection() {
494        let manager = CacheManager::new().expect("Failed to create cache manager");
495        let locations = manager.report_coreml_cache_locations();
496
497        println!("Potential CoreML cache locations:");
498        for location in &locations {
499            println!("  {}", location.display());
500        }
501    }
502
503    #[test]
504    fn test_find_all_candle_coreml_caches() {
505        let manager = CacheManager::new().expect("Failed to create cache manager");
506
507        match manager.find_all_candle_coreml_caches() {
508            Ok(caches) => {
509                println!("Found {} candle-coreml cache directories:", caches.len());
510                for (path, size) in &caches {
511                    let size_mb = *size as f64 / (1024.0 * 1024.0);
512                    println!("  {} ({:.1} MB)", path.display(), size_mb);
513                }
514
515                let total_size: u64 = caches.iter().map(|(_, size)| size).sum();
516                let total_gb = total_size as f64 / (1024.0 * 1024.0 * 1024.0);
517                println!("Total size: {total_gb:.2} GB");
518            }
519            Err(e) => {
520                println!("Error finding caches: {e}");
521            }
522        }
523    }
524}