Skip to main content

marco_core/logic/
cache.rs

1//! Caching System for Marco
2//!
3//! Provides two types of caching:
4//! 1. **File Caching** (SimpleFileCache): Cache file content with modification time tracking
5//! 2. **Parser Caching** (ParserCache): Cache parsed AST and rendered HTML using moka
6//!
7//! ## File Caching
8//! - Cache file content in memory to avoid repeated disk I/O
9//! - Track file modification times for automatic cache invalidation
10//! - Use weak references to active DocumentBuffers for automatic cleanup
11//! - File monitoring removed to prevent memory leaks and threading issues
12//!
13//! ## Parser Caching (Moka-based)
14//! - **AST Caching**: Cache parsed Document structures keyed by markdown content hash
15//! - **HTML Caching**: Cache rendered HTML keyed by (content_hash, render_options_hash)
16//! - **Thread-safe**: Moka provides lock-free concurrent access
17//! - **Automatic eviction**: LRU-based eviction when cache size limits reached
18//! - **No manual cleanup**: Moka handles cleanup automatically on drop
19
20use crate::parser::{parse, Document};
21use crate::render::{render, RenderOptions};
22use moka::sync::Cache;
23use std::collections::HashMap;
24use std::fs;
25use std::hash::{Hash, Hasher};
26use std::path::{Path, PathBuf};
27use std::sync::{Arc, OnceLock, RwLock};
28use std::time::{SystemTime, UNIX_EPOCH};
29
30/// Simple cache entry for file content (as per spec)
31#[derive(Debug, Clone)]
32pub struct CachedFile {
33    pub content: Arc<String>,
34    pub modification_time: u64,
35    pub last_accessed: SystemTime,
36}
37
38impl CachedFile {
39    pub fn new(content: String, modification_time: u64) -> Self {
40        Self {
41            content: Arc::new(content),
42            modification_time,
43            last_accessed: SystemTime::now(),
44        }
45    }
46
47    /// Check if this entry is still valid for the given file
48    pub fn is_valid_for(&self, path: &Path) -> bool {
49        match fs::metadata(path) {
50            Ok(metadata) => {
51                if let Ok(modified) = metadata.modified() {
52                    if let Ok(duration) = modified.duration_since(UNIX_EPOCH) {
53                        return duration.as_secs() == self.modification_time;
54                    }
55                }
56            }
57            Err(_) => return false,
58        }
59        false
60    }
61}
62
63/// Simple file cache with basic functionality as per spec
64pub struct SimpleFileCache {
65    /// File content cache (`RwLock<HashMap>` as per spec)
66    content_cache: Arc<RwLock<HashMap<PathBuf, CachedFile>>>,
67}
68
69impl Default for SimpleFileCache {
70    fn default() -> Self {
71        Self::new()
72    }
73}
74
75impl SimpleFileCache {
76    /// Create new simple file cache
77    pub fn new() -> Self {
78        Self {
79            content_cache: Arc::new(RwLock::new(HashMap::new())),
80        }
81    }
82
83    /// Load file fast using cache-first strategy (as per spec)
84    pub fn load_file_fast<P: AsRef<Path>>(
85        &self,
86        path: P,
87    ) -> Result<String, Box<dyn std::error::Error>> {
88        // Use the shared version and convert to String for backwards compatibility
89        let shared_content = self.load_file_fast_shared(path)?;
90        Ok((*shared_content).clone())
91    }
92
93    /// Load file fast with shared ownership - avoids cloning for better memory efficiency
94    pub fn load_file_fast_shared<P: AsRef<Path>>(
95        &self,
96        path: P,
97    ) -> Result<Arc<String>, Box<dyn std::error::Error>> {
98        let path = path.as_ref().to_path_buf();
99
100        // Check cache first
101        {
102            if let Ok(cache) = self.content_cache.read() {
103                if let Some(entry) = cache.get(&path) {
104                    if entry.is_valid_for(&path) {
105                        // Cache hit - return shared reference (no cloning!)
106                        return Ok(Arc::clone(&entry.content));
107                    }
108                }
109            }
110        }
111
112        // Cache miss - load from disk and cache
113        self.load_and_cache_file_shared(path)
114    }
115
116    /// Load file from disk and add to cache with shared ownership - avoids unnecessary cloning
117    fn load_and_cache_file_shared(
118        &self,
119        path: PathBuf,
120    ) -> Result<Arc<String>, Box<dyn std::error::Error>> {
121        // Read raw bytes and sanitize UTF-8 (prevents crashes from invalid UTF-8)
122        let raw_bytes = fs::read(&path)
123            .map_err(|e| format!("Failed to read file {}: {}", path.display(), e))?;
124
125        let (content, stats) = crate::logic::utf8::sanitize_input_with_stats(
126            &raw_bytes,
127            crate::logic::utf8::InputSource::File,
128        );
129
130        // Log any UTF-8 issues
131        if stats.had_issues() {
132            log::warn!(
133                "File '{}' had UTF-8 issues: {}",
134                path.display(),
135                stats.summary()
136            );
137        }
138
139        let metadata = fs::metadata(&path)
140            .map_err(|e| format!("Failed to get metadata for {}: {}", path.display(), e))?;
141
142        let modification_time = metadata
143            .modified()
144            .map_err(|e| format!("Failed to get modification time: {}", e))?
145            .duration_since(UNIX_EPOCH)
146            .map_err(|e| format!("Invalid system time: {}", e))?
147            .as_secs();
148
149        // Create Arc<String> directly - no clone needed!
150        let cached_file = CachedFile::new(content, modification_time);
151        let shared_content = Arc::clone(&cached_file.content);
152
153        // Add to cache
154        if let Ok(mut cache) = self.content_cache.write() {
155            cache.insert(path, cached_file);
156        }
157
158        Ok(shared_content)
159    }
160
161    /// Invalidate cache entry for specific file
162    pub fn invalidate_file<P: AsRef<Path>>(&self, path: P) {
163        let path = path.as_ref();
164
165        if let Ok(mut cache) = self.content_cache.write() {
166            cache.remove(path);
167        }
168    }
169
170    /// Clear all cached entries to free memory
171    /// This is called during application shutdown to prevent memory retention
172    pub fn clear(&self) {
173        log::info!("Clearing file cache");
174
175        let mut cleared_files = 0;
176
177        // Clear file content cache
178        if let Ok(mut cache) = self.content_cache.write() {
179            cleared_files = cache.len();
180            cache.clear();
181        }
182
183        log::info!("File cache cleared: {} file entries", cleared_files);
184    }
185}
186
187// ============================================================================
188// Parser Cache (Moka-based)
189// ============================================================================
190
191// Cache size limits
192const AST_CACHE_MAX_CAPACITY: u64 = 1000; // Max 1000 parsed documents
193const HTML_CACHE_MAX_CAPACITY: u64 = 2000; // Max 2000 rendered HTML strings
194
195/// Global singleton parser cache instance
196static GLOBAL_PARSER_CACHE: OnceLock<ParserCache> = OnceLock::new();
197
198/// High-performance parser cache using moka
199#[derive(Clone)]
200pub struct ParserCache {
201    /// Cache for parsed AST documents
202    ast_cache: Cache<u64, Document>,
203    /// Cache for rendered HTML (keyed by content hash + options hash)
204    html_cache: Cache<(u64, u64), String>,
205}
206
207impl ParserCache {
208    /// Create a new parser cache with default capacity
209    pub fn new() -> Self {
210        Self {
211            ast_cache: Cache::new(AST_CACHE_MAX_CAPACITY),
212            html_cache: Cache::new(HTML_CACHE_MAX_CAPACITY),
213        }
214    }
215
216    /// Parse markdown content with AST caching
217    pub fn parse_with_cache(&self, content: &str) -> Result<Document, Box<dyn std::error::Error>> {
218        let content_hash = hash_content(content);
219
220        // Try to get from cache
221        if let Some(doc) = self.ast_cache.get(&content_hash) {
222            return Ok(doc);
223        }
224
225        // Parse and cache
226        let doc = parse(content)?;
227        self.ast_cache.insert(content_hash, doc.clone());
228        Ok(doc)
229    }
230
231    /// Render markdown to HTML with full caching (AST + HTML)
232    pub fn render_with_cache(
233        &self,
234        content: &str,
235        options: RenderOptions,
236    ) -> Result<String, Box<dyn std::error::Error>> {
237        let content_hash = hash_content(content);
238        let options_hash = hash_options(&options);
239        let cache_key = (content_hash, options_hash);
240
241        // Try to get rendered HTML from cache
242        if let Some(html) = self.html_cache.get(&cache_key) {
243            return Ok(html);
244        }
245
246        // Parse (with AST caching)
247        let doc = self.parse_with_cache(content)?;
248
249        // Render and cache
250        let html = render(&doc, &options)?;
251        self.html_cache.insert(cache_key, html.clone());
252        Ok(html)
253    }
254
255    /// Clear all caches
256    pub fn clear(&self) {
257        self.ast_cache.invalidate_all();
258        self.html_cache.invalidate_all();
259    }
260
261    /// Get cache statistics
262    pub fn stats(&self) -> CacheStats {
263        CacheStats {
264            ast_entries: self.ast_cache.entry_count(),
265            html_entries: self.html_cache.entry_count(),
266            ast_capacity: AST_CACHE_MAX_CAPACITY,
267            html_capacity: HTML_CACHE_MAX_CAPACITY,
268        }
269    }
270}
271
272impl Default for ParserCache {
273    fn default() -> Self {
274        Self::new()
275    }
276}
277
278/// Cache statistics
279#[derive(Debug, Clone, Copy)]
280pub struct CacheStats {
281    pub ast_entries: u64,
282    pub html_entries: u64,
283    pub ast_capacity: u64,
284    pub html_capacity: u64,
285}
286
287/// Get the global parser cache instance (creates on first access)
288pub fn global_parser_cache() -> &'static ParserCache {
289    GLOBAL_PARSER_CACHE.get_or_init(ParserCache::new)
290}
291
292/// Shutdown and clear the global parser cache
293///
294/// Note: With moka, this is optional - the cache will be cleaned up
295/// automatically when the program exits. This function is provided
296/// for compatibility with old API.
297pub fn shutdown_global_parser_cache() {
298    if let Some(cache) = GLOBAL_PARSER_CACHE.get() {
299        cache.clear();
300    }
301}
302
303// === Helper functions ===
304
305/// Hash markdown content for cache key
306fn hash_content(content: &str) -> u64 {
307    use std::collections::hash_map::DefaultHasher;
308    let mut hasher = DefaultHasher::new();
309    content.hash(&mut hasher);
310    hasher.finish()
311}
312
313/// Hash render options for cache key
314fn hash_options(options: &RenderOptions) -> u64 {
315    use std::collections::hash_map::DefaultHasher;
316    let mut hasher = DefaultHasher::new();
317
318    // Hash the relevant fields of RenderOptions
319    options.syntax_highlighting.hash(&mut hasher);
320    options.line_numbers.hash(&mut hasher);
321    options.theme.hash(&mut hasher);
322
323    hasher.finish()
324}
325
326// === Convenience Functions ===
327
328/// Parse markdown to HTML (uncached, for one-off conversions)
329pub fn parse_to_html(
330    content: &str,
331    options: RenderOptions,
332) -> Result<String, Box<dyn std::error::Error>> {
333    let doc = parse(content)?;
334    render(&doc, &options)
335}
336
337/// Parse markdown to HTML using global cache (recommended for UI)
338pub fn parse_to_html_cached(
339    content: &str,
340    options: RenderOptions,
341) -> Result<String, Box<dyn std::error::Error>> {
342    global_parser_cache().render_with_cache(content, options)
343}
344
345// ============================================================================
346// Tests
347// ============================================================================
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352    use serial_test::serial;
353    use std::io::Write;
354    use tempfile::{tempdir, NamedTempFile};
355
356    #[test]
357    fn smoke_test_file_cache() {
358        let cache = SimpleFileCache::new();
359
360        // Create a temporary file for testing
361        let mut temp_file = NamedTempFile::new().expect("Failed to create temp file");
362        writeln!(temp_file, "Test content for file cache").expect("Failed to write temp file");
363        let temp_path = temp_file.path();
364
365        // Test file caching - first load should read from disk
366        let content1 = cache
367            .load_file_fast(temp_path)
368            .expect("Failed to load file");
369        assert!(content1.contains("Test content for file cache"));
370
371        // Second load should use cache (we can't directly verify this, but it should work)
372        let content2 = cache
373            .load_file_fast(temp_path)
374            .expect("Failed to load file");
375        assert_eq!(content1, content2);
376    }
377
378    #[test]
379    fn smoke_test_file_cache_cleanup() {
380        let cache = SimpleFileCache::new();
381
382        // Create temporary files for testing
383        let temp_dir = tempdir().expect("Failed to create temp dir");
384        let file_path = temp_dir.path().join("test_file.txt");
385        std::fs::write(&file_path, "Content for cleanup test").expect("Failed to write test file");
386
387        // Populate the cache
388        let _content = cache
389            .load_file_fast(&file_path)
390            .expect("Failed to load file");
391
392        // Note: We can't directly verify cache entries because the cache internals
393        // use RwLock and the cache might be empty due to error handling, but we can
394        // test that clear() doesn't panic and works correctly
395
396        // Test cache cleanup - this is the main focus of issue #16
397        cache.clear();
398
399        // Verify cache still works after cleanup (should reload from disk)
400        let content_after_clear = cache
401            .load_file_fast(&file_path)
402            .expect("Cache should work after clear");
403        assert!(content_after_clear.contains("Content for cleanup test"));
404    }
405
406    #[test]
407    #[serial(file_cache)]
408    fn smoke_test_global_cache_cleanup() {
409        // Test global cache access
410        let cache = global_cache();
411
412        // Create a temporary file
413        let temp_dir = tempdir().expect("Failed to create temp dir");
414        let file_path = temp_dir.path().join("global_test.txt");
415        std::fs::write(&file_path, "Global cache test content").expect("Failed to write test file");
416
417        // Populate global cache
418        let _content = cache
419            .load_file_fast(&file_path)
420            .expect("Failed to load file");
421
422        // Test global cleanup - this is the main focus of issue #16
423        shutdown_global_cache();
424
425        // Verify global cache still works after cleanup
426        let content_after_shutdown = cache
427            .load_file_fast(&file_path)
428            .expect("Global cache should work after shutdown");
429        assert!(content_after_shutdown.contains("Global cache test content"));
430    }
431
432    // === Parser Cache Tests ===
433
434    #[test]
435    fn smoke_test_parser_cache() {
436        let cache = ParserCache::new();
437        let content = "# Hello World\n\nThis is **bold** text.";
438
439        // First parse - cache miss
440        let doc1 = cache.parse_with_cache(content).expect("Parse failed");
441        assert!(format!("{:?}", doc1).contains("Heading"));
442
443        // Force sync to update entry counts
444        cache.ast_cache.run_pending_tasks();
445
446        // Second parse - cache hit (should be instant)
447        let doc2 = cache.parse_with_cache(content).expect("Parse failed");
448        assert!(format!("{:?}", doc2).contains("Heading"));
449
450        let stats = cache.stats();
451        assert_eq!(stats.ast_entries, 1); // Only one unique content cached
452    }
453
454    #[test]
455    fn smoke_test_render_cache() {
456        let cache = ParserCache::new();
457        let content = "# Test\n\nSome content.";
458        let options = RenderOptions::default();
459
460        // First render - cache miss
461        let html1 = cache
462            .render_with_cache(content, options.clone())
463            .expect("Render failed");
464        assert!(html1.contains("<h1"));
465
466        // Force sync to update entry counts
467        cache.ast_cache.run_pending_tasks();
468        cache.html_cache.run_pending_tasks();
469
470        // Second render - cache hit
471        let html2 = cache
472            .render_with_cache(content, options)
473            .expect("Render failed");
474        assert_eq!(html1, html2);
475
476        let stats = cache.stats();
477        assert_eq!(stats.ast_entries, 1);
478        assert_eq!(stats.html_entries, 1);
479    }
480
481    #[test]
482    fn smoke_test_global_parser_cache() {
483        let content = "## Global Cache Test";
484        let cache1 = global_parser_cache();
485        let cache2 = global_parser_cache();
486
487        // Should be same instance
488        assert_eq!(cache1 as *const _, cache2 as *const _);
489
490        // Should work
491        let doc = cache1.parse_with_cache(content).expect("Parse failed");
492        assert!(format!("{:?}", doc).contains("Heading"));
493    }
494
495    #[test]
496    fn smoke_test_convenience_functions() {
497        let content = "Test content with **emphasis**.";
498        let options = RenderOptions::default();
499
500        // Uncached version
501        let html1 = parse_to_html(content, options.clone()).expect("Parse failed");
502        assert!(html1.contains("<strong>"));
503
504        // Cached version
505        let html2 = parse_to_html_cached(content, options).expect("Parse failed");
506        assert_eq!(html1, html2);
507    }
508}
509
510/// Global cache instance (singleton pattern as per spec)
511static GLOBAL_CACHE: OnceLock<SimpleFileCache> = OnceLock::new();
512
513/// Get global file cache instance (as per spec)
514pub fn global_cache() -> &'static SimpleFileCache {
515    GLOBAL_CACHE.get_or_init(SimpleFileCache::new)
516}
517
518/// Shutdown and cleanup the global file cache
519/// This clears all cached data to prevent memory retention on application exit
520pub fn shutdown_global_cache() {
521    // Only clear if the global cache has been initialized
522    if let Some(cache) = GLOBAL_CACHE.get() {
523        cache.clear();
524    } else {
525        log::info!("File cache was never initialized, no cleanup needed");
526    }
527}
528
529/// Simple cached file operations (as per spec)
530pub mod cached {
531    use super::*;
532
533    pub fn read_to_string<P: AsRef<Path>>(path: P) -> Result<String, Box<dyn std::error::Error>> {
534        global_cache().load_file_fast(path)
535    }
536}