Skip to main content

xchecker_utils/
cache.rs

1//! Insight cache system for performance optimization
2//!
3//! This module implements a BLAKE3-keyed cache for file summaries and core insights
4//! to avoid reprocessing unchanged files across multiple runs.
5
6use crate::logging::Logger;
7use crate::types::Priority;
8use anyhow::{Context, Result};
9use blake3::Hasher;
10use camino::{Utf8Path, Utf8PathBuf};
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13use std::collections::HashMap;
14use std::fs;
15
16/// Cache for file insights based on BLAKE3 content hashes
17/// Implements R3.4: reuse cached core insights based on BLAKE3 keys
18#[derive(Debug)]
19pub struct InsightCache {
20    /// Cache directory path
21    cache_dir: Utf8PathBuf,
22    /// In-memory cache for current session
23    memory_cache: HashMap<String, CachedInsight>,
24    /// Cache hit/miss statistics for verbose logging
25    stats: CacheStats,
26}
27
28/// Statistics for cache performance tracking
29#[derive(Debug, Default, Clone, Copy, serde::Serialize, serde::Deserialize)]
30pub struct CacheStats {
31    pub hits: usize,
32    pub misses: usize,
33    pub invalidations: usize,
34    pub writes: usize,
35}
36
37impl CacheStats {
38    /// Calculate cache hit ratio
39    #[must_use]
40    pub fn hit_ratio(&self) -> f64 {
41        let total = self.hits + self.misses;
42        if total == 0 {
43            0.0
44        } else {
45            self.hits as f64 / total as f64
46        }
47    }
48}
49
50/// Cached insight data for a file
51#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct CachedInsight {
53    /// BLAKE3 hash of the file content when insights were generated
54    pub content_hash: String,
55    /// File path (for reference)
56    pub file_path: String,
57    /// Priority level of the file
58    pub priority: Priority,
59    /// Core insights (10-25 bullet points per R3.5)
60    pub insights: Vec<String>,
61    /// Phase this insight was generated for
62    pub phase: String,
63    /// Timestamp when insight was cached
64    pub cached_at: DateTime<Utc>,
65    /// File size when cached (for validation)
66    pub file_size: u64,
67    /// Last modified time when cached (for validation)
68    pub last_modified: DateTime<Utc>,
69}
70
71impl InsightCache {
72    /// Create a new insight cache with the specified cache directory
73    pub fn new(cache_dir: Utf8PathBuf) -> Result<Self> {
74        // Ensure cache directory exists (ignore benign races)
75        crate::paths::ensure_dir_all(&cache_dir)
76            .with_context(|| format!("Failed to create cache directory: {cache_dir}"))?;
77
78        Ok(Self {
79            cache_dir,
80            memory_cache: HashMap::new(),
81            stats: CacheStats::default(),
82        })
83    }
84
85    /// Get cache statistics for verbose logging
86    #[must_use]
87    pub const fn stats(&self) -> &CacheStats {
88        &self.stats
89    }
90
91    /// Generate a cache key from file content hash and phase
92    fn cache_key(&self, content_hash: &str, phase: &str) -> String {
93        format!("{content_hash}_{phase}")
94    }
95
96    /// Get the cache file path for a given key
97    fn cache_file_path(&self, key: &str) -> Utf8PathBuf {
98        self.cache_dir.join(format!("{key}.json"))
99    }
100
101    /// Check if a file has changed since it was cached
102    fn has_file_changed(
103        &self,
104        file_path: &Utf8Path,
105        cached_insight: &CachedInsight,
106    ) -> Result<bool> {
107        let metadata = fs::metadata(file_path)
108            .with_context(|| format!("Failed to get metadata for file: {file_path}"))?;
109
110        let current_size = metadata.len();
111        let current_modified = DateTime::<Utc>::from(
112            metadata
113                .modified()
114                .with_context(|| format!("Failed to get modified time for file: {file_path}"))?,
115        );
116
117        // File has changed if size or modification time differs
118        Ok(current_size != cached_insight.file_size
119            || current_modified != cached_insight.last_modified)
120    }
121
122    /// Get cached insights for a file, or None if not cached or invalid
123    pub fn get_insights(
124        &mut self,
125        file_path: &Utf8Path,
126        content_hash: &str,
127        phase: &str,
128        logger: Option<&Logger>,
129    ) -> Result<Option<Vec<String>>> {
130        let key = self.cache_key(content_hash, phase);
131
132        // First check memory cache
133        if let Some(cached) = self.memory_cache.get(&key) {
134            // Validate that file hasn't changed
135            if self.has_file_changed(file_path, cached)? {
136                // File changed, invalidate memory cache entry
137                self.memory_cache.remove(&key);
138                self.stats.invalidations += 1;
139                if let Some(logger) = logger {
140                    logger.verbose(&format!(
141                        "Cache invalidated (file changed): {} [{}]",
142                        file_path,
143                        &content_hash[..8]
144                    ));
145                }
146            } else {
147                self.stats.hits += 1;
148                if let Some(logger) = logger {
149                    logger.verbose(&format!(
150                        "Cache hit (memory): {} [{}]",
151                        file_path,
152                        &content_hash[..8]
153                    ));
154                }
155                return Ok(Some(cached.insights.clone()));
156            }
157        }
158
159        // Check disk cache
160        let cache_file = self.cache_file_path(&key);
161        if cache_file.exists() {
162            if let Ok(cached) = self.load_cached_insight(&cache_file) {
163                // Validate content hash matches
164                if cached.content_hash == content_hash {
165                    // Validate file hasn't changed
166                    if self.has_file_changed(file_path, &cached)? {
167                        // File changed, remove stale cache file
168                        let _ = fs::remove_file(&cache_file);
169                        self.stats.invalidations += 1;
170                        if let Some(logger) = logger {
171                            logger.verbose(&format!(
172                                "Cache invalidated (file changed): {} [{}]",
173                                file_path,
174                                &content_hash[..8]
175                            ));
176                        }
177                    } else {
178                        // Cache hit - load into memory and return
179                        self.memory_cache.insert(key, cached.clone());
180                        self.stats.hits += 1;
181                        if let Some(logger) = logger {
182                            logger.verbose(&format!(
183                                "Cache hit (disk): {} [{}]",
184                                file_path,
185                                &content_hash[..8]
186                            ));
187                        }
188                        return Ok(Some(cached.insights));
189                    }
190                } else {
191                    // Content hash mismatch, remove stale cache file
192                    let _ = fs::remove_file(&cache_file);
193                    self.stats.invalidations += 1;
194                    if let Some(logger) = logger {
195                        logger.verbose(&format!(
196                            "Cache invalidated (hash mismatch): {} [{}]",
197                            file_path,
198                            &content_hash[..8]
199                        ));
200                    }
201                }
202            } else {
203                // Corrupted cache file, remove it
204                let _ = fs::remove_file(&cache_file);
205                if let Some(logger) = logger {
206                    logger.verbose(&format!("Cache file corrupted, removed: {cache_file}"));
207                }
208            }
209        }
210
211        // Cache miss
212        self.stats.misses += 1;
213        if let Some(logger) = logger {
214            logger.verbose(&format!(
215                "Cache miss: {} [{}]",
216                file_path,
217                &content_hash[..8]
218            ));
219        }
220        Ok(None)
221    }
222
223    /// Store insights in cache for a file
224    #[allow(clippy::too_many_arguments)]
225    pub fn store_insights(
226        &mut self,
227        file_path: &Utf8Path,
228        _content: &str,
229        content_hash: &str,
230        phase: &str,
231        priority: Priority,
232        insights: Vec<String>,
233        logger: Option<&Logger>,
234    ) -> Result<()> {
235        let key = self.cache_key(content_hash, phase);
236
237        // Get file metadata for validation
238        let metadata = fs::metadata(file_path)
239            .with_context(|| format!("Failed to get metadata for file: {file_path}"))?;
240
241        let cached_insight =
242            CachedInsight {
243                content_hash: content_hash.to_string(),
244                file_path: file_path.to_string(),
245                priority,
246                insights: insights.clone(),
247                phase: phase.to_string(),
248                cached_at: Utc::now(),
249                file_size: metadata.len(),
250                last_modified: DateTime::<Utc>::from(metadata.modified().with_context(|| {
251                    format!("Failed to get modified time for file: {file_path}")
252                })?),
253            };
254
255        // Store in memory cache
256        self.memory_cache
257            .insert(key.clone(), cached_insight.clone());
258
259        // Store in disk cache
260        let cache_file = self.cache_file_path(&key);
261        self.save_cached_insight(&cache_file, &cached_insight)?;
262
263        self.stats.writes += 1;
264        if let Some(logger) = logger {
265            logger.verbose(&format!(
266                "Cached insights: {} ({} insights) [{}]",
267                file_path,
268                insights.len(),
269                &content_hash[..8]
270            ));
271        }
272
273        Ok(())
274    }
275
276    /// Generate core insights for a file (R3.5: 10-25 bullet points per phase)
277    #[must_use]
278    pub fn generate_insights(
279        &self,
280        content: &str,
281        file_path: &Utf8Path,
282        phase: &str,
283        priority: Priority,
284    ) -> Vec<String> {
285        let mut insights = Vec::new();
286
287        // Basic file information
288        let line_count = content.lines().count();
289        let byte_count = content.len();
290        insights.push(format!(
291            "File: {file_path} ({line_count} lines, {byte_count} bytes)"
292        ));
293        insights.push(format!("Priority: {priority:?}"));
294
295        // Phase-specific insights
296        match phase.to_lowercase().as_str() {
297            "requirements" => {
298                self.generate_requirements_insights(content, &mut insights);
299            }
300            "design" => {
301                self.generate_design_insights(content, &mut insights);
302            }
303            "tasks" => {
304                self.generate_tasks_insights(content, &mut insights);
305            }
306            "review" => {
307                self.generate_review_insights(content, &mut insights);
308            }
309            _ => {
310                self.generate_generic_insights(content, &mut insights);
311            }
312        }
313
314        // Ensure we have 10-25 insights as per R3.5
315        let current_len = insights.len();
316        if current_len < 10 {
317            // Add more generic insights to reach minimum
318            self.add_generic_content_insights(content, &mut insights, 10 - current_len);
319        } else if insights.len() > 25 {
320            // Truncate to maximum
321            insights.truncate(25);
322        }
323
324        insights
325    }
326
327    /// Generate requirements-specific insights
328    fn generate_requirements_insights(&self, content: &str, insights: &mut Vec<String>) {
329        // Look for user stories
330        let user_story_count = content.matches("As a").count();
331        if user_story_count > 0 {
332            insights.push(format!("Contains {user_story_count} user stories"));
333        }
334
335        // Look for acceptance criteria
336        let acceptance_criteria_count =
337            content.matches("WHEN").count() + content.matches("THEN").count();
338        if acceptance_criteria_count > 0 {
339            insights.push(format!(
340                "Contains {acceptance_criteria_count} acceptance criteria statements"
341            ));
342        }
343
344        // Look for requirements sections
345        if content.contains("## Requirements") || content.contains("# Requirements") {
346            insights.push("Contains structured requirements section".to_string());
347        }
348
349        // Look for functional vs non-functional requirements
350        if content.contains("Non-Functional") || content.contains("NFR") {
351            insights.push("Includes non-functional requirements".to_string());
352        }
353
354        // Count requirement numbers
355        let req_numbers = content.matches("Requirement ").count();
356        if req_numbers > 0 {
357            insights.push(format!("Defines {req_numbers} numbered requirements"));
358        }
359    }
360
361    /// Generate design-specific insights
362    fn generate_design_insights(&self, content: &str, insights: &mut Vec<String>) {
363        // Look for architecture sections
364        if content.contains("## Architecture") || content.contains("# Architecture") {
365            insights.push("Contains architecture section".to_string());
366        }
367
368        // Look for component descriptions
369        if content.contains("Component") || content.contains("component") {
370            let component_count = content.matches("component").count();
371            insights.push(format!("References {component_count} components"));
372        }
373
374        // Look for interfaces
375        if content.contains("interface") || content.contains("Interface") {
376            insights.push("Describes interfaces".to_string());
377        }
378
379        // Look for data models
380        if content.contains("Data Model") || content.contains("data model") {
381            insights.push("Includes data model definitions".to_string());
382        }
383
384        // Look for diagrams
385        if content.contains("```mermaid") || content.contains("```plantuml") {
386            let diagram_count =
387                content.matches("```mermaid").count() + content.matches("```plantuml").count();
388            insights.push(format!("Contains {diagram_count} diagrams"));
389        }
390
391        // Look for error handling
392        if content.contains("Error") || content.contains("error") {
393            insights.push("Addresses error handling".to_string());
394        }
395    }
396
397    /// Generate tasks-specific insights
398    fn generate_tasks_insights(&self, content: &str, insights: &mut Vec<String>) {
399        // Count tasks
400        let task_count = content.matches("- [ ]").count() + content.matches("- [x]").count();
401        if task_count > 0 {
402            insights.push(format!("Contains {task_count} tasks"));
403        }
404
405        // Count completed tasks
406        let completed_count = content.matches("- [x]").count();
407        if completed_count > 0 {
408            insights.push(format!("{completed_count} tasks completed"));
409        }
410
411        // Look for milestones
412        let milestone_count = content.matches("Milestone").count();
413        if milestone_count > 0 {
414            insights.push(format!("Organized into {milestone_count} milestones"));
415        }
416
417        // Look for implementation phases
418        if content.contains("Phase") || content.contains("phase") {
419            insights.push("Includes phased implementation approach".to_string());
420        }
421
422        // Look for testing tasks
423        if content.contains("test") || content.contains("Test") {
424            let test_count = content.matches("test").count();
425            insights.push(format!("Includes {test_count} testing-related items"));
426        }
427    }
428
429    /// Generate review-specific insights
430    fn generate_review_insights(&self, content: &str, insights: &mut Vec<String>) {
431        // Look for review comments
432        if content.contains("FIXUP") || content.contains("fixup") {
433            insights.push("Contains fixup recommendations".to_string());
434        }
435
436        // Look for feedback
437        if content.contains("feedback") || content.contains("Feedback") {
438            insights.push("Includes feedback items".to_string());
439        }
440
441        // Look for issues or problems
442        if content.contains("issue") || content.contains("Issue") || content.contains("problem") {
443            insights.push("Identifies issues or problems".to_string());
444        }
445
446        // Look for recommendations
447        if content.contains("recommend") || content.contains("Recommend") {
448            insights.push("Contains recommendations".to_string());
449        }
450    }
451
452    /// Generate generic insights for any content
453    fn generate_generic_insights(&self, content: &str, insights: &mut Vec<String>) {
454        // Count sections
455        let section_count = content.matches("##").count() + content.matches('#').count();
456        if section_count > 0 {
457            insights.push(format!("Contains {section_count} sections"));
458        }
459
460        // Look for code blocks
461        let code_block_count = content.matches("```").count() / 2; // Each block has opening and closing
462        if code_block_count > 0 {
463            insights.push(format!("Contains {code_block_count} code blocks"));
464        }
465
466        // Look for links
467        let link_count = content.matches("](").count();
468        if link_count > 0 {
469            insights.push(format!("Contains {link_count} links"));
470        }
471
472        // Look for lists
473        let list_item_count = content.matches("- ").count() + content.matches("* ").count();
474        if list_item_count > 0 {
475            insights.push(format!("Contains {list_item_count} list items"));
476        }
477    }
478
479    /// Add additional generic content insights to reach minimum count
480    fn add_generic_content_insights(
481        &self,
482        content: &str,
483        insights: &mut Vec<String>,
484        needed: usize,
485    ) {
486        let mut added = 0;
487
488        // Word count
489        if added < needed {
490            let word_count = content.split_whitespace().count();
491            insights.push(format!("Word count: {word_count}"));
492            added += 1;
493        }
494
495        // Paragraph count
496        if added < needed {
497            let paragraph_count = content.split("\n\n").count();
498            insights.push(format!("Paragraph count: {paragraph_count}"));
499            added += 1;
500        }
501
502        // Character analysis
503        if added < needed {
504            let char_count = content.chars().count();
505            insights.push(format!("Character count: {char_count}"));
506            added += 1;
507        }
508
509        // Empty lines
510        if added < needed {
511            let empty_lines = content
512                .lines()
513                .filter(|line| line.trim().is_empty())
514                .count();
515            insights.push(format!("Empty lines: {empty_lines}"));
516            added += 1;
517        }
518
519        // File type indicators
520        if added < needed {
521            if content.contains("```rust") {
522                insights.push("Contains Rust code".to_string());
523                added += 1;
524            } else if content.contains("```yaml") || content.contains("```yml") {
525                insights.push("Contains YAML content".to_string());
526                added += 1;
527            } else if content.contains("```json") {
528                insights.push("Contains JSON content".to_string());
529                added += 1;
530            } else if content.contains("```toml") {
531                insights.push("Contains TOML content".to_string());
532                added += 1;
533            }
534        }
535
536        // Add generic filler insights if still needed
537        while added < needed && insights.len() < 25 {
538            match added {
539                0 => insights.push("Content analysis complete".to_string()),
540                1 => insights.push("Structured document format".to_string()),
541                2 => insights.push("Text-based content".to_string()),
542                3 => insights.push("UTF-8 encoded content".to_string()),
543                4 => insights.push("Markdown formatting detected".to_string()),
544                _ => insights.push(format!("Additional insight #{}", added + 1)),
545            }
546            added += 1;
547        }
548    }
549
550    /// Load cached insight from disk
551    fn load_cached_insight(&self, cache_file: &Utf8Path) -> Result<CachedInsight> {
552        let content = fs::read_to_string(cache_file)
553            .with_context(|| format!("Failed to read cache file: {cache_file}"))?;
554
555        let cached: CachedInsight = serde_json::from_str(&content)
556            .with_context(|| format!("Failed to parse cache file: {cache_file}"))?;
557
558        Ok(cached)
559    }
560
561    /// Save cached insight to disk
562    fn save_cached_insight(&self, cache_file: &Utf8Path, cached: &CachedInsight) -> Result<()> {
563        let content =
564            serde_json::to_string_pretty(cached).context("Failed to serialize cached insight")?;
565
566        fs::write(cache_file, content)
567            .with_context(|| format!("Failed to write cache file: {cache_file}"))?;
568
569        Ok(())
570    }
571
572    /// Clear all cached insights (for testing or cleanup)
573    #[allow(dead_code)] // Cache management utility
574    pub fn clear(&mut self) -> Result<()> {
575        self.memory_cache.clear();
576
577        // Remove all cache files
578        if self.cache_dir.exists() {
579            for entry in fs::read_dir(&self.cache_dir)? {
580                let entry = entry?;
581                if entry.path().extension().and_then(|s| s.to_str()) == Some("json") {
582                    fs::remove_file(entry.path())?;
583                }
584            }
585        }
586
587        self.stats = CacheStats::default();
588        Ok(())
589    }
590
591    /// Log cache statistics if verbose logging is enabled
592    #[allow(dead_code)] // Diagnostic logging utility
593    pub fn log_stats(&self, logger: &Logger) {
594        if self.stats.hits + self.stats.misses > 0 {
595            logger.verbose(&format!(
596                "Cache stats: {} hits, {} misses ({:.1}% hit rate), {} invalidations, {} writes",
597                self.stats.hits,
598                self.stats.misses,
599                self.stats.hit_ratio() * 100.0,
600                self.stats.invalidations,
601                self.stats.writes
602            ));
603        }
604    }
605}
606
607/// Calculate BLAKE3 hash of content for cache key generation
608#[must_use]
609pub fn calculate_content_hash(content: &str) -> String {
610    let mut hasher = Hasher::new();
611    hasher.update(content.as_bytes());
612    hasher.finalize().to_hex().to_string()
613}
614
615#[cfg(test)]
616mod tests {
617    use super::*;
618    use std::thread;
619    use std::time::Duration;
620    use tempfile::TempDir;
621
622    #[test]
623    fn test_cache_creation() -> Result<()> {
624        let temp_dir = TempDir::new()?;
625        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
626
627        let cache = InsightCache::new(cache_dir.clone())?;
628        assert!(cache_dir.exists());
629        assert_eq!(cache.stats().hits, 0);
630        assert_eq!(cache.stats().misses, 0);
631
632        Ok(())
633    }
634
635    #[test]
636    fn test_cache_miss_and_store() -> Result<()> {
637        let temp_dir = TempDir::new()?;
638        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
639        let mut cache = InsightCache::new(cache_dir)?;
640
641        // Create a test file
642        let test_file = temp_dir.path().join("test.md");
643        let content = "# Test\nThis is test content.";
644        fs::write(&test_file, content)?;
645        let file_path = Utf8PathBuf::try_from(test_file)?;
646
647        let content_hash = calculate_content_hash(content);
648
649        // Should be a cache miss initially
650        let result = cache.get_insights(&file_path, &content_hash, "requirements", None)?;
651        assert!(result.is_none());
652        assert_eq!(cache.stats().misses, 1);
653
654        // Generate and store insights
655        let insights =
656            cache.generate_insights(content, &file_path, "requirements", Priority::Medium);
657        assert!(insights.len() >= 10);
658        assert!(insights.len() <= 25);
659
660        cache.store_insights(
661            &file_path,
662            content,
663            &content_hash,
664            "requirements",
665            Priority::Medium,
666            insights.clone(),
667            None,
668        )?;
669        assert_eq!(cache.stats().writes, 1);
670
671        // Should be a cache hit now
672        let cached_insights =
673            cache.get_insights(&file_path, &content_hash, "requirements", None)?;
674        assert!(cached_insights.is_some());
675        assert_eq!(cached_insights.unwrap(), insights);
676        assert_eq!(cache.stats().hits, 1);
677
678        Ok(())
679    }
680
681    #[test]
682    fn test_cache_invalidation_on_file_change() -> Result<()> {
683        let temp_dir = TempDir::new()?;
684        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
685        let mut cache = InsightCache::new(cache_dir)?;
686
687        // Create a test file
688        let test_file = temp_dir.path().join("test.md");
689        let content1 = "# Test\nOriginal content.";
690        fs::write(&test_file, content1)?;
691        let file_path = Utf8PathBuf::try_from(test_file.clone())?;
692
693        let content_hash1 = calculate_content_hash(content1);
694        let insights1 =
695            cache.generate_insights(content1, &file_path, "requirements", Priority::Medium);
696        cache.store_insights(
697            &file_path,
698            content1,
699            &content_hash1,
700            "requirements",
701            Priority::Medium,
702            insights1,
703            None,
704        )?;
705
706        // Verify cache hit
707        let cached = cache.get_insights(&file_path, &content_hash1, "requirements", None)?;
708        assert!(cached.is_some());
709
710        // Wait a bit to ensure different modification time
711        thread::sleep(Duration::from_millis(10));
712
713        // Modify the file
714        let content2 = "# Test\nModified content.";
715        fs::write(&test_file, content2)?;
716        let content_hash2 = calculate_content_hash(content2);
717
718        // Should be a cache miss due to file change (even with old hash)
719        let result = cache.get_insights(&file_path, &content_hash1, "requirements", None)?;
720        assert!(result.is_none());
721        assert!(cache.stats().invalidations > 0);
722
723        // Should also be a miss with new hash
724        let result = cache.get_insights(&file_path, &content_hash2, "requirements", None)?;
725        assert!(result.is_none());
726
727        Ok(())
728    }
729
730    #[test]
731    fn test_disk_cache_persistence() -> Result<()> {
732        let temp_dir = TempDir::new()?;
733        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
734
735        // Create a test file
736        let test_file = temp_dir.path().join("test.md");
737        let content = "# Test\nPersistent content.";
738        fs::write(&test_file, content)?;
739        let file_path = Utf8PathBuf::try_from(test_file)?;
740
741        let content_hash = calculate_content_hash(content);
742        let insights = vec!["Test insight 1".to_string(), "Test insight 2".to_string()];
743
744        // Store in first cache instance
745        {
746            let mut cache1 = InsightCache::new(cache_dir.clone())?;
747            cache1.store_insights(
748                &file_path,
749                content,
750                &content_hash,
751                "requirements",
752                Priority::Medium,
753                insights.clone(),
754                None,
755            )?;
756        }
757
758        // Load from second cache instance (should read from disk)
759        {
760            let mut cache2 = InsightCache::new(cache_dir)?;
761            let cached_insights =
762                cache2.get_insights(&file_path, &content_hash, "requirements", None)?;
763            assert!(cached_insights.is_some());
764            assert_eq!(cached_insights.unwrap(), insights);
765            assert_eq!(cache2.stats().hits, 1);
766        }
767
768        Ok(())
769    }
770
771    #[test]
772    fn test_insight_generation_requirements() {
773        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
774        let content = r"
775# Requirements Document
776
777## Requirements
778
779### Requirement 1
780
781**User Story:** As a developer, I want to test, so that I can verify functionality.
782
783#### Acceptance Criteria
784
7851. WHEN I run tests THEN the system SHALL pass
7862. WHEN errors occur THEN the system SHALL report them
787
788### Requirement 2
789
790**User Story:** As a user, I want features, so that I can be productive.
791
792#### Acceptance Criteria
793
7941. WHEN I use features THEN they SHALL work
795";
796
797        let insights = cache.generate_insights(
798            content,
799            Utf8Path::new("requirements.md"),
800            "requirements",
801            Priority::High,
802        );
803
804        assert!(insights.len() >= 10);
805        assert!(insights.len() <= 25);
806
807        // Should contain requirements-specific insights
808        let insights_text = insights.join(" ");
809        assert!(insights_text.contains("user stories") || insights_text.contains("User Story"));
810        assert!(
811            insights_text.contains("acceptance criteria")
812                || insights_text.contains("WHEN")
813                || insights_text.contains("THEN")
814        );
815    }
816
817    #[test]
818    fn test_insight_generation_design() {
819        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
820        let content = r"
821# Design Document
822
823## Architecture
824
825The system consists of multiple components that interact through well-defined interfaces.
826
827## Components
828
829### Component A
830This component handles data processing.
831
832### Component B  
833This component manages the user interface.
834
835## Data Models
836
837```rust
838struct User {
839    id: u32,
840    name: String,
841}
842```
843
844## Error Handling
845
846The system handles errors gracefully through a structured error hierarchy.
847";
848
849        let insights = cache.generate_insights(
850            content,
851            Utf8Path::new("design.md"),
852            "design",
853            Priority::High,
854        );
855
856        assert!(insights.len() >= 10);
857        assert!(insights.len() <= 25);
858
859        // Should contain design-specific insights
860        let insights_text = insights.join(" ");
861        assert!(insights_text.contains("Architecture") || insights_text.contains("architecture"));
862        assert!(insights_text.contains("component") || insights_text.contains("Component"));
863        assert!(insights_text.contains("Error") || insights_text.contains("error"));
864    }
865
866    #[test]
867    fn test_cache_key_generation() {
868        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
869
870        let key1 = cache.cache_key("hash123", "requirements");
871        let key2 = cache.cache_key("hash123", "design");
872        let key3 = cache.cache_key("hash456", "requirements");
873
874        assert_ne!(key1, key2); // Different phases
875        assert_ne!(key1, key3); // Different hashes
876        assert_ne!(key2, key3); // Different hashes and phases
877
878        assert!(key1.contains("hash123"));
879        assert!(key1.contains("requirements"));
880    }
881
882    #[test]
883    fn test_cache_stats() {
884        let cache_dir = Utf8PathBuf::from("/tmp/test_cache");
885        let mut cache = InsightCache::new(cache_dir).unwrap();
886
887        // Initial stats
888        assert_eq!(cache.stats().hit_ratio(), 0.0);
889
890        // Simulate some cache operations
891        cache.stats.hits = 8;
892        cache.stats.misses = 2;
893        cache.stats.writes = 2;
894        cache.stats.invalidations = 1;
895
896        assert_eq!(cache.stats().hit_ratio(), 0.8);
897    }
898
899    #[test]
900    fn test_content_hash_calculation() {
901        let content1 = "test content";
902        let content2 = "test content";
903        let content3 = "different content";
904
905        let hash1 = calculate_content_hash(content1);
906        let hash2 = calculate_content_hash(content2);
907        let hash3 = calculate_content_hash(content3);
908
909        assert_eq!(hash1, hash2); // Same content = same hash
910        assert_ne!(hash1, hash3); // Different content = different hash
911        assert_eq!(hash1.len(), 64); // BLAKE3 hash length
912    }
913
914    #[test]
915    fn test_cache_clear() -> Result<()> {
916        let temp_dir = TempDir::new()?;
917        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
918        let mut cache = InsightCache::new(cache_dir)?;
919
920        // Add some cache entries
921        cache.memory_cache.insert(
922            "test_key".to_string(),
923            CachedInsight {
924                content_hash: "hash123".to_string(),
925                file_path: "test.md".to_string(),
926                priority: Priority::Medium,
927                insights: vec!["test".to_string()],
928                phase: "requirements".to_string(),
929                cached_at: Utc::now(),
930                file_size: 100,
931                last_modified: Utc::now(),
932            },
933        );
934        cache.stats.hits = 5;
935        cache.stats.misses = 2;
936
937        assert!(!cache.memory_cache.is_empty());
938        assert!(cache.stats.hits > 0);
939
940        // Clear cache
941        cache.clear()?;
942
943        assert!(cache.memory_cache.is_empty());
944        assert_eq!(cache.stats.hits, 0);
945        assert_eq!(cache.stats.misses, 0);
946
947        Ok(())
948    }
949
950    #[test]
951    fn test_insight_generation_tasks() {
952        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
953        let content = r"
954# Implementation Tasks
955
956## Milestone 1
957
958- [ ] Task 1: Implement feature A
959- [x] Task 2: Implement feature B
960- [ ] Task 3: Write tests for feature A
961- [x] Task 4: Write tests for feature B
962
963## Milestone 2
964
965- [ ] Task 5: Implement feature C
966- [ ] Task 6: Test feature C
967
968## Phase 1
969
970Implementation phase for core features.
971
972## Phase 2
973
974Testing phase for all features.
975";
976
977        let insights =
978            cache.generate_insights(content, Utf8Path::new("tasks.md"), "tasks", Priority::High);
979
980        assert!(insights.len() >= 10);
981        assert!(insights.len() <= 25);
982
983        // Should contain tasks-specific insights
984        let insights_text = insights.join(" ");
985        assert!(insights_text.contains("tasks") || insights_text.contains("Task"));
986        assert!(insights_text.contains("completed") || insights_text.contains("[x]"));
987        assert!(
988            insights_text.contains("Milestone")
989                || insights_text.contains("milestone")
990                || insights_text.contains("Phase")
991                || insights_text.contains("phase")
992        );
993    }
994
995    #[test]
996    fn test_insight_generation_review() {
997        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
998        let content = r"
999# Review Document
1000
1001## Feedback
1002
1003The implementation looks good overall, but there are some issues to address.
1004
1005## Issues
1006
10071. Issue with error handling in module A
10082. Problem with performance in module B
1009
1010## Recommendations
1011
1012- Recommend refactoring module A for better error handling
1013- Recommend optimizing module B for better performance
1014
1015## FIXUP
1016
1017The following fixups are needed:
1018- Fix error handling in module A
1019- Optimize performance in module B
1020";
1021
1022        let insights = cache.generate_insights(
1023            content,
1024            Utf8Path::new("review.md"),
1025            "review",
1026            Priority::High,
1027        );
1028
1029        assert!(insights.len() >= 10);
1030        assert!(insights.len() <= 25);
1031
1032        // Should contain review-specific insights
1033        let insights_text = insights.join(" ");
1034        assert!(
1035            insights_text.contains("FIXUP")
1036                || insights_text.contains("fixup")
1037                || insights_text.contains("feedback")
1038                || insights_text.contains("Feedback")
1039                || insights_text.contains("issue")
1040                || insights_text.contains("Issue")
1041                || insights_text.contains("recommend")
1042                || insights_text.contains("Recommend")
1043        );
1044    }
1045
1046    #[test]
1047    fn test_insight_generation_generic() {
1048        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
1049        let content = r#"
1050# Generic Document
1051
1052## Section 1
1053
1054This is some generic content with multiple paragraphs.
1055
1056This is another paragraph.
1057
1058## Section 2
1059
1060- List item 1
1061- List item 2
1062* List item 3
1063
1064[Link to something](https://example.com)
1065
1066```rust
1067fn example() {
1068    println!("Hello, world!");
1069}
1070```
1071
1072```json
1073{
1074    "key": "value"
1075}
1076```
1077"#;
1078
1079        let insights = cache.generate_insights(
1080            content,
1081            Utf8Path::new("generic.md"),
1082            "unknown",
1083            Priority::Medium,
1084        );
1085
1086        assert!(insights.len() >= 10);
1087        assert!(insights.len() <= 25);
1088
1089        // Should contain generic insights
1090        let insights_text = insights.join(" ");
1091        assert!(insights_text.contains("sections") || insights_text.contains("Section"));
1092        assert!(insights_text.contains("code blocks") || insights_text.contains("code"));
1093        assert!(insights_text.contains("list items") || insights_text.contains("List"));
1094    }
1095
1096    #[test]
1097    fn test_cache_statistics_logging() {
1098        use crate::logging::Logger;
1099
1100        let temp_dir = TempDir::new().unwrap();
1101        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf()).unwrap();
1102        let mut cache = InsightCache::new(cache_dir).unwrap();
1103
1104        // Simulate cache operations
1105        cache.stats.hits = 7;
1106        cache.stats.misses = 3;
1107        cache.stats.invalidations = 1;
1108        cache.stats.writes = 3;
1109
1110        // Create a logger (this will log to stderr in test mode)
1111        let logger = Logger::new(true); // verbose mode
1112
1113        // This should not panic and should log statistics
1114        cache.log_stats(&logger);
1115
1116        // Verify hit ratio calculation
1117        assert_eq!(cache.stats().hit_ratio(), 0.7);
1118    }
1119
1120    #[test]
1121    fn test_memory_cache_hit() -> Result<()> {
1122        let temp_dir = TempDir::new()?;
1123        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
1124        let mut cache = InsightCache::new(cache_dir)?;
1125
1126        // Create a test file
1127        let test_file = temp_dir.path().join("test.md");
1128        let content = "# Test\nMemory cache test.";
1129        fs::write(&test_file, content)?;
1130        let file_path = Utf8PathBuf::try_from(test_file)?;
1131
1132        let content_hash = calculate_content_hash(content);
1133        let insights = vec![
1134            "Insight 1".to_string(),
1135            "Insight 2".to_string(),
1136            "Insight 3".to_string(),
1137        ];
1138
1139        // Store in cache
1140        cache.store_insights(
1141            &file_path,
1142            content,
1143            &content_hash,
1144            "requirements",
1145            Priority::High,
1146            insights.clone(),
1147            None,
1148        )?;
1149
1150        // First retrieval should hit memory cache
1151        let result1 = cache.get_insights(&file_path, &content_hash, "requirements", None)?;
1152        assert!(result1.is_some());
1153        assert_eq!(result1.unwrap(), insights);
1154        assert_eq!(cache.stats().hits, 1);
1155        assert_eq!(cache.stats().misses, 0);
1156
1157        // Second retrieval should also hit memory cache
1158        let result2 = cache.get_insights(&file_path, &content_hash, "requirements", None)?;
1159        assert!(result2.is_some());
1160        assert_eq!(result2.unwrap(), insights);
1161        assert_eq!(cache.stats().hits, 2);
1162        assert_eq!(cache.stats().misses, 0);
1163
1164        Ok(())
1165    }
1166
1167    #[test]
1168    fn test_cache_key_uniqueness() {
1169        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
1170
1171        // Test that different combinations produce unique keys
1172        let keys = vec![
1173            cache.cache_key("hash1", "requirements"),
1174            cache.cache_key("hash1", "design"),
1175            cache.cache_key("hash1", "tasks"),
1176            cache.cache_key("hash1", "review"),
1177            cache.cache_key("hash2", "requirements"),
1178            cache.cache_key("hash2", "design"),
1179        ];
1180
1181        // All keys should be unique
1182        for i in 0..keys.len() {
1183            for j in (i + 1)..keys.len() {
1184                assert_ne!(
1185                    keys[i], keys[j],
1186                    "Keys at indices {i} and {j} should be different"
1187                );
1188            }
1189        }
1190
1191        // Keys should contain both hash and phase
1192        for key in &keys {
1193            assert!(key.contains('_'), "Key should contain underscore separator");
1194        }
1195    }
1196
1197    #[test]
1198    fn test_insight_count_bounds() {
1199        let cache = InsightCache::new(Utf8PathBuf::from("/tmp")).unwrap();
1200
1201        // Test with minimal content
1202        let minimal_content = "x";
1203        let insights_min = cache.generate_insights(
1204            minimal_content,
1205            Utf8Path::new("minimal.md"),
1206            "requirements",
1207            Priority::Low,
1208        );
1209        assert!(
1210            insights_min.len() >= 10,
1211            "Should have at least 10 insights, got {}",
1212            insights_min.len()
1213        );
1214        assert!(
1215            insights_min.len() <= 25,
1216            "Should have at most 25 insights, got {}",
1217            insights_min.len()
1218        );
1219
1220        // Test with rich content
1221        let rich_content = r"
1222# Rich Document
1223
1224## Section 1
1225Content here.
1226
1227## Section 2
1228More content.
1229
1230## Section 3
1231Even more content.
1232
1233- List item 1
1234- List item 2
1235- List item 3
1236
1237```rust
1238code here
1239```
1240
1241[Link](url)
1242
1243**User Story:** As a user, I want features.
1244
1245WHEN something THEN something else SHALL happen.
1246";
1247        let insights_rich = cache.generate_insights(
1248            rich_content,
1249            Utf8Path::new("rich.md"),
1250            "requirements",
1251            Priority::High,
1252        );
1253        assert!(
1254            insights_rich.len() >= 10,
1255            "Should have at least 10 insights, got {}",
1256            insights_rich.len()
1257        );
1258        assert!(
1259            insights_rich.len() <= 25,
1260            "Should have at most 25 insights, got {}",
1261            insights_rich.len()
1262        );
1263    }
1264
1265    #[test]
1266    fn test_corrupted_cache_file_handling() -> Result<()> {
1267        let temp_dir = TempDir::new()?;
1268        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
1269        let mut cache = InsightCache::new(cache_dir)?;
1270
1271        // Create a test file
1272        let test_file = temp_dir.path().join("test.md");
1273        let content = "# Test\nCorrupted cache test.";
1274        fs::write(&test_file, content)?;
1275        let file_path = Utf8PathBuf::try_from(test_file)?;
1276
1277        let content_hash = calculate_content_hash(content);
1278        let key = cache.cache_key(&content_hash, "requirements");
1279
1280        // Write a corrupted cache file
1281        let cache_file = cache.cache_file_path(&key);
1282        fs::write(&cache_file, "{ invalid json }")?;
1283
1284        // Should handle corrupted file gracefully (cache miss)
1285        let result = cache.get_insights(&file_path, &content_hash, "requirements", None)?;
1286        assert!(result.is_none());
1287        assert_eq!(cache.stats().misses, 1);
1288
1289        // Corrupted file should be removed
1290        assert!(!cache_file.exists());
1291
1292        Ok(())
1293    }
1294
1295    #[test]
1296    fn test_hash_mismatch_invalidation() -> Result<()> {
1297        let temp_dir = TempDir::new()?;
1298        let cache_dir = Utf8PathBuf::try_from(temp_dir.path().to_path_buf())?;
1299        let mut cache = InsightCache::new(cache_dir)?;
1300
1301        // Create a test file
1302        let test_file = temp_dir.path().join("test.md");
1303        let content = "# Test\nHash mismatch test.";
1304        fs::write(&test_file, content)?;
1305        let file_path = Utf8PathBuf::try_from(test_file.clone())?;
1306
1307        let content_hash1 = calculate_content_hash(content);
1308        let insights = vec!["Test insight".to_string()];
1309
1310        // Store with first hash
1311        cache.store_insights(
1312            &file_path,
1313            content,
1314            &content_hash1,
1315            "requirements",
1316            Priority::Medium,
1317            insights,
1318            None,
1319        )?;
1320
1321        // Wait a bit to ensure different modification time
1322        thread::sleep(Duration::from_millis(10));
1323
1324        // Modify the file content (this will change both hash and mtime)
1325        let new_content = "# Test\nDifferent content.";
1326        fs::write(&test_file, new_content)?;
1327
1328        // Try to retrieve with old hash (file has changed)
1329        let result = cache.get_insights(&file_path, &content_hash1, "requirements", None)?;
1330
1331        // Should be a miss due to file change, and cache should be invalidated
1332        assert!(result.is_none());
1333        assert!(cache.stats().invalidations > 0);
1334
1335        Ok(())
1336    }
1337}