raxit-core 0.1.2

Core security scanning engine for AI agent applications
Documentation
//! File cache for incremental scanning

use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use xxhash_rust::xxh3::xxh3_64;

use crate::error::Result;

/// File cache entry
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheEntry {
    /// File hash (xxh3)
    pub hash: u64,

    /// Last modified timestamp
    pub modified_at: String,

    /// File size in bytes
    pub size: u64,
}

/// File cache for incremental scanning
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct FileCache {
    /// Version of the cache format
    pub version: String,

    /// Map of file path -> cache entry
    pub files: HashMap<String, CacheEntry>,
}

impl FileCache {
    /// Create a new empty cache
    pub fn new() -> Self {
        Self {
            version: "1.0.0".to_string(),
            files: HashMap::new(),
        }
    }

    /// Load cache from file
    pub fn load(path: &Path) -> Result<Self> {
        if !path.exists() {
            tracing::debug!("Cache file not found, creating new cache");
            return Ok(Self::new());
        }

        let content = fs::read_to_string(path)?;
        let cache: FileCache = serde_json::from_str(&content)?;

        tracing::debug!("Loaded cache with {} entries", cache.files.len());
        Ok(cache)
    }

    /// Save cache to file
    pub fn save(&self, path: &Path) -> Result<()> {
        // Create parent directory if needed
        if let Some(parent) = path.parent() {
            fs::create_dir_all(parent)?;
        }

        let content = serde_json::to_string_pretty(self)?;
        fs::write(path, content)?;

        tracing::debug!(
            "Saved cache with {} entries to {}",
            self.files.len(),
            path.display()
        );
        Ok(())
    }

    /// Check if a file has changed since last scan
    pub fn has_changed(&self, path: &Path) -> Result<bool> {
        let path_str = path.to_string_lossy().to_string();

        // If file not in cache, it's new (changed)
        let Some(cached) = self.files.get(&path_str) else {
            return Ok(true);
        };

        // Compute current hash
        let current_hash = Self::hash_file(path)?;

        // Compare hashes
        Ok(cached.hash != current_hash)
    }

    /// Update cache entry for a file
    pub fn update(&mut self, path: &Path) -> Result<()> {
        let path_str = path.to_string_lossy().to_string();

        let hash = Self::hash_file(path)?;
        let metadata = fs::metadata(path)?;

        let entry = CacheEntry {
            hash,
            modified_at: chrono::Utc::now().to_rfc3339(),
            size: metadata.len(),
        };

        self.files.insert(path_str, entry);
        Ok(())
    }

    /// Compute xxh3 hash of a file
    fn hash_file(path: &Path) -> Result<u64> {
        let content = fs::read(path)?;
        Ok(xxh3_64(&content))
    }

    /// Remove entries for files that no longer exist
    pub fn prune(&mut self, existing_files: &[PathBuf]) {
        let existing: std::collections::HashSet<String> = existing_files
            .iter()
            .map(|p| p.to_string_lossy().to_string())
            .collect();

        self.files.retain(|path, _| existing.contains(path));
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;

    #[test]
    fn test_cache_creation() {
        let cache = FileCache::new();
        assert_eq!(cache.version, "1.0.0");
        assert_eq!(cache.files.len(), 0);
    }

    #[test]
    fn test_hash_file() -> Result<()> {
        let mut file = NamedTempFile::new()?;
        file.write_all(b"test content")?;

        let hash1 = FileCache::hash_file(file.path())?;
        let hash2 = FileCache::hash_file(file.path())?;

        // Same content should produce same hash
        assert_eq!(hash1, hash2);

        Ok(())
    }

    #[test]
    fn test_cache_save_load() -> Result<()> {
        let temp_dir = tempfile::tempdir()?;
        let cache_path = temp_dir.path().join("cache.json");

        let mut cache = FileCache::new();
        cache.files.insert(
            "test.py".to_string(),
            CacheEntry {
                hash: 12345,
                modified_at: chrono::Utc::now().to_rfc3339(),
                size: 100,
            },
        );

        cache.save(&cache_path)?;

        let loaded = FileCache::load(&cache_path)?;
        assert_eq!(loaded.files.len(), 1);
        assert_eq!(loaded.files.get("test.py").unwrap().hash, 12345);

        Ok(())
    }

    #[test]
    fn test_has_changed() -> Result<()> {
        let mut file = NamedTempFile::new()?;
        file.write_all(b"original content")?;

        let mut cache = FileCache::new();
        cache.update(file.path())?;

        // File should not be marked as changed
        assert!(!cache.has_changed(file.path())?);

        // Modify file
        file.write_all(b" modified")?;

        // File should now be marked as changed
        assert!(cache.has_changed(file.path())?);

        Ok(())
    }
}