vyre-libs 0.6.3

vyre Category A library ecosystem - pure-IR compositions over vyre-ops hardware primitives
Documentation
use std::path::{Path, PathBuf};
use std::sync::Arc;

use rustc_hash::FxHashMap as HashMap;

use crate::parsing::c::preprocess::gpu_pipeline::lru_index::LruIndex;
use crate::parsing::c::preprocess::gpu_pipeline::IncludeByteCacheStats;
use crate::parsing::c::preprocess::gpu_pipeline::IncludeLoader;

type IncludeFileCacheKey = (PathBuf, Vec<u8>, bool, bool);
const DEFAULT_MAX_INCLUDE_CACHE_ENTRIES: usize = 4096;
const DEFAULT_MAX_INCLUDE_CACHE_BYTES: usize = 64 * 1024 * 1024;

pub(super) enum IncludeFileResidency {
    Filesystem,
    RunCache,
}

pub(super) struct IncludeFile {
    pub(super) canonical_path: PathBuf,
    pub(super) bytes: Arc<[u8]>,
    pub(super) residency: IncludeFileResidency,
}

struct IncludeFileCacheEntry {
    canonical_path: PathBuf,
    bytes: Arc<[u8]>,
    retained_bytes: usize,
    last_used: u64,
}

pub(super) struct IncludeFileCache {
    entries: HashMap<IncludeFileCacheKey, IncludeFileCacheEntry>,
    lru: LruIndex<IncludeFileCacheKey>,
    clock: u64,
    retained_bytes: usize,
    max_entries: usize,
    max_bytes: usize,
    hits: u64,
    misses: u64,
    evictions: u64,
    loaded_bytes: u64,
    reused_bytes: u64,
}

impl Default for IncludeFileCache {
    fn default() -> Self {
        Self::with_limits(
            DEFAULT_MAX_INCLUDE_CACHE_ENTRIES,
            DEFAULT_MAX_INCLUDE_CACHE_BYTES,
        )
    }
}

impl IncludeFileCache {
    fn with_limits(max_entries: usize, max_bytes: usize) -> Self {
        Self {
            entries: HashMap::default(),
            lru: LruIndex::with_capacity(max_entries),
            clock: 0,
            retained_bytes: 0,
            max_entries,
            max_bytes,
            hits: 0,
            misses: 0,
            evictions: 0,
            loaded_bytes: 0,
            reused_bytes: 0,
        }
    }

    pub(super) fn resolve(
        &mut self,
        loader: &dyn IncludeLoader,
        from: &Path,
        path: &[u8],
        is_system: bool,
        is_next: bool,
    ) -> Result<Option<IncludeFile>, String> {
        let key = (
            cache_scope_path(from, is_system, is_next),
            path.to_vec(),
            is_system,
            is_next,
        );
        self.clock = self.clock.saturating_add(1);
        if let Some(entry) = self.entries.get_mut(&key) {
            self.hits = self.hits.saturating_add(1);
            self.reused_bytes = self
                .reused_bytes
                .saturating_add(u64::try_from(entry.bytes.len()).unwrap_or(u64::MAX));
            entry.last_used = self.clock;
            let canonical_path = entry.canonical_path.clone();
            let bytes = Arc::clone(&entry.bytes);
            self.record_lru(key, self.clock);
            return Ok(Some(IncludeFile {
                canonical_path,
                bytes,
                residency: IncludeFileResidency::RunCache,
            }));
        }
        self.misses = self.misses.saturating_add(1);
        let Some((canonical_path, bytes)) = loader.load(path, is_system, is_next, from)? else {
            return Ok(None);
        };
        self.loaded_bytes = self
            .loaded_bytes
            .saturating_add(u64::try_from(bytes.len()).unwrap_or(u64::MAX));
        self.insert(key, canonical_path.clone(), Arc::clone(&bytes));
        Ok(Some(IncludeFile {
            canonical_path,
            bytes,
            residency: IncludeFileResidency::Filesystem,
        }))
    }

    pub(super) fn stats(&self) -> IncludeByteCacheStats {
        IncludeByteCacheStats {
            hits: self.hits,
            misses: self.misses,
            entries: self.entries.len(),
            evictions: self.evictions,
            retained_bytes: u64::try_from(self.retained_bytes).unwrap_or(u64::MAX),
            loaded_bytes: self.loaded_bytes,
            reused_bytes: self.reused_bytes,
        }
    }

    fn insert(&mut self, key: IncludeFileCacheKey, canonical_path: PathBuf, bytes: Arc<[u8]>) {
        let retained_bytes = retained_entry_bytes(&key, &canonical_path, bytes.len());
        if self.max_entries == 0 || retained_bytes > self.max_bytes {
            return;
        }
        let last_used = self.clock;
        self.retained_bytes = self.retained_bytes.saturating_add(retained_bytes);
        self.entries.insert(
            key.clone(),
            IncludeFileCacheEntry {
                canonical_path,
                bytes,
                retained_bytes,
                last_used,
            },
        );
        self.record_lru(key, last_used);
        self.evict_to_limits();
    }

    fn evict_to_limits(&mut self) {
        while self.entries.len() > self.max_entries || self.retained_bytes > self.max_bytes {
            let Some(evict_key) = self.lru.pop_valid(|key, last_used| {
                self.entries
                    .get(key)
                    .is_some_and(|entry| entry.last_used == last_used)
            }) else {
                break;
            };
            if let Some(entry) = self.entries.remove(&evict_key) {
                self.retained_bytes = self.retained_bytes.saturating_sub(entry.retained_bytes);
                self.evictions = self.evictions.saturating_add(1);
            }
        }
    }

    fn record_lru(&mut self, key: IncludeFileCacheKey, last_used: u64) {
        self.lru.record(key, last_used);
        self.compact_lru_if_needed();
    }

    fn compact_lru_if_needed(&mut self) {
        self.lru.compact_if_needed(
            self.entries.len(),
            self.entries
                .iter()
                .map(|(key, entry)| (key.clone(), entry.last_used)),
        );
    }

    #[cfg(test)]
    fn lru_len_for_tests(&self) -> usize {
        self.lru.len()
    }
}

fn cache_scope_path(from: &Path, is_system: bool, is_next: bool) -> PathBuf {
    if is_system && !is_next {
        PathBuf::new()
    } else {
        from.to_path_buf()
    }
}

fn retained_entry_bytes(
    key: &IncludeFileCacheKey,
    canonical_path: &Path,
    bytes_len: usize,
) -> usize {
    key.0
        .to_string_lossy()
        .len()
        .saturating_add(key.1.len())
        .saturating_add(canonical_path.to_string_lossy().len())
        .saturating_add(bytes_len)
}

#[cfg(test)]
mod tests {
    use std::cell::Cell;
    use std::collections::HashMap;
    use std::path::{Path, PathBuf};
    use std::sync::Arc;

    use super::IncludeFileCache;
    use crate::parsing::c::preprocess::gpu_pipeline::IncludeLoader;

    struct TestLoader {
        files: HashMap<Vec<u8>, Vec<u8>>,
        loads: Cell<usize>,
    }

    impl TestLoader {
        fn new(files: &[(&[u8], &[u8])]) -> Self {
            Self {
                files: files
                    .iter()
                    .map(|(path, bytes)| (path.to_vec(), bytes.to_vec()))
                    .collect(),
                loads: Cell::new(0),
            }
        }
    }

    impl IncludeLoader for TestLoader {
        fn load(
            &self,
            path: &[u8],
            _is_system: bool,
            _is_next: bool,
            _from: &Path,
        ) -> Result<Option<(PathBuf, Arc<[u8]>)>, String> {
            self.loads.set(self.loads.get() + 1);
            Ok(self.files.get(path).map(|bytes| {
                (
                    PathBuf::from(String::from_utf8_lossy(path).into_owned()),
                    bytes.clone().into(),
                )
            }))
        }
    }

    #[test]
    fn include_file_cache_evicts_to_entry_budget() {
        let loader = TestLoader::new(&[(b"a.h", b"int a;"), (b"b.h", b"int b;")]);
        let mut cache = IncludeFileCache::with_limits(1, usize::MAX);

        assert!(cache
            .resolve(&loader, Path::new("<tu>"), b"a.h", false, false)
            .expect("Fix: resolve a")
            .is_some());
        assert!(cache
            .resolve(&loader, Path::new("<tu>"), b"b.h", false, false)
            .expect("Fix: resolve b")
            .is_some());

        let stats = cache.stats();
        assert_eq!(stats.entries, 1);
        assert_eq!(stats.evictions, 1);
        assert!(stats.retained_bytes > 0);
    }

    #[test]
    fn include_file_cache_does_not_retain_oversized_entries() {
        let loader = TestLoader::new(&[(b"huge.h", b"0123456789")]);
        let mut cache = IncludeFileCache::with_limits(8, 4);

        assert!(cache
            .resolve(&loader, Path::new("<tu>"), b"huge.h", false, false)
            .expect("Fix: resolve huge")
            .is_some());

        let stats = cache.stats();
        assert_eq!(stats.entries, 0);
        assert_eq!(stats.retained_bytes, 0);
        assert_eq!(stats.loaded_bytes, 10);
    }

    #[test]
    fn include_file_cache_lru_heap_compacts_hot_hits() {
        let loader = TestLoader::new(&[(b"hot.h", b"int hot;")]);
        let mut cache = IncludeFileCache::with_limits(8, usize::MAX);

        assert!(cache
            .resolve(&loader, Path::new("<tu>"), b"hot.h", false, false)
            .expect("Fix: resolve hot")
            .is_some());
        for _ in 0..160 {
            assert!(cache
                .resolve(&loader, Path::new("<tu>"), b"hot.h", false, false)
                .expect("Fix: resolve cached hot")
                .is_some());
        }

        assert_eq!(cache.stats().entries, 1);
        assert_eq!(cache.stats().hits, 160);
        assert_eq!(loader.loads.get(), 1);
        assert!(
            cache.lru_len_for_tests() <= cache.stats().entries.saturating_mul(4).saturating_add(32),
            "Fix: include-file cache LRU metadata must compact stale hot-hit records instead of growing with every include."
        );
    }
}