pdf_oxide 0.3.33

The fastest Rust PDF library with text extraction: 0.8ms mean, 100% pass rate on 3,830 PDFs. 5× faster than pdf_extract, 17× faster than oxidize_pdf. Extract, create, and edit PDFs.
Documentation
//! General-purpose caching utilities.
//!
//! This module contains cache types and helper traits that are used across
//! multiple modules (e.g., `document`, `fonts::cmap`).

use std::collections::HashMap;
use std::sync::Mutex;

/// Extension trait for Mutex to recover from poisoned locks.
///
/// When a thread panics while holding a Mutex, the lock becomes "poisoned"
/// and the standard `.lock().unwrap()` would cascade panics. This trait
/// provides `.lock_or_recover()` which discards the poison flag and returns
/// the inner data, since the Mutexes in PdfDocument protect caches and
/// bookkeeping (not safety-critical invariants).
pub(crate) trait MutexExt<T> {
    /// Lock the mutex, recovering from poison if needed.
    fn lock_or_recover(&self) -> std::sync::MutexGuard<'_, T>;
}

impl<T> MutexExt<T> for Mutex<T> {
    fn lock_or_recover(&self) -> std::sync::MutexGuard<'_, T> {
        self.lock().unwrap_or_else(|poisoned| {
            log::debug!("Mutex was poisoned, recovering");
            poisoned.into_inner()
        })
    }
}

/// Entry-count-bounded cache with LRU-style eviction.
///
/// Used for caches where estimating per-entry byte size is impractical
/// (e.g., `Vec<TextSpan>`, `Vec<PdfImage>`). On insert, when at capacity,
/// the least-recently-used entry is evicted. `get()` promotes the accessed
/// key to the most-recently-used position so hot entries survive eviction.
pub(crate) struct BoundedEntryCache<K: Eq + std::hash::Hash + Copy, V> {
    map: HashMap<K, V>,
    insertion_order: std::collections::VecDeque<K>,
    max_entries: usize,
}

impl<K: Eq + std::hash::Hash + Copy, V> BoundedEntryCache<K, V> {
    pub(crate) fn new(max_entries: usize) -> Self {
        Self {
            map: HashMap::new(),
            insertion_order: std::collections::VecDeque::new(),
            max_entries,
        }
    }

    pub(crate) fn get(&mut self, key: &K) -> Option<&V> {
        if self.map.contains_key(key) {
            if let Some(pos) = self.insertion_order.iter().position(|k| k == key) {
                self.insertion_order.remove(pos);
            }
            self.insertion_order.push_back(*key);
        }
        self.map.get(key)
    }

    pub(crate) fn insert(&mut self, key: K, value: V) {
        use std::collections::hash_map::Entry;
        // On re-insert: replace value and promote to most-recently-used position
        // so LRU eviction order stays accurate.
        if let Entry::Occupied(mut e) = self.map.entry(key) {
            e.insert(value);
            if let Some(pos) = self.insertion_order.iter().position(|k| k == &key) {
                self.insertion_order.remove(pos);
            }
            self.insertion_order.push_back(key);
            return;
        }
        // Evict oldest entries if at capacity
        while self.map.len() >= self.max_entries && !self.insertion_order.is_empty() {
            if let Some(old_key) = self.insertion_order.pop_front() {
                self.map.remove(&old_key);
            }
        }
        self.map.insert(key, value);
        self.insertion_order.push_back(key);
    }

    pub(crate) fn clear(&mut self) {
        self.map.clear();
        self.insertion_order.clear();
    }

    pub(crate) fn len(&self) -> usize {
        self.map.len()
    }
}