Skip to main content

lru_tokens/
lib.rs

1//! # lru-tokens
2//!
3//! LRU cache where eviction is **weighted by token count** (or any other
4//! size unit you supply), not entry count.
5//!
6//! Prompt caches are usually bounded by tokens, not by entries — a few
7//! 100k-token system prompts can dominate the budget that would
8//! otherwise hold thousands of small entries. This crate inverts the
9//! usual `LruCache<K, V>` policy: each entry carries a `weight` (you
10//! say what it means — tokens, bytes, dollars), and the cache evicts
11//! the least-recently-used entries until the cumulative weight fits.
12//!
13//! ## Example
14//!
15//! ```
16//! use lru_tokens::LruTokens;
17//!
18//! let mut cache: LruTokens<&str, String> = LruTokens::new(1_000);
19//!
20//! cache.put("system-prompt-a", "...".into(), 800);
21//! cache.put("system-prompt-b", "...".into(), 300); // total 1100 > 1000
22//! // Inserting `b` evicted `a` (the LRU) to bring total under 1000.
23//! assert!(cache.get(&"system-prompt-a").is_none());
24//! assert!(cache.get(&"system-prompt-b").is_some());
25//! assert_eq!(cache.weight(), 300);
26//! ```
27
28#![deny(missing_docs)]
29
30use std::collections::HashMap;
31use std::hash::Hash;
32
33/// A single cache entry's bookkeeping.
34struct Entry<V> {
35    value: V,
36    weight: u64,
37    /// Monotonic timestamp of last access; bigger = more recent.
38    last_access: u64,
39}
40
41/// LRU cache bounded by cumulative weight.
42pub struct LruTokens<K, V> {
43    capacity: u64,
44    weight: u64,
45    tick: u64,
46    map: HashMap<K, Entry<V>>,
47}
48
49impl<K, V> LruTokens<K, V>
50where
51    K: Eq + Hash + Clone,
52{
53    /// Build a cache that holds entries totalling at most `capacity`
54    /// units of weight.
55    pub fn new(capacity: u64) -> Self {
56        Self {
57            capacity,
58            weight: 0,
59            tick: 0,
60            map: HashMap::new(),
61        }
62    }
63
64    /// Capacity in weight units.
65    pub fn capacity(&self) -> u64 {
66        self.capacity
67    }
68
69    /// Current cumulative weight.
70    pub fn weight(&self) -> u64 {
71        self.weight
72    }
73
74    /// Number of cached entries.
75    pub fn len(&self) -> usize {
76        self.map.len()
77    }
78
79    /// True when no entries are cached.
80    pub fn is_empty(&self) -> bool {
81        self.map.is_empty()
82    }
83
84    /// Insert or update an entry. If the new weight pushes the total
85    /// over capacity, the least-recently-used entries are dropped
86    /// until the total fits (or only the new entry remains).
87    ///
88    /// If `weight` itself exceeds `capacity`, the cache will end up
89    /// holding just this one entry with `self.weight() == weight`.
90    pub fn put(&mut self, key: K, value: V, weight: u64) {
91        // Replace existing entry (free up its old weight first).
92        if let Some(old) = self.map.remove(&key) {
93            self.weight -= old.weight;
94        }
95        self.tick += 1;
96        self.map.insert(
97            key.clone(),
98            Entry {
99                value,
100                weight,
101                last_access: self.tick,
102            },
103        );
104        self.weight += weight;
105        self.evict_until_fits();
106    }
107
108    /// Look up an entry; bumps recency on hit.
109    pub fn get(&mut self, key: &K) -> Option<&V> {
110        self.tick += 1;
111        let tick = self.tick;
112        let entry = self.map.get_mut(key)?;
113        entry.last_access = tick;
114        Some(&entry.value)
115    }
116
117    /// Look up without bumping recency. For peek-only inspection.
118    pub fn peek(&self, key: &K) -> Option<&V> {
119        self.map.get(key).map(|e| &e.value)
120    }
121
122    /// Remove an entry. Returns its value if present.
123    pub fn remove(&mut self, key: &K) -> Option<V> {
124        let entry = self.map.remove(key)?;
125        self.weight -= entry.weight;
126        Some(entry.value)
127    }
128
129    /// Drop everything.
130    pub fn clear(&mut self) {
131        self.map.clear();
132        self.weight = 0;
133    }
134
135    fn evict_until_fits(&mut self) {
136        while self.weight > self.capacity && self.map.len() > 1 {
137            // Find the entry with the smallest last_access (LRU).
138            let lru_key = self
139                .map
140                .iter()
141                .min_by_key(|(_, e)| e.last_access)
142                .map(|(k, _)| k.clone());
143            if let Some(k) = lru_key {
144                if let Some(e) = self.map.remove(&k) {
145                    self.weight -= e.weight;
146                }
147            } else {
148                break;
149            }
150        }
151    }
152}