pingora_cache/
predictor.rs

1// Copyright 2025 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Cacheability Predictor
16
17use crate::hashtable::{ConcurrentLruCache, LruShard};
18
19pub type CustomReasonPredicate = fn(&'static str) -> bool;
20
21/// Cacheability Predictor
22///
23/// Remembers previously uncacheable assets.
24/// Allows bypassing cache / cache lock early based on historical precedent.
25///
26/// NOTE: to simply avoid caching requests with certain characteristics,
27/// add checks in request_cache_filter to avoid enabling cache in the first place.
28/// The predictor's bypass mechanism handles cases where the request _looks_ cacheable
29/// but its previous responses suggest otherwise. The request _could_ be cacheable in the future.
30pub struct Predictor<const N_SHARDS: usize> {
31    uncacheable_keys: ConcurrentLruCache<(), N_SHARDS>,
32    skip_custom_reasons_fn: Option<CustomReasonPredicate>,
33}
34
35use crate::{key::CacheHashKey, CacheKey, NoCacheReason};
36use log::debug;
37
38/// The cache predictor trait.
39///
40/// This trait allows user defined predictor to replace [Predictor].
41pub trait CacheablePredictor {
42    /// Return true if likely cacheable, false if likely not.
43    fn cacheable_prediction(&self, key: &CacheKey) -> bool;
44
45    /// Mark cacheable to allow next request to cache.
46    /// Returns false if the key was already marked cacheable.
47    fn mark_cacheable(&self, key: &CacheKey) -> bool;
48
49    /// Mark uncacheable to actively bypass cache on the next request.
50    /// May skip marking on certain NoCacheReasons.
51    /// Returns None if we skipped marking uncacheable.
52    /// Returns Some(false) if the key was already marked uncacheable.
53    fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool>;
54}
55
56// This particular bit of `where [LruShard...; N]: Default` nonsense arises from
57// ConcurrentLruCache needing this trait bound, which in turns arises from the Rust
58// compiler not being able to guarantee that all array sizes N implement `Default`.
59// See https://github.com/rust-lang/rust/issues/61415
60impl<const N_SHARDS: usize> Predictor<N_SHARDS>
61where
62    [LruShard<()>; N_SHARDS]: Default,
63{
64    /// Create a new Predictor with `N_SHARDS * shard_capacity` total capacity for
65    /// uncacheable cache keys.
66    ///
67    /// - `shard_capacity`: defines number of keys remembered as uncacheable per LRU shard.
68    /// - `skip_custom_reasons_fn`: an optional predicate used in `mark_uncacheable`
69    ///   that can customize which `Custom` `NoCacheReason`s ought to be remembered as uncacheable.
70    ///   If the predicate returns true, then the predictor will skip remembering the current
71    ///   cache key as uncacheable (and avoid bypassing cache on the next request).
72    pub fn new(
73        shard_capacity: usize,
74        skip_custom_reasons_fn: Option<CustomReasonPredicate>,
75    ) -> Predictor<N_SHARDS> {
76        Predictor {
77            uncacheable_keys: ConcurrentLruCache::<(), N_SHARDS>::new(shard_capacity),
78            skip_custom_reasons_fn,
79        }
80    }
81}
82
83impl<const N_SHARDS: usize> CacheablePredictor for Predictor<N_SHARDS>
84where
85    [LruShard<()>; N_SHARDS]: Default,
86{
87    fn cacheable_prediction(&self, key: &CacheKey) -> bool {
88        // variance key is ignored because this check happens before cache lookup
89        let hash = key.primary_bin();
90        let key = u128::from_be_bytes(hash); // Endianness doesn't matter
91
92        // Note: LRU updated in mark_* functions only,
93        // as we assume the caller always updates the cacheability of the response later
94        !self.uncacheable_keys.read(key).contains(&key)
95    }
96
97    fn mark_cacheable(&self, key: &CacheKey) -> bool {
98        // variance key is ignored because cacheable_prediction() is called before cache lookup
99        // where the variance key is unknown
100        let hash = key.primary_bin();
101        let key = u128::from_be_bytes(hash);
102
103        let cache = self.uncacheable_keys.get(key);
104        if !cache.read().contains(&key) {
105            // not in uncacheable list, nothing to do
106            return true;
107        }
108
109        let mut cache = cache.write();
110        cache.pop(&key);
111        debug!("bypassed request became cacheable");
112        false
113    }
114
115    fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool> {
116        // only mark as uncacheable for the future on certain reasons,
117        // (e.g. InternalErrors)
118        use NoCacheReason::*;
119        match reason {
120            // CacheLockGiveUp: the writer will set OriginNotCache (if applicable)
121            // readers don't need to do it
122            NeverEnabled
123            | StorageError
124            | InternalError
125            | Deferred
126            | CacheLockGiveUp
127            | CacheLockTimeout
128            | DeclinedToUpstream
129            | UpstreamError
130            | PredictedResponseTooLarge => {
131                return None;
132            }
133            // Skip certain NoCacheReason::Custom according to user
134            Custom(reason) if self.skip_custom_reasons_fn.is_some_and(|f| f(reason)) => {
135                return None;
136            }
137            Custom(_) | OriginNotCache | ResponseTooLarge => { /* mark uncacheable for these only */
138            }
139        }
140
141        // variance key is ignored because cacheable_prediction() is called before cache lookup
142        // where the variance key is unknown
143        let hash = key.primary_bin();
144        let key = u128::from_be_bytes(hash);
145
146        let mut cache = self.uncacheable_keys.get(key).write();
147        // put() returns Some(old_value) if the key existed, else None
148        let new_key = cache.put(key, ()).is_none();
149        if new_key {
150            debug!("request marked uncacheable");
151        }
152        Some(new_key)
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    #[test]
160    fn test_mark_cacheability() {
161        let predictor = Predictor::<1>::new(10, None);
162        let key = CacheKey::new("a", "b", "c");
163        // cacheable if no history
164        assert!(predictor.cacheable_prediction(&key));
165
166        // don't remember internal / storage errors
167        predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
168        assert!(predictor.cacheable_prediction(&key));
169        predictor.mark_uncacheable(&key, NoCacheReason::StorageError);
170        assert!(predictor.cacheable_prediction(&key));
171
172        // origin explicitly said uncacheable
173        predictor.mark_uncacheable(&key, NoCacheReason::OriginNotCache);
174        assert!(!predictor.cacheable_prediction(&key));
175
176        // mark cacheable again
177        predictor.mark_cacheable(&key);
178        assert!(predictor.cacheable_prediction(&key));
179    }
180
181    #[test]
182    fn test_custom_skip_predicate() {
183        let predictor = Predictor::<1>::new(
184            10,
185            Some(|custom_reason| matches!(custom_reason, "Skipping")),
186        );
187        let key = CacheKey::new("a", "b", "c");
188        // cacheable if no history
189        assert!(predictor.cacheable_prediction(&key));
190
191        // custom predicate still uses default skip reasons
192        predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
193        assert!(predictor.cacheable_prediction(&key));
194
195        // other custom reasons can still be marked uncacheable
196        predictor.mark_uncacheable(&key, NoCacheReason::Custom("DontCacheMe"));
197        assert!(!predictor.cacheable_prediction(&key));
198
199        let key = CacheKey::new("a", "c", "d");
200        assert!(predictor.cacheable_prediction(&key));
201        // specific custom reason is skipped
202        predictor.mark_uncacheable(&key, NoCacheReason::Custom("Skipping"));
203        assert!(predictor.cacheable_prediction(&key));
204    }
205
206    #[test]
207    fn test_mark_uncacheable_lru() {
208        let predictor = Predictor::<1>::new(3, None);
209        let key1 = CacheKey::new("a", "b", "c");
210        predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
211        assert!(!predictor.cacheable_prediction(&key1));
212
213        let key2 = CacheKey::new("a", "bc", "c");
214        predictor.mark_uncacheable(&key2, NoCacheReason::OriginNotCache);
215        assert!(!predictor.cacheable_prediction(&key2));
216
217        let key3 = CacheKey::new("a", "cd", "c");
218        predictor.mark_uncacheable(&key3, NoCacheReason::OriginNotCache);
219        assert!(!predictor.cacheable_prediction(&key3));
220
221        // promote / reinsert key1
222        predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
223
224        let key4 = CacheKey::new("a", "de", "c");
225        predictor.mark_uncacheable(&key4, NoCacheReason::OriginNotCache);
226        assert!(!predictor.cacheable_prediction(&key4));
227
228        // key 1 was recently used
229        assert!(!predictor.cacheable_prediction(&key1));
230        // key 2 was evicted
231        assert!(predictor.cacheable_prediction(&key2));
232        assert!(!predictor.cacheable_prediction(&key3));
233        assert!(!predictor.cacheable_prediction(&key4));
234    }
235}