pingora_cache/
predictor.rs

1// Copyright 2025 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Cacheability Predictor
16
17use crate::hashtable::{ConcurrentLruCache, LruShard};
18
19pub type CustomReasonPredicate = fn(&'static str) -> bool;
20
21/// Cacheability Predictor
22///
23/// Remembers previously uncacheable assets.
24/// Allows bypassing cache / cache lock early based on historical precedent.
25///
26/// NOTE: to simply avoid caching requests with certain characteristics,
27/// add checks in request_cache_filter to avoid enabling cache in the first place.
28/// The predictor's bypass mechanism handles cases where the request _looks_ cacheable
29/// but its previous responses suggest otherwise. The request _could_ be cacheable in the future.
30pub struct Predictor<const N_SHARDS: usize> {
31    uncacheable_keys: ConcurrentLruCache<(), N_SHARDS>,
32    skip_custom_reasons_fn: Option<CustomReasonPredicate>,
33}
34
35use crate::{key::CacheHashKey, CacheKey, NoCacheReason};
36use log::debug;
37
38/// The cache predictor trait.
39///
40/// This trait allows user defined predictor to replace [Predictor].
41pub trait CacheablePredictor {
42    /// Return true if likely cacheable, false if likely not.
43    fn cacheable_prediction(&self, key: &CacheKey) -> bool;
44
45    /// Mark cacheable to allow next request to cache.
46    /// Returns false if the key was already marked cacheable.
47    fn mark_cacheable(&self, key: &CacheKey) -> bool;
48
49    /// Mark uncacheable to actively bypass cache on the next request.
50    /// May skip marking on certain NoCacheReasons.
51    /// Returns None if we skipped marking uncacheable.
52    /// Returns Some(false) if the key was already marked uncacheable.
53    fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool>;
54}
55
56// This particular bit of `where [LruShard...; N]: Default` nonsense arises from
57// ConcurrentLruCache needing this trait bound, which in turns arises from the Rust
58// compiler not being able to guarantee that all array sizes N implement `Default`.
59// See https://github.com/rust-lang/rust/issues/61415
60impl<const N_SHARDS: usize> Predictor<N_SHARDS>
61where
62    [LruShard<()>; N_SHARDS]: Default,
63{
64    /// Create a new Predictor with `N_SHARDS * shard_capacity` total capacity for
65    /// uncacheable cache keys.
66    ///
67    /// - `shard_capacity`: defines number of keys remembered as uncacheable per LRU shard.
68    /// - `skip_custom_reasons_fn`: an optional predicate used in `mark_uncacheable`
69    ///   that can customize which `Custom` `NoCacheReason`s ought to be remembered as uncacheable.
70    ///   If the predicate returns true, then the predictor will skip remembering the current
71    ///   cache key as uncacheable (and avoid bypassing cache on the next request).
72    pub fn new(
73        shard_capacity: usize,
74        skip_custom_reasons_fn: Option<CustomReasonPredicate>,
75    ) -> Predictor<N_SHARDS> {
76        Predictor {
77            uncacheable_keys: ConcurrentLruCache::<(), N_SHARDS>::new(shard_capacity),
78            skip_custom_reasons_fn,
79        }
80    }
81}
82
83impl<const N_SHARDS: usize> CacheablePredictor for Predictor<N_SHARDS>
84where
85    [LruShard<()>; N_SHARDS]: Default,
86{
87    fn cacheable_prediction(&self, key: &CacheKey) -> bool {
88        // variance key is ignored because this check happens before cache lookup
89        let hash = key.primary_bin();
90        let key = u128::from_be_bytes(hash); // Endianness doesn't matter
91
92        // Note: LRU updated in mark_* functions only,
93        // as we assume the caller always updates the cacheability of the response later
94        !self.uncacheable_keys.read(key).contains(&key)
95    }
96
97    fn mark_cacheable(&self, key: &CacheKey) -> bool {
98        // variance key is ignored because cacheable_prediction() is called before cache lookup
99        // where the variance key is unknown
100        let hash = key.primary_bin();
101        let key = u128::from_be_bytes(hash);
102
103        let cache = self.uncacheable_keys.get(key);
104        if !cache.read().contains(&key) {
105            // not in uncacheable list, nothing to do
106            return true;
107        }
108
109        let mut cache = cache.write();
110        cache.pop(&key);
111        debug!("bypassed request became cacheable");
112        false
113    }
114
115    fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool> {
116        // only mark as uncacheable for the future on certain reasons,
117        // (e.g. InternalErrors)
118        use NoCacheReason::*;
119        match reason {
120            // CacheLockGiveUp: the writer will set OriginNotCache (if applicable)
121            // readers don't need to do it
122            NeverEnabled | StorageError | InternalError | Deferred | CacheLockGiveUp
123            | CacheLockTimeout | DeclinedToUpstream | UpstreamError => {
124                return None;
125            }
126            // Skip certain NoCacheReason::Custom according to user
127            Custom(reason) if self.skip_custom_reasons_fn.is_some_and(|f| f(reason)) => {
128                return None;
129            }
130            Custom(_) | OriginNotCache | ResponseTooLarge => { /* mark uncacheable for these only */
131            }
132        }
133
134        // variance key is ignored because cacheable_prediction() is called before cache lookup
135        // where the variance key is unknown
136        let hash = key.primary_bin();
137        let key = u128::from_be_bytes(hash);
138
139        let mut cache = self.uncacheable_keys.get(key).write();
140        // put() returns Some(old_value) if the key existed, else None
141        let new_key = cache.put(key, ()).is_none();
142        if new_key {
143            debug!("request marked uncacheable");
144        }
145        Some(new_key)
146    }
147}
148
149#[cfg(test)]
150mod tests {
151    use super::*;
152    #[test]
153    fn test_mark_cacheability() {
154        let predictor = Predictor::<1>::new(10, None);
155        let key = CacheKey::new("a", "b", "c");
156        // cacheable if no history
157        assert!(predictor.cacheable_prediction(&key));
158
159        // don't remember internal / storage errors
160        predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
161        assert!(predictor.cacheable_prediction(&key));
162        predictor.mark_uncacheable(&key, NoCacheReason::StorageError);
163        assert!(predictor.cacheable_prediction(&key));
164
165        // origin explicitly said uncacheable
166        predictor.mark_uncacheable(&key, NoCacheReason::OriginNotCache);
167        assert!(!predictor.cacheable_prediction(&key));
168
169        // mark cacheable again
170        predictor.mark_cacheable(&key);
171        assert!(predictor.cacheable_prediction(&key));
172    }
173
174    #[test]
175    fn test_custom_skip_predicate() {
176        let predictor = Predictor::<1>::new(
177            10,
178            Some(|custom_reason| matches!(custom_reason, "Skipping")),
179        );
180        let key = CacheKey::new("a", "b", "c");
181        // cacheable if no history
182        assert!(predictor.cacheable_prediction(&key));
183
184        // custom predicate still uses default skip reasons
185        predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
186        assert!(predictor.cacheable_prediction(&key));
187
188        // other custom reasons can still be marked uncacheable
189        predictor.mark_uncacheable(&key, NoCacheReason::Custom("DontCacheMe"));
190        assert!(!predictor.cacheable_prediction(&key));
191
192        let key = CacheKey::new("a", "c", "d");
193        assert!(predictor.cacheable_prediction(&key));
194        // specific custom reason is skipped
195        predictor.mark_uncacheable(&key, NoCacheReason::Custom("Skipping"));
196        assert!(predictor.cacheable_prediction(&key));
197    }
198
199    #[test]
200    fn test_mark_uncacheable_lru() {
201        let predictor = Predictor::<1>::new(3, None);
202        let key1 = CacheKey::new("a", "b", "c");
203        predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
204        assert!(!predictor.cacheable_prediction(&key1));
205
206        let key2 = CacheKey::new("a", "bc", "c");
207        predictor.mark_uncacheable(&key2, NoCacheReason::OriginNotCache);
208        assert!(!predictor.cacheable_prediction(&key2));
209
210        let key3 = CacheKey::new("a", "cd", "c");
211        predictor.mark_uncacheable(&key3, NoCacheReason::OriginNotCache);
212        assert!(!predictor.cacheable_prediction(&key3));
213
214        // promote / reinsert key1
215        predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
216
217        let key4 = CacheKey::new("a", "de", "c");
218        predictor.mark_uncacheable(&key4, NoCacheReason::OriginNotCache);
219        assert!(!predictor.cacheable_prediction(&key4));
220
221        // key 1 was recently used
222        assert!(!predictor.cacheable_prediction(&key1));
223        // key 2 was evicted
224        assert!(predictor.cacheable_prediction(&key2));
225        assert!(!predictor.cacheable_prediction(&key3));
226        assert!(!predictor.cacheable_prediction(&key4));
227    }
228}