pingora_cache/
key.rs

1// Copyright 2025 Cloudflare, Inc.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Cache key
16
17use super::*;
18
19use blake2::{Blake2b, Digest};
20use http::Extensions;
21use serde::{Deserialize, Serialize};
22use std::fmt::{Display, Formatter, Result as FmtResult};
23
24// 16-byte / 128-bit key: large enough to avoid collision
25const KEY_SIZE: usize = 16;
26
27/// An 128 bit hash binary
28pub type HashBinary = [u8; KEY_SIZE];
29
30fn hex2str(hex: &[u8]) -> String {
31    use std::fmt::Write;
32    let mut s = String::with_capacity(KEY_SIZE * 2);
33    for c in hex {
34        write!(s, "{:02x}", c).unwrap(); // safe, just dump hex to string
35    }
36    s
37}
38
39/// Decode the hex str into [HashBinary].
40///
41/// Return `None` when the decode fails or the input is not exact 32 (to decode to 16 bytes).
42pub fn str2hex(s: &str) -> Option<HashBinary> {
43    if s.len() != KEY_SIZE * 2 {
44        return None;
45    }
46    let mut output = [0; KEY_SIZE];
47    // no need to bubble the error, it should be obvious why the decode fails
48    hex::decode_to_slice(s.as_bytes(), &mut output).ok()?;
49    Some(output)
50}
51
52/// The trait for cache key
53pub trait CacheHashKey {
54    /// Return the hash of the cache key
55    fn primary_bin(&self) -> HashBinary;
56
57    /// Return the variance hash of the cache key.
58    ///
59    /// `None` if no variance.
60    fn variance_bin(&self) -> Option<HashBinary>;
61
62    /// Return the hash including both primary and variance keys
63    fn combined_bin(&self) -> HashBinary {
64        let key = self.primary_bin();
65        if let Some(v) = self.variance_bin() {
66            let mut hasher = Blake2b128::new();
67            hasher.update(key);
68            hasher.update(v);
69            hasher.finalize().into()
70        } else {
71            // if there is no variance, combined_bin should return the same as primary_bin
72            key
73        }
74    }
75
76    /// An extra tag for identifying users
77    ///
78    /// For example, if the storage backend implements per user quota, this tag can be used.
79    fn user_tag(&self) -> &str;
80
81    /// The hex string of [Self::primary_bin()]
82    fn primary(&self) -> String {
83        hex2str(&self.primary_bin())
84    }
85
86    /// The hex string of [Self::variance_bin()]
87    fn variance(&self) -> Option<String> {
88        self.variance_bin().as_ref().map(|b| hex2str(&b[..]))
89    }
90
91    /// The hex string of [Self::combined_bin()]
92    fn combined(&self) -> String {
93        hex2str(&self.combined_bin())
94    }
95}
96
97/// General purpose cache key
98#[derive(Debug, Clone)]
99pub struct CacheKey {
100    // Namespace and primary fields are essentially strings,
101    // except they allow invalid UTF-8 sequences.
102    // These fields should be able to be hashed.
103    namespace: Vec<u8>,
104    primary: Vec<u8>,
105    primary_bin_override: Option<HashBinary>,
106    variance: Option<HashBinary>,
107    /// An extra tag for identifying users
108    ///
109    /// For example, if the storage backend implements per user quota, this tag can be used.
110    pub user_tag: String,
111
112    /// Grab-bag for user-defined extensions. These will not be persisted to disk.
113    pub extensions: Extensions,
114}
115
116impl CacheKey {
117    /// Set the value of the variance hash
118    pub fn set_variance_key(&mut self, key: HashBinary) {
119        self.variance = Some(key)
120    }
121
122    /// Get the value of the variance hash
123    pub fn get_variance_key(&self) -> Option<&HashBinary> {
124        self.variance.as_ref()
125    }
126
127    /// Removes the variance from this cache key
128    pub fn remove_variance_key(&mut self) {
129        self.variance = None
130    }
131
132    /// Override the primary key hash
133    pub fn set_primary_bin_override(&mut self, key: HashBinary) {
134        self.primary_bin_override = Some(key)
135    }
136
137    /// Try to get primary key as UTF-8 str, if valid
138    pub fn primary_key_str(&self) -> Option<&str> {
139        std::str::from_utf8(&self.primary).ok()
140    }
141
142    /// Try to get namespace key as UTF-8 str, if valid
143    pub fn namespace_str(&self) -> Option<&str> {
144        std::str::from_utf8(&self.namespace).ok()
145    }
146}
147
148/// Storage optimized cache key to keep in memory or in storage
149// 16 bytes + 8 bytes (+16 * u8) + user_tag.len() + 16 Bytes (Box<str>)
150#[derive(Debug, Deserialize, Serialize, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
151pub struct CompactCacheKey {
152    pub primary: HashBinary,
153    // save 8 bytes for non-variance but waste 8 bytes for variance vs, store flat 16 bytes
154    pub variance: Option<Box<HashBinary>>,
155    pub user_tag: Box<str>, // the len should be small to keep memory usage bounded
156}
157
158impl Display for CompactCacheKey {
159    fn fmt(&self, f: &mut Formatter) -> FmtResult {
160        write!(f, "{}", hex2str(&self.primary))?;
161        if let Some(var) = &self.variance {
162            write!(f, ", variance: {}", hex2str(var.as_ref()))?;
163        }
164        write!(f, ", user_tag: {}", self.user_tag)
165    }
166}
167
168impl CacheHashKey for CompactCacheKey {
169    fn primary_bin(&self) -> HashBinary {
170        self.primary
171    }
172
173    fn variance_bin(&self) -> Option<HashBinary> {
174        self.variance.as_ref().map(|s| *s.as_ref())
175    }
176
177    fn user_tag(&self) -> &str {
178        &self.user_tag
179    }
180}
181
182/*
183 * We use blake2 hashing, which is faster and more secure, to replace md5.
184 * We have not given too much thought on whether non-crypto hash can be safely
185 * use because hashing performance is not critical.
186 * Note: we should avoid hashes like ahash which does not have consistent output
187 * across machines because it is designed purely for in memory hashtable
188*/
189
190// hash output: we use 128 bits (16 bytes) hash which will map to 32 bytes hex string
191pub(crate) type Blake2b128 = Blake2b<blake2::digest::consts::U16>;
192
193/// helper function: hash str to u8
194pub fn hash_u8(key: &str) -> u8 {
195    let mut hasher = Blake2b128::new();
196    hasher.update(key);
197    let raw = hasher.finalize();
198    raw[0]
199}
200
201/// helper function: hash key (String or Bytes) to [HashBinary]
202pub fn hash_key<K: AsRef<[u8]>>(key: K) -> HashBinary {
203    let mut hasher = Blake2b128::new();
204    hasher.update(key.as_ref());
205    let raw = hasher.finalize();
206    raw.into()
207}
208
209impl CacheKey {
210    fn primary_hasher(&self) -> Blake2b128 {
211        let mut hasher = Blake2b128::new();
212        hasher.update(&self.namespace);
213        hasher.update(&self.primary);
214        hasher
215    }
216
217    /// Create a default [CacheKey] from a request, which just takes its URI as the primary key.
218    pub fn default(req_header: &ReqHeader) -> Self {
219        CacheKey {
220            namespace: Vec::new(),
221            primary: format!("{}", req_header.uri).into_bytes(),
222            primary_bin_override: None,
223            variance: None,
224            user_tag: "".into(),
225            extensions: Extensions::new(),
226        }
227    }
228
229    /// Create a new [CacheKey] from the given namespace, primary, and user_tag input.
230    ///
231    /// Both `namespace` and `primary` will be used for the primary hash
232    pub fn new<B1, B2, S>(namespace: B1, primary: B2, user_tag: S) -> Self
233    where
234        B1: Into<Vec<u8>>,
235        B2: Into<Vec<u8>>,
236        S: Into<String>,
237    {
238        CacheKey {
239            namespace: namespace.into(),
240            primary: primary.into(),
241            primary_bin_override: None,
242            variance: None,
243            user_tag: user_tag.into(),
244            extensions: Extensions::new(),
245        }
246    }
247
248    /// Return the namespace of this key
249    pub fn namespace(&self) -> &[u8] {
250        &self.namespace[..]
251    }
252
253    /// Return the primary key of this key
254    pub fn primary_key(&self) -> &[u8] {
255        &self.primary[..]
256    }
257
258    /// Convert this key to [CompactCacheKey].
259    pub fn to_compact(&self) -> CompactCacheKey {
260        let primary = self.primary_bin();
261        CompactCacheKey {
262            primary,
263            variance: self.variance_bin().map(Box::new),
264            user_tag: self.user_tag.clone().into_boxed_str(),
265        }
266    }
267}
268
269impl CacheHashKey for CacheKey {
270    fn primary_bin(&self) -> HashBinary {
271        if let Some(primary_bin_override) = self.primary_bin_override {
272            primary_bin_override
273        } else {
274            self.primary_hasher().finalize().into()
275        }
276    }
277
278    fn variance_bin(&self) -> Option<HashBinary> {
279        self.variance
280    }
281
282    fn user_tag(&self) -> &str {
283        &self.user_tag
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn test_cache_key_hash() {
293        let key = CacheKey {
294            namespace: Vec::new(),
295            primary: b"aa".to_vec(),
296            primary_bin_override: None,
297            variance: None,
298            user_tag: "1".into(),
299            extensions: Extensions::new(),
300        };
301        let hash = key.primary();
302        assert_eq!(hash, "ac10f2aef117729f8dad056b3059eb7e");
303        assert!(key.variance().is_none());
304        assert_eq!(key.combined(), hash);
305        let compact = key.to_compact();
306        assert_eq!(compact.primary(), hash);
307        assert!(compact.variance().is_none());
308        assert_eq!(compact.combined(), hash);
309    }
310
311    #[test]
312    fn test_cache_key_hash_override() {
313        let mut key = CacheKey {
314            namespace: Vec::new(),
315            primary: b"aa".to_vec(),
316            primary_bin_override: str2hex("27c35e6e9373877f29e562464e46497e"),
317            variance: None,
318            user_tag: "1".into(),
319            extensions: Extensions::new(),
320        };
321        let hash = key.primary();
322        assert_eq!(hash, "27c35e6e9373877f29e562464e46497e");
323        assert!(key.variance().is_none());
324        assert_eq!(key.combined(), hash);
325        let compact = key.to_compact();
326        assert_eq!(compact.primary(), hash);
327        assert!(compact.variance().is_none());
328        assert_eq!(compact.combined(), hash);
329
330        // make sure set_primary_bin_override overrides the primary key hash correctly
331        key.set_primary_bin_override(str2hex("004174d3e75a811a5b44c46b3856f3ee").unwrap());
332        let hash = key.primary();
333        assert_eq!(hash, "004174d3e75a811a5b44c46b3856f3ee");
334        assert!(key.variance().is_none());
335        assert_eq!(key.combined(), hash);
336        let compact = key.to_compact();
337        assert_eq!(compact.primary(), hash);
338        assert!(compact.variance().is_none());
339        assert_eq!(compact.combined(), hash);
340    }
341
342    #[test]
343    fn test_cache_key_vary_hash() {
344        let key = CacheKey {
345            namespace: Vec::new(),
346            primary: b"aa".to_vec(),
347            primary_bin_override: None,
348            variance: Some([0u8; 16]),
349            user_tag: "1".into(),
350            extensions: Extensions::new(),
351        };
352        let hash = key.primary();
353        assert_eq!(hash, "ac10f2aef117729f8dad056b3059eb7e");
354        assert_eq!(key.variance().unwrap(), "00000000000000000000000000000000");
355        assert_eq!(key.combined(), "004174d3e75a811a5b44c46b3856f3ee");
356        let compact = key.to_compact();
357        assert_eq!(compact.primary(), "ac10f2aef117729f8dad056b3059eb7e");
358        assert_eq!(
359            compact.variance().unwrap(),
360            "00000000000000000000000000000000"
361        );
362        assert_eq!(compact.combined(), "004174d3e75a811a5b44c46b3856f3ee");
363    }
364
365    #[test]
366    fn test_cache_key_vary_hash_override() {
367        let key = CacheKey {
368            namespace: Vec::new(),
369            primary: b"saaaad".to_vec(),
370            primary_bin_override: str2hex("ac10f2aef117729f8dad056b3059eb7e"),
371            variance: Some([0u8; 16]),
372            user_tag: "1".into(),
373            extensions: Extensions::new(),
374        };
375        let hash = key.primary();
376        assert_eq!(hash, "ac10f2aef117729f8dad056b3059eb7e");
377        assert_eq!(key.variance().unwrap(), "00000000000000000000000000000000");
378        assert_eq!(key.combined(), "004174d3e75a811a5b44c46b3856f3ee");
379        let compact = key.to_compact();
380        assert_eq!(compact.primary(), "ac10f2aef117729f8dad056b3059eb7e");
381        assert_eq!(
382            compact.variance().unwrap(),
383            "00000000000000000000000000000000"
384        );
385        assert_eq!(compact.combined(), "004174d3e75a811a5b44c46b3856f3ee");
386    }
387
388    #[test]
389    fn test_hex_str() {
390        let mut key = [0; KEY_SIZE];
391        for (i, v) in key.iter_mut().enumerate() {
392            // key: [0, 1, 2, .., 15]
393            *v = i as u8;
394        }
395        let hex_str = hex2str(&key);
396        let key2 = str2hex(&hex_str).unwrap();
397        for i in 0..KEY_SIZE {
398            assert_eq!(key[i], key2[i]);
399        }
400    }
401    #[test]
402    fn test_primary_key_str_valid_utf8() {
403        let valid_utf8_key = CacheKey {
404            namespace: Vec::new(),
405            primary: b"/valid/path?query=1".to_vec(),
406            primary_bin_override: None,
407            variance: None,
408            user_tag: "1".into(),
409            extensions: Extensions::new(),
410        };
411
412        assert_eq!(
413            valid_utf8_key.primary_key_str(),
414            Some("/valid/path?query=1")
415        )
416    }
417
418    #[test]
419    fn test_primary_key_str_invalid_utf8() {
420        let invalid_utf8_key = CacheKey {
421            namespace: Vec::new(),
422            primary: vec![0x66, 0x6f, 0x6f, 0xff],
423            primary_bin_override: None,
424            variance: None,
425            user_tag: "1".into(),
426            extensions: Extensions::new(),
427        };
428
429        assert!(invalid_utf8_key.primary_key_str().is_none())
430    }
431}