sequoia_openpgp/packet/signature/
cache.rs

1//! A signature verification cache.
2//!
3//! Signature verification is expensive.  To mitigate this, Sequoia
4//! includes a signature verification cache.  This is keyed on the
5//! hash of the signature's context: the signature MPIs, the computed
6//! hash, and the key.  Since this context is needed to use the cache,
7//! it's hard to misuse the cache.
8//!
9//! The signature cache also supports dumping and restoring the cache
10//! from disk (see [`SignatureVerificationCache::restore`] and
11//! [`SignatureVerificationCache::dump`]).  This is particularly
12//! useful for one-shot programs, which don't have enough time to warm
13//! the cache up.
14//!
15//! The cache file needs to be managed carefully.  In particular, you
16//! probably don't want to allow it to grow without bound.  To help
17//! manage the cache, the cache keeps track of whether an entry was
18//! added ([`Entry::inserted`]), and whether it was accessed
19//! ([`Entry::accessed`]).
20use std::cmp;
21use std::collections::BTreeMap;
22use std::collections::btree_map;
23use std::sync::OnceLock;
24use std::sync::RwLock;
25use std::sync::atomic::AtomicBool;
26use std::sync::atomic::AtomicUsize;
27use std::sync::atomic::Ordering;
28
29use crate::HashAlgorithm;
30use crate::Result;
31use crate::packet::Key;
32use crate::packet::Signature;
33use crate::packet::key;
34
35const TRACE: bool = false;
36
37/// The cache singleton.
38static SIGNATURE_VERIFICATION_CACHE: SignatureVerificationCache
39    = SignatureVerificationCache::empty();
40
41/// The hash algorithm that we use.
42///
43/// SHA-512 is faster than SHA-256 on 64-bit hardware.
44const HASH_ALGO: HashAlgorithm = HashAlgorithm::SHA512;
45
46/// We use SHA-512, which has 512 / 8 bytes = 64 bytes.  We truncate
47/// it to the first 256 bits, i.e. we do SHA-512-256.  We're only
48/// worried about second pre-image resistance, so this is enough even
49/// when the signature uses SHA-512.
50const HASH_BYTES_UNTRUNCATED: usize = 512 / 8;
51const HASH_BYTES_TRUNCATED: usize = HASH_BYTES_UNTRUNCATED / 2;
52
53// The value of a cache entry.
54const VALUE_BYTES: usize = HASH_BYTES_TRUNCATED;
55type Value = [u8; VALUE_BYTES];
56const VALUE_NULL: Value = [0u8; VALUE_BYTES];
57
58/// Information about a cache entry.
59#[derive(Debug)]
60pub struct Metadata {
61    /// Whether the entry was inserted.
62    ///
63    /// Entries added by [`SignatureVerificationCache::restore`] have
64    /// this cleared.  Entries added as a side effect of a signature
65    /// verification have this set.
66    inserted: bool,
67
68    /// Whether the entry is accessed.
69    ///
70    /// An entry added by [`SignatureVerificationCache::restore`]
71    /// initially is not considered to have been accessed.  This is
72    /// set when an entry is used by the signature verification code.
73    accessed: AtomicBool,
74}
75
76impl Clone for Metadata {
77    fn clone(&self) -> Metadata {
78        Self {
79            inserted: self.inserted,
80            accessed: AtomicBool::from(self.accessed.load(Ordering::Relaxed)),
81        }
82    }
83}
84
85impl Metadata {
86    /// Instantiate a value.
87    ///
88    /// Entries added by [`SignatureVerificationCache::restore`] have
89    /// this cleared.  Entries added as a side effect of a signature
90    /// verification have this set.
91    fn new(inserted: bool) -> Self {
92        Metadata {
93            inserted,
94            accessed: false.into(),
95        }
96    }
97
98    /// Whether the entry was inserted since the program started.
99    ///
100    /// Entries added by [`SignatureVerificationCache::restore`] have
101    /// this cleared.  Entries added as a side effect of a signature
102    /// verification have this set.
103    pub fn inserted(&self) -> bool {
104        self.inserted
105    }
106
107    /// Whether the entry was accessed.
108    ///
109    /// An entry added by [`SignatureVerificationCache::restore`]
110    /// initially is not considered to have been accessed.  This is
111    /// set when an entry is used by the signature verification code.
112    pub fn accessed(&self) -> bool {
113        self.accessed.load(Ordering::Relaxed)
114    }
115}
116
117/// An entry in the signature verification cache.
118///
119/// You can iterate over the cache using
120/// [`SignatureVerificationCache::dump`].
121///
122/// Two entries are considered equal if their values are identical;
123/// the metadata is ignored.
124#[derive(Clone)]
125pub struct Entry {
126    value: Value,
127    metadata: Metadata,
128}
129
130impl PartialOrd for Entry {
131    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
132        Some(self.cmp(other))
133    }
134}
135
136impl Ord for Entry {
137    fn cmp(&self, other: &Self) -> cmp::Ordering {
138        self.value.cmp(&other.value)
139    }
140}
141
142impl PartialEq for Entry {
143    fn eq(&self, other: &Self) -> bool {
144        self.cmp(other) == cmp::Ordering::Equal
145    }
146}
147
148impl Eq for Entry {}
149
150impl Entry {
151    /// Computes the cache entry from the signature and its context.
152    pub(super) fn new(sig: &Signature,
153                      computed_digest: &[u8],
154                      key: &Key<key::PublicParts, key::UnspecifiedRole>)
155        -> Result<Self>
156    {
157        use crate::serialize::Marshal;
158        use crate::serialize::MarshalInto;
159
160        // Hash(Version || Signature MPIs Len || Signature MPIs || Hash Algorithm || Digest || Key MPIs)
161        //
162        // - Version: one byte, currently 0.
163        // - Signature MPIs: 4 bytes, little endian
164        // - Signature MPIs: variable number of bytes, the signature's MPIs
165        // - Hash algorithm: one byte, the hash algorithm
166        // - Digest: HashAlgorithm::len() bytes, the digest's length
167        // - Key: variable number of bytes, the key's MPIs
168        let mut context = HASH_ALGO.context()?.for_digest();
169
170        // Version.
171        context.update(&[ 0u8 ]);
172
173        // MPIs.
174        let mpis_len = sig.mpis.serialized_len();
175        context.update(&[
176            (mpis_len & 0xFF) as u8,
177            ((mpis_len >> 8) & 0xFF) as u8,
178            ((mpis_len >> 16) & 0xFF) as u8,
179            ((mpis_len >> 24) & 0xFF) as u8,
180        ]);
181        sig.mpis.export(&mut context)?;
182
183        // Hash algorithm.
184        context.update(&[
185            u8::from(sig.hash_algo())
186        ]);
187
188        // Hash.
189        context.update(computed_digest);
190
191        // Keys.
192        key.mpis().export(&mut context)?;
193
194        let context_hash = context.into_digest()?;
195
196        let mut value = VALUE_NULL;
197        value.copy_from_slice(&context_hash[..VALUE_BYTES]);
198
199        Ok(Entry {
200            value,
201            metadata: Metadata::new(true),
202        })
203    }
204
205    /// Returns the cache entry's value.
206    ///
207    /// This value is opaque and must not be interpreted.
208    ///
209    /// You can write this value to disk, and restore it using
210    /// [`SignatureVerificationCache::restore`].
211    pub fn value(&self) -> &[u8] {
212        &self.value
213    }
214
215    /// Returns whether the entry is in the cache.
216    pub(super) fn present(&self) -> bool {
217        SIGNATURE_VERIFICATION_CACHE.present(&self.value)
218    }
219
220    /// Inserts the entry in the cache.
221    ///
222    /// `verified` indicates whether the signature could be verified
223    /// (`true`), or not (`false`).
224    pub(super) fn insert(self, verified: bool) {
225        // We don't cache negative results.
226        if verified {
227            SIGNATURE_VERIFICATION_CACHE.insert(self.value);
228        }
229    }
230
231    /// Whether the entry was inserted since the program started.
232    ///
233    /// Entries added by [`SignatureVerificationCache::restore`] have
234    /// this cleared.  Entries added as a side effect of a signature
235    /// verification have this set.
236    pub fn inserted(&self) -> bool {
237        self.metadata.inserted
238    }
239
240    /// Whether the entry was accessed.
241    ///
242    /// An entry added by [`SignatureVerificationCache::restore`]
243    /// initially is not considered to have been accessed.  This is
244    /// set when an entry is used by the signature verification code.
245    pub fn accessed(&self) -> bool {
246        self.metadata.accessed.load(Ordering::Relaxed)
247    }
248}
249
250/// We split on the `BUCKETS_BITS` most significant bits of the value
251/// to reduce locking contention.
252const BUCKETS_BITS: usize = 4;
253const BUCKETS: usize = 1 << BUCKETS_BITS;
254const BUCKETS_SHIFT: usize = 8 - BUCKETS_BITS;
255
256/// A signature verification cache.
257pub struct SignatureVerificationCache {
258    /// A sorted list of entries.  This is filled by
259    /// `SignatureVerificationCache::restore`, and is much faster than
260    /// filling the btrees.
261    list: OnceLock<Vec<Entry>>,
262
263    /// The buckets.
264    buckets: [
265        RwLock<BTreeMap<Value, Metadata>>;
266        BUCKETS
267    ],
268
269    /// The number of cache hits.
270    hits: AtomicUsize,
271    /// The number of cache misses.
272    misses: AtomicUsize,
273    /// The number of entries that were restored (i.e., not inserted).
274    preloads: AtomicUsize,
275    /// The number of entries that were inserted (i.e., not restored).
276    insertions: AtomicUsize,
277}
278
279impl SignatureVerificationCache {
280    const fn empty() -> Self {
281        SignatureVerificationCache {
282            list: OnceLock::new(),
283            buckets: [
284                // 0
285                RwLock::new(BTreeMap::new()),
286                RwLock::new(BTreeMap::new()),
287                RwLock::new(BTreeMap::new()),
288                RwLock::new(BTreeMap::new()),
289                RwLock::new(BTreeMap::new()),
290                RwLock::new(BTreeMap::new()),
291                RwLock::new(BTreeMap::new()),
292                RwLock::new(BTreeMap::new()),
293                // 8
294                RwLock::new(BTreeMap::new()),
295                RwLock::new(BTreeMap::new()),
296                RwLock::new(BTreeMap::new()),
297                RwLock::new(BTreeMap::new()),
298                RwLock::new(BTreeMap::new()),
299                RwLock::new(BTreeMap::new()),
300                RwLock::new(BTreeMap::new()),
301                RwLock::new(BTreeMap::new()),
302            ],
303            hits: AtomicUsize::new(0),
304            misses: AtomicUsize::new(0),
305            preloads: AtomicUsize::new(0),
306            insertions: AtomicUsize::new(0),
307        }
308    }
309
310    /// Returns the bucket that a cache entry goes into.
311    fn bucket(value: &[u8]) -> usize {
312        (value[0] >> BUCKETS_SHIFT) as usize
313    }
314
315    /// Returns whether the cache contains `value`.
316    fn present(&self, value: &[u8]) -> bool {
317        assert_eq!(value.len(), HASH_BYTES_TRUNCATED);
318
319        // First search in our restored list.  It's sorted so we can
320        // use binary search.
321        if let Some(list) = self.list.get() {
322            if let Ok(i) = list.binary_search_by(|e| e.value[..].cmp(value)) {
323                list[i].metadata.accessed.store(true, Ordering::Relaxed);
324                self.hits.fetch_add(1, Ordering::Relaxed);
325                return true;
326            }
327        }
328
329        // Fallback to searching the buckets.
330        let i = Self::bucket(value);
331        let entries = self.buckets[i].read().unwrap();
332        if let Some(metadata) = entries.get(value) {
333            metadata.accessed.store(true, Ordering::Relaxed);
334            self.hits.fetch_add(1, Ordering::Relaxed);
335            true
336        } else {
337            self.misses.fetch_add(1, Ordering::Relaxed);
338            false
339        }
340    }
341
342    /// Inserts a verified signature into the cache.
343    fn insert(&self, value: [u8; HASH_BYTES_TRUNCATED]) {
344        let i = Self::bucket(&value);
345        let mut entries = self.buckets[i].write().unwrap();
346        match entries.entry(value) {
347            btree_map::Entry::Vacant(e) => {
348                // The entry is new.  Note it.
349                self.insertions.fetch_add(1, Ordering::Relaxed);
350
351                // Add the entry.
352                e.insert(Metadata::new(true));
353            }
354            btree_map::Entry::Occupied(_e) => {
355                // Nothing to do.
356            }
357        }
358    }
359
360    /// Returns the number of cache hits.
361    pub fn cache_hits() -> usize {
362        SIGNATURE_VERIFICATION_CACHE.hits.load(Ordering::Relaxed)
363    }
364
365    /// Returns the number of cache misses.
366    pub fn cache_misses() -> usize {
367        SIGNATURE_VERIFICATION_CACHE.misses.load(Ordering::Relaxed)
368    }
369
370    /// Returns the number of cache insertions.
371    ///
372    /// This returns the number of times an entry was added to the
373    /// cache since the program started or the last time
374    /// [`SignatureVerificationCache::clear_insertions`] was called.
375    ///
376    /// This does not include entries added via
377    /// [`SignatureVerificationCache::restore`].
378    pub fn insertions() -> usize {
379        SIGNATURE_VERIFICATION_CACHE.insertions.load(Ordering::Relaxed)
380    }
381
382    /// Resets the insertions counter.
383    pub fn clear_insertions() {
384        SIGNATURE_VERIFICATION_CACHE.insertions.store(0, Ordering::Relaxed);
385    }
386
387    /// Restores the signature verification cache.
388    ///
389    /// This merges the entries into the existing signature cache.
390    ///
391    /// The values are the values as returned by [`Entry::value`].
392    ///
393    /// The iterator is `Send`, `Sync` and `'static`, because this
394    /// function may spawn a thread to avoid blocking the main thread.
395    ///
396    /// When the restore is complete, `finished` is called.
397    pub fn restore<'a, F>(
398        entries: impl Iterator<Item=Vec<u8>> + Send + Sync + 'static,
399        finished: F)
400        where F: FnOnce() + Send + Sync + 'static
401    {
402        tracer!(TRACE, "SignatureVerificationCache::restore");
403
404        // Sanity check the constants here: this function is run O(1)
405        // times.
406
407        assert_eq!(HASH_ALGO.context().expect("have SHA-512")
408                   .for_digest().digest_size(),
409                   HASH_BYTES_UNTRUNCATED);
410        assert!(HASH_BYTES_TRUNCATED <= HASH_BYTES_UNTRUNCATED);
411
412        // Must fit in a byte.
413        assert!(BUCKETS_BITS <= 8);
414
415        // Consistency check.
416        assert_eq!(BUCKETS, 1 << BUCKETS_BITS);
417
418        std::thread::spawn(move || {
419            let mut items: Vec<Entry> = Vec::with_capacity(32 * 1024);
420
421            let mut bad = 0;
422            let mut count = 0;
423
424            for entry in entries {
425                count += 1;
426                if entry.len() != VALUE_BYTES {
427                    bad += 1;
428                    continue;
429                }
430
431                let mut value = VALUE_NULL;
432                value.copy_from_slice(&entry[..VALUE_BYTES]);
433
434                items.push(Entry {
435                    value,
436                    metadata: Metadata::new(false),
437                });
438            }
439
440            if bad > 0 {
441                t!("Warning: {} of {} cache entries could not be read",
442                   bad, count);
443            }
444            t!("Restored {} entries", count);
445
446            SIGNATURE_VERIFICATION_CACHE.preloads
447                .fetch_add(items.len(), Ordering::Relaxed);
448
449            items.sort();
450
451            // If this is the first restore, then we can store the
452            // signatures in the list.
453            if let Err(items) = SIGNATURE_VERIFICATION_CACHE.list.set(items) {
454                // Hmm, another restore.  This is unusual, but okay.
455                // We add the signatures to the buckets, as we can't
456                // change the list: it is behind a OnceLock.
457                let mut bucket_i = 0;
458                let mut bucket = SIGNATURE_VERIFICATION_CACHE
459                    .buckets[bucket_i].write().unwrap();
460                for item in items.into_iter() {
461                    let i = Self::bucket(&item.value);
462                    if i != bucket_i {
463                        // Items should be sorted so we should move
464                        // from one bucket to the next.
465                        assert!(i > bucket_i);
466                        bucket = SIGNATURE_VERIFICATION_CACHE
467                            .buckets[i].write().unwrap();
468                        bucket_i = i;
469                    }
470
471                    bucket.insert(item.value, item.metadata);
472                }
473            }
474
475            finished();
476          });
477    }
478
479    /// Dumps the contents of the cache.
480    ///
481    /// This clones the cache to avoid holding locks too long.
482    ///
483    /// The values returned by [`Entry::value`] may be written to a
484    /// file, and restored using
485    /// [`SignatureVerificationCache::restore`].
486    ///
487    /// Before saving them, you may want to check if there were any
488    /// insertions using [`SignatureVerificationCache::insertions`].
489    ///
490    /// Also, you may want to prune the entries to avoid having the
491    /// cache grow without bound.
492    pub fn dump<'a>() -> impl IntoIterator<Item=Entry> {
493        tracer!(TRACE, "SignatureVerificationCache::dump");
494
495        if TRACE {
496            let preloads = SIGNATURE_VERIFICATION_CACHE
497                .preloads.load(Ordering::Relaxed);
498            let insertions = SIGNATURE_VERIFICATION_CACHE
499                .insertions.load(Ordering::Relaxed);
500
501            t!("{} entries: {} restored, {} inserted",
502               preloads + insertions,
503               preloads, insertions);
504
505            let hits = SIGNATURE_VERIFICATION_CACHE
506                .hits.load(Ordering::Relaxed);
507            let misses = SIGNATURE_VERIFICATION_CACHE
508                .misses.load(Ordering::Relaxed);
509            let lookups = hits + misses;
510            if lookups > 0 {
511                t!("{} cache lookups, {} hits ({}%), {} misses ({}%)",
512                   lookups,
513                   hits, (100 * hits) / lookups,
514                   misses, (100 * misses) / lookups);
515            } else {
516                t!("0 cache lookups");
517            }
518        }
519
520        DumpIter {
521            bucket: 0,
522            iter: None,
523            list: SIGNATURE_VERIFICATION_CACHE.list.get()
524                .map(|list| list.clone())
525                .unwrap_or(Vec::new()),
526        }
527    }
528}
529
530/// Iterates over all entries in the cache.
531///
532/// Note: to reduce lock contention, this may return individual entries
533/// added after it was instantiated.
534struct DumpIter {
535    iter: Option<std::vec::IntoIter<Entry>>,
536
537    // The next bucket to dump.  Once we get to `BUCKETS`, we dump
538    // `list`.
539    bucket: usize,
540
541    // If we verify an entry before the cache is restored, the entry
542    // could end in both SignatureVerificationCache.list and a bucket.
543    // Avoid dumping an entry twice.
544    list: Vec<Entry>,
545}
546
547impl Iterator for DumpIter {
548    type Item = Entry;
549
550    fn next(&mut self) -> Option<Self::Item> {
551        tracer!(TRACE, "DumpIter::next");
552
553        loop {
554            if let Some(ref mut iter) = self.iter {
555                if let Some(item) = iter.next() {
556                    return Some(item);
557                }
558            }
559
560            if self.bucket == BUCKETS {
561                if self.list.is_empty() {
562                    return None;
563                }
564
565                let list = std::mem::take(&mut self.list);
566                t!("Dumping {} restored entries", list.len());
567                self.iter = Some(list.into_iter());
568            } else {
569                let bucket = &SIGNATURE_VERIFICATION_CACHE.buckets[self.bucket];
570                self.bucket += 1;
571
572                let bucket = bucket.read().unwrap();
573
574                t!("Dumping {} entries from bucket {}",
575                   bucket.len(), self.bucket - 1);
576
577                self.iter = Some(
578                    bucket.iter()
579                        .filter_map(|(v, m)| {
580                            // If the entry is also in list, then we
581                            // don't want to return it twice.
582                            if let Ok(_) = self.list.binary_search_by(|e| {
583                                e.value[..].cmp(v)
584                            })
585                            {
586                                // This is a dupe.  Skip it.
587                                None
588                            } else {
589                                Some(Entry {
590                                    value: v.clone(),
591                                    metadata: m.clone(),
592                                })
593                            }
594                        })
595                        .collect::<Vec<_>>()
596                        .into_iter())
597            }
598        }
599    }
600}
601
602#[cfg(test)]
603mod test {
604    use super::*;
605
606    #[test]
607    fn bucket() {
608        // Assert that all the buckets have the same number of items.
609        let mut bucket = 0;
610        let mut bucket_count = vec![0; BUCKETS];
611
612        for i in 0..=u8::MAX {
613            let mut value = VALUE_NULL;
614            value[0] = i;
615            let b = SignatureVerificationCache::bucket(&value);
616
617            if b != bucket {
618                // Different bucket.  Since we are using the most
619                // significant bits, it must be the next bucket.
620                assert_eq!(b, bucket + 1);
621                bucket = bucket + 1;
622            }
623            bucket_count[b] += 1;
624        }
625
626        for (i, c) in bucket_count.iter().enumerate() {
627            eprintln!("{}: {}", i, c);
628        }
629
630        assert!(bucket_count.iter().all(|c| *c == bucket_count[0]));
631        assert_eq!(bucket_count.iter().map(|c| *c as usize).sum::<usize>(),
632                   u8::MAX as usize + 1);
633    }
634}