Skip to main content

sozu_lib/
tls.rs

1//! A unified certificate resolver for rustls.
2//!
3//! Persists certificates in the Rustls
4//! [`CertifiedKey` format](https://docs.rs/rustls/latest/rustls/sign/struct.CertifiedKey.html),
5//! exposes them to the HTTPS listener for TLS handshakes.
6#[cfg(test)]
7use std::collections::HashSet;
8use std::{
9    collections::HashMap,
10    fmt,
11    str::FromStr,
12    sync::{Arc, LazyLock, Mutex},
13};
14
15use rustls::{
16    pki_types::{CertificateDer, PrivateKeyDer, pem::PemObject},
17    server::{ClientHello, ResolvesServerCert},
18    sign::CertifiedKey,
19};
20
21use crate::crypto::any_supported_type;
22use sha2::{Digest, Sha256};
23use sozu_command::{
24    certificate::{
25        CertificateError, Fingerprint, get_cn_and_san_attributes, parse_pem, parse_x509,
26        split_certificate_chain,
27    },
28    logging::ansi_palette,
29    proto::command::{AddCertificate, CertificateAndKey, ReplaceCertificate, SocketAddress},
30};
31
32use crate::metrics::names;
33use crate::router::pattern_trie::{Key, KeyValue, TrieNode};
34
35/// Module-level prefix used on every log line emitted from this module.
36/// Produces a bold bright-white `TLS-RESOLVER` label (uniform across every
37/// protocol) when the logger is in colored mode. The certificate resolver
38/// runs at listener scope -- it has no per-session state -- so this is the
39/// only macro the module needs. `RUSTLS` covers the protocol-side logs in
40/// `lib/src/protocol/rustls.rs`; `TLS-RESOLVER` is intentionally distinct so
41/// operators can tell handshake failures (RUSTLS) apart from cert-store
42/// management noise (TLS-RESOLVER).
43macro_rules! log_module_context {
44    () => {{
45        let (open, reset, _, _, _) = ansi_palette();
46        format!(
47            "{open}TLS-RESOLVER{reset}\t >>>",
48            open = open,
49            reset = reset
50        )
51    }};
52}
53
54// -----------------------------------------------------------------------------
55// Default ParsedCertificateAndKey
56
57static DEFAULT_CERTIFICATE: LazyLock<Option<Arc<CertifiedKey>>> = LazyLock::new(|| {
58    let add = AddCertificate {
59        certificate: CertificateAndKey {
60            certificate: include_str!("../assets/certificate.pem").to_string(),
61            certificate_chain: vec![include_str!("../assets/certificate_chain.pem").to_string()],
62            key: include_str!("../assets/key.pem").to_string(),
63            versions: vec![],
64            names: vec![],
65        },
66        address: SocketAddress::new_v4(0, 0, 0, 0, 8080), // not used anyway
67        expired_at: None,
68    };
69    CertifiedKeyWrapper::try_from(&add).ok().map(|c| c.inner)
70});
71
72#[derive(thiserror::Error, Debug)]
73pub enum CertificateResolverError {
74    #[error("failed to get common name and subject alternate names from pem, {0}")]
75    InvalidCommonNameAndSubjectAlternateNames(CertificateError),
76    #[error("invalid private key: {0}")]
77    InvalidPrivateKey(String),
78    #[error("empty key")]
79    EmptyKeys,
80    #[error("error parsing x509 cert from bytes: {0}")]
81    ParseX509(CertificateError),
82    #[error("error parsing pem formated certificate from bytes: {0}")]
83    ParsePem(CertificateError),
84    #[error("error parsing overriding names in new certificate: {0}")]
85    ParseOverridingNames(CertificateError),
86}
87
88/// A wrapper around the Rustls
89/// [`CertifiedKey` type](https://docs.rs/rustls/latest/rustls/sign/struct.CertifiedKey.html),
90/// stored and returned by the certificate resolver.
91#[derive(Clone, Debug)]
92pub struct CertifiedKeyWrapper {
93    inner: Arc<CertifiedKey>,
94    /// domain names, override what can be found in the cert
95    names: Vec<String>,
96    expiration: i64,
97    fingerprint: Fingerprint,
98}
99
100/// Convert an AddCertificate request into the Rustls format.
101/// Support RSA and ECDSA certificates.
102impl TryFrom<&AddCertificate> for CertifiedKeyWrapper {
103    type Error = CertificateResolverError;
104
105    fn try_from(add: &AddCertificate) -> Result<Self, Self::Error> {
106        let cert = add.certificate.clone();
107
108        let pem =
109            parse_pem(cert.certificate.as_bytes()).map_err(CertificateResolverError::ParsePem)?;
110
111        let x509 = parse_x509(&pem.contents).map_err(CertificateResolverError::ParseX509)?;
112
113        let overriding_names = if add.certificate.names.is_empty() {
114            get_cn_and_san_attributes(&x509)
115        } else {
116            add.certificate.names.clone()
117        };
118
119        let expiration = add
120            .expired_at
121            .unwrap_or(x509.validity().not_after.timestamp());
122
123        let fingerprint = Fingerprint(Sha256::digest(&pem.contents).iter().cloned().collect());
124
125        // The leaf is at index 0; chain entries follow. ACME clients
126        // emitting `fullchain.pem` (Certbot default, lego, acme.sh)
127        // place the leaf at the start, which would store
128        // `[leaf, leaf, intermediate, root]` and fail strict
129        // validators (Node.js `UNABLE_TO_VERIFY_LEAF_SIGNATURE`).
130        // Each entry is split through `split_certificate_chain` so a
131        // single multi-PEM string fans out (`parse_pem` would
132        // otherwise stop at the first block); each split entry is
133        // dedup'd against the leaf's DER bytes.
134        let leaf_der = pem.contents;
135        let mut chain = vec![CertificateDer::from(leaf_der.to_owned())];
136        let mut dropped_duplicates = 0usize;
137        for cert in &cert.certificate_chain {
138            for split_pem in split_certificate_chain(cert.to_owned()) {
139                let chain_link = parse_pem(split_pem.as_bytes())
140                    .map_err(CertificateResolverError::ParsePem)?
141                    .contents;
142
143                if chain_link == leaf_der {
144                    dropped_duplicates += 1;
145                    continue;
146                }
147                chain.push(CertificateDer::from(chain_link));
148            }
149        }
150        if dropped_duplicates > 0 {
151            debug!(
152                "{} dropped {} duplicate leaf certificate(s) from the supplied chain",
153                log_module_context!(),
154                dropped_duplicates
155            );
156        }
157
158        // Parse the PEM-encoded private key into a `PrivateKeyDer` via
159        // `rustls-pki-types`'s `PemObject` trait. `from_pem_slice` accepts
160        // PKCS1 / PKCS8 / SEC1 key formats the same way the old
161        // `rustls-pemfile::read_one` + per-variant `From::from` chain did,
162        // and folds the empty-input / no-PEM-object / unsupported-format
163        // cases into a single `Err` we surface as `EmptyKeys` (the
164        // existing variant covers any failure to extract a key from the
165        // supplied PEM blob).
166        let private_key = PrivateKeyDer::from_pem_slice(cert.key.as_bytes())
167            .map_err(|_| CertificateResolverError::EmptyKeys)?;
168
169        // Postconditions of chain assembly: the leaf was pushed first, so the
170        // chain is never empty and its head is exactly the parsed leaf DER.
171        // Dedup only ever *drops* entries it recognises as the leaf, so the
172        // assembled length can never exceed leaf + supplied links.
173        debug_assert!(
174            !chain.is_empty(),
175            "assembled certificate chain must contain at least the leaf"
176        );
177        debug_assert_eq!(
178            chain[0].as_ref(),
179            leaf_der.as_slice(),
180            "the leaf must remain at index 0 of the chain"
181        );
182        // SHA-256 fingerprint is exactly 32 bytes — anything else means the
183        // digest pipeline changed underneath us.
184        debug_assert_eq!(
185            fingerprint.0.len(),
186            32,
187            "a SHA-256 fingerprint must be 32 bytes"
188        );
189
190        match any_supported_type(&private_key) {
191            Ok(signing_key) => {
192                let stored_certificate = CertifiedKeyWrapper {
193                    inner: Arc::new(CertifiedKey::new(chain, signing_key)),
194                    names: overriding_names,
195                    expiration,
196                    fingerprint,
197                };
198                Ok(stored_certificate)
199            }
200            Err(sign_error) => Err(CertificateResolverError::InvalidPrivateKey(
201                sign_error.to_string(),
202            )),
203        }
204    }
205}
206
207/// Parses and stores TLS certificates, makes them available to Rustls for TLS handshakes
208///
209/// the `domains` TrieNode is an addressing system to resolve a certificate
210/// for a given domain name.
211/// Certificates are stored in a hashmap that may contain unreachable certificates if
212/// no domain name points to it.
213#[derive(Default, Debug)]
214pub struct CertificateResolver {
215    /// routing one domain name to one certificate for fast resolving
216    pub domains: TrieNode<Fingerprint>,
217    /// a storage map: fingerprint -> stored_certificate
218    certificates: HashMap<Fingerprint, CertifiedKeyWrapper>,
219    /// maps each domain name to several compatible certificates, sorted by expiration date
220    /// map of domain_name -> all fingerprints (and expiration) linked to this domain name
221    //  the vector of (fingerprint, expiration) is sorted by expiration
222    name_fingerprint_idx: HashMap<String, Vec<(Fingerprint, i64)>>,
223}
224
225impl CertificateResolver {
226    /// return the certificate in the Rustls-usable form
227    pub fn get_certificate(&self, fingerprint: &Fingerprint) -> Option<CertifiedKeyWrapper> {
228        self.certificates.get(fingerprint).map(ToOwned::to_owned)
229    }
230
231    /// Recompute the aggregate `tls.cert.min_expires_at_seconds` gauge across
232    /// every certificate currently loaded. Per-SNI granularity would explode
233    /// statsd key cardinality (the resolver can easily hold tens of thousands
234    /// of names on a public endpoint) and the existing `gauge!` macro has no
235    /// label support, so we expose a single absolute unix-seconds reading of
236    /// the soonest-expiring cert. Dashboards alert on this as the "next cert
237    /// to rotate" deadline; operators query per-cert detail through the
238    /// command API. `x509` timestamps are signed but `set_gauge` takes a
239    /// `usize`, so we clamp already-expired certs to 0 (which is still a
240    /// monotonic "panic now" signal to any alerting rule).
241    ///
242    /// Called from `add_certificate` / `remove_certificate` — i.e. only when
243    /// the cert set actually changes, never on the hot TLS handshake path.
244    fn publish_min_expiration_gauge(&self) {
245        let Some(min_expiration) = self.certificates.values().map(|c| c.expiration).min() else {
246            // SECURITY: an empty resolver is not "every cert just
247            // expired"; it is "no cert
248            // has been loaded yet" — typical at process boot before the
249            // first AddCertificate request lands. Writing 0 here pages
250            // SOC tooling on every restart with the same alert as a real
251            // expired-cert event. Skip the emit so the gauge reflects the
252            // last known good state instead of being clobbered to 0.
253            return;
254        };
255        let clamped = min_expiration.max(0) as usize;
256        gauge!(names::tls::CERT_MIN_EXPIRES_AT_SECONDS, clamped);
257    }
258
259    /// persist a certificate, after ensuring validity, and checking if it can replace another certificate.
260    /// return the certificate fingerprint regardless of having inserted it or not
261    pub fn add_certificate(
262        &mut self,
263        add: &AddCertificate,
264    ) -> Result<Fingerprint, CertificateResolverError> {
265        let cert_to_add = CertifiedKeyWrapper::try_from(add)?;
266
267        trace!(
268            "{} adding certificate {:?}",
269            log_module_context!(),
270            cert_to_add
271        );
272
273        if self.certificates.contains_key(&cert_to_add.fingerprint) {
274            return Ok(cert_to_add.fingerprint);
275        }
276
277        // Past the duplicate guard the fingerprint is genuinely new, so the
278        // store will grow by exactly one. Snapshot the count to assert that
279        // delta below (ungated `let` — read only inside the debug_assert, so
280        // the optimizer drops it in release while it still compiles).
281        let certificates_before = self.certificates.len();
282        let new_fingerprint = cert_to_add.fingerprint.clone();
283        debug_assert!(
284            !self.certificates.contains_key(&new_fingerprint),
285            "add_certificate past the dedup guard must be inserting a new fingerprint"
286        );
287
288        for new_name in &cert_to_add.names {
289            let fingerprints_for_this_name = self
290                .name_fingerprint_idx
291                .entry(new_name.to_owned())
292                .or_default();
293
294            fingerprints_for_this_name
295                .push((cert_to_add.fingerprint.clone(), cert_to_add.expiration));
296
297            // sort expiration ascending (longest-lived to the right)
298            fingerprints_for_this_name.sort_by_key(|t| t.1);
299
300            let longest_lived_cert = match fingerprints_for_this_name.last() {
301                Some(cert) => cert,
302                None => {
303                    error!(
304                        "{} no fingerprint for this name, this should not happen",
305                        log_module_context!()
306                    );
307                    continue;
308                }
309            };
310
311            // update the longest lived certificate in the TriNode
312            self.domains.remove(&new_name.to_owned().into_bytes());
313            self.domains.insert(
314                new_name.to_owned().into_bytes(),
315                longest_lived_cert.0.to_owned(),
316            );
317        }
318
319        self.certificates
320            .insert(cert_to_add.fingerprint.to_owned(), cert_to_add.clone());
321        self.publish_min_expiration_gauge();
322
323        // Postconditions: the new fingerprint is now stored, the store grew by
324        // exactly one (the dedup guard above ruled out an overwrite), and
325        // every name the cert advertises now resolves to it through the index.
326        debug_assert!(
327            self.certificates.contains_key(&new_fingerprint),
328            "add_certificate must store the new certificate"
329        );
330        debug_assert_eq!(
331            self.certificates.len(),
332            certificates_before + 1,
333            "add_certificate must grow the store by exactly one"
334        );
335        debug_assert!(
336            cert_to_add.names.iter().all(|name| {
337                self.name_fingerprint_idx
338                    .get(name)
339                    .is_some_and(|fps| fps.iter().any(|(fp, _)| *fp == new_fingerprint))
340            }),
341            "every name of the added cert must be indexed to its fingerprint"
342        );
343
344        trace!("{} {:#?}", log_module_context!(), self);
345
346        Ok(cert_to_add.fingerprint)
347    }
348
349    /// Delete a certificate from the resolver. May fail if there is no alternative for
350    // a domain name
351    pub fn remove_certificate(
352        &mut self,
353        fingerprint: &Fingerprint,
354    ) -> Result<(), CertificateResolverError> {
355        // Snapshot the store size so the postcondition can assert that a
356        // present cert drops the count by exactly one and an absent one is a
357        // no-op. Ungated `let`: read only inside the debug_asserts below, so
358        // it compiles in release and the optimizer drops it.
359        let certificates_before = self.certificates.len();
360        let was_present = self.certificates.contains_key(fingerprint);
361
362        if let Some(certificate_to_remove) = self.get_certificate(fingerprint) {
363            // Names snapshot used only by the index-cleanup postcondition.
364            // Gate BOTH the `let` and its assert with `#[cfg(debug_assertions)]`
365            // so the clone never runs (and never warns) in release.
366            #[cfg(debug_assertions)]
367            let removed_names = certificate_to_remove.names.clone();
368            for name in certificate_to_remove.names {
369                self.domains.domain_remove(&name.as_bytes().to_vec());
370
371                if let std::collections::hash_map::Entry::Occupied(mut entry) =
372                    self.name_fingerprint_idx.entry(name.to_owned())
373                {
374                    // remove fingerprints from the index for this name
375                    entry.get_mut().retain(|t| &t.0 != fingerprint);
376
377                    // reinsert the longest lived certificate in the TrieNode
378                    if let Some(longest_lived_cert) = entry.get().last() {
379                        self.domains
380                            .insert(name.as_bytes().to_vec(), longest_lived_cert.0.to_owned());
381                    }
382
383                    // clean up empty index entries to avoid memory leaks
384                    if entry.get().is_empty() {
385                        entry.remove();
386                    }
387                }
388            }
389
390            self.certificates.remove(fingerprint);
391            self.publish_min_expiration_gauge();
392
393            // Postconditions on the present-cert path: the fingerprint is
394            // truly gone, the store shrank by exactly one, and no name still
395            // points at the removed fingerprint in the index (a leftover would
396            // let `resolve` hand back a fingerprint with no backing cert).
397            debug_assert!(
398                !self.certificates.contains_key(fingerprint),
399                "remove_certificate must evict the fingerprint"
400            );
401            debug_assert_eq!(
402                self.certificates.len(),
403                certificates_before - 1,
404                "removing a present cert must shrink the store by exactly one"
405            );
406            #[cfg(debug_assertions)]
407            debug_assert!(
408                removed_names.iter().all(|name| {
409                    self.name_fingerprint_idx
410                        .get(name)
411                        .is_none_or(|fps| fps.iter().all(|(fp, _)| fp != fingerprint))
412                }),
413                "no name may still index the removed fingerprint"
414            );
415        } else {
416            // Absent-cert path is a pure no-op: the store size is unchanged.
417            debug_assert!(
418                !was_present,
419                "the absent-cert branch must only run when the fingerprint was not stored"
420            );
421            debug_assert_eq!(
422                self.certificates.len(),
423                certificates_before,
424                "removing an absent cert must not change the store"
425            );
426        }
427        trace!("{} {:#?}", log_module_context!(), self);
428
429        Ok(())
430    }
431
432    /// Add the new certificate first, then remove the old one.
433    /// This ordering ensures that the old certificate remains available
434    /// if adding the new one fails.
435    pub fn replace_certificate(
436        &mut self,
437        replace: &ReplaceCertificate,
438    ) -> Result<Fingerprint, CertificateResolverError> {
439        let add = AddCertificate {
440            address: replace.address.to_owned(),
441            certificate: replace.new_certificate.to_owned(),
442            expired_at: replace.new_expired_at.to_owned(),
443        };
444
445        // ── Idempotent-replace short-circuit ──
446        //
447        // Compute the new fingerprint *before* mutating the resolver so we
448        // can compare it with the old one. When `add_certificate` is
449        // called with a fingerprint that already exists it early-returns
450        // (lib/src/tls.rs add_certificate path) without inserting; if we
451        // then unconditionally called `remove_certificate(old)` and old
452        // equalled new, we would delete the entry the caller intended to
453        // *retain*. An idempotent renewal — typical for retry loops, dead
454        // ACME polls, or operator-driven `ReplaceCertificate` requests
455        // that resubmit the same PEM — must therefore short-circuit here.
456        // Any failure to materialise the wrapper (parse, sign-key check)
457        // surfaces as `CertificateResolverError`, identical to the path
458        // through `add_certificate`.
459        let new_cert = CertifiedKeyWrapper::try_from(&add)?;
460        let new_fingerprint = new_cert.fingerprint.to_owned();
461
462        if let Ok(old_fingerprint) = Fingerprint::from_str(&replace.old_fingerprint) {
463            if old_fingerprint == new_fingerprint {
464                // Idempotent replace: the new cert is byte-identical to the
465                // one already serving this name. Removing `old == new` would
466                // delete the entry the caller meant to keep, so we must NOT
467                // touch the store — assert it is untouched on this path.
468                let stored_before = self.certificates.contains_key(&new_fingerprint);
469                // Re-publish the expiration gauge so dashboards observe the
470                // replace request even when the certificate set is unchanged.
471                self.publish_min_expiration_gauge();
472                debug_assert_eq!(
473                    self.certificates.contains_key(&new_fingerprint),
474                    stored_before,
475                    "idempotent replace must not change whether the cert is stored"
476                );
477                return Ok(new_fingerprint);
478            }
479        }
480
481        let new_fingerprint = self.add_certificate(&add)?;
482
483        // After a non-idempotent add the new certificate is in the store,
484        // ready to serve handshakes before the old one is torn down (the
485        // add-before-remove ordering that keeps the name continuously
486        // resolvable).
487        debug_assert!(
488            self.certificates.contains_key(&new_fingerprint),
489            "the replacement certificate must be stored before the old one is removed"
490        );
491
492        match Fingerprint::from_str(&replace.old_fingerprint) {
493            Ok(old_fingerprint) => self.remove_certificate(&old_fingerprint)?,
494            Err(err) => {
495                // The new certificate was already added above. If we can't parse the old
496                // fingerprint, the old certificate remains in the resolver (leaked).
497                // We return Ok to indicate the new certificate is available, but warn
498                // that cleanup of the old one failed.
499                warn!(
500                    "{} new certificate added but could not remove old one: \
501                     failed to parse old fingerprint, {}",
502                    log_module_context!(),
503                    err
504                );
505            }
506        }
507
508        Ok(new_fingerprint)
509    }
510
511    /// return all fingerprints that are available for these domain names,
512    /// provided at least one name is given
513    #[cfg(test)]
514    fn find_certificates_by_names(
515        &self,
516        names: &HashSet<String>,
517    ) -> Result<HashSet<Fingerprint>, CertificateResolverError> {
518        let mut fingerprints = HashSet::new();
519        for name in names {
520            if let Some(fprints) = self.name_fingerprint_idx.get(name) {
521                fprints.iter().for_each(|fingerprint| {
522                    fingerprints.insert(fingerprint.to_owned().0);
523                });
524            }
525        }
526
527        Ok(fingerprints)
528    }
529
530    /// return the hashset of subjects that the certificate is able to handle.
531    /// the certificate must be already persisted for this check
532    #[cfg(test)]
533    fn certificate_names(
534        &self,
535        fingerprint: &Fingerprint,
536    ) -> Result<HashSet<String>, CertificateResolverError> {
537        if let Some(cert) = self.certificates.get(fingerprint) {
538            return Ok(cert.names.iter().cloned().collect());
539        }
540        Ok(HashSet::new())
541    }
542
543    pub fn domain_lookup(
544        &self,
545        domain: &[u8],
546        accept_wildcard: bool,
547    ) -> Option<&KeyValue<Key, Fingerprint>> {
548        self.domains.domain_lookup(domain, accept_wildcard)
549    }
550
551    /// Resolve the SAN set Sōzu would serve for `domain` (the same trie
552    /// lookup rustls uses, wildcard-aware via `domain_lookup`). Returns the
553    /// certificate's `names` exactly as stored — wildcards retain their
554    /// leading `*.` so the caller can apply RFC 6125 §6.4.3 matching. `None`
555    /// when no cert covers `domain` (rustls would fall back to
556    /// `DEFAULT_CERTIFICATE`).
557    ///
558    /// Mirrors `MutexCertificateResolver::resolve` minus the rustls glue, so
559    /// the SAN snapshot taken at handshake matches the certificate the peer
560    /// actually validated (RFC 7540 §9.1.1 / RFC 9113 §9.1.1 connection
561    /// reuse).
562    pub fn names_for_sni(&self, domain: &[u8]) -> Option<Vec<String>> {
563        let (_, fingerprint) = self.domain_lookup(domain, true)?;
564        self.certificates
565            .get(fingerprint)
566            .map(|cert| cert.names.clone())
567    }
568}
569
570// -----------------------------------------------------------------------------
571// MutexWrappedCertificateResolver struct
572
573#[derive(Default)]
574pub struct MutexCertificateResolver(pub Mutex<CertificateResolver>);
575
576impl ResolvesServerCert for MutexCertificateResolver {
577    fn resolve(&self, client_hello: ClientHello) -> Option<Arc<CertifiedKey>> {
578        let server_name = client_hello.server_name();
579        let sigschemes = client_hello.signature_schemes();
580
581        let Some(name) = server_name else {
582            error!(
583                "{} cannot look up certificate: no SNI from session",
584                log_module_context!()
585            );
586            return None;
587        };
588        trace!(
589            "{} trying to resolve name: {:?} for signature scheme: {:?}",
590            log_module_context!(),
591            name,
592            sigschemes
593        );
594        // Every other site uses blocking `lock()`, and silently falling
595        // back to `DEFAULT_CERTIFICATE` on lock
596        // contention is an attacker-detectable mismatch (different
597        // chain → different fingerprint) and a footgun the moment
598        // multi-threading enters the worker. Block here. Lock-poisoning
599        // (panic-while-holding) is mapped to the same default-cert
600        // fallback the previous `try_lock` Err arm produced — preserves
601        // the existing observable behaviour for that one corner case
602        // without inventing a new failure mode.
603        let resolver = match self.0.lock() {
604            Ok(guard) => guard,
605            Err(poisoned) => {
606                error!(
607                    "{} cert resolver mutex poisoned, returning default cert: {:?}",
608                    log_module_context!(),
609                    poisoned
610                );
611                return DEFAULT_CERTIFICATE.clone();
612            }
613        };
614        if let Some((_, fingerprint)) = resolver.domains.domain_lookup(name.as_bytes(), true) {
615            trace!(
616                "{} looking for certificate for {:?} with fingerprint {:?}",
617                log_module_context!(),
618                name,
619                fingerprint
620            );
621
622            // Strict-binding invariant: when the SNI trie resolves a name to a
623            // fingerprint, the served cert is exactly the one stored under
624            // that fingerprint — never a substitute. This guards cert
625            // SELECTION (our own store consistency), not the peer-supplied SNI
626            // itself, so a `debug_assert` is correct here (a violation is a
627            // resolver bug, not hostile traffic). The cert may legitimately be
628            // `None` if the trie still indexes a fingerprint whose cert was
629            // concurrently removed; we only assert the positive case.
630            let cert = resolver
631                .certificates
632                .get(fingerprint)
633                .map(|cert| cert.inner.clone());
634            debug_assert!(
635                cert.is_none()
636                    || resolver
637                        .certificates
638                        .get(fingerprint)
639                        .is_some_and(|stored| Arc::ptr_eq(&stored.inner, cert.as_ref().unwrap())),
640                "resolved certificate must be the one stored under the looked-up fingerprint"
641            );
642
643            trace!(
644                "{} found for fingerprint {}: {}",
645                log_module_context!(),
646                fingerprint,
647                cert.is_some()
648            );
649            return cert;
650        }
651        drop(resolver);
652
653        // error!("could not look up a certificate for server name '{}'", name);
654        // This certificate is used for TLS tunneling with another TLS termination endpoint
655        // Note that this is unsafe and you should provide a valid certificate
656        debug!(
657            "{} default certificate is used for {}",
658            log_module_context!(),
659            name
660        );
661        incr!(names::tls::DEFAULT_CERT_USED);
662        DEFAULT_CERTIFICATE.clone()
663    }
664}
665
666impl MutexCertificateResolver {
667    /// Snapshot of the SAN set Sōzu would serve for `domain`. Acquires the
668    /// resolver lock once. Returns `None` when the underlying mutex is
669    /// poisoned — the caller is expected to treat poison the same as
670    /// "default cert served" (legacy fallback), mirroring `resolve`'s
671    /// own poison handling at the rustls hot path.
672    pub fn names_for_sni(&self, domain: &[u8]) -> Option<Vec<String>> {
673        match self.0.lock() {
674            Ok(guard) => guard.names_for_sni(domain),
675            Err(poisoned) => {
676                error!(
677                    "{} cert resolver mutex poisoned, treating as no SAN match: {:?}",
678                    log_module_context!(),
679                    poisoned
680                );
681                None
682            }
683        }
684    }
685}
686
687impl fmt::Debug for MutexCertificateResolver {
688    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
689        f.write_str("MutexWrappedCertificateResolver")
690    }
691}
692
693// -----------------------------------------------------------------------------
694// Unit tests
695
696#[cfg(test)]
697mod tests {
698    use std::{
699        collections::HashSet,
700        error::Error,
701        time::{Duration, SystemTime},
702    };
703
704    // use rand::{seq::SliceRandom, thread_rng};
705    use sozu_command::proto::command::{
706        AddCertificate, CertificateAndKey, ReplaceCertificate, SocketAddress,
707    };
708
709    use super::CertificateResolver;
710
711    #[test]
712    fn lifecycle() -> Result<(), Box<dyn Error + Send + Sync>> {
713        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
714        let mut resolver = CertificateResolver::default();
715        let certificate_and_key = CertificateAndKey {
716            certificate: String::from(include_str!("../assets/certificate.pem")),
717            key: String::from(include_str!("../assets/key.pem")),
718            ..Default::default()
719        };
720
721        let fingerprint = resolver
722            .add_certificate(&AddCertificate {
723                address,
724                certificate: certificate_and_key,
725                expired_at: None,
726            })
727            .expect("could not add certificate");
728
729        if resolver.get_certificate(&fingerprint).is_none() {
730            return Err("failed to retrieve certificate".into());
731        }
732
733        // get the names to try and retrieve the certificate AFTER it is supposed to be removed
734        let names = resolver.certificate_names(&fingerprint)?;
735
736        if let Err(err) = resolver.remove_certificate(&fingerprint) {
737            return Err(format!("the certificate was not removed, {err}").into());
738        }
739
740        if resolver.get_certificate(&fingerprint).is_some() {
741            return Err("We have retrieved the certificate that should be deleted".into());
742        }
743
744        if !resolver.find_certificates_by_names(&names)?.is_empty() {
745            return Err(
746                "The certificate should be deleted but one of its names is in the index".into(),
747            );
748        }
749
750        Ok(())
751    }
752
753    #[test]
754    fn name_override() -> Result<(), Box<dyn Error + Send + Sync>> {
755        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
756        let mut resolver = CertificateResolver::default();
757        let certificate_and_key = CertificateAndKey {
758            certificate: String::from(include_str!("../assets/certificate.pem")),
759            key: String::from(include_str!("../assets/key.pem")),
760            names: vec!["localhost".into(), "lolcatho.st".into()],
761            ..Default::default()
762        };
763
764        let fingerprint = resolver.add_certificate(&AddCertificate {
765            address,
766            certificate: certificate_and_key,
767            expired_at: None,
768        })?;
769
770        if resolver.get_certificate(&fingerprint).is_none() {
771            return Err("failed to retrieve certificate".into());
772        }
773
774        let mut lolcat = HashSet::new();
775        lolcat.insert(String::from("lolcatho.st"));
776        if resolver.find_certificates_by_names(&lolcat)?.is_empty()
777            || resolver.get_certificate(&fingerprint).is_none()
778        {
779            return Err("failed to retrieve certificate with custom names".into());
780        }
781
782        if let Err(err) = resolver.remove_certificate(&fingerprint) {
783            return Err(format!("the certificate could not be removed, {err}").into());
784        }
785
786        let names = resolver.certificate_names(&fingerprint)?;
787        if !resolver.find_certificates_by_names(&names)?.is_empty()
788            && resolver.get_certificate(&fingerprint).is_some()
789        {
790            return Err("We have retrieved the certificate that should be deleted".into());
791        }
792
793        Ok(())
794    }
795
796    #[test]
797    fn keep_resolving_with_wildcard() -> Result<(), Box<dyn Error + Send + Sync>> {
798        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
799        let mut resolver = CertificateResolver::default();
800
801        // ---------------------------------------------------------------------
802        // load the wildcard certificate,  expiring in 3 years
803        let wildcard_example_org = CertificateAndKey {
804            certificate: String::from(include_str!("../assets/tests/certificate-3.pem")),
805            key: String::from(include_str!("../assets/tests/key.pem")),
806            ..Default::default()
807        };
808
809        let wildcard_example_org_fingerprint = resolver.add_certificate(&AddCertificate {
810            address,
811            certificate: wildcard_example_org,
812            expired_at: Some(
813                (SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?
814                    + Duration::from_secs(365 * 24 * 3600))
815                .as_secs() as i64,
816            ),
817        })?;
818
819        if resolver
820            .get_certificate(&wildcard_example_org_fingerprint)
821            .is_none()
822        {
823            return Err("could not load the 2-year-valid certificate".into());
824        }
825
826        // ---------------------------------------------------------------------
827        // try loading the ordinary certificate, expiring in 2 years
828        // this one has two names: example.org and www.example.org
829        let www_example_org = CertificateAndKey {
830            certificate: String::from(include_str!("../assets/tests/certificate-2.pem")),
831            key: String::from(include_str!("../assets/tests/key.pem")),
832            ..Default::default()
833        };
834
835        let www_example_org_fingerprint = resolver.add_certificate(&AddCertificate {
836            address,
837            certificate: www_example_org,
838            expired_at: Some(
839                (SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?
840                    + Duration::from_secs(2 * 365 * 24 * 3600))
841                .as_secs() as i64,
842            ),
843        })?;
844
845        let www_example_org = resolver
846            .domain_lookup("www.example.org".as_bytes(), true)
847            .expect("there should be a www.example.org cert");
848        assert_eq!(www_example_org.1, www_example_org_fingerprint);
849
850        let test_example_org = resolver
851            .domain_lookup("test.example.org".as_bytes(), true)
852            .expect("there should be a test.example.org cert");
853        assert_eq!(test_example_org.1, wildcard_example_org_fingerprint);
854
855        let example_org = resolver
856            .domain_lookup("example.org".as_bytes(), true)
857            .expect("there should be a example.org cert");
858        assert_eq!(example_org.1, www_example_org_fingerprint);
859
860        // check that when removing the www.example.org certificate
861        // the resolver falls back on the wildcard
862        resolver
863            .remove_certificate(&www_example_org_fingerprint)
864            .expect("should be able to remove the 2-year certificate");
865
866        let should_be_wildcard_fingerprint = resolver
867            .domain_lookup("www.example.org".as_bytes(), true)
868            .expect("there should be a www.example.org cert");
869        assert_eq!(
870            should_be_wildcard_fingerprint.1,
871            wildcard_example_org_fingerprint
872        );
873
874        assert!(
875            resolver
876                .domain_lookup("example.org".as_bytes(), true)
877                .is_none()
878        );
879
880        Ok(())
881    }
882
883    #[test]
884    fn resolve_the_longer_lived_cert() -> Result<(), Box<dyn Error + Send + Sync>> {
885        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
886        let mut resolver = CertificateResolver::default();
887
888        // ---------------------------------------------------------------------
889        // load the 2-year valid certificate
890        let certificate_and_key_2y = CertificateAndKey {
891            certificate: String::from(include_str!("../assets/tests/certificate-2y.pem")),
892            key: String::from(include_str!("../assets/tests/key-2y.pem")),
893            ..Default::default()
894        };
895
896        let fingerprint_2y = resolver.add_certificate(&AddCertificate {
897            address,
898            certificate: certificate_and_key_2y,
899            expired_at: None,
900        })?;
901
902        if resolver.get_certificate(&fingerprint_2y).is_none() {
903            return Err("could not load the 2-year-valid certificate".into());
904        }
905
906        // ---------------------------------------------------------------------
907        // try loading the 1-year valid certificate
908        let certificate_and_key_1y = CertificateAndKey {
909            certificate: String::from(include_str!("../assets/tests/certificate-1y.pem")),
910            key: String::from(include_str!("../assets/tests/key-1y.pem")),
911            ..Default::default()
912        };
913
914        let fingerprint_1y = resolver.add_certificate(&AddCertificate {
915            address,
916            certificate: certificate_and_key_1y,
917            ..Default::default()
918        })?;
919
920        let localhost_cert = resolver
921            .domain_lookup("localhost".as_bytes(), true)
922            .expect("there should be a localhost cert");
923
924        assert_eq!(localhost_cert.1, fingerprint_2y);
925
926        // check that when removing the longer-lived certificate,
927        // the resolver falls back on the shorter-lived one
928
929        resolver
930            .remove_certificate(&fingerprint_2y)
931            .expect("should be able to remove the 2-year certificate");
932
933        let localhost_cert = resolver
934            .domain_lookup("localhost".as_bytes(), true)
935            .expect("there should be a localhost cert");
936
937        assert_eq!(localhost_cert.1, fingerprint_1y);
938
939        Ok(())
940    }
941
942    #[test]
943    fn expiration_override() -> Result<(), Box<dyn Error + Send + Sync>> {
944        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
945        let mut resolver = CertificateResolver::default();
946
947        // ---------------------------------------------------------------------
948        // load first certificate
949        let certificate_and_key_1y = CertificateAndKey {
950            certificate: String::from(include_str!("../assets/tests/certificate-1y.pem")),
951            key: String::from(include_str!("../assets/tests/key-1y.pem")),
952            ..Default::default()
953        };
954
955        let fingerprint_1y_overriden = resolver.add_certificate(&AddCertificate {
956            address,
957            certificate: certificate_and_key_1y,
958            expired_at: Some(
959                (SystemTime::now().duration_since(SystemTime::UNIX_EPOCH)?
960                    + Duration::from_secs(3 * 365 * 24 * 3600))
961                .as_secs() as i64,
962            ),
963        })?;
964
965        if resolver
966            .get_certificate(&fingerprint_1y_overriden)
967            .is_none()
968        {
969            return Err("failed to retrieve certificate".into());
970        }
971
972        // ---------------------------------------------------------------------
973        // load second certificate
974        let certificate_and_key_2y = CertificateAndKey {
975            certificate: String::from(include_str!("../assets/tests/certificate-2y.pem")),
976            key: String::from(include_str!("../assets/tests/key-2y.pem")),
977            ..Default::default()
978        };
979
980        let fingerprint_2y = resolver.add_certificate(&AddCertificate {
981            address,
982            certificate: certificate_and_key_2y,
983            expired_at: None,
984        })?;
985
986        let localhost_cert = resolver
987            .domain_lookup("localhost".as_bytes(), true)
988            .expect("there should be a localhost cert");
989
990        assert_eq!(localhost_cert.1, fingerprint_1y_overriden);
991
992        // check that when removing the overriden certificate,
993        // the resolver falls back on the other one
994
995        resolver
996            .remove_certificate(&fingerprint_1y_overriden)
997            .expect("should be able to remove the 1-year (3-year-overriden) certificate");
998
999        let localhost_cert = resolver
1000            .domain_lookup("localhost".as_bytes(), true)
1001            .expect("there should be a localhost cert");
1002
1003        assert_eq!(localhost_cert.1, fingerprint_2y);
1004
1005        Ok(())
1006    }
1007
1008    /// Verify that `replace_certificate` adds the new cert before removing
1009    /// the old one, so lookup always returns a valid certificate.
1010    #[test]
1011    fn replace_certificate_add_before_remove() -> Result<(), Box<dyn Error + Send + Sync>> {
1012        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
1013        let mut resolver = CertificateResolver::default();
1014
1015        // add the initial (1-year) certificate
1016        let cert_1y = CertificateAndKey {
1017            certificate: String::from(include_str!("../assets/tests/certificate-1y.pem")),
1018            key: String::from(include_str!("../assets/tests/key-1y.pem")),
1019            ..Default::default()
1020        };
1021
1022        let fingerprint_1y = resolver.add_certificate(&AddCertificate {
1023            address,
1024            certificate: cert_1y,
1025            expired_at: None,
1026        })?;
1027
1028        // sanity: the 1y cert is resolvable
1029        assert!(
1030            resolver
1031                .domain_lookup("localhost".as_bytes(), true)
1032                .is_some(),
1033            "initial certificate should be resolvable"
1034        );
1035
1036        // replace with the 2-year certificate
1037        let cert_2y = CertificateAndKey {
1038            certificate: String::from(include_str!("../assets/tests/certificate-2y.pem")),
1039            key: String::from(include_str!("../assets/tests/key-2y.pem")),
1040            ..Default::default()
1041        };
1042
1043        let new_fingerprint = resolver.replace_certificate(&ReplaceCertificate {
1044            address,
1045            new_certificate: cert_2y,
1046            old_fingerprint: fingerprint_1y.to_string(),
1047            new_expired_at: None,
1048        })?;
1049
1050        // the old certificate should be gone
1051        assert!(
1052            resolver.get_certificate(&fingerprint_1y).is_none(),
1053            "old certificate should have been removed"
1054        );
1055
1056        // the new certificate should be present and resolvable
1057        assert!(
1058            resolver.get_certificate(&new_fingerprint).is_some(),
1059            "new certificate should be present"
1060        );
1061        let resolved = resolver
1062            .domain_lookup("localhost".as_bytes(), true)
1063            .expect("a certificate should resolve for localhost");
1064        assert_eq!(
1065            resolved.1, new_fingerprint,
1066            "resolved certificate should be the replacement"
1067        );
1068
1069        Ok(())
1070    }
1071
1072    /// When `ReplaceCertificate` carries the same certificate / fingerprint
1073    /// as the existing entry, the previous implementation called
1074    /// `add_certificate` (which early-returns on duplicate fingerprint
1075    /// without inserting) and then unconditionally removed the old
1076    /// fingerprint — i.e. the *current* entry — leaving the resolver
1077    /// without a certificate for that name. The fix short-circuits the
1078    /// idempotent case and keeps the existing entry in place. An
1079    /// idempotent ACME / operator retry must therefore still resolve.
1080    #[test]
1081    fn replace_certificate_with_same_fingerprint_is_noop()
1082    -> Result<(), Box<dyn Error + Send + Sync>> {
1083        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
1084        let mut resolver = CertificateResolver::default();
1085
1086        let cert = CertificateAndKey {
1087            certificate: String::from(include_str!("../assets/tests/certificate-1y.pem")),
1088            key: String::from(include_str!("../assets/tests/key-1y.pem")),
1089            ..Default::default()
1090        };
1091
1092        let initial_fingerprint = resolver.add_certificate(&AddCertificate {
1093            address,
1094            certificate: cert.clone(),
1095            expired_at: None,
1096        })?;
1097
1098        // Replace with the SAME PEM/key — identical fingerprint expected.
1099        let returned_fingerprint = resolver.replace_certificate(&ReplaceCertificate {
1100            address,
1101            new_certificate: cert,
1102            old_fingerprint: initial_fingerprint.to_string(),
1103            new_expired_at: None,
1104        })?;
1105
1106        assert_eq!(
1107            returned_fingerprint, initial_fingerprint,
1108            "idempotent replace should return the existing fingerprint"
1109        );
1110
1111        assert!(
1112            resolver.get_certificate(&initial_fingerprint).is_some(),
1113            "idempotent replace must NOT delete the existing certificate"
1114        );
1115
1116        let resolved = resolver
1117            .domain_lookup("localhost".as_bytes(), true)
1118            .expect("certificate should still resolve after idempotent replace");
1119        assert_eq!(
1120            resolved.1, initial_fingerprint,
1121            "resolver should still hand back the original fingerprint"
1122        );
1123
1124        Ok(())
1125    }
1126
1127    /// Verify that removing the last certificate for a domain cleans up
1128    /// the empty entry in `name_fingerprint_idx` (no memory leak).
1129    #[test]
1130    fn removal_cleans_up_empty_index_entries() -> Result<(), Box<dyn Error + Send + Sync>> {
1131        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
1132        let mut resolver = CertificateResolver::default();
1133
1134        let cert = CertificateAndKey {
1135            certificate: String::from(include_str!("../assets/tests/certificate-1y.pem")),
1136            key: String::from(include_str!("../assets/tests/key-1y.pem")),
1137            ..Default::default()
1138        };
1139
1140        let fingerprint = resolver.add_certificate(&AddCertificate {
1141            address,
1142            certificate: cert,
1143            expired_at: None,
1144        })?;
1145
1146        // record the names associated with this cert
1147        let names = resolver.certificate_names(&fingerprint)?;
1148        assert!(
1149            !names.is_empty(),
1150            "certificate should have at least one name"
1151        );
1152
1153        // verify index is populated
1154        for name in &names {
1155            assert!(
1156                resolver.name_fingerprint_idx.contains_key(name),
1157                "name_fingerprint_idx should contain '{name}' before removal"
1158            );
1159        }
1160
1161        resolver.remove_certificate(&fingerprint)?;
1162
1163        // after removal, all index entries for these names should be gone
1164        for name in &names {
1165            assert!(
1166                !resolver.name_fingerprint_idx.contains_key(name),
1167                "name_fingerprint_idx should not contain empty entry for '{name}' after removal"
1168            );
1169        }
1170
1171        Ok(())
1172    }
1173
1174    /// Many ACME clients (Certbot's `fullchain.pem`, lego, acme.sh) emit
1175    /// the leaf certificate at the START of the chain file. Without
1176    /// dedup, the resolver previously stored `[leaf, leaf, ...]` and
1177    /// the on-wire TLS handshake replayed the leaf twice — accepted by
1178    /// browsers but rejected by stricter validators (Node.js,
1179    /// `UNABLE_TO_VERIFY_LEAF_SIGNATURE`). The fix in
1180    /// `TryFrom<&AddCertificate> for CertifiedKeyWrapper` drops any
1181    /// chain entry whose DER bytes match the leaf. Closes #1135 / #1148.
1182    ///
1183    /// This test passes the SAME leaf PEM as both `certificate` and the
1184    /// sole `certificate_chain` entry (the `fullchain.pem` shape). The
1185    /// stored chain length must be `1` (leaf only), not `2`
1186    /// (`[leaf, leaf]`).
1187    #[test]
1188    fn certificate_chain_dedup_drops_duplicate_leaf() -> Result<(), Box<dyn Error + Send + Sync>> {
1189        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
1190        let mut resolver = CertificateResolver::default();
1191
1192        let leaf_pem = String::from(include_str!("../assets/certificate.pem"));
1193
1194        let cert_with_duplicated_leaf = CertificateAndKey {
1195            certificate: leaf_pem.clone(),
1196            certificate_chain: vec![leaf_pem],
1197            key: String::from(include_str!("../assets/key.pem")),
1198            ..Default::default()
1199        };
1200
1201        let fingerprint = resolver.add_certificate(&AddCertificate {
1202            address,
1203            certificate: cert_with_duplicated_leaf,
1204            expired_at: None,
1205        })?;
1206
1207        let stored = resolver
1208            .get_certificate(&fingerprint)
1209            .ok_or("resolver lost the certificate after add")?;
1210
1211        assert_eq!(
1212            stored.inner.cert.len(),
1213            1,
1214            "expected dedup to drop the duplicate leaf, got chain of {} cert(s)",
1215            stored.inner.cert.len()
1216        );
1217
1218        Ok(())
1219    }
1220
1221    /// When an operator passes the entire `fullchain.pem` content as a
1222    /// SINGLE chain entry (one string containing multiple
1223    /// `-----BEGIN CERTIFICATE-----` blocks back-to-back), the previous
1224    /// code called `parse_pem` once on the multi-PEM string, which only
1225    /// consumes the first PEM block and silently drops the rest. The
1226    /// fix splits each chain entry through
1227    /// `split_certificate_chain` so multi-PEM strings fan out into one
1228    /// entry per CA before parsing.
1229    ///
1230    /// This test concatenates two PEM blocks into a single chain entry
1231    /// (the leaf duplicated, so dedup also kicks in). The stored chain
1232    /// must end up with length 1 — the leaf — proving (a) the multi-PEM
1233    /// entry was split correctly, (b) the duplicate leaf was dropped.
1234    #[test]
1235    fn certificate_chain_handles_multi_pem_single_entry() -> Result<(), Box<dyn Error + Send + Sync>>
1236    {
1237        let address = SocketAddress::new_v4(127, 0, 0, 1, 8080);
1238        let mut resolver = CertificateResolver::default();
1239
1240        let leaf_pem = String::from(include_str!("../assets/certificate.pem"));
1241        // Two PEM blocks concatenated into one string; both are the
1242        // leaf so dedup brings the result back to length 1.
1243        let multi_pem_chain_entry = format!("{leaf_pem}\n{leaf_pem}");
1244
1245        let cert = CertificateAndKey {
1246            certificate: leaf_pem,
1247            certificate_chain: vec![multi_pem_chain_entry],
1248            key: String::from(include_str!("../assets/key.pem")),
1249            ..Default::default()
1250        };
1251
1252        let fingerprint = resolver.add_certificate(&AddCertificate {
1253            address,
1254            certificate: cert,
1255            expired_at: None,
1256        })?;
1257
1258        let stored = resolver
1259            .get_certificate(&fingerprint)
1260            .ok_or("resolver lost the certificate after add")?;
1261
1262        // Without the split, `parse_pem` would only consume the first
1263        // PEM block and drop the second; with the split + dedup, both
1264        // get fanned out, both get recognised as the leaf, both get
1265        // dropped — leaving the original leaf at index 0 only.
1266        assert_eq!(
1267            stored.inner.cert.len(),
1268            1,
1269            "expected split + dedup to leave only the leaf, got chain of {} cert(s)",
1270            stored.inner.cert.len()
1271        );
1272
1273        Ok(())
1274    }
1275}