Skip to main content

devboy_storage/
router_cache.rs

1//! In-memory cache for the source router per [ADR-021] §7.
2//!
3//! Source latencies vary across orders of magnitude (microseconds
4//! for a keychain read; hundreds of milliseconds for `op read` plus
5//! a possible biometric prompt; seconds for a misconfigured Vault).
6//! Without caching, an agent that resolves a dozen secrets per
7//! minute is unusable. This module is the cache the router (P5.5+,
8//! P6) wraps every `get()` with.
9//!
10//! ## Adaptive TTL
11//!
12//! Per ADR-021 §7:
13//!
14//! - The **base TTL** is per-source (`cache_ttl_seconds` in
15//!   `sources.toml`); the default lives in P6 source impls and is
16//!   typically 900 seconds.
17//! - If [`SecretSource::get`](crate::source::SecretSource::get)
18//!   returns a `lease_duration`, the effective TTL becomes
19//!   `min(base_ttl, lease_duration)`. Vault dynamic-secret leases
20//!   keep the cache from outliving the lease.
21//! - `lease_duration = Some(0)` disables caching for that read
22//!   entirely — the value is returned to the caller but never
23//!   cached.
24//! - The global index may further lower the TTL through
25//!   `cache_ttl_seconds_max` (per-secret cap). The cap can only
26//!   lower the TTL; it cannot raise it above the source default.
27//!
28//! ## Eviction
29//!
30//! Entries leave the cache when:
31//!
32//! 1. Their effective TTL elapses (lazy — checked on the next
33//!    `get`).
34//! 2. The user invokes `devboy secrets refresh <path>` /
35//!    `--all` ([`AdaptiveCache::invalidate`] /
36//!    [`AdaptiveCache::invalidate_all`]).
37//! 3. A source declares out-of-band invalidation (Vault lease
38//!    revoked, 1Password session timed out). The router calls
39//!    [`AdaptiveCache::invalidate`] in response.
40//! 4. The process exits — every [`SecretString`] in the map
41//!    zeroizes on drop. The cache itself is `Drop`-safe; we do not
42//!    keep any extra plaintext copy.
43//!
44//! ## Persistence
45//!
46//! **Never.** Per ADR-021 §7: "the cache is never persisted.
47//! Process exit drops every entry." The same posture as
48//! [`secrecy::SecretString`]'s zeroize-on-drop, extended one level
49//! up.
50//!
51//! ## Testability
52//!
53//! [`CacheClock`] is the abstract time source. Production callers
54//! pass [`SystemClock`]; tests pass [`ManualClock`] so the TTL can
55//! be raced past without `std::thread::sleep`.
56//!
57//! [ADR-021]: https://github.com/meteora-pro/devboy-tools/blob/main/docs/architecture/adr/ADR-021-external-secret-sources.md
58
59use std::collections::HashMap;
60use std::sync::{Arc, Mutex};
61use std::time::{Duration, Instant};
62
63use secrecy::SecretString;
64
65use crate::secret_path::SecretPath;
66
67/// Default base TTL when neither the source nor the per-secret cap
68/// override it. Matches the ADR-021 §7 fallback (15 minutes).
69pub const DEFAULT_BASE_TTL: Duration = Duration::from_secs(15 * 60);
70
71// =============================================================================
72// Clock abstraction
73// =============================================================================
74
75/// Wall-clock abstraction so tests can race past the cache TTL
76/// without sleeping.
77pub trait CacheClock: Send + Sync {
78    /// Current monotonic time. Production uses [`Instant::now`];
79    /// [`ManualClock`] returns whatever the test arranged.
80    fn now(&self) -> Instant;
81}
82
83/// Production clock backed by [`Instant::now`].
84#[derive(Debug, Default, Clone, Copy)]
85pub struct SystemClock;
86
87impl CacheClock for SystemClock {
88    fn now(&self) -> Instant {
89        Instant::now()
90    }
91}
92
93/// Test clock whose `now()` value is controlled by
94/// [`ManualClock::advance`].
95#[derive(Debug, Clone)]
96pub struct ManualClock(Arc<Mutex<Instant>>);
97
98impl ManualClock {
99    /// Build a manual clock starting at `initial`.
100    pub fn new(initial: Instant) -> Self {
101        Self(Arc::new(Mutex::new(initial)))
102    }
103
104    /// Advance the clock by `delta`. Subsequent calls to
105    /// [`CacheClock::now`] return the new time.
106    pub fn advance(&self, delta: Duration) {
107        let mut g = self.0.lock().expect("ManualClock mutex poisoned");
108        *g += delta;
109    }
110}
111
112impl CacheClock for ManualClock {
113    fn now(&self) -> Instant {
114        *self.0.lock().expect("ManualClock mutex poisoned")
115    }
116}
117
118// =============================================================================
119// Cache entry (private)
120// =============================================================================
121
122struct CacheEntry {
123    value: SecretString,
124    /// Pre-computed wall-clock instant at which this entry expires.
125    /// `None` would mean "never expires" but we never construct
126    /// that; every entry has an explicit expiry.
127    expires_at: Instant,
128}
129
130// =============================================================================
131// AdaptiveCache
132// =============================================================================
133
134/// Path-keyed in-memory cache with adaptive TTL.
135///
136/// The router wraps every `get()` with the cache: on a hit, return
137/// the cached value (clone of [`SecretString`]); on a miss or
138/// expiry, ask the source and store the result via
139/// [`AdaptiveCache::put`].
140///
141/// Thread-safe — uses a single [`std::sync::Mutex`] internally.
142/// Operations are CPU-bound (hash + mutex grab), so the standard
143/// library mutex is the right primitive (no `tokio::sync::Mutex`
144/// hold across `.await`).
145pub struct AdaptiveCache {
146    /// Source-default TTL. Effective TTL is min(this, lease, cap).
147    base_ttl: Duration,
148    /// Time source. Tests inject [`ManualClock`].
149    clock: Arc<dyn CacheClock>,
150    /// Live entries.
151    entries: Mutex<HashMap<SecretPath, CacheEntry>>,
152}
153
154impl AdaptiveCache {
155    /// Build a cache with the given source-default TTL and the
156    /// production clock.
157    pub fn new(base_ttl: Duration) -> Self {
158        Self::with_clock(base_ttl, Arc::new(SystemClock))
159    }
160
161    /// Build a cache with a caller-supplied clock. Used by tests.
162    pub fn with_clock(base_ttl: Duration, clock: Arc<dyn CacheClock>) -> Self {
163        Self {
164            base_ttl,
165            clock,
166            entries: Mutex::new(HashMap::new()),
167        }
168    }
169
170    /// Source-default TTL the cache was constructed with.
171    pub fn base_ttl(&self) -> Duration {
172        self.base_ttl
173    }
174
175    /// Borrow the clock. Useful for `doctor`-style introspection
176    /// and tests.
177    pub fn clock(&self) -> &Arc<dyn CacheClock> {
178        &self.clock
179    }
180
181    /// Look up `path`. Returns `Some(value.clone())` on hit;
182    /// `None` on miss or after the TTL has elapsed (the expired
183    /// entry is evicted lazily).
184    pub fn get(&self, path: &SecretPath) -> Option<SecretString> {
185        let mut g = self.entries.lock().expect("AdaptiveCache mutex poisoned");
186        let now = self.clock.now();
187        let mut hit = None;
188        if let Some(entry) = g.get(path) {
189            if entry.expires_at > now {
190                hit = Some(entry.value.clone());
191            } else {
192                // Expired — drop the entry so subsequent calls do
193                // not re-evaluate.
194                g.remove(path);
195            }
196        }
197        hit
198    }
199
200    /// Insert a value. Returns `true` if the value was cached;
201    /// `false` if caching was suppressed (by `lease_duration =
202    /// Some(0)` or by an effective TTL of zero).
203    ///
204    /// `lease_duration` is the upstream-reported lease (from
205    /// [`GetOutcome::lease_duration`](crate::source::GetOutcome));
206    /// `max_ttl` is the per-secret cap from the global index
207    /// (`cache_ttl_seconds_max`). Both are optional; both can only
208    /// lower the effective TTL, never raise it.
209    pub fn put(
210        &self,
211        path: &SecretPath,
212        value: SecretString,
213        lease_duration: Option<Duration>,
214        max_ttl: Option<Duration>,
215    ) -> bool {
216        let ttl = match self.effective_ttl(lease_duration, max_ttl) {
217            Some(t) => t,
218            None => return false,
219        };
220        let expires_at = self.clock.now() + ttl;
221        let mut g = self.entries.lock().expect("AdaptiveCache mutex poisoned");
222        g.insert(path.clone(), CacheEntry { value, expires_at });
223        true
224    }
225
226    /// Compute the effective TTL given the source's lease and the
227    /// per-secret cap. Returns `None` to disable caching.
228    fn effective_ttl(
229        &self,
230        lease_duration: Option<Duration>,
231        max_ttl: Option<Duration>,
232    ) -> Option<Duration> {
233        let mut ttl = self.base_ttl;
234        if let Some(lease) = lease_duration {
235            if lease.is_zero() {
236                return None;
237            }
238            if lease < ttl {
239                ttl = lease;
240            }
241        }
242        if let Some(cap) = max_ttl
243            && cap < ttl
244        {
245            ttl = cap;
246        }
247        if ttl.is_zero() { None } else { Some(ttl) }
248    }
249
250    /// Drop the entry for one path. Idempotent — no-op if the
251    /// path is not in the cache.
252    pub fn invalidate(&self, path: &SecretPath) {
253        let mut g = self.entries.lock().expect("AdaptiveCache mutex poisoned");
254        g.remove(path);
255    }
256
257    /// Drop every entry. Used by `devboy secrets refresh --all`.
258    pub fn invalidate_all(&self) {
259        let mut g = self.entries.lock().expect("AdaptiveCache mutex poisoned");
260        g.clear();
261    }
262
263    /// Number of entries currently in the cache, including any
264    /// that may already be expired. (Lazy eviction means a count
265    /// of `n` is the upper bound on live entries; `get()` may
266    /// reduce it on the next call.)
267    pub fn len(&self) -> usize {
268        self.entries
269            .lock()
270            .expect("AdaptiveCache mutex poisoned")
271            .len()
272    }
273
274    /// `true` when the cache has no entries.
275    pub fn is_empty(&self) -> bool {
276        self.len() == 0
277    }
278}
279
280impl std::fmt::Debug for AdaptiveCache {
281    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
282        // Never print values — the cache is full of plaintext
283        // secrets. Surface only the size and the configured TTL so
284        // accidental `dbg!` doesn't leak the credential payloads.
285        let count = self.entries.lock().map(|g| g.len()).unwrap_or(0);
286        f.debug_struct("AdaptiveCache")
287            .field("base_ttl", &self.base_ttl)
288            .field("entries", &format!("<{count} redacted>"))
289            .field("clock", &"<dyn CacheClock>")
290            .finish()
291    }
292}
293
294// =============================================================================
295// Tests
296// =============================================================================
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use secrecy::ExposeSecret;
302
303    fn p(s: &str) -> SecretPath {
304        SecretPath::parse(s).unwrap()
305    }
306
307    fn manual_cache(base_ttl: Duration) -> (AdaptiveCache, ManualClock) {
308        let clock = ManualClock::new(Instant::now());
309        let cache = AdaptiveCache::with_clock(base_ttl, Arc::new(clock.clone()));
310        (cache, clock)
311    }
312
313    fn secret(s: &str) -> SecretString {
314        SecretString::from(s.to_owned())
315    }
316
317    fn exposed(v: &Option<SecretString>) -> Option<&str> {
318        v.as_ref().map(|s| s.expose_secret())
319    }
320
321    // -- put/get round-trip ----------------------------------------
322
323    #[test]
324    fn put_then_get_returns_value_within_ttl() {
325        let (cache, _clock) = manual_cache(Duration::from_secs(60));
326        cache.put(&p("a/b/c"), secret("v"), None, None);
327        let got = cache.get(&p("a/b/c"));
328        assert_eq!(exposed(&got), Some("v"));
329    }
330
331    #[test]
332    fn missing_key_returns_none() {
333        let (cache, _clock) = manual_cache(Duration::from_secs(60));
334        assert!(cache.get(&p("a/b/c")).is_none());
335    }
336
337    #[test]
338    fn distinct_paths_do_not_collide() {
339        let (cache, _clock) = manual_cache(Duration::from_secs(60));
340        cache.put(&p("a/b/c"), secret("v1"), None, None);
341        cache.put(&p("d/e/f"), secret("v2"), None, None);
342        assert_eq!(exposed(&cache.get(&p("a/b/c"))), Some("v1"));
343        assert_eq!(exposed(&cache.get(&p("d/e/f"))), Some("v2"));
344    }
345
346    #[test]
347    fn put_overwrites_previous_value() {
348        let (cache, _clock) = manual_cache(Duration::from_secs(60));
349        cache.put(&p("a/b/c"), secret("v1"), None, None);
350        cache.put(&p("a/b/c"), secret("v2"), None, None);
351        assert_eq!(exposed(&cache.get(&p("a/b/c"))), Some("v2"));
352    }
353
354    // -- TTL expiry ------------------------------------------------
355
356    #[test]
357    fn expired_entry_returns_none_and_is_evicted() {
358        let (cache, clock) = manual_cache(Duration::from_secs(10));
359        cache.put(&p("a/b/c"), secret("v"), None, None);
360        clock.advance(Duration::from_secs(11));
361        assert!(cache.get(&p("a/b/c")).is_none());
362        // get() lazily evicted — len drops back to zero.
363        assert_eq!(cache.len(), 0);
364    }
365
366    #[test]
367    fn entry_at_exact_expiry_is_treated_as_expired() {
368        // We use `entry.expires_at > now` (strict). At equality,
369        // the entry is gone.
370        let (cache, clock) = manual_cache(Duration::from_secs(10));
371        cache.put(&p("a/b/c"), secret("v"), None, None);
372        clock.advance(Duration::from_secs(10));
373        assert!(cache.get(&p("a/b/c")).is_none());
374    }
375
376    // -- Adaptive TTL: lease_duration ------------------------------
377
378    #[test]
379    fn lease_duration_zero_disables_caching() {
380        let (cache, _clock) = manual_cache(Duration::from_secs(60));
381        let cached = cache.put(&p("a/b/c"), secret("v"), Some(Duration::from_secs(0)), None);
382        assert!(!cached, "lease_duration=0 must suppress caching");
383        assert!(cache.get(&p("a/b/c")).is_none());
384    }
385
386    #[test]
387    fn lease_below_base_lowers_effective_ttl() {
388        let (cache, clock) = manual_cache(Duration::from_secs(60));
389        cache.put(
390            &p("a/b/c"),
391            secret("v"),
392            Some(Duration::from_secs(10)),
393            None,
394        );
395        clock.advance(Duration::from_secs(11));
396        assert!(
397            cache.get(&p("a/b/c")).is_none(),
398            "lease=10s should evict at 11s"
399        );
400    }
401
402    #[test]
403    fn lease_above_base_does_not_extend_ttl() {
404        let (cache, clock) = manual_cache(Duration::from_secs(60));
405        cache.put(
406            &p("a/b/c"),
407            secret("v"),
408            Some(Duration::from_secs(3600)),
409            None,
410        );
411        clock.advance(Duration::from_secs(61));
412        assert!(
413            cache.get(&p("a/b/c")).is_none(),
414            "lease=3600s should NOT raise the 60s base TTL"
415        );
416    }
417
418    // -- Adaptive TTL: max_ttl cap (cache_ttl_seconds_max) ---------
419
420    #[test]
421    fn max_ttl_cap_lowers_below_base() {
422        let (cache, clock) = manual_cache(Duration::from_secs(60));
423        cache.put(&p("a/b/c"), secret("v"), None, Some(Duration::from_secs(5)));
424        clock.advance(Duration::from_secs(6));
425        assert!(
426            cache.get(&p("a/b/c")).is_none(),
427            "max_ttl=5s should evict at 6s"
428        );
429    }
430
431    #[test]
432    fn max_ttl_cap_does_not_raise_above_base() {
433        // ADR-021 §7: "may not raise it above the source default".
434        let (cache, clock) = manual_cache(Duration::from_secs(10));
435        cache.put(
436            &p("a/b/c"),
437            secret("v"),
438            None,
439            Some(Duration::from_secs(3600)),
440        );
441        clock.advance(Duration::from_secs(11));
442        assert!(
443            cache.get(&p("a/b/c")).is_none(),
444            "max_ttl=3600s with base=10s should still expire at 10s"
445        );
446    }
447
448    #[test]
449    fn lease_and_max_ttl_both_lower_taken_jointly() {
450        // base=60s, lease=30s, cap=10s → effective=10s
451        let (cache, clock) = manual_cache(Duration::from_secs(60));
452        cache.put(
453            &p("a/b/c"),
454            secret("v"),
455            Some(Duration::from_secs(30)),
456            Some(Duration::from_secs(10)),
457        );
458        clock.advance(Duration::from_secs(11));
459        assert!(cache.get(&p("a/b/c")).is_none());
460    }
461
462    // -- Invalidation ----------------------------------------------
463
464    #[test]
465    fn invalidate_drops_one_entry() {
466        let (cache, _clock) = manual_cache(Duration::from_secs(60));
467        cache.put(&p("a/b/c"), secret("v1"), None, None);
468        cache.put(&p("d/e/f"), secret("v2"), None, None);
469        cache.invalidate(&p("a/b/c"));
470        assert!(cache.get(&p("a/b/c")).is_none());
471        // The other path is untouched.
472        assert_eq!(exposed(&cache.get(&p("d/e/f"))), Some("v2"));
473    }
474
475    #[test]
476    fn invalidate_unknown_path_is_a_noop() {
477        let (cache, _clock) = manual_cache(Duration::from_secs(60));
478        cache.invalidate(&p("a/b/c")); // does not panic
479        assert_eq!(cache.len(), 0);
480    }
481
482    #[test]
483    fn invalidate_all_drops_everything() {
484        let (cache, _clock) = manual_cache(Duration::from_secs(60));
485        cache.put(&p("a/b/c"), secret("v1"), None, None);
486        cache.put(&p("d/e/f"), secret("v2"), None, None);
487        cache.put(&p("g/h/i"), secret("v3"), None, None);
488        assert_eq!(cache.len(), 3);
489        cache.invalidate_all();
490        assert!(cache.is_empty());
491    }
492
493    // -- Process-exit semantics ------------------------------------
494
495    #[test]
496    fn drop_clears_entries_and_releases_secret_strings() {
497        // The cache holds SecretString in a HashMap; both zeroize
498        // on drop. Verify the map shrinks to zero when the cache
499        // is dropped — proxy for "entries are released".
500        let (cache, _clock) = manual_cache(Duration::from_secs(60));
501        cache.put(&p("a/b/c"), secret("v"), None, None);
502        let entries_arc = std::sync::Arc::new(());
503        let weak = std::sync::Arc::downgrade(&entries_arc);
504        drop(entries_arc);
505        // Sanity for the weak/arc dance: after the strong is
506        // dropped, weak.upgrade() is None.
507        assert!(weak.upgrade().is_none());
508        // Now drop the cache; the SecretString inside zeroizes.
509        // We can't observe the zeroize directly without unsafe,
510        // but the absence of a panic and the standard
511        // `secrecy::SecretString` contract is the closest we get.
512        drop(cache);
513    }
514
515    // -- Debug / redaction -----------------------------------------
516
517    #[test]
518    fn debug_does_not_leak_plaintext() {
519        let (cache, _clock) = manual_cache(Duration::from_secs(60));
520        cache.put(&p("a/b/c"), secret("super-secret-value"), None, None);
521        let dbg = format!("{cache:?}");
522        assert!(!dbg.contains("super-secret-value"));
523        assert!(dbg.contains("AdaptiveCache"));
524        assert!(dbg.contains("redacted"));
525    }
526
527    // -- DEFAULT_BASE_TTL ------------------------------------------
528
529    #[test]
530    fn default_base_ttl_matches_adr_021_900_seconds() {
531        assert_eq!(DEFAULT_BASE_TTL, Duration::from_secs(15 * 60));
532        assert_eq!(DEFAULT_BASE_TTL.as_secs(), 900);
533    }
534
535    #[test]
536    fn base_ttl_accessor_returns_constructor_value() {
537        let cache = AdaptiveCache::new(Duration::from_secs(123));
538        assert_eq!(cache.base_ttl(), Duration::from_secs(123));
539    }
540
541    // -- Length / emptiness ----------------------------------------
542
543    #[test]
544    fn len_counts_inserted_entries() {
545        let (cache, _clock) = manual_cache(Duration::from_secs(60));
546        assert!(cache.is_empty());
547        assert_eq!(cache.len(), 0);
548        cache.put(&p("a/b/c"), secret("v"), None, None);
549        assert_eq!(cache.len(), 1);
550        cache.put(&p("d/e/f"), secret("v2"), None, None);
551        assert_eq!(cache.len(), 2);
552    }
553}