Skip to main content

reddb_server/runtime/ai/
answer_cache_key.rs

1//! `AnswerCacheKey` — pure key derivation and TTL policy for the ASK
2//! answer cache.
3//!
4//! Issue #403 (PRD #391): an opt-in answer cache lets ASK skip the LLM
5//! when the same question lands against the same data under the same
6//! determinism knobs. The cache is keyed by
7//! `hash(tenant, user_scope, question, provider, model, temperature,
8//! seed, sources_fingerprint)` and gated by per-query `CACHE TTL '5m'`
9//! / `NOCACHE` clauses on top of deployment defaults.
10//!
11//! Deep module: no I/O, no clock, no storage. The caller hands in the
12//! identity scope, the determinism-resolved request shape (`Applied`
13//! from #400 in real wiring, plain fields here so the module stays
14//! decoupled), and the source fingerprint that retrieval (#398) already
15//! computes. We return a stable lowercase-hex SHA-256 key and, given
16//! `Mode` + `Settings`, an effective TTL.
17//!
18//! ## Why the module owns these decisions
19//!
20//! The cache key is a security boundary: cross-tenant key collisions
21//! leak answers. Pinning the canonical form here — with tests around
22//! the per-tenant scope, around `Some(0)` vs `None` seed, around
23//! `temperature` float canonicalisation — keeps the key derivation in
24//! one place a reviewer can audit. The wiring slice that follows can
25//! treat the key as an opaque string.
26//!
27//! ## Key canonical form
28//!
29//! Fields are concatenated in fixed order with the ASCII Unit Separator
30//! (0x1f) as delimiter:
31//!
32//! ```text
33//! tenant | 0x1f | user | 0x1f | question | 0x1f | provider | 0x1f
34//!     | model | 0x1f | temperature | 0x1f | seed | 0x1f | fingerprint
35//! ```
36//!
37//! - `temperature` serializes as `"none"` when absent, otherwise as the
38//!   shortest round-tripping IEEE-754 representation produced by Rust's
39//!   `{}` formatter (`0`, `0.5`, etc.). `0` and `none` are distinct.
40//! - `seed` serializes as `"none"` when absent, otherwise as the decimal
41//!   `u64`. `0` and `none` are distinct (guards against the same kind
42//!   of `unwrap_or(0)` regression `DeterminismDecider` already pins).
43//! - `0x1f` cannot appear in any of the inputs (SQL parser rejects it
44//!   in strings; the fingerprint, provider, model, decimals, and hex
45//!   are all ASCII printable), so the concatenation is injective without
46//!   escaping. Same trick as [`super::determinism_decider::derive_seed`].
47
48use std::time::Duration;
49
50use sha2::{Digest, Sha256};
51
52/// Identity scope. `tenant` is mandatory; `user` is empty when the
53/// cache should be tenant-wide. Anonymous / embedded callers with no
54/// auth context pass empty strings for both.
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56pub struct Scope<'a> {
57    pub tenant: &'a str,
58    pub user: &'a str,
59}
60
61/// All inputs that determine which answer a given call would receive.
62/// Re-evaluating against a changed `temperature`, `seed`, `model`, or
63/// `sources_fingerprint` must miss the cache, so each appears verbatim
64/// in the key.
65#[derive(Debug, Clone, Copy)]
66pub struct Inputs<'a> {
67    pub question: &'a str,
68    pub provider: &'a str,
69    pub model: &'a str,
70    /// The temperature actually sent to the provider — i.e. what
71    /// `DeterminismDecider::decide` returned, not what the user asked
72    /// for.
73    pub temperature: Option<f32>,
74    /// The seed actually sent — same caveat as `temperature`.
75    pub seed: Option<u64>,
76    /// Opaque stable fingerprint over the retrieved sources (URNs +
77    /// content versions). The retrieval layer (#398) owns the format.
78    pub sources_fingerprint: &'a str,
79}
80
81/// Per-query `CACHE TTL '...'` / `NOCACHE` clause, parsed from the SQL
82/// surface. Default-constructed `Mode` is [`Mode::Default`], which
83/// means "fall back to settings".
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
85pub enum Mode {
86    /// No per-query opinion. The effective behaviour comes from
87    /// [`Settings::enabled`] / [`Settings::default_ttl`].
88    Default,
89    /// `ASK '...' CACHE TTL '5m'` — populate and consult the cache
90    /// with this TTL regardless of the global default.
91    Cache(Duration),
92    /// `ASK '...' NOCACHE` — bypass the cache entirely on this call.
93    NoCache,
94}
95
96impl Default for Mode {
97    fn default() -> Self {
98        Mode::Default
99    }
100}
101
102/// Deployment-level cache settings, surfaced via `ask.cache.*`.
103#[derive(Debug, Clone, Copy, PartialEq, Eq)]
104pub struct Settings {
105    /// `ask.cache.enabled` (default `false`).
106    pub enabled: bool,
107    /// `ask.cache.default_ttl`. `None` means "no default TTL"; queries
108    /// must opt in with `CACHE TTL '...'` to populate the cache.
109    pub default_ttl: Option<Duration>,
110    /// `ask.cache.max_entries`. Not consulted here — the eviction
111    /// policy lives in the cache store. Exposed for completeness.
112    pub max_entries: usize,
113}
114
115impl Default for Settings {
116    fn default() -> Self {
117        Self {
118            enabled: false,
119            default_ttl: None,
120            max_entries: 0,
121        }
122    }
123}
124
125/// What the cache wrapper should do for a single ASK call.
126#[derive(Debug, Clone, Copy, PartialEq, Eq)]
127pub enum Decision {
128    /// Skip the cache entirely (do not read, do not write).
129    Bypass,
130    /// Consult the cache; on miss, populate with `ttl`.
131    Use { ttl: Duration },
132}
133
134/// Combine the per-query [`Mode`] with deployment [`Settings`] to get
135/// the effective behaviour for this call.
136///
137/// Rules:
138/// - `NOCACHE` always wins (explicit user opt-out).
139/// - `CACHE TTL t` always wins when present (explicit user opt-in;
140///   the deployment toggle does NOT gate per-query opt-in, only the
141///   silent default).
142/// - `Default` + `enabled=true` + `default_ttl=Some(t)` → use, ttl=t.
143/// - `Default` + anything else → bypass.
144pub fn decide(mode: Mode, settings: Settings) -> Decision {
145    match mode {
146        Mode::NoCache => Decision::Bypass,
147        Mode::Cache(ttl) => Decision::Use { ttl },
148        Mode::Default => match (settings.enabled, settings.default_ttl) {
149            (true, Some(ttl)) => Decision::Use { ttl },
150            _ => Decision::Bypass,
151        },
152    }
153}
154
155/// Derive the lowercase-hex SHA-256 cache key for one ASK call.
156///
157/// The key is a function of identity scope + request-shape inputs. It
158/// does NOT include the TTL — two calls with the same identity and
159/// shape collide on the same entry regardless of how long that entry
160/// will live, which is the correct hit/miss semantic.
161pub fn derive_key(scope: Scope<'_>, inputs: Inputs<'_>) -> String {
162    const SEP: u8 = 0x1f;
163    let mut hasher = Sha256::new();
164    hasher.update(scope.tenant.as_bytes());
165    hasher.update([SEP]);
166    hasher.update(scope.user.as_bytes());
167    hasher.update([SEP]);
168    hasher.update(inputs.question.as_bytes());
169    hasher.update([SEP]);
170    hasher.update(inputs.provider.as_bytes());
171    hasher.update([SEP]);
172    hasher.update(inputs.model.as_bytes());
173    hasher.update([SEP]);
174    hasher.update(format_temperature(inputs.temperature).as_bytes());
175    hasher.update([SEP]);
176    hasher.update(format_seed(inputs.seed).as_bytes());
177    hasher.update([SEP]);
178    hasher.update(inputs.sources_fingerprint.as_bytes());
179    let digest = hasher.finalize();
180    let mut out = String::with_capacity(digest.len() * 2);
181    for b in digest {
182        out.push_str(&format!("{b:02x}"));
183    }
184    out
185}
186
187fn format_temperature(t: Option<f32>) -> String {
188    match t {
189        None => "none".to_string(),
190        Some(v) => format!("{v}"),
191    }
192}
193
194fn format_seed(s: Option<u64>) -> String {
195    match s {
196        None => "none".to_string(),
197        Some(v) => v.to_string(),
198    }
199}
200
201/// Parse a TTL literal from `CACHE TTL '<lit>'`.
202///
203/// Accepts `<integer><unit>` with units `s` (seconds), `m` (minutes),
204/// `h` (hours), `d` (days). Whitespace is not allowed. The integer
205/// must be > 0; a zero TTL would mean "expire immediately" which is a
206/// foot-gun the parser refuses on the user's behalf.
207pub fn parse_ttl(literal: &str) -> Result<Duration, TtlParseError> {
208    if literal.is_empty() {
209        return Err(TtlParseError::Empty);
210    }
211    let bytes = literal.as_bytes();
212    let unit_idx = bytes
213        .iter()
214        .position(|b| !b.is_ascii_digit())
215        .ok_or(TtlParseError::MissingUnit)?;
216    if unit_idx == 0 {
217        return Err(TtlParseError::MissingNumber);
218    }
219    let (num_part, unit_part) = literal.split_at(unit_idx);
220    let n: u64 = num_part.parse().map_err(|_| TtlParseError::InvalidNumber)?;
221    if n == 0 {
222        return Err(TtlParseError::ZeroTtl);
223    }
224    let secs = match unit_part {
225        "s" => n,
226        "m" => n.checked_mul(60).ok_or(TtlParseError::Overflow)?,
227        "h" => n.checked_mul(3600).ok_or(TtlParseError::Overflow)?,
228        "d" => n.checked_mul(86_400).ok_or(TtlParseError::Overflow)?,
229        _ => return Err(TtlParseError::UnknownUnit),
230    };
231    Ok(Duration::from_secs(secs))
232}
233
234/// Why [`parse_ttl`] rejected a literal. Named variants so the runtime
235/// can map each to a deterministic error message without a stringly
236/// typed switch.
237#[derive(Debug, Clone, Copy, PartialEq, Eq)]
238pub enum TtlParseError {
239    Empty,
240    MissingNumber,
241    MissingUnit,
242    InvalidNumber,
243    UnknownUnit,
244    ZeroTtl,
245    Overflow,
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    fn scope() -> Scope<'static> {
253        Scope {
254            tenant: "acme",
255            user: "alice",
256        }
257    }
258
259    fn inputs() -> Inputs<'static> {
260        Inputs {
261            question: "what is the capital of france?",
262            provider: "openai",
263            model: "gpt-4o-mini",
264            temperature: Some(0.0),
265            seed: Some(42),
266            sources_fingerprint: "abc123",
267        }
268    }
269
270    // ---- key: determinism & scope separation -------------------------
271
272    #[test]
273    fn key_is_deterministic_across_calls() {
274        let k1 = derive_key(scope(), inputs());
275        let k2 = derive_key(scope(), inputs());
276        assert_eq!(k1, k2);
277        // sha256 hex is 64 chars.
278        assert_eq!(k1.len(), 64);
279        assert!(k1
280            .chars()
281            .all(|c| c.is_ascii_hexdigit() && !c.is_uppercase()));
282    }
283
284    #[test]
285    fn key_changes_with_tenant() {
286        let a = derive_key(
287            Scope {
288                tenant: "acme",
289                user: "alice",
290            },
291            inputs(),
292        );
293        let b = derive_key(
294            Scope {
295                tenant: "globex",
296                user: "alice",
297            },
298            inputs(),
299        );
300        assert_ne!(a, b, "per-tenant scope must isolate cache keys");
301    }
302
303    #[test]
304    fn key_changes_with_user() {
305        let a = derive_key(
306            Scope {
307                tenant: "acme",
308                user: "alice",
309            },
310            inputs(),
311        );
312        let b = derive_key(
313            Scope {
314                tenant: "acme",
315                user: "bob",
316            },
317            inputs(),
318        );
319        assert_ne!(a, b);
320    }
321
322    #[test]
323    fn empty_user_is_distinct_from_named_user() {
324        let anon = derive_key(
325            Scope {
326                tenant: "acme",
327                user: "",
328            },
329            inputs(),
330        );
331        let named = derive_key(scope(), inputs());
332        assert_ne!(anon, named);
333    }
334
335    // ---- key: every input field actually feeds the digest ------------
336
337    #[test]
338    fn key_changes_with_question() {
339        let mut i = inputs();
340        let base = derive_key(scope(), i);
341        i.question = "different question";
342        let other = derive_key(scope(), i);
343        assert_ne!(base, other);
344    }
345
346    #[test]
347    fn key_changes_with_provider() {
348        let mut i = inputs();
349        let base = derive_key(scope(), i);
350        i.provider = "anthropic";
351        let other = derive_key(scope(), i);
352        assert_ne!(base, other);
353    }
354
355    #[test]
356    fn key_changes_with_model() {
357        let mut i = inputs();
358        let base = derive_key(scope(), i);
359        i.model = "gpt-4o";
360        let other = derive_key(scope(), i);
361        assert_ne!(base, other);
362    }
363
364    #[test]
365    fn key_changes_with_temperature() {
366        let mut i = inputs();
367        let base = derive_key(scope(), i);
368        i.temperature = Some(0.7);
369        let other = derive_key(scope(), i);
370        assert_ne!(base, other);
371    }
372
373    #[test]
374    fn key_changes_with_seed() {
375        let mut i = inputs();
376        let base = derive_key(scope(), i);
377        i.seed = Some(43);
378        let other = derive_key(scope(), i);
379        assert_ne!(base, other);
380    }
381
382    #[test]
383    fn key_changes_with_fingerprint() {
384        let mut i = inputs();
385        let base = derive_key(scope(), i);
386        i.sources_fingerprint = "def456";
387        let other = derive_key(scope(), i);
388        assert_ne!(
389            base, other,
390            "different sources must miss cache even for identical question"
391        );
392    }
393
394    // ---- key: None vs Some(0) for optional knobs ---------------------
395
396    #[test]
397    fn temperature_none_distinct_from_zero() {
398        let mut i = inputs();
399        i.temperature = None;
400        let none = derive_key(scope(), i);
401        i.temperature = Some(0.0);
402        let zero = derive_key(scope(), i);
403        assert_ne!(
404            none, zero,
405            "None and Some(0.0) must not collide — a provider that ignores temperature is not the same as one that received zero"
406        );
407    }
408
409    #[test]
410    fn seed_none_distinct_from_zero() {
411        let mut i = inputs();
412        i.seed = None;
413        let none = derive_key(scope(), i);
414        i.seed = Some(0);
415        let zero = derive_key(scope(), i);
416        assert_ne!(none, zero);
417    }
418
419    // ---- key: pin the canonical form against accidental change ------
420
421    #[test]
422    fn key_pinned_against_known_value() {
423        // If the canonical form ever changes (delimiter, field order,
424        // float/seed serialization), this test will fail loudly. Update
425        // the literal only on a deliberate schema bump and bump
426        // ask.cache.max_entries-style call sites accordingly.
427        let scope = Scope {
428            tenant: "t",
429            user: "u",
430        };
431        let i = Inputs {
432            question: "q",
433            provider: "p",
434            model: "m",
435            temperature: Some(0.0),
436            seed: Some(1),
437            sources_fingerprint: "f",
438        };
439        let key = derive_key(scope, i);
440        // Computed by `printf 't\x1fu\x1fq\x1fp\x1fm\x1f0\x1f1\x1ff' | sha256sum`.
441        assert_eq!(
442            key,
443            "ca47974209a1e07b9890aa73b5bdbcc2fda1bae0ba1d77f186c9dc168b54f903"
444        );
445    }
446
447    // ---- decide(): TTL policy ---------------------------------------
448
449    #[test]
450    fn decide_nocache_always_bypasses() {
451        let s = Settings {
452            enabled: true,
453            default_ttl: Some(Duration::from_secs(60)),
454            max_entries: 100,
455        };
456        assert_eq!(decide(Mode::NoCache, s), Decision::Bypass);
457    }
458
459    #[test]
460    fn decide_per_query_cache_wins_over_disabled_setting() {
461        let s = Settings::default();
462        assert_eq!(
463            decide(Mode::Cache(Duration::from_secs(300)), s),
464            Decision::Use {
465                ttl: Duration::from_secs(300)
466            }
467        );
468    }
469
470    #[test]
471    fn decide_default_bypass_when_disabled() {
472        let s = Settings {
473            enabled: false,
474            default_ttl: Some(Duration::from_secs(60)),
475            max_entries: 100,
476        };
477        assert_eq!(decide(Mode::Default, s), Decision::Bypass);
478    }
479
480    #[test]
481    fn decide_default_bypass_when_no_default_ttl() {
482        let s = Settings {
483            enabled: true,
484            default_ttl: None,
485            max_entries: 100,
486        };
487        assert_eq!(decide(Mode::Default, s), Decision::Bypass);
488    }
489
490    #[test]
491    fn decide_default_uses_setting_ttl_when_enabled_and_ttl_set() {
492        let s = Settings {
493            enabled: true,
494            default_ttl: Some(Duration::from_secs(120)),
495            max_entries: 100,
496        };
497        assert_eq!(
498            decide(Mode::Default, s),
499            Decision::Use {
500                ttl: Duration::from_secs(120)
501            }
502        );
503    }
504
505    #[test]
506    fn decide_per_query_cache_overrides_setting_default() {
507        let s = Settings {
508            enabled: true,
509            default_ttl: Some(Duration::from_secs(60)),
510            max_entries: 100,
511        };
512        assert_eq!(
513            decide(Mode::Cache(Duration::from_secs(900)), s),
514            Decision::Use {
515                ttl: Duration::from_secs(900)
516            }
517        );
518    }
519
520    // ---- parse_ttl() ------------------------------------------------
521
522    #[test]
523    fn parse_ttl_seconds() {
524        assert_eq!(parse_ttl("30s").unwrap(), Duration::from_secs(30));
525    }
526
527    #[test]
528    fn parse_ttl_minutes() {
529        assert_eq!(parse_ttl("5m").unwrap(), Duration::from_secs(300));
530    }
531
532    #[test]
533    fn parse_ttl_hours() {
534        assert_eq!(parse_ttl("2h").unwrap(), Duration::from_secs(7200));
535    }
536
537    #[test]
538    fn parse_ttl_days() {
539        assert_eq!(parse_ttl("1d").unwrap(), Duration::from_secs(86_400));
540    }
541
542    #[test]
543    fn parse_ttl_empty_rejected() {
544        assert_eq!(parse_ttl(""), Err(TtlParseError::Empty));
545    }
546
547    #[test]
548    fn parse_ttl_zero_rejected() {
549        // 0s is a foot-gun: an entry that expires the instant it's
550        // written. Refuse it so misconfiguration shows up at parse time.
551        assert_eq!(parse_ttl("0s"), Err(TtlParseError::ZeroTtl));
552    }
553
554    #[test]
555    fn parse_ttl_missing_unit_rejected() {
556        assert_eq!(parse_ttl("30"), Err(TtlParseError::MissingUnit));
557    }
558
559    #[test]
560    fn parse_ttl_missing_number_rejected() {
561        assert_eq!(parse_ttl("m"), Err(TtlParseError::MissingNumber));
562    }
563
564    #[test]
565    fn parse_ttl_unknown_unit_rejected() {
566        assert_eq!(parse_ttl("5x"), Err(TtlParseError::UnknownUnit));
567        assert_eq!(parse_ttl("5ms"), Err(TtlParseError::UnknownUnit));
568    }
569
570    #[test]
571    fn parse_ttl_whitespace_rejected() {
572        // The SQL surface strips quotes already; we should not be
573        // lenient about embedded whitespace inside the literal.
574        assert_eq!(parse_ttl("5 m"), Err(TtlParseError::UnknownUnit));
575        assert_eq!(parse_ttl(" 5m"), Err(TtlParseError::MissingNumber));
576    }
577
578    #[test]
579    fn parse_ttl_negative_rejected() {
580        // Leading '-' is not a digit, so position(!is_ascii_digit) =
581        // 0 → MissingNumber. Pinned for clarity.
582        assert_eq!(parse_ttl("-5m"), Err(TtlParseError::MissingNumber));
583    }
584
585    #[test]
586    fn parse_ttl_invalid_number_rejected() {
587        // u64 overflow at the integer parse step.
588        assert_eq!(
589            parse_ttl("99999999999999999999s"),
590            Err(TtlParseError::InvalidNumber)
591        );
592    }
593
594    #[test]
595    fn parse_ttl_overflow_on_unit_multiplication() {
596        // Large number that fits in u64 but overflows once multiplied
597        // by 86_400.
598        let max_d = u64::MAX / 86_400 + 1;
599        let lit = format!("{}d", max_d);
600        assert_eq!(parse_ttl(&lit), Err(TtlParseError::Overflow));
601    }
602
603    // ---- mode default ----------------------------------------------
604
605    #[test]
606    fn mode_default_is_inherit() {
607        assert_eq!(Mode::default(), Mode::Default);
608    }
609
610    // ---- determinism across modes ----------------------------------
611
612    #[test]
613    fn decide_is_deterministic_across_calls() {
614        let s = Settings {
615            enabled: true,
616            default_ttl: Some(Duration::from_secs(60)),
617            max_entries: 10,
618        };
619        for mode in [
620            Mode::Default,
621            Mode::NoCache,
622            Mode::Cache(Duration::from_secs(120)),
623        ] {
624            let d1 = decide(mode, s);
625            let d2 = decide(mode, s);
626            assert_eq!(d1, d2);
627        }
628    }
629}