Skip to main content

reddb_server/runtime/ai/
cost_guard.rs

1//! `CostGuardEvaluator` — pure ASK resource-cap policy.
2//!
3//! Issue #401 (PRD #391): every ASK call must respect hard limits on
4//! prompt size, completion size, source payload size, wall-clock
5//! timeout, and a per-tenant daily USD cap. This module is the pure
6//! kernel that decides whether a given call (or step within a call)
7//! is allowed to proceed.
8//!
9//! Deep module: no I/O, no clock reads, no transport. The caller
10//! threads in the current usage snapshot, the tenant's running daily
11//! spend, the deployment settings, and the current `now`. The
12//! evaluator returns either [`Decision::Allow`] or [`Decision::Reject`]
13//! carrying the offending limit name and the HTTP status the API
14//! layer should surface (413 for over-budget, 504 for timeout).
15//!
16//! ## Why a single evaluator
17//!
18//! The ASK pipeline has three natural checkpoints where caps matter:
19//!
20//! 1. **Pre-call** — once the prompt has been assembled and sources
21//!    fetched, before sending to the LLM. Catches `max_prompt_tokens`,
22//!    `max_sources_bytes`, and the daily cost cap (using an estimated
23//!    cost for the planned call).
24//! 2. **In-flight** — when streaming tokens back, the running
25//!    `completion_tokens` count must not exceed `max_completion_tokens`,
26//!    and the elapsed time must not exceed `timeout_ms`.
27//! 3. **Post-call** — once the call returns, the daily cost counter
28//!    is incremented; the next call sees the updated state.
29//!
30//! All three boil down to the same shape: "given this usage and
31//! these limits, is the call still allowed?". Hence one function.
32//!
33//! ## Daily cap reset
34//!
35//! The daily cap resets at UTC midnight. The evaluator does not read
36//! the wall clock; the caller passes [`Now::epoch_secs`], and the
37//! evaluator checks whether the supplied [`DailyState::day_epoch_secs`]
38//! is still the same UTC day. If a fresh day has started, the running
39//! spend is treated as zero — the caller is responsible for actually
40//! resetting the state afterwards (the evaluator is read-only).
41//!
42//! ## Multi-tenant isolation
43//!
44//! There is no tenant id in this module. Callers must keep a separate
45//! [`DailyState`] per tenant and pass the right one. The evaluator
46//! never mixes state across tenants because it never holds state at
47//! all.
48
49/// Deployment-wide ASK caps. All durations are in milliseconds, all
50/// sizes in bytes, all token counts in raw tokens (not characters).
51#[derive(Debug, Clone, Copy, PartialEq)]
52pub struct Settings {
53    /// Hard ceiling on the assembled prompt size sent to the LLM.
54    /// Default 8192. Exceeded → 413 `max_prompt_tokens`.
55    pub max_prompt_tokens: u32,
56    /// Hard ceiling on the streamed completion size.
57    /// Default 1024. Exceeded → 413 `max_completion_tokens`.
58    pub max_completion_tokens: u32,
59    /// Hard ceiling on the total bytes of source payloads (the
60    /// concatenated `sources_flat` content). Default 262_144.
61    /// Exceeded → 413 `max_sources_bytes`.
62    pub max_sources_bytes: u32,
63    /// Hard wall-clock timeout for a single ASK call.
64    /// Default 30_000. Exceeded → 504 `timeout_ms`.
65    pub timeout_ms: u32,
66    /// Optional per-tenant daily USD cap. `None` means unlimited.
67    /// Exceeded → 413 `daily_cost_cap_usd`.
68    pub daily_cost_cap_usd: Option<f64>,
69}
70
71impl Default for Settings {
72    fn default() -> Self {
73        Self {
74            max_prompt_tokens: 8192,
75            max_completion_tokens: 1024,
76            max_sources_bytes: 262_144,
77            timeout_ms: 30_000,
78            daily_cost_cap_usd: None,
79        }
80    }
81}
82
83/// Snapshot of what the current call has spent so far. For pre-call
84/// checks, `completion_tokens` is 0; for in-flight checks the caller
85/// supplies the running totals.
86#[derive(Debug, Clone, Copy, PartialEq, Default)]
87pub struct Usage {
88    /// Tokens in the assembled prompt (system + sources + question).
89    pub prompt_tokens: u32,
90    /// Completion tokens emitted by the LLM so far.
91    pub completion_tokens: u32,
92    /// Total bytes across the assembled `sources_flat` payload.
93    pub sources_bytes: u32,
94    /// Estimated USD cost the current call will add to the daily
95    /// counter once finished. Pre-call this is an estimate; post-call
96    /// it should match the actual provider charge.
97    pub estimated_cost_usd: f64,
98    /// Wall-clock millis since the call started.
99    pub elapsed_ms: u32,
100}
101
102/// Per-tenant running daily spend.
103///
104/// `day_epoch_secs` is the epoch-second at the *start* of the UTC day
105/// the spend was accrued in. The evaluator compares it against
106/// `now.epoch_secs` rounded down to the same UTC day; if they differ,
107/// `spent_usd` is treated as 0 (the day rolled over).
108#[derive(Debug, Clone, Copy, PartialEq, Default)]
109pub struct DailyState {
110    pub spent_usd: f64,
111    pub day_epoch_secs: i64,
112}
113
114/// Injected clock — the evaluator must not read system time.
115#[derive(Debug, Clone, Copy, PartialEq)]
116pub struct Now {
117    pub epoch_secs: i64,
118}
119
120/// Which cap tripped.
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum LimitKind {
123    PromptTokens,
124    CompletionTokens,
125    SourcesBytes,
126    Timeout,
127    DailyCostCap,
128}
129
130impl LimitKind {
131    /// Field name surfaced in the API error body so operators can grep
132    /// for the offending knob in their config.
133    pub fn field_name(self) -> &'static str {
134        match self {
135            LimitKind::PromptTokens => "max_prompt_tokens",
136            LimitKind::CompletionTokens => "max_completion_tokens",
137            LimitKind::SourcesBytes => "max_sources_bytes",
138            LimitKind::Timeout => "timeout_ms",
139            LimitKind::DailyCostCap => "daily_cost_cap_usd",
140        }
141    }
142
143    /// HTTP status the API layer should return for this breach.
144    /// Timeout is the only 504; everything else is 413.
145    pub fn http_status(self) -> u16 {
146        match self {
147            LimitKind::Timeout => 504,
148            _ => 413,
149        }
150    }
151}
152
153#[derive(Debug, Clone, PartialEq)]
154pub enum Decision {
155    Allow,
156    Reject {
157        limit: LimitKind,
158        http_status: u16,
159        detail: String,
160    },
161}
162
163/// Pure cap evaluation.
164///
165/// Check order is fixed and tested: prompt → sources → completion →
166/// timeout → daily cap. The first breach wins — the evaluator does
167/// not aggregate. This means if two limits are both tripped, the
168/// caller sees the structurally-cheapest one first (cheaper to fix:
169/// prompt assembly happens before the LLM call).
170pub fn evaluate(usage: &Usage, daily: &DailyState, settings: &Settings, now: Now) -> Decision {
171    if usage.prompt_tokens > settings.max_prompt_tokens {
172        return reject(
173            LimitKind::PromptTokens,
174            format!(
175                "prompt {} tokens exceeds max_prompt_tokens={}",
176                usage.prompt_tokens, settings.max_prompt_tokens
177            ),
178        );
179    }
180
181    if usage.sources_bytes > settings.max_sources_bytes {
182        return reject(
183            LimitKind::SourcesBytes,
184            format!(
185                "sources payload {} bytes exceeds max_sources_bytes={}",
186                usage.sources_bytes, settings.max_sources_bytes
187            ),
188        );
189    }
190
191    if usage.completion_tokens > settings.max_completion_tokens {
192        return reject(
193            LimitKind::CompletionTokens,
194            format!(
195                "completion {} tokens exceeds max_completion_tokens={}",
196                usage.completion_tokens, settings.max_completion_tokens
197            ),
198        );
199    }
200
201    if usage.elapsed_ms > settings.timeout_ms {
202        return reject(
203            LimitKind::Timeout,
204            format!(
205                "elapsed {}ms exceeds timeout_ms={}",
206                usage.elapsed_ms, settings.timeout_ms
207            ),
208        );
209    }
210
211    if let Some(cap) = settings.daily_cost_cap_usd {
212        let effective_spent = if same_utc_day(daily.day_epoch_secs, now.epoch_secs) {
213            daily.spent_usd
214        } else {
215            0.0
216        };
217        let projected = effective_spent + usage.estimated_cost_usd;
218        if projected > cap {
219            return reject(
220                LimitKind::DailyCostCap,
221                format!("projected spend ${projected:.6} exceeds daily_cost_cap_usd=${cap:.6}"),
222            );
223        }
224    }
225
226    Decision::Allow
227}
228
229fn reject(limit: LimitKind, detail: String) -> Decision {
230    Decision::Reject {
231        limit,
232        http_status: limit.http_status(),
233        detail,
234    }
235}
236
237const SECS_PER_DAY: i64 = 86_400;
238
239/// Compare two epoch-seconds for "same UTC calendar day".
240///
241/// Floor division on `SECS_PER_DAY` gives the day index. Both inputs
242/// can be negative (pre-1970); Rust's `i64::div_euclid` handles the
243/// sign correctly. This is the test boundary for the daily reset — a
244/// call where `now` lands one second past UTC midnight sees a fresh
245/// `spent_usd = 0`.
246pub fn utc_day_start_epoch_secs(epoch_secs: i64) -> i64 {
247    epoch_secs.div_euclid(SECS_PER_DAY) * SECS_PER_DAY
248}
249
250fn same_utc_day(a: i64, b: i64) -> bool {
251    utc_day_start_epoch_secs(a) == utc_day_start_epoch_secs(b)
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257
258    fn settings() -> Settings {
259        Settings::default()
260    }
261
262    fn now_at(epoch_secs: i64) -> Now {
263        Now { epoch_secs }
264    }
265
266    fn fresh_state() -> DailyState {
267        DailyState::default()
268    }
269
270    fn ok_usage() -> Usage {
271        Usage::default()
272    }
273
274    // ---- Limit boundaries -------------------------------------------------
275
276    #[test]
277    fn at_limit_is_allowed() {
278        let s = settings();
279        let u = Usage {
280            prompt_tokens: s.max_prompt_tokens,
281            completion_tokens: s.max_completion_tokens,
282            sources_bytes: s.max_sources_bytes,
283            elapsed_ms: s.timeout_ms,
284            ..ok_usage()
285        };
286        assert_eq!(evaluate(&u, &fresh_state(), &s, now_at(0)), Decision::Allow);
287    }
288
289    #[test]
290    fn one_over_prompt_tokens_rejects_413() {
291        let s = settings();
292        let u = Usage {
293            prompt_tokens: s.max_prompt_tokens + 1,
294            ..ok_usage()
295        };
296        let d = evaluate(&u, &fresh_state(), &s, now_at(0));
297        match d {
298            Decision::Reject {
299                limit,
300                http_status,
301                detail,
302            } => {
303                assert_eq!(limit, LimitKind::PromptTokens);
304                assert_eq!(http_status, 413);
305                assert!(detail.contains("max_prompt_tokens"));
306            }
307            other => panic!("expected Reject, got {other:?}"),
308        }
309    }
310
311    #[test]
312    fn over_sources_bytes_rejects_413() {
313        let s = settings();
314        let u = Usage {
315            sources_bytes: s.max_sources_bytes + 1,
316            ..ok_usage()
317        };
318        let d = evaluate(&u, &fresh_state(), &s, now_at(0));
319        match d {
320            Decision::Reject {
321                limit, http_status, ..
322            } => {
323                assert_eq!(limit, LimitKind::SourcesBytes);
324                assert_eq!(http_status, 413);
325            }
326            other => panic!("expected Reject, got {other:?}"),
327        }
328    }
329
330    #[test]
331    fn over_completion_tokens_rejects_413() {
332        let s = settings();
333        let u = Usage {
334            completion_tokens: s.max_completion_tokens + 1,
335            ..ok_usage()
336        };
337        let d = evaluate(&u, &fresh_state(), &s, now_at(0));
338        match d {
339            Decision::Reject {
340                limit, http_status, ..
341            } => {
342                assert_eq!(limit, LimitKind::CompletionTokens);
343                assert_eq!(http_status, 413);
344            }
345            other => panic!("expected Reject, got {other:?}"),
346        }
347    }
348
349    #[test]
350    fn over_timeout_rejects_504() {
351        let s = settings();
352        let u = Usage {
353            elapsed_ms: s.timeout_ms + 1,
354            ..ok_usage()
355        };
356        let d = evaluate(&u, &fresh_state(), &s, now_at(0));
357        match d {
358            Decision::Reject {
359                limit, http_status, ..
360            } => {
361                assert_eq!(limit, LimitKind::Timeout);
362                assert_eq!(http_status, 504);
363            }
364            other => panic!("expected Reject, got {other:?}"),
365        }
366    }
367
368    // ---- Daily cap --------------------------------------------------------
369
370    #[test]
371    fn daily_cap_none_means_unlimited() {
372        let s = Settings {
373            daily_cost_cap_usd: None,
374            ..settings()
375        };
376        let u = Usage {
377            estimated_cost_usd: 9_999.0,
378            ..ok_usage()
379        };
380        let daily = DailyState {
381            spent_usd: 1_000_000.0,
382            day_epoch_secs: 0,
383        };
384        assert_eq!(evaluate(&u, &daily, &s, now_at(0)), Decision::Allow);
385    }
386
387    #[test]
388    fn daily_cap_blocks_when_projected_exceeds() {
389        let s = Settings {
390            daily_cost_cap_usd: Some(10.0),
391            ..settings()
392        };
393        let u = Usage {
394            estimated_cost_usd: 2.5,
395            ..ok_usage()
396        };
397        let daily = DailyState {
398            spent_usd: 8.0,
399            day_epoch_secs: 0,
400        };
401        let d = evaluate(&u, &daily, &s, now_at(0));
402        match d {
403            Decision::Reject {
404                limit, http_status, ..
405            } => {
406                assert_eq!(limit, LimitKind::DailyCostCap);
407                assert_eq!(http_status, 413);
408            }
409            other => panic!("expected Reject, got {other:?}"),
410        }
411    }
412
413    #[test]
414    fn daily_cap_allows_at_exact_cap() {
415        // Boundary: projected == cap is NOT a breach (strict >).
416        let s = Settings {
417            daily_cost_cap_usd: Some(10.0),
418            ..settings()
419        };
420        let u = Usage {
421            estimated_cost_usd: 2.0,
422            ..ok_usage()
423        };
424        let daily = DailyState {
425            spent_usd: 8.0,
426            day_epoch_secs: 0,
427        };
428        assert_eq!(evaluate(&u, &daily, &s, now_at(0)), Decision::Allow);
429    }
430
431    #[test]
432    fn daily_cap_resets_at_utc_midnight() {
433        // State was accrued on day 0; now is 1 second into day 1.
434        // Previous spend must be ignored — fresh budget.
435        let s = Settings {
436            daily_cost_cap_usd: Some(10.0),
437            ..settings()
438        };
439        let u = Usage {
440            estimated_cost_usd: 9.0,
441            ..ok_usage()
442        };
443        let day_zero_start = 0;
444        let day_one_start_plus_1 = SECS_PER_DAY + 1;
445        let daily = DailyState {
446            spent_usd: 100.0,
447            day_epoch_secs: day_zero_start,
448        };
449        assert_eq!(
450            evaluate(&u, &daily, &s, now_at(day_one_start_plus_1)),
451            Decision::Allow,
452            "stale spend from yesterday must not count against today",
453        );
454    }
455
456    #[test]
457    fn daily_cap_same_day_other_seconds_does_not_reset() {
458        // Both timestamps land in the same UTC day; old spend stays.
459        let s = Settings {
460            daily_cost_cap_usd: Some(10.0),
461            ..settings()
462        };
463        let u = Usage {
464            estimated_cost_usd: 5.0,
465            ..ok_usage()
466        };
467        let daily = DailyState {
468            spent_usd: 9.0,
469            day_epoch_secs: 0,
470        };
471        // 12:34 UTC same day
472        let now_same_day = 45_240;
473        let d = evaluate(&u, &daily, &s, now_at(now_same_day));
474        assert!(matches!(
475            d,
476            Decision::Reject {
477                limit: LimitKind::DailyCostCap,
478                ..
479            }
480        ));
481    }
482
483    // ---- Check order ------------------------------------------------------
484
485    #[test]
486    fn prompt_check_fires_before_completion_check() {
487        // Both tripped — caller sees PromptTokens (cheaper to act on).
488        let s = settings();
489        let u = Usage {
490            prompt_tokens: s.max_prompt_tokens + 1,
491            completion_tokens: s.max_completion_tokens + 1,
492            ..ok_usage()
493        };
494        let d = evaluate(&u, &fresh_state(), &s, now_at(0));
495        match d {
496            Decision::Reject { limit, .. } => assert_eq!(limit, LimitKind::PromptTokens),
497            other => panic!("expected Reject, got {other:?}"),
498        }
499    }
500
501    #[test]
502    fn timeout_check_fires_before_daily_cap() {
503        let s = Settings {
504            daily_cost_cap_usd: Some(0.0),
505            ..settings()
506        };
507        let u = Usage {
508            estimated_cost_usd: 1.0,
509            elapsed_ms: s.timeout_ms + 1,
510            ..ok_usage()
511        };
512        let d = evaluate(&u, &fresh_state(), &s, now_at(0));
513        match d {
514            Decision::Reject { limit, .. } => assert_eq!(limit, LimitKind::Timeout),
515            other => panic!("expected Reject, got {other:?}"),
516        }
517    }
518
519    // ---- Multi-tenant isolation ------------------------------------------
520
521    #[test]
522    fn separate_daily_states_do_not_interact() {
523        // Tenant A is over cap, tenant B is fresh. Same settings.
524        let s = Settings {
525            daily_cost_cap_usd: Some(5.0),
526            ..settings()
527        };
528        let u = Usage {
529            estimated_cost_usd: 1.0,
530            ..ok_usage()
531        };
532        let tenant_a = DailyState {
533            spent_usd: 4.5,
534            day_epoch_secs: 0,
535        };
536        let tenant_b = DailyState {
537            spent_usd: 0.0,
538            day_epoch_secs: 0,
539        };
540        assert!(matches!(
541            evaluate(&u, &tenant_a, &s, now_at(0)),
542            Decision::Reject {
543                limit: LimitKind::DailyCostCap,
544                ..
545            }
546        ));
547        assert_eq!(evaluate(&u, &tenant_b, &s, now_at(0)), Decision::Allow);
548    }
549
550    // ---- Field/status surface --------------------------------------------
551
552    #[test]
553    fn field_names_match_settings_keys() {
554        // The detail message and field_name must reference the same
555        // operator-visible config key — operators grep the error to
556        // find which knob to bump.
557        assert_eq!(LimitKind::PromptTokens.field_name(), "max_prompt_tokens");
558        assert_eq!(
559            LimitKind::CompletionTokens.field_name(),
560            "max_completion_tokens"
561        );
562        assert_eq!(LimitKind::SourcesBytes.field_name(), "max_sources_bytes");
563        assert_eq!(LimitKind::Timeout.field_name(), "timeout_ms");
564        assert_eq!(LimitKind::DailyCostCap.field_name(), "daily_cost_cap_usd");
565    }
566
567    #[test]
568    fn http_status_mapping() {
569        assert_eq!(LimitKind::PromptTokens.http_status(), 413);
570        assert_eq!(LimitKind::CompletionTokens.http_status(), 413);
571        assert_eq!(LimitKind::SourcesBytes.http_status(), 413);
572        assert_eq!(LimitKind::DailyCostCap.http_status(), 413);
573        assert_eq!(LimitKind::Timeout.http_status(), 504);
574    }
575
576    // ---- Default settings pinned -----------------------------------------
577
578    #[test]
579    fn defaults_match_spec() {
580        let s = Settings::default();
581        assert_eq!(s.max_prompt_tokens, 8192);
582        assert_eq!(s.max_completion_tokens, 1024);
583        assert_eq!(s.max_sources_bytes, 262_144);
584        assert_eq!(s.timeout_ms, 30_000);
585        assert_eq!(s.daily_cost_cap_usd, None);
586    }
587
588    // ---- Determinism / purity --------------------------------------------
589
590    #[test]
591    fn evaluation_is_deterministic() {
592        let s = Settings {
593            daily_cost_cap_usd: Some(10.0),
594            ..settings()
595        };
596        let u = Usage {
597            prompt_tokens: 100,
598            completion_tokens: 50,
599            sources_bytes: 1000,
600            estimated_cost_usd: 0.5,
601            elapsed_ms: 1234,
602        };
603        let daily = DailyState {
604            spent_usd: 1.0,
605            day_epoch_secs: 0,
606        };
607        let a = evaluate(&u, &daily, &s, now_at(500));
608        let b = evaluate(&u, &daily, &s, now_at(500));
609        assert_eq!(a, b);
610    }
611
612    #[test]
613    fn same_utc_day_negative_epoch() {
614        // Pre-1970 timestamps must round correctly (div_euclid).
615        // -1 second is still UTC day -1, not day 0.
616        assert!(same_utc_day(-1, -1));
617        assert!(!same_utc_day(-1, 0));
618        assert!(same_utc_day(0, SECS_PER_DAY - 1));
619        assert!(!same_utc_day(0, SECS_PER_DAY));
620    }
621
622    #[test]
623    fn utc_day_start_handles_negative_epoch() {
624        assert_eq!(utc_day_start_epoch_secs(0), 0);
625        assert_eq!(utc_day_start_epoch_secs(SECS_PER_DAY + 123), SECS_PER_DAY);
626        assert_eq!(utc_day_start_epoch_secs(-1), -SECS_PER_DAY);
627    }
628}