entelix_policy/cost.rs
1//! `CostMeter` + [`PricingTable`] / [`ModelPricing`] — `rust_decimal`-
2//! backed transactional charge accumulator. F4 mitigation: a charge
3//! is recorded **only after** the response decoder succeeds — there is
4//! no API path that lets an in-flight failure produce a partial
5//! charge.
6//!
7//! Pricing is per-model, per-1000-tokens. Vendors publish
8//! cents-per-1k figures; using `rust_decimal::Decimal` keeps the
9//! per-call cost an exact rational with no float-rounding drift
10//! across millions of charges.
11
12// Read-lock guards on `pricing` are scoped inside non-async blocks and
13// dropped before the ledger update / tracing call. clippy's
14// `significant_drop_tightening` flags the binding pattern even when
15// the block scope already drops correctly.
16#![allow(clippy::significant_drop_tightening)]
17
18use std::collections::HashMap;
19use std::sync::Arc;
20
21use dashmap::DashMap;
22use parking_lot::RwLock;
23use rust_decimal::Decimal;
24use serde::{Deserialize, Serialize};
25
26use entelix_core::ir::Usage;
27
28use crate::error::{PolicyError, PolicyResult};
29
30/// Per-model pricing, in cost units per 1000 tokens. The unit is
31/// caller-defined (USD cents, GBP pence, internal credits) — the
32/// meter is unit-blind and just sums `Decimal`s.
33///
34/// Every rate is mandatory (invariant #15 — no silent fallback).
35/// Vendors that don't charge for a tier (e.g. Bedrock has no cache
36/// surface today) pass [`Decimal::ZERO`] explicitly so the operator
37/// declares their pricing posture rather than inheriting whatever
38/// fallback the SDK happens to ship.
39#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
40pub struct ModelPricing {
41 /// Cost per 1000 prompt tokens.
42 pub input_per_1k: Decimal,
43 /// Cost per 1000 completion tokens.
44 pub output_per_1k: Decimal,
45 /// Cost per 1000 cache-read tokens. Vendor-published rate —
46 /// e.g. Anthropic Sonnet 4.6 = 10% of input, `OpenAI` gpt-4o =
47 /// 50% of input, Bedrock = 0 today.
48 pub cache_read_per_1k: Decimal,
49 /// Cost per 1000 cache-write tokens. Anthropic charges a
50 /// premium (~25% above input) for cache creation; many
51 /// vendors charge zero.
52 pub cache_write_per_1k: Decimal,
53}
54
55impl ModelPricing {
56 /// Build a pricing row. All four rates are required — the SDK
57 /// never invents a cache rate from the input rate.
58 #[must_use]
59 pub const fn new(
60 input_per_1k: Decimal,
61 output_per_1k: Decimal,
62 cache_read_per_1k: Decimal,
63 cache_write_per_1k: Decimal,
64 ) -> Self {
65 Self {
66 input_per_1k,
67 output_per_1k,
68 cache_read_per_1k,
69 cache_write_per_1k,
70 }
71 }
72
73 /// Compute the exact cost for one [`Usage`] sample. All
74 /// arithmetic is integer-on-`Decimal`; no floats.
75 #[must_use]
76 pub fn cost_for(&self, usage: &Usage) -> Decimal {
77 let input = self.input_per_1k * Decimal::from(usage.input_tokens) / Decimal::from(1000);
78 let output = self.output_per_1k * Decimal::from(usage.output_tokens) / Decimal::from(1000);
79 let cache_write = self.cache_write_per_1k
80 * Decimal::from(usage.cache_creation_input_tokens)
81 / Decimal::from(1000);
82 let cache_read =
83 self.cache_read_per_1k * Decimal::from(usage.cached_input_tokens) / Decimal::from(1000);
84 input + output + cache_write + cache_read
85 }
86}
87
88/// Lookup of model name → [`ModelPricing`]. Keys are the same model
89/// strings the codecs send to the wire (e.g. `"claude-opus-4-7"`,
90/// `"gpt-4.1"`). Lookup is exact; aliases are the caller's
91/// responsibility.
92#[derive(Clone, Debug, Default)]
93pub struct PricingTable {
94 by_model: HashMap<String, ModelPricing>,
95}
96
97impl PricingTable {
98 /// Empty table.
99 #[must_use]
100 pub fn new() -> Self {
101 Self::default()
102 }
103
104 /// Insert (or overwrite) one model's pricing.
105 pub fn set(&mut self, model: impl Into<String>, pricing: ModelPricing) {
106 self.by_model.insert(model.into(), pricing);
107 }
108
109 /// Builder-style insert.
110 #[must_use]
111 pub fn add_model_pricing(mut self, model: impl Into<String>, pricing: ModelPricing) -> Self {
112 self.set(model, pricing);
113 self
114 }
115
116 /// Look up a model's pricing.
117 #[must_use]
118 pub fn get(&self, model: &str) -> Option<&ModelPricing> {
119 self.by_model.get(model)
120 }
121
122 /// Number of configured models.
123 #[must_use]
124 pub fn len(&self) -> usize {
125 self.by_model.len()
126 }
127
128 /// True when the table has no entries.
129 #[must_use]
130 pub fn is_empty(&self) -> bool {
131 self.by_model.is_empty()
132 }
133}
134
135/// Behavior when [`CostMeter::charge`] is called with a `model` that
136/// has no entry in the [`PricingTable`].
137///
138/// Default is [`Reject`] — the safe choice for production billing
139/// where a missing row is a configuration bug. [`WarnOnce`] is a
140/// gentler option for staging environments and incremental vendor
141/// rollouts where a new model name reaches traffic before the
142/// pricing table catches up.
143///
144/// [`Reject`]: UnknownModelPolicy::Reject
145/// [`WarnOnce`]: UnknownModelPolicy::WarnOnce
146#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
147#[non_exhaustive]
148pub enum UnknownModelPolicy {
149 /// Return [`PolicyError::UnknownModel`]. The caller must decide
150 /// whether to fail the request or swallow it. Default.
151 #[default]
152 Reject,
153 /// Log a `tracing::warn` at most once per distinct model name
154 /// and record a zero charge. The model name set is held in the
155 /// meter so a single missing entry does not flood telemetry.
156 WarnOnce,
157}
158
159/// Observer notified on every unknown-model charge attempt.
160///
161/// Independent of [`UnknownModelPolicy`] — the policy decides what the
162/// `charge()` *return value* is (error vs. zero charge), the sink
163/// decides *what side-effect* runs when an unknown model is dispatched
164/// (metric counter increment, breadcrumb, alert). Operators wire both:
165/// `WarnOnce` keeps log noise bounded for human readers, the sink
166/// emits a counter that production dashboards consume without dedup.
167///
168/// **Contract** (mirrors [`entelix_core::AuditSink`], invariant 18):
169///
170/// - Sync `&self` — the call site is the cost-meter hot path; the
171/// sink must not block. Async work is the sink impl's responsibility
172/// (spawn an internal task, push onto a channel).
173/// - Fires on every attempt — NOT deduped. The `WarnOnce` log gate
174/// dedupes the human-facing message; this sink sees raw counts so
175/// Prometheus / Datadog scrapes see request-rate, not unique-model-
176/// count.
177/// - Fires under every [`UnknownModelPolicy`] — including `Reject`,
178/// where the call ultimately errors. The sink runs *before* the
179/// policy decision so dashboards reflect "operator hit an unknown
180/// model" regardless of whether the call surfaced as an error.
181/// - Failures stay inside — sink impls must not panic; if they do,
182/// the panic is the impl's bug. The cost-meter does not catch.
183///
184/// Example: a metrics-counter sink.
185///
186/// ```ignore
187/// struct MetricsUnknownModelSink;
188///
189/// impl entelix_policy::UnknownModelSink for MetricsUnknownModelSink {
190/// fn record_unknown_model(&self, tenant: &entelix_core::TenantId, model: &str) {
191/// metrics::counter!(
192/// "entelix_policy.unknown_model_charge",
193/// "tenant" => tenant.as_str().to_owned(),
194/// "model" => model.to_owned(),
195/// ).increment(1);
196/// }
197/// }
198/// ```
199pub trait UnknownModelSink: Send + Sync + 'static {
200 /// Record one unknown-model dispatch attempt. See trait doc for
201 /// the firing contract (every attempt, no dedup, runs ahead of
202 /// the policy decision).
203 fn record_unknown_model(&self, tenant: &entelix_core::TenantId, model: &str);
204}
205
206/// Cap on distinct model names tracked under `WarnOnce`.
207///
208/// Bounds `warned_models` at roughly `MAX_WARNED_MODELS *
209/// avg_name_len` bytes — a couple of KiB at this limit. Past the
210/// cap, additional distinct unknowns produce a single saturated
211/// warn and stop accumulating, so an adversarial caller spamming
212/// distinct names cannot drive the process out of memory.
213pub const MAX_WARNED_MODELS: usize = 1024;
214
215/// Default cap on distinct tenant ledger entries.
216///
217/// Same defense-in-depth shape as [`MAX_WARNED_MODELS`]: an
218/// adversarial caller submitting requests with attacker-chosen
219/// `tenant_id` strings could otherwise grow the in-memory ledger
220/// without bound, exhausting process memory. With this cap, once
221/// the ledger has recorded `DEFAULT_MAX_TENANTS` distinct tenants
222/// the meter logs a single saturation warn and silently records
223/// `Decimal::ZERO` for further unknown-tenant charges. Operators
224/// override the cap via [`CostMeter::with_max_tenants`] (deployments
225/// with truly large tenant counts size up; deployments draining
226/// idle tenants on a schedule keep the default).
227///
228/// `10_000` is a pragmatic ceiling — a single [`CostMeter`] holds
229/// a `String` + `Decimal` per tenant (~64 bytes amortised), so the
230/// cap bounds the ledger at roughly 640 KiB.
231pub const DEFAULT_MAX_TENANTS: usize = 10_000;
232
233/// Per-tenant cost ledger. Records the cumulative spend for every
234/// tenant that has ever been charged.
235///
236/// Cloning is cheap (`Arc` over the underlying maps) — share one
237/// meter across the whole process.
238#[derive(Clone)]
239pub struct CostMeter {
240 pricing: Arc<RwLock<PricingTable>>,
241 ledger: Arc<DashMap<entelix_core::TenantId, Decimal>>,
242 unknown_policy: UnknownModelPolicy,
243 /// Bounded set of model names already warned about under
244 /// [`UnknownModelPolicy::WarnOnce`]. Capped at
245 /// [`MAX_WARNED_MODELS`] entries to bound memory under
246 /// adversarial-input spam.
247 warned_models: Arc<DashMap<String, ()>>,
248 /// `true` once `warned_models` reached
249 /// [`MAX_WARNED_MODELS`] and the saturation warn has been
250 /// emitted. Subsequent unknown-model calls return zero charge
251 /// silently.
252 warned_saturated: Arc<std::sync::atomic::AtomicBool>,
253 /// Maximum distinct tenant ledger entries before
254 /// [`Self::charge`] starts dropping new tenants on the floor.
255 /// See [`DEFAULT_MAX_TENANTS`] for the rationale.
256 max_tenants: usize,
257 /// `true` once `ledger` reached `max_tenants` and the
258 /// saturation warn has been emitted. Subsequent unknown-tenant
259 /// calls return `Ok(Decimal::ZERO)` silently.
260 tenants_saturated: Arc<std::sync::atomic::AtomicBool>,
261 /// Optional observer fired on every unknown-model dispatch — see
262 /// [`UnknownModelSink`]'s trait doc. `None` makes every unknown-
263 /// model path a silent no-op on the sink channel; the
264 /// [`UnknownModelPolicy`] decision (`Reject` / `WarnOnce`) is
265 /// independent.
266 unknown_model_sink: Option<Arc<dyn UnknownModelSink>>,
267}
268
269impl CostMeter {
270 /// Build with the supplied pricing table, the default
271 /// `UnknownModelPolicy::Reject`, and [`DEFAULT_MAX_TENANTS`].
272 #[must_use]
273 pub fn new(pricing: PricingTable) -> Self {
274 Self {
275 pricing: Arc::new(RwLock::new(pricing)),
276 ledger: Arc::new(DashMap::new()),
277 unknown_policy: UnknownModelPolicy::default(),
278 warned_models: Arc::new(DashMap::new()),
279 warned_saturated: Arc::new(std::sync::atomic::AtomicBool::new(false)),
280 max_tenants: DEFAULT_MAX_TENANTS,
281 tenants_saturated: Arc::new(std::sync::atomic::AtomicBool::new(false)),
282 unknown_model_sink: None,
283 }
284 }
285
286 /// Builder-style override of the unknown-model policy.
287 #[must_use]
288 pub const fn with_unknown_model_policy(mut self, policy: UnknownModelPolicy) -> Self {
289 self.unknown_policy = policy;
290 self
291 }
292
293 /// Wire an [`UnknownModelSink`] observer. The sink fires on every
294 /// unknown-model dispatch attempt — distinct from
295 /// [`UnknownModelPolicy::WarnOnce`]'s log-dedup gate (the policy
296 /// suppresses repeat log lines; the sink sees raw per-attempt
297 /// counts). The [`UnknownModelPolicy`] decision (`Reject` /
298 /// `WarnOnce`) is independent. Pairs with the trait doc for the
299 /// contract.
300 #[must_use]
301 pub fn with_unknown_model_sink(mut self, sink: Arc<dyn UnknownModelSink>) -> Self {
302 self.unknown_model_sink = Some(sink);
303 self
304 }
305
306 /// Override the maximum distinct tenant entries the ledger
307 /// retains. Past this cap, [`Self::charge`] records
308 /// `Decimal::ZERO` for new tenants and emits a single saturation
309 /// warn. Operators draining idle tenants on a schedule
310 /// (`drain(tenant)` on a periodic job) should leave the
311 /// default; deployments with truly large tenant counts size
312 /// up. Setting `0` disables charging entirely (every call
313 /// returns zero) which is mostly useful for tests.
314 #[must_use]
315 pub const fn with_max_tenants(mut self, cap: usize) -> Self {
316 self.max_tenants = cap;
317 self
318 }
319
320 /// Effective tenant cap.
321 #[must_use]
322 pub const fn max_tenants(&self) -> usize {
323 self.max_tenants
324 }
325
326 /// Number of tenants currently in the ledger.
327 #[must_use]
328 pub fn tracked_tenant_count(&self) -> usize {
329 self.ledger.len()
330 }
331
332 /// Hot-swap the pricing table. Used by operators rolling out
333 /// new vendor rates without a process restart. The `&self`
334 /// receiver is intentional — every clone of the `Arc<CostMeter>`
335 /// shares the same pricing slot, so a config-reload thread can
336 /// replace rates without coordinating with charge sites.
337 pub fn replace_pricing(&self, pricing: PricingTable) {
338 *self.pricing.write() = pricing;
339 }
340
341 /// Atomically replace `model`'s pricing row without rebuilding
342 /// the rest of the table. Inserts the row when the model is not
343 /// yet present, so admin write paths revising a single vendor
344 /// tariff need not re-author the whole catalogue. Pairs with
345 /// [`Self::pricing_snapshot`] for read-modify-write cycles that
346 /// touch only one model.
347 pub fn replace_model_pricing(&self, model: impl Into<String>, pricing: ModelPricing) {
348 self.pricing.write().set(model, pricing);
349 }
350
351 /// Owned point-in-time clone of the current pricing table.
352 /// `O(models)` allocation; intended for admin diff / inspection
353 /// / external-store reconciliation flows rather than per-charge
354 /// hot paths. Mutations on the returned value do not affect the
355 /// meter — use [`Self::replace_model_pricing`] or
356 /// [`Self::replace_pricing`] to persist changes.
357 #[must_use]
358 pub fn pricing_snapshot(&self) -> PricingTable {
359 self.pricing.read().clone()
360 }
361
362 /// Internal: emit a one-shot saturation warn and flip the
363 /// `tenants_saturated` flag. Race-tolerant via
364 /// `compare_exchange` on the flag — only the first thread
365 /// past the cap logs.
366 fn warn_tenants_saturated(&self) {
367 use std::sync::atomic::Ordering;
368 if self
369 .tenants_saturated
370 .compare_exchange(false, true, Ordering::SeqCst, Ordering::Relaxed)
371 .is_ok()
372 {
373 tracing::warn!(
374 target: "entelix_policy::cost",
375 cap = self.max_tenants,
376 "cost meter tenant ledger cap reached — further unknown tenants charged as zero"
377 );
378 }
379 }
380
381 /// Internal: log a `tracing::warn` at most once per distinct
382 /// `model` name and bound the warned-set at
383 /// [`MAX_WARNED_MODELS`] — past that, emit one saturation warn
384 /// and stop accumulating so an adversarial caller cannot drive
385 /// memory unbounded with distinct unknown names.
386 fn warn_once_for_unknown(&self, model: &str) {
387 use std::sync::atomic::Ordering;
388
389 // Fast path: already saturated — silent zero charge.
390 if self.warned_saturated.load(Ordering::Relaxed) {
391 return;
392 }
393 // De-dupe gate. DashMap::insert returns the previous value;
394 // `None` means this is a fresh model.
395 if self.warned_models.contains_key(model) {
396 return;
397 }
398 // Try to claim a slot. Race-tolerant: even if multiple
399 // threads pass the contains_key check, the size check after
400 // insert handles it.
401 if self.warned_models.len() >= MAX_WARNED_MODELS {
402 // Saturate exactly once.
403 if !self.warned_saturated.swap(true, Ordering::SeqCst) {
404 tracing::warn!(
405 target: "entelix_policy::cost",
406 cap = MAX_WARNED_MODELS,
407 "cost meter warned_models cap reached — further unknown models suppressed"
408 );
409 }
410 return;
411 }
412 if self.warned_models.insert(model.to_owned(), ()).is_none() {
413 tracing::warn!(
414 target: "entelix_policy::cost",
415 model,
416 "cost meter has no pricing row for model — recording zero charge"
417 );
418 }
419 }
420
421 /// Record a charge for `tenant` against `model` for `usage`.
422 /// Returns the exact charge amount.
423 ///
424 /// When `model` has no row in the pricing table the behavior
425 /// follows [`Self::with_unknown_model_policy`] — by default a
426 /// [`PolicyError::UnknownModel`] is returned; under
427 /// [`UnknownModelPolicy::WarnOnce`] the meter logs a single
428 /// `tracing::warn` per distinct model and returns
429 /// `Decimal::ZERO`.
430 ///
431 /// **Transactional (F4)**: this method is invoked from the
432 /// `post_response` hook, which only runs after the codec has
433 /// successfully decoded the response. A network failure / parse
434 /// error short-circuits before this point and the ledger stays
435 /// untouched.
436 pub fn charge(
437 &self,
438 tenant_id: &entelix_core::TenantId,
439 model: &str,
440 usage: &Usage,
441 ) -> PolicyResult<Decimal> {
442 // Cost lookup is the only step that needs the pricing read
443 // guard. Drop it before running the unknown-model side-effect
444 // chain (operator sink + warn-once + policy decision) so a
445 // sink impl that internally acquires its own lock cannot
446 // deadlock against a concurrent `replace_pricing` writer
447 // (lock-ordering, root CLAUDE.md).
448 let lookup = self.pricing.read().get(model).map(|p| p.cost_for(usage));
449 let Some(cost) = lookup else {
450 // Sink fires ahead of the policy split — operators routing
451 // dashboards off this signal see every attempt regardless
452 // of whether the call surfaces as an error (`Reject`) or
453 // zero charge (`WarnOnce`). The log-dedup gate below
454 // belongs to the human-facing channel; the sink is the
455 // machine channel.
456 if let Some(sink) = &self.unknown_model_sink {
457 sink.record_unknown_model(tenant_id, model);
458 }
459 return match self.unknown_policy {
460 UnknownModelPolicy::Reject => Err(PolicyError::UnknownModel(model.to_owned())),
461 UnknownModelPolicy::WarnOnce => {
462 self.warn_once_for_unknown(model);
463 Ok(Decimal::ZERO)
464 }
465 };
466 };
467 if cost.is_zero() {
468 return Ok(cost);
469 }
470 // Saturation check: only NEW tenants count against the cap;
471 // already-tracked tenants accumulate into their existing
472 // entry without growing the map. This keeps the cap a
473 // memory-bound, not a charging-rate bound. `TenantId`
474 // implements `Borrow<str>`, so the lookup uses the existing
475 // `Arc<str>` without an extra allocation.
476 let already_tracked = self.ledger.contains_key(tenant_id.as_str());
477 if !already_tracked && self.ledger.len() >= self.max_tenants {
478 self.warn_tenants_saturated();
479 return Ok(Decimal::ZERO);
480 }
481 self.ledger
482 .entry(tenant_id.clone())
483 .and_modify(|v| *v += cost)
484 .or_insert(cost);
485 tracing::debug!(
486 target: "entelix_policy::cost",
487 tenant_id = tenant_id.as_str(),
488 model,
489 charge = %cost,
490 "cost meter charged"
491 );
492 Ok(cost)
493 }
494
495 /// Cumulative spend for `tenant_id`. Returns `Decimal::ZERO` for
496 /// an unseen tenant.
497 #[must_use]
498 pub fn spent_by(&self, tenant_id: &entelix_core::TenantId) -> Decimal {
499 self.ledger
500 .get(tenant_id.as_str())
501 .map_or(Decimal::ZERO, |v| *v)
502 }
503
504 /// Reset (and return) the recorded spend for `tenant_id`. Used by
505 /// nightly billing to drain the in-memory ledger after
506 /// persisting it.
507 pub fn drain(&self, tenant_id: &entelix_core::TenantId) -> Decimal {
508 self.ledger
509 .remove(tenant_id.as_str())
510 .map_or(Decimal::ZERO, |(_, v)| v)
511 }
512}
513
514#[async_trait::async_trait]
515impl entelix_core::CostCalculator for CostMeter {
516 /// Side-effect-free cost computation for telemetry. Looks up
517 /// the pricing row for `model` and returns the computed
518 /// per-call cost as `f64` for emission into observability
519 /// fields like `gen_ai.usage.cost`.
520 ///
521 /// `ctx` is accepted for the trait contract — `CostMeter` uses
522 /// a global pricing table shared across tenants. Multi-tenant
523 /// calculators that need per-tenant pricing tiers wrap a
524 /// `CostMeter` per tenant or implement `CostCalculator`
525 /// directly with a `(tenant_id, model) → ModelPricing` lookup.
526 ///
527 /// Returns `None` when the model is not in the pricing table —
528 /// telemetry consumers omit the cost attribute rather than
529 /// emitting a misleading zero. The calculator path does NOT
530 /// mutate the per-tenant ledger; ledger updates flow through
531 /// [`Self::charge`] which is invoked by the `PolicyLayer`
532 /// service after a successful response.
533 async fn compute_cost(
534 &self,
535 model: &str,
536 usage: &Usage,
537 _ctx: &entelix_core::ExecutionContext,
538 ) -> Option<f64> {
539 use rust_decimal::prelude::ToPrimitive;
540 let pricing = self.pricing.read();
541 let model_pricing = pricing.get(model)?;
542 // `Decimal::to_f64` is None only on overflow — at production
543 // pricing rates the per-call cost stays well within f64 range.
544 model_pricing.cost_for(usage).to_f64()
545 }
546}
547
548/// Conservative worst-case output budget used by the pre-call
549/// estimator when [`entelix_core::ir::ModelRequest::max_tokens`] is
550/// unset. Vendor defaults vary (Anthropic = `max_tokens` required by
551/// API contract; `OpenAI` = vendor-default ~4096; Gemini = up to
552/// 8192). The constant biases toward overestimation so a `RunBudget`
553/// pre-call gate fails closed (false-positive rejection is
554/// recoverable; silent overrun is not).
555const PRE_CALL_UNBOUNDED_OUTPUT_TOKENS: u32 = 8_192;
556
557#[async_trait::async_trait]
558impl entelix_core::BudgetCostEstimator for CostMeter {
559 /// Pre-call worst-case estimate in `Decimal` precision. Looks up
560 /// the pricing row for `request.model`; if absent, returns
561 /// `None` so the pre-call gate skips rather than synthesising a
562 /// zero (matches `compute_cost`).
563 ///
564 /// Prompt-token estimation uses [`entelix_core::ByteCountTokenCounter`]
565 /// for a conservative count without coupling to a vendor-accurate
566 /// tokenizer. Operators with vendor-accurate token counters wired
567 /// via [`entelix_core::TokenCounterRegistry`] implement a custom
568 /// [`entelix_core::BudgetCostEstimator`] that consults the
569 /// registry directly — the trait surface stays vendor-agnostic.
570 ///
571 /// Output-token estimate is `request.max_tokens` when set, or
572 /// `PRE_CALL_UNBOUNDED_OUTPUT_TOKENS` as the worst-case bound.
573 /// Cache rates are treated as zero (no cache hit on a yet-to-fire
574 /// call), which biases the estimate upward.
575 async fn estimate_pre_call(
576 &self,
577 request: &entelix_core::ir::ModelRequest,
578 _ctx: &entelix_core::ExecutionContext,
579 ) -> Option<Decimal> {
580 use entelix_core::TokenCounter;
581 let pricing = self.pricing.read();
582 let model_pricing = pricing.get(&request.model)?;
583 let counter = entelix_core::ByteCountTokenCounter::new();
584 let raw_tokens = counter.count_messages(&request.messages);
585 let input_tokens = u32::try_from(raw_tokens).unwrap_or(u32::MAX); // silent-fallback-ok: saturate at u32::MAX so a pathologically long prompt over-estimates rather than wraps; biases the pre-call gate conservatively.
586 let output_tokens = request
587 .max_tokens
588 .unwrap_or(PRE_CALL_UNBOUNDED_OUTPUT_TOKENS); // silent-fallback-ok: PRE_CALL_UNBOUNDED_OUTPUT_TOKENS is the documented worst-case bound for vendors that allow unset max_tokens.
589 let projected = Usage::new(input_tokens, output_tokens);
590 Some(model_pricing.cost_for(&projected))
591 }
592
593 /// Post-call actual charge in `Decimal` precision. Read
594 /// directly from the response's [`Usage`]; this is the same
595 /// arithmetic [`Self::charge`] feeds into the per-tenant ledger,
596 /// surfaced separately so [`entelix_core::RunBudget::observe_cost`]
597 /// receives the precision-preserving value before any
598 /// `f64`-lossy telemetry conversion.
599 async fn calculate_actual(
600 &self,
601 request: &entelix_core::ir::ModelRequest,
602 usage: &Usage,
603 _ctx: &entelix_core::ExecutionContext,
604 ) -> Option<Decimal> {
605 let pricing = self.pricing.read();
606 let model_pricing = pricing.get(&request.model)?;
607 Some(model_pricing.cost_for(usage))
608 }
609}
610
611impl std::fmt::Debug for CostMeter {
612 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
613 f.debug_struct("CostMeter")
614 .field("models", &self.pricing.read().len())
615 .field("tenants", &self.ledger.len())
616 .field("unknown_policy", &self.unknown_policy)
617 .field("warned_models", &self.warned_models.len())
618 .field(
619 "warned_saturated",
620 &self
621 .warned_saturated
622 .load(std::sync::atomic::Ordering::Relaxed),
623 )
624 .field("max_tenants", &self.max_tenants)
625 .field(
626 "tenants_saturated",
627 &self
628 .tenants_saturated
629 .load(std::sync::atomic::Ordering::Relaxed),
630 )
631 .field("unknown_model_sink", &self.unknown_model_sink.is_some())
632 .finish()
633 }
634}
635
636#[cfg(test)]
637#[allow(clippy::unwrap_used)]
638mod tests {
639 use entelix_core::TenantId;
640 use std::str::FromStr;
641
642 use super::*;
643
644 fn d(s: &str) -> Decimal {
645 Decimal::from_str(s).unwrap()
646 }
647
648 fn pricing() -> PricingTable {
649 PricingTable::new()
650 .add_model_pricing(
651 "claude-opus-4-7",
652 ModelPricing::new(d("15"), d("75"), d("1.5"), d("18.75")),
653 )
654 .add_model_pricing(
655 "gpt-4.1",
656 // gpt-4.1 cache-read is 25% of input (vendor-published).
657 ModelPricing::new(d("2"), d("8"), d("0.5"), Decimal::ZERO), // magic-ok: test fixture rate
658 )
659 }
660
661 fn usage(input: u32, output: u32) -> Usage {
662 Usage::new(input, output)
663 }
664
665 #[test]
666 fn cost_for_simple_usage_is_exact() {
667 let p = pricing();
668 let claude = p.get("claude-opus-4-7").unwrap();
669 let cost = claude.cost_for(&usage(1000, 1000));
670 // 1000 input * 15/1000 + 1000 output * 75/1000 = 15 + 75 = 90
671 assert_eq!(cost, d("90"));
672 }
673
674 #[test]
675 fn cost_with_cache_writes_and_reads() {
676 let p = pricing();
677 let claude = p.get("claude-opus-4-7").unwrap();
678 let cost = claude.cost_for(
679 &Usage::new(500, 200)
680 .with_cached_input_tokens(2000)
681 .with_cache_creation_input_tokens(800),
682 );
683 // 500*15/1000 + 200*75/1000 + 800*18.75/1000 + 2000*1.5/1000
684 // = 7.5 + 15 + 15 + 3 = 40.5
685 assert_eq!(cost, d("40.5"));
686 }
687
688 #[test]
689 fn cache_read_uses_explicit_rate_no_fallback() {
690 // gpt-4.1 has cache_read_per_1k = 0.5 (25% of input). The SDK
691 // does not invent a fallback from input_per_1k — the rate is
692 // exactly what the operator declared (invariant #15).
693 let p = pricing();
694 let gpt = p.get("gpt-4.1").unwrap();
695 let cost = gpt.cost_for(&Usage::default().with_cached_input_tokens(1000));
696 // 1000 cache_read * 0.5 / 1000 = 0.5
697 assert_eq!(cost, d("0.5")); // magic-ok: arithmetic check value
698 }
699
700 #[test]
701 fn cache_write_zero_rate_means_zero_charge() {
702 // gpt-4.1 has cache_write_per_1k = ZERO (vendor doesn't
703 // charge for cache writes). Cache-write tokens accrue no
704 // cost — a regression-test for the "no silent fallback"
705 // contract: the SDK does not invent a positive rate from
706 // input_per_1k.
707 let p = pricing();
708 let gpt = p.get("gpt-4.1").unwrap();
709 let cost = gpt.cost_for(&Usage::default().with_cache_creation_input_tokens(1_000_000));
710 assert_eq!(cost, Decimal::ZERO);
711 }
712
713 #[test]
714 fn charge_sums_per_tenant_atomically() {
715 let meter = CostMeter::new(pricing());
716 let u = usage(1000, 1000);
717 meter
718 .charge(&TenantId::new("alpha"), "claude-opus-4-7", &u)
719 .unwrap();
720 meter
721 .charge(&TenantId::new("alpha"), "claude-opus-4-7", &u)
722 .unwrap();
723 meter
724 .charge(&TenantId::new("bravo"), "claude-opus-4-7", &u)
725 .unwrap();
726 assert_eq!(meter.spent_by(&TenantId::new("alpha")), d("180"));
727 assert_eq!(meter.spent_by(&TenantId::new("bravo")), d("90"));
728 assert_eq!(meter.spent_by(&TenantId::new("never-seen")), Decimal::ZERO);
729 }
730
731 #[test]
732 fn unknown_model_does_not_charge() {
733 let meter = CostMeter::new(pricing());
734 let err = meter
735 .charge(&TenantId::new("alpha"), "unknown-model", &usage(1000, 1000))
736 .unwrap_err();
737 assert!(matches!(err, PolicyError::UnknownModel(_)));
738 assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
739 }
740
741 #[test]
742 fn zero_usage_is_a_zero_charge_no_ledger_entry() {
743 let meter = CostMeter::new(pricing());
744 let cost = meter
745 .charge(
746 &TenantId::new("alpha"),
747 "claude-opus-4-7",
748 &Usage::default(),
749 )
750 .unwrap();
751 assert_eq!(cost, Decimal::ZERO);
752 assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
753 }
754
755 #[test]
756 fn drain_resets_tenant_ledger() {
757 let meter = CostMeter::new(pricing());
758 meter
759 .charge(
760 &TenantId::new("alpha"),
761 "claude-opus-4-7",
762 &usage(1000, 1000),
763 )
764 .unwrap();
765 assert_eq!(meter.drain(&TenantId::new("alpha")), d("90"));
766 assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
767 }
768
769 #[test]
770 fn warn_once_unknown_model_returns_zero_and_does_not_charge() {
771 let meter =
772 CostMeter::new(pricing()).with_unknown_model_policy(UnknownModelPolicy::WarnOnce);
773 let cost = meter
774 .charge(
775 &TenantId::new("alpha"),
776 "vendor-preview-x",
777 &usage(1000, 1000),
778 )
779 .unwrap();
780 assert_eq!(cost, Decimal::ZERO);
781 assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
782 // Same model again — must not re-warn (state inspected via len).
783 meter
784 .charge(
785 &TenantId::new("alpha"),
786 "vendor-preview-x",
787 &usage(2000, 2000),
788 )
789 .unwrap();
790 assert_eq!(meter.warned_models.len(), 1);
791 // Distinct unknown model — separate warn entry.
792 meter
793 .charge(&TenantId::new("alpha"), "vendor-preview-y", &usage(1000, 0))
794 .unwrap();
795 assert_eq!(meter.warned_models.len(), 2);
796 }
797
798 #[test]
799 fn ledger_caps_at_max_tenants_under_adversarial_spam() {
800 // Tiny cap so the test runs fast. Real deployments use
801 // DEFAULT_MAX_TENANTS (10000) or override via with_max_tenants.
802 let meter = CostMeter::new(pricing()).with_max_tenants(8);
803 // First 8 distinct tenants land in the ledger and accumulate.
804 for i in 0..8 {
805 let charge = meter
806 .charge(
807 &TenantId::new(format!("tenant-{i}")),
808 "claude-opus-4-7",
809 &usage(100, 100),
810 )
811 .unwrap();
812 assert!(!charge.is_zero(), "tenant {i} should be charged");
813 }
814 assert_eq!(meter.tracked_tenant_count(), 8);
815 // Past the cap: NEW tenants record Decimal::ZERO and never
816 // join the ledger — saturation flag fires once.
817 for i in 8..200 {
818 let charge = meter
819 .charge(
820 &TenantId::new(format!("tenant-{i}")),
821 "claude-opus-4-7",
822 &usage(100, 100),
823 )
824 .unwrap();
825 assert_eq!(
826 charge,
827 Decimal::ZERO,
828 "tenant {i} past cap should be charged zero (silently dropped)"
829 );
830 }
831 assert_eq!(
832 meter.tracked_tenant_count(),
833 8,
834 "ledger size must not grow past max_tenants"
835 );
836 // Already-tracked tenants continue to accumulate normally —
837 // the cap is on distinct entries, not on charging rate.
838 let prior = meter.spent_by(&TenantId::new("tenant-0"));
839 let _ = meter
840 .charge(
841 &TenantId::new("tenant-0"),
842 "claude-opus-4-7",
843 &usage(100, 100),
844 )
845 .unwrap();
846 assert!(meter.spent_by(&TenantId::new("tenant-0")) > prior);
847 }
848
849 #[test]
850 fn warned_models_caps_at_max_under_adversarial_spam() {
851 let meter =
852 CostMeter::new(pricing()).with_unknown_model_policy(UnknownModelPolicy::WarnOnce);
853 // Spam well past the cap with distinct names.
854 for i in 0..(MAX_WARNED_MODELS * 2) {
855 let _ = meter.charge(&TenantId::new("alpha"), &format!("model-{i}"), &usage(1, 1));
856 }
857 assert!(
858 meter.warned_models.len() <= MAX_WARNED_MODELS,
859 "warned_models exceeded cap: {} > {MAX_WARNED_MODELS}",
860 meter.warned_models.len()
861 );
862 // Ledger remains untouched (zero charges).
863 assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
864 }
865
866 #[test]
867 fn known_model_still_charges_under_warn_once() {
868 let meter =
869 CostMeter::new(pricing()).with_unknown_model_policy(UnknownModelPolicy::WarnOnce);
870 let cost = meter
871 .charge(
872 &TenantId::new("alpha"),
873 "claude-opus-4-7",
874 &usage(1000, 1000),
875 )
876 .unwrap();
877 assert_eq!(cost, d("90"));
878 assert_eq!(meter.spent_by(&TenantId::new("alpha")), d("90"));
879 assert_eq!(meter.warned_models.len(), 0);
880 }
881
882 #[test]
883 fn pricing_can_be_hot_swapped() {
884 let meter = CostMeter::new(pricing());
885 let mut new_pricing = pricing();
886 new_pricing.set(
887 "gpt-4.1",
888 ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
889 );
890 meter.replace_pricing(new_pricing);
891 let cost = meter
892 .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
893 .unwrap();
894 assert_eq!(cost, d("20"));
895 }
896
897 #[test]
898 fn replace_model_pricing_updates_existing_row_atomically() {
899 // Single-row replace leaves untouched rows alone — operators
900 // revising one vendor's tariff don't disturb the rest.
901 let meter = CostMeter::new(pricing());
902 meter.replace_model_pricing(
903 "gpt-4.1",
904 ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
905 );
906
907 // Touched row reflects the new tariff.
908 let gpt_charge = meter
909 .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
910 .unwrap();
911 assert_eq!(gpt_charge, d("20"));
912
913 // Untouched row keeps its original tariff.
914 let claude_charge = meter
915 .charge(
916 &TenantId::new("alpha"),
917 "claude-opus-4-7",
918 &usage(1000, 1000),
919 )
920 .unwrap();
921 assert_eq!(claude_charge, d("90"));
922 }
923
924 #[test]
925 fn replace_model_pricing_inserts_when_model_absent() {
926 // Insert-or-replace semantic — new vendor models can join
927 // the table without rebuilding it from scratch.
928 let meter = CostMeter::new(PricingTable::new());
929
930 // Pre-condition: unknown model rejects under the default
931 // `Reject` policy.
932 let err = meter
933 .charge(&TenantId::new("alpha"), "new-vendor-x", &usage(100, 0))
934 .unwrap_err();
935 assert!(matches!(err, PolicyError::UnknownModel(_)));
936
937 meter.replace_model_pricing(
938 "new-vendor-x",
939 ModelPricing::new(d("5"), d("15"), Decimal::ZERO, Decimal::ZERO),
940 );
941
942 // Post-condition: the inserted row charges normally.
943 let cost = meter
944 .charge(&TenantId::new("alpha"), "new-vendor-x", &usage(1000, 1000))
945 .unwrap();
946 assert_eq!(cost, d("20"));
947 }
948
949 #[test]
950 fn pricing_snapshot_returns_owned_copy_isolated_from_subsequent_mutations() {
951 // The snapshot is a point-in-time clone. Mutations on the
952 // returned value do NOT propagate back, and mutations on the
953 // meter after the snapshot do not appear in the held copy.
954 let meter = CostMeter::new(pricing());
955 let mut snap = meter.pricing_snapshot();
956 assert!(snap.get("claude-opus-4-7").is_some());
957
958 // Mutate the snapshot — meter must stay intact.
959 snap.set(
960 "claude-opus-4-7",
961 ModelPricing::new(d("999"), d("999"), Decimal::ZERO, Decimal::ZERO),
962 );
963 let meter_charge = meter
964 .charge(&TenantId::new("alpha"), "claude-opus-4-7", &usage(1000, 0))
965 .unwrap();
966 assert_eq!(
967 meter_charge,
968 d("15"),
969 "meter must ignore snapshot mutations"
970 );
971
972 // Mutate the meter — held snapshot must stay intact.
973 meter.replace_model_pricing(
974 "claude-opus-4-7",
975 ModelPricing::new(d("1"), d("1"), Decimal::ZERO, Decimal::ZERO),
976 );
977 let snap_pricing = snap.get("claude-opus-4-7").unwrap();
978 assert_eq!(
979 snap_pricing.input_per_1k,
980 d("999"),
981 "snapshot must not see meter mutations after capture"
982 );
983 }
984
985 #[test]
986 fn pricing_mutations_serialise_under_concurrent_clones() {
987 // Two threads pound the same `Arc<CostMeter>` clones with
988 // disjoint mutation patterns — one rebuilds the full table,
989 // the other partials a single row. Both go through the same
990 // `RwLock<PricingTable>` slot so writes serialise; the test
991 // pins (a) no panics / no torn state and (b) the post-join
992 // table reflects every committed mutation deterministically.
993 let meter = Arc::new(CostMeter::new(pricing()));
994 let barrier = Arc::new(std::sync::Barrier::new(2));
995
996 // Snapshot baseline: claude row exists, gpt-4.1 row exists.
997 assert!(meter.pricing_snapshot().get("claude-opus-4-7").is_some());
998
999 let m_replace = Arc::clone(&meter);
1000 let b_replace = Arc::clone(&barrier);
1001 let t_replace = std::thread::spawn(move || {
1002 b_replace.wait();
1003 for i in 0..500 {
1004 let mut next = PricingTable::new();
1005 next.set(
1006 "claude-opus-4-7",
1007 ModelPricing::new(
1008 Decimal::from(i % 3),
1009 Decimal::from((i % 3) * 2),
1010 Decimal::ZERO,
1011 Decimal::ZERO,
1012 ),
1013 );
1014 m_replace.replace_pricing(next);
1015 }
1016 });
1017
1018 let m_partial = Arc::clone(&meter);
1019 let b_partial = Arc::clone(&barrier);
1020 let t_partial = std::thread::spawn(move || {
1021 b_partial.wait();
1022 for i in 0..500 {
1023 m_partial.replace_model_pricing(
1024 "gpt-4.1",
1025 ModelPricing::new(
1026 Decimal::from(i % 5),
1027 Decimal::from((i % 5) * 4),
1028 Decimal::ZERO,
1029 Decimal::ZERO,
1030 ),
1031 );
1032 }
1033 });
1034
1035 t_replace.join().unwrap();
1036 t_partial.join().unwrap();
1037
1038 // Final state: t_replace's last write installed a table
1039 // containing only `claude-opus-4-7`. t_partial's last write
1040 // may have installed `gpt-4.1` AFTER that swap (insert
1041 // semantic), or BEFORE it (lost to the table replace). Both
1042 // orderings are valid; what must hold is that the meter is
1043 // in ONE of these two states — no torn row, no panic, no
1044 // missing claude.
1045 let final_snap = meter.pricing_snapshot();
1046 assert!(
1047 final_snap.get("claude-opus-4-7").is_some(),
1048 "claude row must survive — every t_replace write installs it"
1049 );
1050 }
1051
1052 #[test]
1053 fn replace_model_pricing_is_observed_by_cloned_meters() {
1054 // The single-row swap rides through `Arc<CostMeter>` clones
1055 // just like the full-table `replace_pricing`. Config-reload
1056 // threads holding their own clone do not coordinate with
1057 // charge sites.
1058 let meter = CostMeter::new(pricing());
1059 let cloned = meter.clone();
1060 cloned.replace_model_pricing(
1061 "gpt-4.1",
1062 ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
1063 );
1064
1065 let cost = meter
1066 .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
1067 .unwrap();
1068 assert_eq!(
1069 cost,
1070 d("20"),
1071 "original meter must observe a per-row swap installed via a clone"
1072 );
1073 }
1074
1075 /// Test sink that records every `record_unknown_model` call.
1076 /// Captures `(tenant, model)` pairs so tests can assert order +
1077 /// count, exercises the production sink-impl shape (Arc-shareable,
1078 /// sync, internally synchronised via `Mutex`).
1079 #[derive(Default)]
1080 struct CapturingSink {
1081 calls: std::sync::Mutex<Vec<(String, String)>>,
1082 }
1083
1084 impl CapturingSink {
1085 fn snapshot(&self) -> Vec<(String, String)> {
1086 self.calls.lock().unwrap().clone()
1087 }
1088 }
1089
1090 impl UnknownModelSink for CapturingSink {
1091 fn record_unknown_model(&self, tenant: &TenantId, model: &str) {
1092 self.calls
1093 .lock()
1094 .unwrap()
1095 .push((tenant.as_str().to_owned(), model.to_owned()));
1096 }
1097 }
1098
1099 #[test]
1100 fn unknown_model_sink_fires_under_warn_once_without_dedup() {
1101 // WarnOnce dedupes the tracing log channel; the sink must NOT
1102 // dedupe — production dashboards consume raw per-attempt counts
1103 // (rate, p50, percentiles).
1104 let sink = Arc::new(CapturingSink::default());
1105 let meter = CostMeter::new(pricing())
1106 .with_unknown_model_policy(UnknownModelPolicy::WarnOnce)
1107 .with_unknown_model_sink(sink.clone());
1108
1109 for _ in 0..5 {
1110 let _ = meter
1111 .charge(&TenantId::new("alpha"), "vendor-preview-x", &usage(1, 1))
1112 .unwrap();
1113 }
1114
1115 let calls = sink.snapshot();
1116 assert_eq!(
1117 calls.len(),
1118 5,
1119 "sink must observe every attempt, not the warn-once-deduped subset"
1120 );
1121 for (tenant, model) in &calls {
1122 assert_eq!(tenant, "alpha");
1123 assert_eq!(model, "vendor-preview-x");
1124 }
1125 // Log gate still dedupes — the sink + log channels are
1126 // independent.
1127 assert_eq!(meter.warned_models.len(), 1);
1128 }
1129
1130 #[test]
1131 fn unknown_model_sink_fires_under_reject_before_err_returns() {
1132 // Reject surfaces the call as an Err — but the sink must still
1133 // see the attempt, so dashboards reflect "operator hit an
1134 // unknown model" regardless of whether the call ultimately
1135 // succeeds or errors.
1136 let sink = Arc::new(CapturingSink::default());
1137 let meter = CostMeter::new(pricing()).with_unknown_model_sink(sink.clone());
1138
1139 let err = meter
1140 .charge(&TenantId::new("bravo"), "mystery-model", &usage(10, 10))
1141 .unwrap_err();
1142 assert!(matches!(err, PolicyError::UnknownModel(_)));
1143
1144 let calls = sink.snapshot();
1145 assert_eq!(
1146 calls,
1147 vec![("bravo".to_owned(), "mystery-model".to_owned())]
1148 );
1149 }
1150
1151 #[test]
1152 fn absent_unknown_model_sink_is_a_silent_no_op() {
1153 // The default constructor wires no sink — neither the charge
1154 // path nor the ledger should observe any sink-related work.
1155 let meter = CostMeter::new(pricing());
1156 let _ = meter
1157 .charge(&TenantId::new("alpha"), "unknown", &usage(1, 1))
1158 .unwrap_err();
1159 assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
1160 }
1161
1162 #[test]
1163 fn known_model_charge_does_not_fire_unknown_sink() {
1164 // Sink is scoped to the unknown branch — a healthy known-model
1165 // charge must NOT trip the dashboard counter.
1166 let sink = Arc::new(CapturingSink::default());
1167 let meter = CostMeter::new(pricing()).with_unknown_model_sink(sink.clone());
1168 let _ = meter
1169 .charge(
1170 &TenantId::new("alpha"),
1171 "claude-opus-4-7",
1172 &usage(1000, 1000),
1173 )
1174 .unwrap();
1175 assert!(
1176 sink.snapshot().is_empty(),
1177 "known-model dispatch must not invoke the unknown-model sink"
1178 );
1179 }
1180
1181 #[test]
1182 fn unknown_model_sink_may_replace_pricing_without_deadlock() {
1183 // Regression-pin for the lock-ordering reshape: the sink runs
1184 // outside the `pricing.read()` scope, so a sink impl that
1185 // calls `replace_pricing` (an admin write path acting on the
1186 // observed dispatch) must not deadlock. If the read guard
1187 // leaked into the sink call, this test would hang on the
1188 // `pricing.write()` inside `replace_pricing`.
1189 struct HotSwapSink {
1190 meter: Arc<RwLock<Option<CostMeter>>>,
1191 }
1192 impl UnknownModelSink for HotSwapSink {
1193 fn record_unknown_model(&self, _tenant: &TenantId, model: &str) {
1194 if let Some(m) = self.meter.read().as_ref() {
1195 let mut p = pricing();
1196 p.set(
1197 model,
1198 ModelPricing::new(d("1"), d("1"), Decimal::ZERO, Decimal::ZERO),
1199 );
1200 m.replace_pricing(p);
1201 }
1202 }
1203 }
1204
1205 let slot: Arc<RwLock<Option<CostMeter>>> = Arc::new(RwLock::new(None));
1206 let meter = CostMeter::new(pricing())
1207 .with_unknown_model_policy(UnknownModelPolicy::WarnOnce)
1208 .with_unknown_model_sink(Arc::new(HotSwapSink {
1209 meter: slot.clone(),
1210 }));
1211 *slot.write() = Some(meter.clone());
1212
1213 // First call: model is unknown → sink fires → installs pricing.
1214 let first = meter
1215 .charge(&TenantId::new("alpha"), "freshly-launched", &usage(1000, 0))
1216 .unwrap();
1217 assert_eq!(
1218 first,
1219 Decimal::ZERO,
1220 "first call returns zero (the model was unknown when looked up)"
1221 );
1222
1223 // Second call: pricing is now installed → charges normally.
1224 let second = meter
1225 .charge(&TenantId::new("alpha"), "freshly-launched", &usage(1000, 0))
1226 .unwrap();
1227 assert_eq!(second, d("1"));
1228 }
1229
1230 #[test]
1231 fn pricing_replacement_is_observed_by_cloned_meters() {
1232 let meter = CostMeter::new(pricing());
1233 let cloned = meter.clone();
1234
1235 let mut new_pricing = pricing();
1236 new_pricing.set(
1237 "gpt-4.1",
1238 ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
1239 );
1240 cloned.replace_pricing(new_pricing);
1241
1242 let cost = meter
1243 .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
1244 .unwrap();
1245 assert_eq!(
1246 cost,
1247 d("20"),
1248 "the original meter must charge against a pricing table installed via a clone"
1249 );
1250 }
1251}