Skip to main content

entelix_policy/
cost.rs

1//! `CostMeter` + [`PricingTable`] / [`ModelPricing`] — `rust_decimal`-
2//! backed transactional charge accumulator. F4 mitigation: a charge
3//! is recorded **only after** the response decoder succeeds — there is
4//! no API path that lets an in-flight failure produce a partial
5//! charge.
6//!
7//! Pricing is per-model, per-1000-tokens. Vendors publish
8//! cents-per-1k figures; using `rust_decimal::Decimal` keeps the
9//! per-call cost an exact rational with no float-rounding drift
10//! across millions of charges.
11
12// Read-lock guards on `pricing` are scoped inside non-async blocks and
13// dropped before the ledger update / tracing call. clippy's
14// `significant_drop_tightening` flags the binding pattern even when
15// the block scope already drops correctly.
16#![allow(clippy::significant_drop_tightening)]
17
18use std::collections::HashMap;
19use std::sync::Arc;
20
21use dashmap::DashMap;
22use parking_lot::RwLock;
23use rust_decimal::Decimal;
24use serde::{Deserialize, Serialize};
25
26use entelix_core::ir::Usage;
27
28use crate::error::{PolicyError, PolicyResult};
29
30/// Per-model pricing, in cost units per 1000 tokens. The unit is
31/// caller-defined (USD cents, GBP pence, internal credits) — the
32/// meter is unit-blind and just sums `Decimal`s.
33///
34/// Every rate is mandatory (invariant #15 — no silent fallback).
35/// Vendors that don't charge for a tier (e.g. Bedrock has no cache
36/// surface today) pass [`Decimal::ZERO`] explicitly so the operator
37/// declares their pricing posture rather than inheriting whatever
38/// fallback the SDK happens to ship.
39#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
40pub struct ModelPricing {
41    /// Cost per 1000 prompt tokens.
42    pub input_per_1k: Decimal,
43    /// Cost per 1000 completion tokens.
44    pub output_per_1k: Decimal,
45    /// Cost per 1000 cache-read tokens. Vendor-published rate —
46    /// e.g. Anthropic Sonnet 4.6 = 10% of input, `OpenAI` gpt-4o =
47    /// 50% of input, Bedrock = 0 today.
48    pub cache_read_per_1k: Decimal,
49    /// Cost per 1000 cache-write tokens. Anthropic charges a
50    /// premium (~25% above input) for cache creation; many
51    /// vendors charge zero.
52    pub cache_write_per_1k: Decimal,
53}
54
55impl ModelPricing {
56    /// Build a pricing row. All four rates are required — the SDK
57    /// never invents a cache rate from the input rate.
58    #[must_use]
59    pub const fn new(
60        input_per_1k: Decimal,
61        output_per_1k: Decimal,
62        cache_read_per_1k: Decimal,
63        cache_write_per_1k: Decimal,
64    ) -> Self {
65        Self {
66            input_per_1k,
67            output_per_1k,
68            cache_read_per_1k,
69            cache_write_per_1k,
70        }
71    }
72
73    /// Compute the exact cost for one [`Usage`] sample. All
74    /// arithmetic is integer-on-`Decimal`; no floats.
75    #[must_use]
76    pub fn cost_for(&self, usage: &Usage) -> Decimal {
77        let input = self.input_per_1k * Decimal::from(usage.input_tokens) / Decimal::from(1000);
78        let output = self.output_per_1k * Decimal::from(usage.output_tokens) / Decimal::from(1000);
79        let cache_write = self.cache_write_per_1k
80            * Decimal::from(usage.cache_creation_input_tokens)
81            / Decimal::from(1000);
82        let cache_read =
83            self.cache_read_per_1k * Decimal::from(usage.cached_input_tokens) / Decimal::from(1000);
84        input + output + cache_write + cache_read
85    }
86}
87
88/// Lookup of model name → [`ModelPricing`]. Keys are the same model
89/// strings the codecs send to the wire (e.g. `"claude-opus-4-7"`,
90/// `"gpt-4.1"`). Lookup is exact; aliases are the caller's
91/// responsibility.
92#[derive(Clone, Debug, Default)]
93pub struct PricingTable {
94    by_model: HashMap<String, ModelPricing>,
95}
96
97impl PricingTable {
98    /// Empty table.
99    #[must_use]
100    pub fn new() -> Self {
101        Self::default()
102    }
103
104    /// Insert (or overwrite) one model's pricing.
105    pub fn set(&mut self, model: impl Into<String>, pricing: ModelPricing) {
106        self.by_model.insert(model.into(), pricing);
107    }
108
109    /// Builder-style insert.
110    #[must_use]
111    pub fn add_model_pricing(mut self, model: impl Into<String>, pricing: ModelPricing) -> Self {
112        self.set(model, pricing);
113        self
114    }
115
116    /// Look up a model's pricing.
117    #[must_use]
118    pub fn get(&self, model: &str) -> Option<&ModelPricing> {
119        self.by_model.get(model)
120    }
121
122    /// Number of configured models.
123    #[must_use]
124    pub fn len(&self) -> usize {
125        self.by_model.len()
126    }
127
128    /// True when the table has no entries.
129    #[must_use]
130    pub fn is_empty(&self) -> bool {
131        self.by_model.is_empty()
132    }
133}
134
135/// Behavior when [`CostMeter::charge`] is called with a `model` that
136/// has no entry in the [`PricingTable`].
137///
138/// Default is [`Reject`] — the safe choice for production billing
139/// where a missing row is a configuration bug. [`WarnOnce`] is a
140/// gentler option for staging environments and incremental vendor
141/// rollouts where a new model name reaches traffic before the
142/// pricing table catches up.
143///
144/// [`Reject`]: UnknownModelPolicy::Reject
145/// [`WarnOnce`]: UnknownModelPolicy::WarnOnce
146#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
147#[non_exhaustive]
148pub enum UnknownModelPolicy {
149    /// Return [`PolicyError::UnknownModel`]. The caller must decide
150    /// whether to fail the request or swallow it. Default.
151    #[default]
152    Reject,
153    /// Log a `tracing::warn` at most once per distinct model name
154    /// and record a zero charge. The model name set is held in the
155    /// meter so a single missing entry does not flood telemetry.
156    WarnOnce,
157}
158
159/// Observer notified on every unknown-model charge attempt.
160///
161/// Independent of [`UnknownModelPolicy`] — the policy decides what the
162/// `charge()` *return value* is (error vs. zero charge), the sink
163/// decides *what side-effect* runs when an unknown model is dispatched
164/// (metric counter increment, breadcrumb, alert). Operators wire both:
165/// `WarnOnce` keeps log noise bounded for human readers, the sink
166/// emits a counter that production dashboards consume without dedup.
167///
168/// **Contract** (mirrors [`entelix_core::AuditSink`], invariant 18):
169///
170/// - Sync `&self` — the call site is the cost-meter hot path; the
171///   sink must not block. Async work is the sink impl's responsibility
172///   (spawn an internal task, push onto a channel).
173/// - Fires on every attempt — NOT deduped. The `WarnOnce` log gate
174///   dedupes the human-facing message; this sink sees raw counts so
175///   Prometheus / Datadog scrapes see request-rate, not unique-model-
176///   count.
177/// - Fires under every [`UnknownModelPolicy`] — including `Reject`,
178///   where the call ultimately errors. The sink runs *before* the
179///   policy decision so dashboards reflect "operator hit an unknown
180///   model" regardless of whether the call surfaced as an error.
181/// - Failures stay inside — sink impls must not panic; if they do,
182///   the panic is the impl's bug. The cost-meter does not catch.
183///
184/// Example: a metrics-counter sink.
185///
186/// ```ignore
187/// struct MetricsUnknownModelSink;
188///
189/// impl entelix_policy::UnknownModelSink for MetricsUnknownModelSink {
190///     fn record_unknown_model(&self, tenant: &entelix_core::TenantId, model: &str) {
191///         metrics::counter!(
192///             "entelix_policy.unknown_model_charge",
193///             "tenant" => tenant.as_str().to_owned(),
194///             "model" => model.to_owned(),
195///         ).increment(1);
196///     }
197/// }
198/// ```
199pub trait UnknownModelSink: Send + Sync + 'static {
200    /// Record one unknown-model dispatch attempt. See trait doc for
201    /// the firing contract (every attempt, no dedup, runs ahead of
202    /// the policy decision).
203    fn record_unknown_model(&self, tenant: &entelix_core::TenantId, model: &str);
204}
205
206/// Cap on distinct model names tracked under `WarnOnce`.
207///
208/// Bounds `warned_models` at roughly `MAX_WARNED_MODELS *
209/// avg_name_len` bytes — a couple of KiB at this limit. Past the
210/// cap, additional distinct unknowns produce a single saturated
211/// warn and stop accumulating, so an adversarial caller spamming
212/// distinct names cannot drive the process out of memory.
213pub const MAX_WARNED_MODELS: usize = 1024;
214
215/// Default cap on distinct tenant ledger entries.
216///
217/// Same defense-in-depth shape as [`MAX_WARNED_MODELS`]: an
218/// adversarial caller submitting requests with attacker-chosen
219/// `tenant_id` strings could otherwise grow the in-memory ledger
220/// without bound, exhausting process memory. With this cap, once
221/// the ledger has recorded `DEFAULT_MAX_TENANTS` distinct tenants
222/// the meter logs a single saturation warn and silently records
223/// `Decimal::ZERO` for further unknown-tenant charges. Operators
224/// override the cap via [`CostMeter::with_max_tenants`] (deployments
225/// with truly large tenant counts size up; deployments draining
226/// idle tenants on a schedule keep the default).
227///
228/// `10_000` is a pragmatic ceiling — a single [`CostMeter`] holds
229/// a `String` + `Decimal` per tenant (~64 bytes amortised), so the
230/// cap bounds the ledger at roughly 640 KiB.
231pub const DEFAULT_MAX_TENANTS: usize = 10_000;
232
233/// Per-tenant cost ledger. Records the cumulative spend for every
234/// tenant that has ever been charged.
235///
236/// Cloning is cheap (`Arc` over the underlying maps) — share one
237/// meter across the whole process.
238#[derive(Clone)]
239pub struct CostMeter {
240    pricing: Arc<RwLock<PricingTable>>,
241    ledger: Arc<DashMap<entelix_core::TenantId, Decimal>>,
242    unknown_policy: UnknownModelPolicy,
243    /// Bounded set of model names already warned about under
244    /// [`UnknownModelPolicy::WarnOnce`]. Capped at
245    /// [`MAX_WARNED_MODELS`] entries to bound memory under
246    /// adversarial-input spam.
247    warned_models: Arc<DashMap<String, ()>>,
248    /// `true` once `warned_models` reached
249    /// [`MAX_WARNED_MODELS`] and the saturation warn has been
250    /// emitted. Subsequent unknown-model calls return zero charge
251    /// silently.
252    warned_saturated: Arc<std::sync::atomic::AtomicBool>,
253    /// Maximum distinct tenant ledger entries before
254    /// [`Self::charge`] starts dropping new tenants on the floor.
255    /// See [`DEFAULT_MAX_TENANTS`] for the rationale.
256    max_tenants: usize,
257    /// `true` once `ledger` reached `max_tenants` and the
258    /// saturation warn has been emitted. Subsequent unknown-tenant
259    /// calls return `Ok(Decimal::ZERO)` silently.
260    tenants_saturated: Arc<std::sync::atomic::AtomicBool>,
261    /// Optional observer fired on every unknown-model dispatch — see
262    /// [`UnknownModelSink`]'s trait doc. `None` makes every unknown-
263    /// model path a silent no-op on the sink channel; the
264    /// [`UnknownModelPolicy`] decision (`Reject` / `WarnOnce`) is
265    /// independent.
266    unknown_model_sink: Option<Arc<dyn UnknownModelSink>>,
267}
268
269impl CostMeter {
270    /// Build with the supplied pricing table, the default
271    /// `UnknownModelPolicy::Reject`, and [`DEFAULT_MAX_TENANTS`].
272    #[must_use]
273    pub fn new(pricing: PricingTable) -> Self {
274        Self {
275            pricing: Arc::new(RwLock::new(pricing)),
276            ledger: Arc::new(DashMap::new()),
277            unknown_policy: UnknownModelPolicy::default(),
278            warned_models: Arc::new(DashMap::new()),
279            warned_saturated: Arc::new(std::sync::atomic::AtomicBool::new(false)),
280            max_tenants: DEFAULT_MAX_TENANTS,
281            tenants_saturated: Arc::new(std::sync::atomic::AtomicBool::new(false)),
282            unknown_model_sink: None,
283        }
284    }
285
286    /// Builder-style override of the unknown-model policy.
287    #[must_use]
288    pub const fn with_unknown_model_policy(mut self, policy: UnknownModelPolicy) -> Self {
289        self.unknown_policy = policy;
290        self
291    }
292
293    /// Wire an [`UnknownModelSink`] observer. The sink fires on every
294    /// unknown-model dispatch attempt — distinct from
295    /// [`UnknownModelPolicy::WarnOnce`]'s log-dedup gate (the policy
296    /// suppresses repeat log lines; the sink sees raw per-attempt
297    /// counts). The [`UnknownModelPolicy`] decision (`Reject` /
298    /// `WarnOnce`) is independent. Pairs with the trait doc for the
299    /// contract.
300    #[must_use]
301    pub fn with_unknown_model_sink(mut self, sink: Arc<dyn UnknownModelSink>) -> Self {
302        self.unknown_model_sink = Some(sink);
303        self
304    }
305
306    /// Override the maximum distinct tenant entries the ledger
307    /// retains. Past this cap, [`Self::charge`] records
308    /// `Decimal::ZERO` for new tenants and emits a single saturation
309    /// warn. Operators draining idle tenants on a schedule
310    /// (`drain(tenant)` on a periodic job) should leave the
311    /// default; deployments with truly large tenant counts size
312    /// up. Setting `0` disables charging entirely (every call
313    /// returns zero) which is mostly useful for tests.
314    #[must_use]
315    pub const fn with_max_tenants(mut self, cap: usize) -> Self {
316        self.max_tenants = cap;
317        self
318    }
319
320    /// Effective tenant cap.
321    #[must_use]
322    pub const fn max_tenants(&self) -> usize {
323        self.max_tenants
324    }
325
326    /// Number of tenants currently in the ledger.
327    #[must_use]
328    pub fn tracked_tenant_count(&self) -> usize {
329        self.ledger.len()
330    }
331
332    /// Hot-swap the pricing table. Used by operators rolling out
333    /// new vendor rates without a process restart. The `&self`
334    /// receiver is intentional — every clone of the `Arc<CostMeter>`
335    /// shares the same pricing slot, so a config-reload thread can
336    /// replace rates without coordinating with charge sites.
337    pub fn replace_pricing(&self, pricing: PricingTable) {
338        *self.pricing.write() = pricing;
339    }
340
341    /// Atomically replace `model`'s pricing row without rebuilding
342    /// the rest of the table. Inserts the row when the model is not
343    /// yet present, so admin write paths revising a single vendor
344    /// tariff need not re-author the whole catalogue. Pairs with
345    /// [`Self::pricing_snapshot`] for read-modify-write cycles that
346    /// touch only one model.
347    pub fn replace_model_pricing(&self, model: impl Into<String>, pricing: ModelPricing) {
348        self.pricing.write().set(model, pricing);
349    }
350
351    /// Owned point-in-time clone of the current pricing table.
352    /// `O(models)` allocation; intended for admin diff / inspection
353    /// / external-store reconciliation flows rather than per-charge
354    /// hot paths. Mutations on the returned value do not affect the
355    /// meter — use [`Self::replace_model_pricing`] or
356    /// [`Self::replace_pricing`] to persist changes.
357    #[must_use]
358    pub fn pricing_snapshot(&self) -> PricingTable {
359        self.pricing.read().clone()
360    }
361
362    /// Internal: emit a one-shot saturation warn and flip the
363    /// `tenants_saturated` flag. Race-tolerant via
364    /// `compare_exchange` on the flag — only the first thread
365    /// past the cap logs.
366    fn warn_tenants_saturated(&self) {
367        use std::sync::atomic::Ordering;
368        if self
369            .tenants_saturated
370            .compare_exchange(false, true, Ordering::SeqCst, Ordering::Relaxed)
371            .is_ok()
372        {
373            tracing::warn!(
374                target: "entelix_policy::cost",
375                cap = self.max_tenants,
376                "cost meter tenant ledger cap reached — further unknown tenants charged as zero"
377            );
378        }
379    }
380
381    /// Internal: log a `tracing::warn` at most once per distinct
382    /// `model` name and bound the warned-set at
383    /// [`MAX_WARNED_MODELS`] — past that, emit one saturation warn
384    /// and stop accumulating so an adversarial caller cannot drive
385    /// memory unbounded with distinct unknown names.
386    fn warn_once_for_unknown(&self, model: &str) {
387        use std::sync::atomic::Ordering;
388
389        // Fast path: already saturated — silent zero charge.
390        if self.warned_saturated.load(Ordering::Relaxed) {
391            return;
392        }
393        // De-dupe gate. DashMap::insert returns the previous value;
394        // `None` means this is a fresh model.
395        if self.warned_models.contains_key(model) {
396            return;
397        }
398        // Try to claim a slot. Race-tolerant: even if multiple
399        // threads pass the contains_key check, the size check after
400        // insert handles it.
401        if self.warned_models.len() >= MAX_WARNED_MODELS {
402            // Saturate exactly once.
403            if !self.warned_saturated.swap(true, Ordering::SeqCst) {
404                tracing::warn!(
405                    target: "entelix_policy::cost",
406                    cap = MAX_WARNED_MODELS,
407                    "cost meter warned_models cap reached — further unknown models suppressed"
408                );
409            }
410            return;
411        }
412        if self.warned_models.insert(model.to_owned(), ()).is_none() {
413            tracing::warn!(
414                target: "entelix_policy::cost",
415                model,
416                "cost meter has no pricing row for model — recording zero charge"
417            );
418        }
419    }
420
421    /// Record a charge for `tenant` against `model` for `usage`.
422    /// Returns the exact charge amount.
423    ///
424    /// When `model` has no row in the pricing table the behavior
425    /// follows [`Self::with_unknown_model_policy`] — by default a
426    /// [`PolicyError::UnknownModel`] is returned; under
427    /// [`UnknownModelPolicy::WarnOnce`] the meter logs a single
428    /// `tracing::warn` per distinct model and returns
429    /// `Decimal::ZERO`.
430    ///
431    /// **Transactional (F4)**: this method is invoked from the
432    /// `post_response` hook, which only runs after the codec has
433    /// successfully decoded the response. A network failure / parse
434    /// error short-circuits before this point and the ledger stays
435    /// untouched.
436    pub fn charge(
437        &self,
438        tenant_id: &entelix_core::TenantId,
439        model: &str,
440        usage: &Usage,
441    ) -> PolicyResult<Decimal> {
442        // Cost lookup is the only step that needs the pricing read
443        // guard. Drop it before running the unknown-model side-effect
444        // chain (operator sink + warn-once + policy decision) so a
445        // sink impl that internally acquires its own lock cannot
446        // deadlock against a concurrent `replace_pricing` writer
447        // (lock-ordering, root CLAUDE.md).
448        let lookup = self.pricing.read().get(model).map(|p| p.cost_for(usage));
449        let Some(cost) = lookup else {
450            // Sink fires ahead of the policy split — operators routing
451            // dashboards off this signal see every attempt regardless
452            // of whether the call surfaces as an error (`Reject`) or
453            // zero charge (`WarnOnce`). The log-dedup gate below
454            // belongs to the human-facing channel; the sink is the
455            // machine channel.
456            if let Some(sink) = &self.unknown_model_sink {
457                sink.record_unknown_model(tenant_id, model);
458            }
459            return match self.unknown_policy {
460                UnknownModelPolicy::Reject => Err(PolicyError::UnknownModel(model.to_owned())),
461                UnknownModelPolicy::WarnOnce => {
462                    self.warn_once_for_unknown(model);
463                    Ok(Decimal::ZERO)
464                }
465            };
466        };
467        if cost.is_zero() {
468            return Ok(cost);
469        }
470        // Saturation check: only NEW tenants count against the cap;
471        // already-tracked tenants accumulate into their existing
472        // entry without growing the map. This keeps the cap a
473        // memory-bound, not a charging-rate bound. `TenantId`
474        // implements `Borrow<str>`, so the lookup uses the existing
475        // `Arc<str>` without an extra allocation.
476        let already_tracked = self.ledger.contains_key(tenant_id.as_str());
477        if !already_tracked && self.ledger.len() >= self.max_tenants {
478            self.warn_tenants_saturated();
479            return Ok(Decimal::ZERO);
480        }
481        self.ledger
482            .entry(tenant_id.clone())
483            .and_modify(|v| *v += cost)
484            .or_insert(cost);
485        tracing::debug!(
486            target: "entelix_policy::cost",
487            tenant_id = tenant_id.as_str(),
488            model,
489            charge = %cost,
490            "cost meter charged"
491        );
492        Ok(cost)
493    }
494
495    /// Cumulative spend for `tenant_id`. Returns `Decimal::ZERO` for
496    /// an unseen tenant.
497    #[must_use]
498    pub fn spent_by(&self, tenant_id: &entelix_core::TenantId) -> Decimal {
499        self.ledger
500            .get(tenant_id.as_str())
501            .map_or(Decimal::ZERO, |v| *v)
502    }
503
504    /// Reset (and return) the recorded spend for `tenant_id`. Used by
505    /// nightly billing to drain the in-memory ledger after
506    /// persisting it.
507    pub fn drain(&self, tenant_id: &entelix_core::TenantId) -> Decimal {
508        self.ledger
509            .remove(tenant_id.as_str())
510            .map_or(Decimal::ZERO, |(_, v)| v)
511    }
512}
513
514#[async_trait::async_trait]
515impl entelix_core::CostCalculator for CostMeter {
516    /// Side-effect-free cost computation for telemetry. Looks up
517    /// the pricing row for `model` and returns the computed
518    /// per-call cost as `f64` for emission into observability
519    /// fields like `gen_ai.usage.cost`.
520    ///
521    /// `ctx` is accepted for the trait contract — `CostMeter` uses
522    /// a global pricing table shared across tenants. Multi-tenant
523    /// calculators that need per-tenant pricing tiers wrap a
524    /// `CostMeter` per tenant or implement `CostCalculator`
525    /// directly with a `(tenant_id, model) → ModelPricing` lookup.
526    ///
527    /// Returns `None` when the model is not in the pricing table —
528    /// telemetry consumers omit the cost attribute rather than
529    /// emitting a misleading zero. The calculator path does NOT
530    /// mutate the per-tenant ledger; ledger updates flow through
531    /// [`Self::charge`] which is invoked by the `PolicyLayer`
532    /// service after a successful response.
533    async fn compute_cost(
534        &self,
535        model: &str,
536        usage: &Usage,
537        _ctx: &entelix_core::ExecutionContext,
538    ) -> Option<f64> {
539        use rust_decimal::prelude::ToPrimitive;
540        let pricing = self.pricing.read();
541        let model_pricing = pricing.get(model)?;
542        // `Decimal::to_f64` is None only on overflow — at production
543        // pricing rates the per-call cost stays well within f64 range.
544        model_pricing.cost_for(usage).to_f64()
545    }
546}
547
548/// Conservative worst-case output budget used by the pre-call
549/// estimator when [`entelix_core::ir::ModelRequest::max_tokens`] is
550/// unset. Vendor defaults vary (Anthropic = `max_tokens` required by
551/// API contract; `OpenAI` = vendor-default ~4096; Gemini = up to
552/// 8192). The constant biases toward overestimation so a `RunBudget`
553/// pre-call gate fails closed (false-positive rejection is
554/// recoverable; silent overrun is not).
555const PRE_CALL_UNBOUNDED_OUTPUT_TOKENS: u32 = 8_192;
556
557#[async_trait::async_trait]
558impl entelix_core::BudgetCostEstimator for CostMeter {
559    /// Pre-call worst-case estimate in `Decimal` precision. Looks up
560    /// the pricing row for `request.model`; if absent, returns
561    /// `None` so the pre-call gate skips rather than synthesising a
562    /// zero (matches `compute_cost`).
563    ///
564    /// Prompt-token estimation uses [`entelix_core::ByteCountTokenCounter`]
565    /// for a conservative count without coupling to a vendor-accurate
566    /// tokenizer. Operators with vendor-accurate token counters wired
567    /// via [`entelix_core::TokenCounterRegistry`] implement a custom
568    /// [`entelix_core::BudgetCostEstimator`] that consults the
569    /// registry directly — the trait surface stays vendor-agnostic.
570    ///
571    /// Output-token estimate is `request.max_tokens` when set, or
572    /// `PRE_CALL_UNBOUNDED_OUTPUT_TOKENS` as the worst-case bound.
573    /// Cache rates are treated as zero (no cache hit on a yet-to-fire
574    /// call), which biases the estimate upward.
575    async fn estimate_pre_call(
576        &self,
577        request: &entelix_core::ir::ModelRequest,
578        _ctx: &entelix_core::ExecutionContext,
579    ) -> Option<Decimal> {
580        use entelix_core::TokenCounter;
581        let pricing = self.pricing.read();
582        let model_pricing = pricing.get(&request.model)?;
583        let counter = entelix_core::ByteCountTokenCounter::new();
584        let raw_tokens = counter.count_messages(&request.messages);
585        let input_tokens = u32::try_from(raw_tokens).unwrap_or(u32::MAX); // silent-fallback-ok: saturate at u32::MAX so a pathologically long prompt over-estimates rather than wraps; biases the pre-call gate conservatively.
586        let output_tokens = request
587            .max_tokens
588            .unwrap_or(PRE_CALL_UNBOUNDED_OUTPUT_TOKENS); // silent-fallback-ok: PRE_CALL_UNBOUNDED_OUTPUT_TOKENS is the documented worst-case bound for vendors that allow unset max_tokens.
589        let projected = Usage::new(input_tokens, output_tokens);
590        Some(model_pricing.cost_for(&projected))
591    }
592
593    /// Post-call actual charge in `Decimal` precision. Read
594    /// directly from the response's [`Usage`]; this is the same
595    /// arithmetic [`Self::charge`] feeds into the per-tenant ledger,
596    /// surfaced separately so [`entelix_core::RunBudget::observe_cost`]
597    /// receives the precision-preserving value before any
598    /// `f64`-lossy telemetry conversion.
599    async fn calculate_actual(
600        &self,
601        request: &entelix_core::ir::ModelRequest,
602        usage: &Usage,
603        _ctx: &entelix_core::ExecutionContext,
604    ) -> Option<Decimal> {
605        let pricing = self.pricing.read();
606        let model_pricing = pricing.get(&request.model)?;
607        Some(model_pricing.cost_for(usage))
608    }
609}
610
611impl std::fmt::Debug for CostMeter {
612    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
613        f.debug_struct("CostMeter")
614            .field("models", &self.pricing.read().len())
615            .field("tenants", &self.ledger.len())
616            .field("unknown_policy", &self.unknown_policy)
617            .field("warned_models", &self.warned_models.len())
618            .field(
619                "warned_saturated",
620                &self
621                    .warned_saturated
622                    .load(std::sync::atomic::Ordering::Relaxed),
623            )
624            .field("max_tenants", &self.max_tenants)
625            .field(
626                "tenants_saturated",
627                &self
628                    .tenants_saturated
629                    .load(std::sync::atomic::Ordering::Relaxed),
630            )
631            .field("unknown_model_sink", &self.unknown_model_sink.is_some())
632            .finish()
633    }
634}
635
636#[cfg(test)]
637#[allow(clippy::unwrap_used)]
638mod tests {
639    use entelix_core::TenantId;
640    use std::str::FromStr;
641
642    use super::*;
643
644    fn d(s: &str) -> Decimal {
645        Decimal::from_str(s).unwrap()
646    }
647
648    fn pricing() -> PricingTable {
649        PricingTable::new()
650            .add_model_pricing(
651                "claude-opus-4-7",
652                ModelPricing::new(d("15"), d("75"), d("1.5"), d("18.75")),
653            )
654            .add_model_pricing(
655                "gpt-4.1",
656                // gpt-4.1 cache-read is 25% of input (vendor-published).
657                ModelPricing::new(d("2"), d("8"), d("0.5"), Decimal::ZERO), // magic-ok: test fixture rate
658            )
659    }
660
661    fn usage(input: u32, output: u32) -> Usage {
662        Usage::new(input, output)
663    }
664
665    #[test]
666    fn cost_for_simple_usage_is_exact() {
667        let p = pricing();
668        let claude = p.get("claude-opus-4-7").unwrap();
669        let cost = claude.cost_for(&usage(1000, 1000));
670        // 1000 input * 15/1000 + 1000 output * 75/1000 = 15 + 75 = 90
671        assert_eq!(cost, d("90"));
672    }
673
674    #[test]
675    fn cost_with_cache_writes_and_reads() {
676        let p = pricing();
677        let claude = p.get("claude-opus-4-7").unwrap();
678        let cost = claude.cost_for(
679            &Usage::new(500, 200)
680                .with_cached_input_tokens(2000)
681                .with_cache_creation_input_tokens(800),
682        );
683        // 500*15/1000 + 200*75/1000 + 800*18.75/1000 + 2000*1.5/1000
684        // = 7.5 + 15 + 15 + 3 = 40.5
685        assert_eq!(cost, d("40.5"));
686    }
687
688    #[test]
689    fn cache_read_uses_explicit_rate_no_fallback() {
690        // gpt-4.1 has cache_read_per_1k = 0.5 (25% of input). The SDK
691        // does not invent a fallback from input_per_1k — the rate is
692        // exactly what the operator declared (invariant #15).
693        let p = pricing();
694        let gpt = p.get("gpt-4.1").unwrap();
695        let cost = gpt.cost_for(&Usage::default().with_cached_input_tokens(1000));
696        // 1000 cache_read * 0.5 / 1000 = 0.5
697        assert_eq!(cost, d("0.5")); // magic-ok: arithmetic check value
698    }
699
700    #[test]
701    fn cache_write_zero_rate_means_zero_charge() {
702        // gpt-4.1 has cache_write_per_1k = ZERO (vendor doesn't
703        // charge for cache writes). Cache-write tokens accrue no
704        // cost — a regression-test for the "no silent fallback"
705        // contract: the SDK does not invent a positive rate from
706        // input_per_1k.
707        let p = pricing();
708        let gpt = p.get("gpt-4.1").unwrap();
709        let cost = gpt.cost_for(&Usage::default().with_cache_creation_input_tokens(1_000_000));
710        assert_eq!(cost, Decimal::ZERO);
711    }
712
713    #[test]
714    fn charge_sums_per_tenant_atomically() {
715        let meter = CostMeter::new(pricing());
716        let u = usage(1000, 1000);
717        meter
718            .charge(&TenantId::new("alpha"), "claude-opus-4-7", &u)
719            .unwrap();
720        meter
721            .charge(&TenantId::new("alpha"), "claude-opus-4-7", &u)
722            .unwrap();
723        meter
724            .charge(&TenantId::new("bravo"), "claude-opus-4-7", &u)
725            .unwrap();
726        assert_eq!(meter.spent_by(&TenantId::new("alpha")), d("180"));
727        assert_eq!(meter.spent_by(&TenantId::new("bravo")), d("90"));
728        assert_eq!(meter.spent_by(&TenantId::new("never-seen")), Decimal::ZERO);
729    }
730
731    #[test]
732    fn unknown_model_does_not_charge() {
733        let meter = CostMeter::new(pricing());
734        let err = meter
735            .charge(&TenantId::new("alpha"), "unknown-model", &usage(1000, 1000))
736            .unwrap_err();
737        assert!(matches!(err, PolicyError::UnknownModel(_)));
738        assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
739    }
740
741    #[test]
742    fn zero_usage_is_a_zero_charge_no_ledger_entry() {
743        let meter = CostMeter::new(pricing());
744        let cost = meter
745            .charge(
746                &TenantId::new("alpha"),
747                "claude-opus-4-7",
748                &Usage::default(),
749            )
750            .unwrap();
751        assert_eq!(cost, Decimal::ZERO);
752        assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
753    }
754
755    #[test]
756    fn drain_resets_tenant_ledger() {
757        let meter = CostMeter::new(pricing());
758        meter
759            .charge(
760                &TenantId::new("alpha"),
761                "claude-opus-4-7",
762                &usage(1000, 1000),
763            )
764            .unwrap();
765        assert_eq!(meter.drain(&TenantId::new("alpha")), d("90"));
766        assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
767    }
768
769    #[test]
770    fn warn_once_unknown_model_returns_zero_and_does_not_charge() {
771        let meter =
772            CostMeter::new(pricing()).with_unknown_model_policy(UnknownModelPolicy::WarnOnce);
773        let cost = meter
774            .charge(
775                &TenantId::new("alpha"),
776                "vendor-preview-x",
777                &usage(1000, 1000),
778            )
779            .unwrap();
780        assert_eq!(cost, Decimal::ZERO);
781        assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
782        // Same model again — must not re-warn (state inspected via len).
783        meter
784            .charge(
785                &TenantId::new("alpha"),
786                "vendor-preview-x",
787                &usage(2000, 2000),
788            )
789            .unwrap();
790        assert_eq!(meter.warned_models.len(), 1);
791        // Distinct unknown model — separate warn entry.
792        meter
793            .charge(&TenantId::new("alpha"), "vendor-preview-y", &usage(1000, 0))
794            .unwrap();
795        assert_eq!(meter.warned_models.len(), 2);
796    }
797
798    #[test]
799    fn ledger_caps_at_max_tenants_under_adversarial_spam() {
800        // Tiny cap so the test runs fast. Real deployments use
801        // DEFAULT_MAX_TENANTS (10000) or override via with_max_tenants.
802        let meter = CostMeter::new(pricing()).with_max_tenants(8);
803        // First 8 distinct tenants land in the ledger and accumulate.
804        for i in 0..8 {
805            let charge = meter
806                .charge(
807                    &TenantId::new(format!("tenant-{i}")),
808                    "claude-opus-4-7",
809                    &usage(100, 100),
810                )
811                .unwrap();
812            assert!(!charge.is_zero(), "tenant {i} should be charged");
813        }
814        assert_eq!(meter.tracked_tenant_count(), 8);
815        // Past the cap: NEW tenants record Decimal::ZERO and never
816        // join the ledger — saturation flag fires once.
817        for i in 8..200 {
818            let charge = meter
819                .charge(
820                    &TenantId::new(format!("tenant-{i}")),
821                    "claude-opus-4-7",
822                    &usage(100, 100),
823                )
824                .unwrap();
825            assert_eq!(
826                charge,
827                Decimal::ZERO,
828                "tenant {i} past cap should be charged zero (silently dropped)"
829            );
830        }
831        assert_eq!(
832            meter.tracked_tenant_count(),
833            8,
834            "ledger size must not grow past max_tenants"
835        );
836        // Already-tracked tenants continue to accumulate normally —
837        // the cap is on distinct entries, not on charging rate.
838        let prior = meter.spent_by(&TenantId::new("tenant-0"));
839        let _ = meter
840            .charge(
841                &TenantId::new("tenant-0"),
842                "claude-opus-4-7",
843                &usage(100, 100),
844            )
845            .unwrap();
846        assert!(meter.spent_by(&TenantId::new("tenant-0")) > prior);
847    }
848
849    #[test]
850    fn warned_models_caps_at_max_under_adversarial_spam() {
851        let meter =
852            CostMeter::new(pricing()).with_unknown_model_policy(UnknownModelPolicy::WarnOnce);
853        // Spam well past the cap with distinct names.
854        for i in 0..(MAX_WARNED_MODELS * 2) {
855            let _ = meter.charge(&TenantId::new("alpha"), &format!("model-{i}"), &usage(1, 1));
856        }
857        assert!(
858            meter.warned_models.len() <= MAX_WARNED_MODELS,
859            "warned_models exceeded cap: {} > {MAX_WARNED_MODELS}",
860            meter.warned_models.len()
861        );
862        // Ledger remains untouched (zero charges).
863        assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
864    }
865
866    #[test]
867    fn known_model_still_charges_under_warn_once() {
868        let meter =
869            CostMeter::new(pricing()).with_unknown_model_policy(UnknownModelPolicy::WarnOnce);
870        let cost = meter
871            .charge(
872                &TenantId::new("alpha"),
873                "claude-opus-4-7",
874                &usage(1000, 1000),
875            )
876            .unwrap();
877        assert_eq!(cost, d("90"));
878        assert_eq!(meter.spent_by(&TenantId::new("alpha")), d("90"));
879        assert_eq!(meter.warned_models.len(), 0);
880    }
881
882    #[test]
883    fn pricing_can_be_hot_swapped() {
884        let meter = CostMeter::new(pricing());
885        let mut new_pricing = pricing();
886        new_pricing.set(
887            "gpt-4.1",
888            ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
889        );
890        meter.replace_pricing(new_pricing);
891        let cost = meter
892            .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
893            .unwrap();
894        assert_eq!(cost, d("20"));
895    }
896
897    #[test]
898    fn replace_model_pricing_updates_existing_row_atomically() {
899        // Single-row replace leaves untouched rows alone — operators
900        // revising one vendor's tariff don't disturb the rest.
901        let meter = CostMeter::new(pricing());
902        meter.replace_model_pricing(
903            "gpt-4.1",
904            ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
905        );
906
907        // Touched row reflects the new tariff.
908        let gpt_charge = meter
909            .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
910            .unwrap();
911        assert_eq!(gpt_charge, d("20"));
912
913        // Untouched row keeps its original tariff.
914        let claude_charge = meter
915            .charge(
916                &TenantId::new("alpha"),
917                "claude-opus-4-7",
918                &usage(1000, 1000),
919            )
920            .unwrap();
921        assert_eq!(claude_charge, d("90"));
922    }
923
924    #[test]
925    fn replace_model_pricing_inserts_when_model_absent() {
926        // Insert-or-replace semantic — new vendor models can join
927        // the table without rebuilding it from scratch.
928        let meter = CostMeter::new(PricingTable::new());
929
930        // Pre-condition: unknown model rejects under the default
931        // `Reject` policy.
932        let err = meter
933            .charge(&TenantId::new("alpha"), "new-vendor-x", &usage(100, 0))
934            .unwrap_err();
935        assert!(matches!(err, PolicyError::UnknownModel(_)));
936
937        meter.replace_model_pricing(
938            "new-vendor-x",
939            ModelPricing::new(d("5"), d("15"), Decimal::ZERO, Decimal::ZERO),
940        );
941
942        // Post-condition: the inserted row charges normally.
943        let cost = meter
944            .charge(&TenantId::new("alpha"), "new-vendor-x", &usage(1000, 1000))
945            .unwrap();
946        assert_eq!(cost, d("20"));
947    }
948
949    #[test]
950    fn pricing_snapshot_returns_owned_copy_isolated_from_subsequent_mutations() {
951        // The snapshot is a point-in-time clone. Mutations on the
952        // returned value do NOT propagate back, and mutations on the
953        // meter after the snapshot do not appear in the held copy.
954        let meter = CostMeter::new(pricing());
955        let mut snap = meter.pricing_snapshot();
956        assert!(snap.get("claude-opus-4-7").is_some());
957
958        // Mutate the snapshot — meter must stay intact.
959        snap.set(
960            "claude-opus-4-7",
961            ModelPricing::new(d("999"), d("999"), Decimal::ZERO, Decimal::ZERO),
962        );
963        let meter_charge = meter
964            .charge(&TenantId::new("alpha"), "claude-opus-4-7", &usage(1000, 0))
965            .unwrap();
966        assert_eq!(
967            meter_charge,
968            d("15"),
969            "meter must ignore snapshot mutations"
970        );
971
972        // Mutate the meter — held snapshot must stay intact.
973        meter.replace_model_pricing(
974            "claude-opus-4-7",
975            ModelPricing::new(d("1"), d("1"), Decimal::ZERO, Decimal::ZERO),
976        );
977        let snap_pricing = snap.get("claude-opus-4-7").unwrap();
978        assert_eq!(
979            snap_pricing.input_per_1k,
980            d("999"),
981            "snapshot must not see meter mutations after capture"
982        );
983    }
984
985    #[test]
986    fn pricing_mutations_serialise_under_concurrent_clones() {
987        // Two threads pound the same `Arc<CostMeter>` clones with
988        // disjoint mutation patterns — one rebuilds the full table,
989        // the other partials a single row. Both go through the same
990        // `RwLock<PricingTable>` slot so writes serialise; the test
991        // pins (a) no panics / no torn state and (b) the post-join
992        // table reflects every committed mutation deterministically.
993        let meter = Arc::new(CostMeter::new(pricing()));
994        let barrier = Arc::new(std::sync::Barrier::new(2));
995
996        // Snapshot baseline: claude row exists, gpt-4.1 row exists.
997        assert!(meter.pricing_snapshot().get("claude-opus-4-7").is_some());
998
999        let m_replace = Arc::clone(&meter);
1000        let b_replace = Arc::clone(&barrier);
1001        let t_replace = std::thread::spawn(move || {
1002            b_replace.wait();
1003            for i in 0..500 {
1004                let mut next = PricingTable::new();
1005                next.set(
1006                    "claude-opus-4-7",
1007                    ModelPricing::new(
1008                        Decimal::from(i % 3),
1009                        Decimal::from((i % 3) * 2),
1010                        Decimal::ZERO,
1011                        Decimal::ZERO,
1012                    ),
1013                );
1014                m_replace.replace_pricing(next);
1015            }
1016        });
1017
1018        let m_partial = Arc::clone(&meter);
1019        let b_partial = Arc::clone(&barrier);
1020        let t_partial = std::thread::spawn(move || {
1021            b_partial.wait();
1022            for i in 0..500 {
1023                m_partial.replace_model_pricing(
1024                    "gpt-4.1",
1025                    ModelPricing::new(
1026                        Decimal::from(i % 5),
1027                        Decimal::from((i % 5) * 4),
1028                        Decimal::ZERO,
1029                        Decimal::ZERO,
1030                    ),
1031                );
1032            }
1033        });
1034
1035        t_replace.join().unwrap();
1036        t_partial.join().unwrap();
1037
1038        // Final state: t_replace's last write installed a table
1039        // containing only `claude-opus-4-7`. t_partial's last write
1040        // may have installed `gpt-4.1` AFTER that swap (insert
1041        // semantic), or BEFORE it (lost to the table replace). Both
1042        // orderings are valid; what must hold is that the meter is
1043        // in ONE of these two states — no torn row, no panic, no
1044        // missing claude.
1045        let final_snap = meter.pricing_snapshot();
1046        assert!(
1047            final_snap.get("claude-opus-4-7").is_some(),
1048            "claude row must survive — every t_replace write installs it"
1049        );
1050    }
1051
1052    #[test]
1053    fn replace_model_pricing_is_observed_by_cloned_meters() {
1054        // The single-row swap rides through `Arc<CostMeter>` clones
1055        // just like the full-table `replace_pricing`. Config-reload
1056        // threads holding their own clone do not coordinate with
1057        // charge sites.
1058        let meter = CostMeter::new(pricing());
1059        let cloned = meter.clone();
1060        cloned.replace_model_pricing(
1061            "gpt-4.1",
1062            ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
1063        );
1064
1065        let cost = meter
1066            .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
1067            .unwrap();
1068        assert_eq!(
1069            cost,
1070            d("20"),
1071            "original meter must observe a per-row swap installed via a clone"
1072        );
1073    }
1074
1075    /// Test sink that records every `record_unknown_model` call.
1076    /// Captures `(tenant, model)` pairs so tests can assert order +
1077    /// count, exercises the production sink-impl shape (Arc-shareable,
1078    /// sync, internally synchronised via `Mutex`).
1079    #[derive(Default)]
1080    struct CapturingSink {
1081        calls: std::sync::Mutex<Vec<(String, String)>>,
1082    }
1083
1084    impl CapturingSink {
1085        fn snapshot(&self) -> Vec<(String, String)> {
1086            self.calls.lock().unwrap().clone()
1087        }
1088    }
1089
1090    impl UnknownModelSink for CapturingSink {
1091        fn record_unknown_model(&self, tenant: &TenantId, model: &str) {
1092            self.calls
1093                .lock()
1094                .unwrap()
1095                .push((tenant.as_str().to_owned(), model.to_owned()));
1096        }
1097    }
1098
1099    #[test]
1100    fn unknown_model_sink_fires_under_warn_once_without_dedup() {
1101        // WarnOnce dedupes the tracing log channel; the sink must NOT
1102        // dedupe — production dashboards consume raw per-attempt counts
1103        // (rate, p50, percentiles).
1104        let sink = Arc::new(CapturingSink::default());
1105        let meter = CostMeter::new(pricing())
1106            .with_unknown_model_policy(UnknownModelPolicy::WarnOnce)
1107            .with_unknown_model_sink(sink.clone());
1108
1109        for _ in 0..5 {
1110            let _ = meter
1111                .charge(&TenantId::new("alpha"), "vendor-preview-x", &usage(1, 1))
1112                .unwrap();
1113        }
1114
1115        let calls = sink.snapshot();
1116        assert_eq!(
1117            calls.len(),
1118            5,
1119            "sink must observe every attempt, not the warn-once-deduped subset"
1120        );
1121        for (tenant, model) in &calls {
1122            assert_eq!(tenant, "alpha");
1123            assert_eq!(model, "vendor-preview-x");
1124        }
1125        // Log gate still dedupes — the sink + log channels are
1126        // independent.
1127        assert_eq!(meter.warned_models.len(), 1);
1128    }
1129
1130    #[test]
1131    fn unknown_model_sink_fires_under_reject_before_err_returns() {
1132        // Reject surfaces the call as an Err — but the sink must still
1133        // see the attempt, so dashboards reflect "operator hit an
1134        // unknown model" regardless of whether the call ultimately
1135        // succeeds or errors.
1136        let sink = Arc::new(CapturingSink::default());
1137        let meter = CostMeter::new(pricing()).with_unknown_model_sink(sink.clone());
1138
1139        let err = meter
1140            .charge(&TenantId::new("bravo"), "mystery-model", &usage(10, 10))
1141            .unwrap_err();
1142        assert!(matches!(err, PolicyError::UnknownModel(_)));
1143
1144        let calls = sink.snapshot();
1145        assert_eq!(
1146            calls,
1147            vec![("bravo".to_owned(), "mystery-model".to_owned())]
1148        );
1149    }
1150
1151    #[test]
1152    fn absent_unknown_model_sink_is_a_silent_no_op() {
1153        // The default constructor wires no sink — neither the charge
1154        // path nor the ledger should observe any sink-related work.
1155        let meter = CostMeter::new(pricing());
1156        let _ = meter
1157            .charge(&TenantId::new("alpha"), "unknown", &usage(1, 1))
1158            .unwrap_err();
1159        assert_eq!(meter.spent_by(&TenantId::new("alpha")), Decimal::ZERO);
1160    }
1161
1162    #[test]
1163    fn known_model_charge_does_not_fire_unknown_sink() {
1164        // Sink is scoped to the unknown branch — a healthy known-model
1165        // charge must NOT trip the dashboard counter.
1166        let sink = Arc::new(CapturingSink::default());
1167        let meter = CostMeter::new(pricing()).with_unknown_model_sink(sink.clone());
1168        let _ = meter
1169            .charge(
1170                &TenantId::new("alpha"),
1171                "claude-opus-4-7",
1172                &usage(1000, 1000),
1173            )
1174            .unwrap();
1175        assert!(
1176            sink.snapshot().is_empty(),
1177            "known-model dispatch must not invoke the unknown-model sink"
1178        );
1179    }
1180
1181    #[test]
1182    fn unknown_model_sink_may_replace_pricing_without_deadlock() {
1183        // Regression-pin for the lock-ordering reshape: the sink runs
1184        // outside the `pricing.read()` scope, so a sink impl that
1185        // calls `replace_pricing` (an admin write path acting on the
1186        // observed dispatch) must not deadlock. If the read guard
1187        // leaked into the sink call, this test would hang on the
1188        // `pricing.write()` inside `replace_pricing`.
1189        struct HotSwapSink {
1190            meter: Arc<RwLock<Option<CostMeter>>>,
1191        }
1192        impl UnknownModelSink for HotSwapSink {
1193            fn record_unknown_model(&self, _tenant: &TenantId, model: &str) {
1194                if let Some(m) = self.meter.read().as_ref() {
1195                    let mut p = pricing();
1196                    p.set(
1197                        model,
1198                        ModelPricing::new(d("1"), d("1"), Decimal::ZERO, Decimal::ZERO),
1199                    );
1200                    m.replace_pricing(p);
1201                }
1202            }
1203        }
1204
1205        let slot: Arc<RwLock<Option<CostMeter>>> = Arc::new(RwLock::new(None));
1206        let meter = CostMeter::new(pricing())
1207            .with_unknown_model_policy(UnknownModelPolicy::WarnOnce)
1208            .with_unknown_model_sink(Arc::new(HotSwapSink {
1209                meter: slot.clone(),
1210            }));
1211        *slot.write() = Some(meter.clone());
1212
1213        // First call: model is unknown → sink fires → installs pricing.
1214        let first = meter
1215            .charge(&TenantId::new("alpha"), "freshly-launched", &usage(1000, 0))
1216            .unwrap();
1217        assert_eq!(
1218            first,
1219            Decimal::ZERO,
1220            "first call returns zero (the model was unknown when looked up)"
1221        );
1222
1223        // Second call: pricing is now installed → charges normally.
1224        let second = meter
1225            .charge(&TenantId::new("alpha"), "freshly-launched", &usage(1000, 0))
1226            .unwrap();
1227        assert_eq!(second, d("1"));
1228    }
1229
1230    #[test]
1231    fn pricing_replacement_is_observed_by_cloned_meters() {
1232        let meter = CostMeter::new(pricing());
1233        let cloned = meter.clone();
1234
1235        let mut new_pricing = pricing();
1236        new_pricing.set(
1237            "gpt-4.1",
1238            ModelPricing::new(d("20"), d("80"), Decimal::ZERO, Decimal::ZERO),
1239        );
1240        cloned.replace_pricing(new_pricing);
1241
1242        let cost = meter
1243            .charge(&TenantId::new("alpha"), "gpt-4.1", &usage(1000, 0))
1244            .unwrap();
1245        assert_eq!(
1246            cost,
1247            d("20"),
1248            "the original meter must charge against a pricing table installed via a clone"
1249        );
1250    }
1251}