Skip to main content

cc_lb_plugin_api/
types.rs

1//! Shared public data types for plugin boundaries.
2
3use std::collections::BTreeMap;
4use std::sync::Arc;
5use std::time::Duration;
6
7use bytes::Bytes;
8use http::{HeaderMap, Method, StatusCode};
9use serde::{Deserialize, Serialize};
10use url::Url;
11use uuid::Uuid;
12
13use crate::errors::{DialectError, SignerError};
14use crate::traits::{Signer, UpstreamDialect};
15
16/// Authenticated caller identity used for quota, audit, and routing decisions.
17#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
18pub struct Principal {
19    /// Stable principal identifier, unique within the proxy deployment.
20    pub id: String,
21    /// Principal category inferred by the authentication plugin.
22    pub kind: PrincipalKind,
23    /// Plugin-provided claims available to router and observability layers.
24    pub claims: serde_json::Map<String, serde_json::Value>,
25}
26
27/// Principal categories supported by first-party and custom auth plugins.
28#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
29#[serde(rename_all = "snake_case")]
30pub enum PrincipalKind {
31    /// Principal authenticated by an Anthropic-compatible API key.
32    ApiKey,
33    /// Principal authenticated as an OAuth subject.
34    OAuthSubject,
35    /// Principal authenticated by an internal key managed by cc-lb.
36    InternalKey,
37    /// Principal authenticated through a workload identity mechanism.
38    WorkloadIdentity,
39    /// Principal authenticated by a Claude subscription bearer token.
40    SubscriptionBearer,
41}
42
43/// Runtime chain slot a plugin can provide.
44#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
45#[serde(rename_all = "snake_case")]
46pub enum PluginSlot {
47    /// Router filter slot for selecting or filtering upstream candidates.
48    Router,
49    /// Observability hook slot for receiving request lifecycle events.
50    ObservabilityHook,
51    /// Request shaping slot for producing upstream-specific requests.
52    Shape,
53}
54
55impl PluginSlot {
56    /// Returns the stable snake_case wire name for this slot.
57    pub fn as_str(self) -> &'static str {
58        match self {
59            Self::Router => "router",
60            Self::ObservabilityHook => "observability_hook",
61            Self::Shape => "shape",
62        }
63    }
64
65    /// Parses a stable snake_case wire name into a plugin slot.
66    pub fn parse(value: &str) -> Option<Self> {
67        match value {
68            "router" => Some(Self::Router),
69            "observability_hook" => Some(Self::ObservabilityHook),
70            "shape" => Some(Self::Shape),
71            _ => None,
72        }
73    }
74}
75
76/// Sentinel principal id used by [`SlotKey`] for proxy-wide global plugin slots.
77pub const GLOBAL_PRINCIPAL: &str = "__global__";
78
79/// Composite key identifying a plugin slot per principal × plugin name.
80///
81/// Used as the trait-level identity for [`crate::FilterPlugin`] via
82/// `FilterPlugin::slot_key` and as the runtime-side cache lookup key
83/// for the wasmtime per-worker `WorkerInstance` map.
84#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
85pub struct SlotKey {
86    /// Principal id this slot is bound to, or [`GLOBAL_PRINCIPAL`] for
87    /// proxy-wide globals.
88    pub principal: String,
89    /// Stable plugin name.
90    pub plugin: String,
91}
92
93impl SlotKey {
94    /// Build a per-principal slot key.
95    pub fn new(principal: impl Into<String>, plugin: impl Into<String>) -> Self {
96        Self {
97            principal: principal.into(),
98            plugin: plugin.into(),
99        }
100    }
101
102    /// Build a proxy-wide global slot key.
103    pub fn global(plugin: impl Into<String>) -> Self {
104        Self::new(GLOBAL_PRINCIPAL, plugin)
105    }
106}
107
108/// Upstream backends supported by the proxy routing contract.
109#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
110#[serde(rename_all = "snake_case", tag = "kind")]
111pub enum Upstream {
112    /// Direct Anthropic API endpoint.
113    AnthropicDirect {
114        /// Operator-configured base URL override for this upstream, resolved
115        /// from the upstream record at routing time. `None` indicates the
116        /// canonical Anthropic endpoint should be used.
117        #[serde(default, skip_serializing_if = "Option::is_none")]
118        base_url: Option<Url>,
119    },
120}
121
122/// Upstream record kind exposed to router plugins for candidate selection.
123#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
124#[serde(rename_all = "snake_case")]
125pub enum UpstreamKind {
126    /// Anthropic API-key upstream.
127    AnthropicApiKey,
128    /// Anthropic OAuth upstream.
129    AnthropicOauth,
130}
131
132impl UpstreamKind {
133    /// Returns the stable snake_case wire name.
134    pub fn as_str(self) -> &'static str {
135        match self {
136            Self::AnthropicApiKey => "anthropic_api_key",
137            Self::AnthropicOauth => "anthropic_oauth",
138        }
139    }
140}
141
142/// Upstream rate-limit metric kind observed from upstream responses.
143#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
144#[serde(rename_all = "snake_case")]
145pub enum RateLimitKind {
146    /// Request-count rate limit.
147    Requests,
148    /// Aggregate token rate limit.
149    Tokens,
150    /// Input-token rate limit.
151    InputTokens,
152    /// Output-token rate limit.
153    OutputTokens,
154}
155
156impl RateLimitKind {
157    /// Returns the stable snake_case wire name.
158    pub fn as_str(self) -> &'static str {
159        match self {
160            Self::Requests => "requests",
161            Self::Tokens => "tokens",
162            Self::InputTokens => "input_tokens",
163            Self::OutputTokens => "output_tokens",
164        }
165    }
166}
167
168/// Latest upstream rate-limit observation exposed to router plugins.
169#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
170pub struct RateLimitObservation {
171    /// Observed rate-limit kind.
172    pub kind: RateLimitKind,
173    /// Provider-defined rate-limit window label.
174    pub window: String,
175    /// Optional maximum quota for the window.
176    pub limit: Option<u64>,
177    /// Optional remaining quota for the window.
178    pub remaining: Option<u64>,
179    /// Optional provider reset timestamp or duration string.
180    pub reset: Option<String>,
181}
182
183/// Freshness state for subscription quota data exposed to router plugins.
184#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
185#[serde(rename_all = "snake_case")]
186pub enum SubscriptionQuotaDataState {
187    /// Data is inside the configured routing freshness window.
188    Fresh,
189    /// Data exists but is older than the configured routing freshness window.
190    Stale,
191    /// No usable subscription quota data exists for the candidate/window.
192    Missing,
193}
194
195/// Latest subscription quota snapshot for one candidate/window.
196#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
197pub struct SubscriptionQuotaCandidateSnapshot {
198    /// Subscription quota window label.
199    pub window: String,
200    /// Freshness state for this window snapshot.
201    pub state: SubscriptionQuotaDataState,
202    /// Data source label, such as header, api, or merged.
203    pub source: Option<String>,
204    /// Provider-reported utilization fraction.
205    pub utilization: Option<f64>,
206    /// Provider-reported quota status.
207    pub status: Option<String>,
208    /// Provider reset timestamp in Unix seconds.
209    pub resets_at_unix_secs: Option<u64>,
210    /// Per-window threshold fraction that was crossed (0.0..=1.0). Matches the
211    /// numeric `anthropic-ratelimit-unified-{5h,7d,overage}-surpassed-threshold`
212    /// header rather than a boolean approximation.
213    pub surpassed_threshold: Option<f64>,
214    /// Representative claim used for provenance/debugging.
215    pub representative_claim: Option<String>,
216    /// Provider reason the quota window is disabled.
217    pub disabled_reason: Option<String>,
218    /// Whether provider extra usage is enabled.
219    pub extra_usage_enabled: Option<bool>,
220    /// Provider extra-usage monthly credit limit.
221    pub extra_usage_monthly_limit: Option<f64>,
222    /// Provider extra-usage consumed credits.
223    pub extra_usage_used_credits: Option<f64>,
224    /// Observation timestamp in Unix milliseconds.
225    pub observed_at_unix_millis: Option<u64>,
226    /// Configured maximum age before this snapshot becomes stale.
227    pub max_staleness_secs: u64,
228    /// `anthropic-ratelimit-unified-fallback` == "available". Top-level signal
229    /// surfaced only on the unified window.
230    #[serde(default, skip_serializing_if = "Option::is_none")]
231    pub fallback_available: Option<bool>,
232    /// `anthropic-ratelimit-unified-overage-in-use` == "true". Top-level signal
233    /// surfaced only on the unified window.
234    #[serde(default, skip_serializing_if = "Option::is_none")]
235    pub overage_in_use: Option<bool>,
236    /// Monthly overage utilization fraction (0.0..=1.0). Top-level signal
237    /// distinct from per-window overage utilization.
238    #[serde(default, skip_serializing_if = "Option::is_none")]
239    pub overage_period_monthly_utilization: Option<f64>,
240    /// Suggested upgrade paths (csv -> normalized list).
241    #[serde(default, skip_serializing_if = "Option::is_none")]
242    pub upgrade_paths: Option<Vec<String>>,
243}
244
245/// Prompt cache TTL class: immutable after entry creation.
246#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
247#[serde(rename_all = "snake_case")]
248#[allow(dead_code)]
249pub enum TtlClass {
250    /// 5-minute TTL cache entry.
251    #[default]
252    Ephemeral5m,
253    /// 1-hour TTL cache entry.
254    Ephemeral1h,
255}
256
257/// Origin of a cache breakpoint: whether explicitly requested or auto-inferred.
258#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
259#[serde(rename_all = "snake_case")]
260#[allow(dead_code)]
261pub enum BreakpointOrigin {
262    /// Explicit cache breakpoint requested by the user or application.
263    Explicit,
264    /// Auto-inferred cache breakpoint from proxy analysis.
265    AutoCacheInferred,
266}
267
268/// Source of a cache breakpoint within the request.
269#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
270#[serde(rename_all = "snake_case")]
271#[allow(dead_code)]
272pub enum CacheBreakpointSource {
273    /// Breakpoint from tools in the request.
274    Tools,
275    /// Breakpoint from system content.
276    System,
277    /// Breakpoint from message content.
278    Message,
279}
280
281/// Cache breakpoint position in the request, for prompt cache optimization.
282#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
283#[allow(dead_code)]
284pub struct CacheBreakpoint {
285    /// Block index of the breakpoint.
286    pub block_index: u32,
287    /// Source of the breakpoint.
288    pub source: CacheBreakpointSource,
289    /// Dot-separated path (e.g., "messages.0.content.1") within request.
290    pub path: String,
291    /// Message index if this breakpoint is within a message, None for system content.
292    pub message_index: Option<u32>,
293    /// Content hash of the prefix up to this breakpoint.
294    pub prefix_hash: String,
295    /// Token count of the prefix up to this breakpoint.
296    pub prefix_token_count: u64,
297    /// Requested TTL class for this breakpoint.
298    pub requested_ttl: TtlClass,
299    /// Origin of this breakpoint.
300    pub origin: BreakpointOrigin,
301}
302
303/// Warm cache entry eligible for reuse in upstream requests.
304#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
305#[allow(dead_code)]
306pub struct WarmCacheEntry {
307    /// Content hash of the cached prefix.
308    pub prefix_hash: String,
309    /// Unix timestamp in seconds when this entry expires.
310    pub expires_at_unix_secs: u64,
311    /// TTL class of this cache entry.
312    pub ttl_class: TtlClass,
313    /// Last observed usage time in Unix seconds.
314    pub last_observed_at_unix_secs: u64,
315}
316
317/// Cache utility prediction for routing decisions.
318#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
319#[allow(dead_code)]
320pub struct CacheScore {
321    /// Predicted input tokens that can be read from cache.
322    pub predicted_cache_read_tokens: u32,
323    /// Predicted input tokens written to 5-minute cache.
324    pub predicted_cache_creation_tokens_5m: u32,
325    /// Predicted input tokens written to 1-hour cache.
326    pub predicted_cache_creation_tokens_1h: u32,
327    /// Predicted input tokens not read from cache.
328    pub predicted_uncached_input_tokens: u32,
329    /// Predicted Unix timestamp when the cache entry will expire, None if permanent.
330    pub predicted_expires_at_unix_secs: Option<u64>,
331    /// Index of the matched cache breakpoint if one was selected, None otherwise.
332    pub matched_breakpoint_index: Option<u32>,
333    /// Confidence score for this prediction (0.0 to 1.0).
334    pub confidence: f32,
335    /// Optional explanation for ambiguous or low-confidence predictions.
336    pub ambiguity_reason: Option<String>,
337}
338
339/// Available upstream candidate for routing decisions.
340///
341/// The router receives a list of available upstream candidates sorted by
342/// `upstream_id` in ascending order (Uuid byte order). This stable ordering
343/// allows plugins to implement deterministic routing algorithms.
344#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
345pub struct UpstreamCandidate {
346    /// Stable upstream identifier.
347    pub upstream_id: Uuid,
348    /// Operator-facing upstream name.
349    pub name: String,
350    /// Upstream kind used to select compatible routing strategies.
351    pub kind: UpstreamKind,
352    /// Latest rate-limit observations for this candidate.
353    pub observed_rate_limits: Vec<RateLimitObservation>,
354    /// Latest subscription quota snapshots for this candidate.
355    #[serde(default)]
356    pub subscription_quotas: Vec<SubscriptionQuotaCandidateSnapshot>,
357    /// Unix timestamp in seconds for the candidate observation snapshot.
358    pub observed_at_unix_secs: u64,
359    /// Predicted cache utility for this candidate, if available.
360    #[serde(default, skip_serializing_if = "Option::is_none")]
361    pub cache_score: Option<CacheScore>,
362    /// Resolved upstream base URL. Populated by the host so that v2 plugins
363    /// that self-reference for shape can construct the dispatch URL against
364    /// the configured upstream instead of hardcoding api.anthropic.com.
365    #[serde(default, skip_serializing_if = "Option::is_none")]
366    pub base_url: Option<String>,
367}
368
369/// Credential strategy expected by a selected upstream.
370#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
371#[serde(rename_all = "snake_case")]
372pub enum CredentialStrategy {
373    /// Anthropic-style `x-api-key` signing.
374    ApiKey,
375    /// Anthropic-style OAuth bearer signing.
376    OAuth,
377    /// Forward an internal credential supplied by upstream configuration.
378    InternalForwarded,
379}
380
381/// Parsed downstream request data passed into plugins.
382#[derive(Clone, Debug)]
383pub struct RequestContext {
384    /// Stable request identifier used for logs, audit rows, and upstream traceability.
385    pub request_id: String,
386    /// Downstream request headers after hop-by-hop stripping.
387    pub downstream_headers: HeaderMap,
388    /// Downstream HTTP method.
389    pub method: Method,
390    /// Downstream request path, such as `/v1/messages`.
391    pub path: String,
392    /// Raw downstream query string without the leading `?`.
393    pub query: Option<String>,
394    /// Buffered request body bytes, required by SigV4 and other signing schemes.
395    pub body_bytes: Bytes,
396    /// Cache breakpoints extracted from the request for prompt cache optimization.
397    pub cache_breakpoints: Vec<CacheBreakpoint>,
398    /// Canonical model identifier resolved from the request.
399    pub canonical_model_id: String,
400}
401
402/// Request produced by an upstream dialect before credentials are applied.
403#[derive(Clone, Debug)]
404pub struct ShapedRequest {
405    url: Url,
406    method: Method,
407    headers: HeaderMap,
408    body: Bytes,
409    _seal: crate::private::Seal,
410}
411
412impl ShapedRequest {
413    /// Returns the destination URL.
414    pub fn url(&self) -> &Url {
415        &self.url
416    }
417
418    /// Replaces the destination URL.
419    pub fn set_url(&mut self, url: Url) {
420        self.url = url;
421    }
422
423    /// Returns the HTTP method.
424    pub fn method(&self) -> &Method {
425        &self.method
426    }
427
428    /// Replaces the HTTP method.
429    pub fn set_method(&mut self, method: Method) {
430        self.method = method;
431    }
432
433    /// Returns the request headers.
434    pub fn headers(&self) -> &HeaderMap {
435        &self.headers
436    }
437
438    /// Returns mutable request headers for signer-owned changes.
439    pub fn headers_mut(&mut self) -> &mut HeaderMap {
440        &mut self.headers
441    }
442
443    /// Returns the request body.
444    pub fn body(&self) -> &Bytes {
445        &self.body
446    }
447
448    /// Replaces the request body.
449    pub fn set_body(&mut self, body: Bytes) {
450        self.body = body;
451    }
452}
453
454/// Dialect-facing capability used to construct shaped requests.
455///
456/// Values of this type are created only by [`shape_request`]. Dialect
457/// implementations receive a mutable reference while their
458/// [`crate::UpstreamDialect::shape`] method is executing, which lets them return
459/// a shaped request without exposing an unrestricted public constructor.
460#[derive(Debug)]
461pub struct ShapedRequestBuilder {
462    _seal: crate::private::Seal,
463}
464
465impl ShapedRequestBuilder {
466    /// Creates a shaped request from dialect-owned parts.
467    pub fn shaped_request(
468        &mut self,
469        url: Url,
470        method: Method,
471        headers: HeaderMap,
472        body: Bytes,
473    ) -> ShapedRequest {
474        ShapedRequest {
475            url,
476            method,
477            headers,
478            body,
479            _seal: crate::private::Seal,
480        }
481    }
482}
483
484/// Invokes an upstream dialect with a temporary shaped-request capability.
485pub fn shape_request(
486    dialect: &dyn UpstreamDialect,
487    ctx: &RequestContext,
488    upstream: &Upstream,
489    principal: &Principal,
490) -> Result<ShapedRequest, DialectError> {
491    let mut builder = ShapedRequestBuilder {
492        _seal: crate::private::Seal,
493    };
494    dialect.shape(ctx, upstream, principal, &mut builder)
495}
496
497/// Request after a signer has consumed and sealed a shaped request.
498#[derive(Clone, Debug)]
499pub struct SignedRequest {
500    url: Url,
501    method: Method,
502    headers: HeaderMap,
503    body: Bytes,
504    _seal: crate::private::Seal,
505}
506
507impl SignedRequest {
508    /// Seals an owned shaped request after the signer has applied credentials.
509    pub fn from_shaped(shaped: ShapedRequest, _capability: &mut SigningCapability) -> Self {
510        Self {
511            url: shaped.url,
512            method: shaped.method,
513            headers: shaped.headers,
514            body: shaped.body,
515            _seal: crate::private::Seal,
516        }
517    }
518
519    /// Returns the destination URL.
520    pub fn url(&self) -> &Url {
521        &self.url
522    }
523
524    /// Returns the HTTP method.
525    pub fn method(&self) -> &Method {
526        &self.method
527    }
528
529    /// Returns the signed request headers.
530    pub fn headers(&self) -> &HeaderMap {
531        &self.headers
532    }
533
534    /// Returns the signed request body.
535    pub fn body(&self) -> &Bytes {
536        &self.body
537    }
538
539    /// Consumes the signed request into relay-ready parts.
540    pub fn into_parts(self) -> (Url, Method, HeaderMap, Bytes) {
541        (self.url, self.method, self.headers, self.body)
542    }
543}
544
545/// Signer-facing capability used to seal shaped requests.
546///
547/// Values of this type are created only by [`sign_request`]. Signer
548/// implementations receive a mutable reference while their [`crate::Signer::sign`]
549/// method is executing, which keeps arbitrary crates from sealing shaped
550/// requests outside the signer boundary.
551#[derive(Debug)]
552pub struct SigningCapability {
553    _seal: crate::private::Seal,
554}
555
556/// Invokes a signer with a temporary signing capability.
557pub async fn sign_request(
558    signer: &dyn Signer,
559    shaped: ShapedRequest,
560) -> Result<SignedRequest, SignerError> {
561    let mut capability = SigningCapability {
562        _seal: crate::private::Seal,
563    };
564    signer.sign(shaped, &mut capability).await
565}
566
567/// Router output selecting both an upstream and its dialect boundary object.
568pub struct RouteDecision {
569    /// Stable upstream identifier selected by the router, when provided by the plugin.
570    pub upstream_id: Option<Uuid>,
571    /// Upstream selected for the request.
572    pub upstream: Upstream,
573    /// Dialect plugin that shapes the request for the selected upstream.
574    pub dialect: Arc<dyn UpstreamDialect>,
575}
576
577/// Per-principal quota window and model allow-list.
578#[derive(Clone, Debug, Eq, PartialEq)]
579pub struct PrincipalQuotas {
580    /// Maximum request count allowed within `window`.
581    pub requests_per_window: u64,
582    /// Maximum input token count allowed within `window`.
583    pub input_tokens_per_window: u64,
584    /// Maximum output token count allowed within `window`.
585    pub output_tokens_per_window: u64,
586    /// Quota window duration.
587    pub window: Duration,
588    /// Glob-style model names allowed for this principal.
589    pub allowed_models: Vec<String>,
590}
591
592/// Observability events emitted by the lifecycle.
593#[derive(Clone, Debug, Eq, PartialEq)]
594pub enum ObserveEvent {
595    /// Downstream request has entered the proxy.
596    RequestStarted {
597        /// Request identifier.
598        request_id: String,
599        /// Downstream user-agent header, when present.
600        downstream_user_agent: Option<String>,
601    },
602    /// Authentication completed successfully.
603    AuthnComplete {
604        /// Authenticated principal identifier.
605        principal_id: String,
606        /// Authenticated principal kind.
607        kind: PrincipalKind,
608    },
609    /// Router selected an upstream.
610    UpstreamChosen {
611        /// Selected upstream.
612        upstream: Upstream,
613    },
614    /// A batch of streamed events passed through the relay.
615    Chunk {
616        /// Monotonic batch index within the response stream.
617        batch_index: u64,
618        /// Number of SSE events in the batch.
619        event_count: usize,
620        /// Total bytes in the batch.
621        total_bytes: usize,
622    },
623    /// Request finished successfully or with an upstream HTTP error.
624    RequestFinished {
625        /// Final HTTP status code.
626        status: StatusCode,
627        /// Input token count reported by the upstream, when known.
628        input_tokens: Option<u64>,
629        /// Output token count reported by the upstream, when known.
630        output_tokens: Option<u64>,
631        /// Cache write token count reported by the upstream, when known.
632        cache_creation_input_tokens: Option<u64>,
633        /// Cache read token count reported by the upstream, when known.
634        cache_read_input_tokens: Option<u64>,
635        /// End-to-end request duration in milliseconds.
636        duration_ms: u64,
637    },
638    /// Lifecycle or plugin error was observed.
639    Error {
640        /// Stable error code.
641        code: String,
642        /// Redacted human-readable message.
643        message: String,
644        /// Error source component.
645        source: String,
646    },
647}
648
649/// Decision returned by a signer after receiving an unauthorized upstream error.
650#[derive(Clone)]
651pub enum RetryDecision {
652    /// Retry with a refreshed signer.
653    Refresh {
654        /// Signer containing refreshed credentials.
655        new_signer: Arc<dyn Signer>,
656    },
657    /// Do not retry the request.
658    Fail,
659}
660
661/// Plugin manifest passed to runtime adapters when instantiating plugins.
662#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
663pub struct PluginManifest {
664    /// Plugin name from configuration.
665    pub name: String,
666    /// Filesystem path or runtime-specific locator for the plugin artifact.
667    pub artifact: String,
668    /// Preferred plugin wire envelope version. Omitted manifests default to v1.
669    #[serde(default, skip_serializing_if = "Option::is_none")]
670    pub wire_version: Option<u8>,
671    /// Runtime configuration provided to the plugin.
672    pub config: serde_json::Value,
673    /// Runtime-specific metadata not interpreted by the core API contract.
674    #[serde(default)]
675    pub metadata: BTreeMap<String, serde_json::Value>,
676    /// Pure-mode dispatch: every hook call builds a fresh wasm `Store`
677    /// (no thread_local cache, no version-compare). Default `true` —
678    /// stateless plugins (the common case) benefit from full isolation
679    /// per call. Opt out only for plugins that genuinely need to keep
680    /// mutable state across calls in the same worker.
681    #[serde(default = "default_pure")]
682    pub pure: bool,
683}
684
685/// `serde` default for [`PluginManifest::pure`]. Omitted manifests are
686/// treated as pure to match the cc-lb-server-side default expectation.
687pub fn default_pure() -> bool {
688    true
689}
690
691// Routing trace cap constants
692/// Maximum number of stages in a routing trace.
693pub const MAX_ROUTING_TRACE_STAGES: usize = 100;
694/// Maximum length of a stage name.
695pub const MAX_STAGE_NAME_LEN: usize = 256;
696/// Maximum length of an error message in internal errors.
697pub const MAX_ERROR_MESSAGE_LEN: usize = 1024;
698
699/// Reason for a passthrough routing decision.
700#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
701#[serde(rename_all = "snake_case")]
702pub enum PassthroughCause {
703    /// Upstream is healthy and available.
704    HealthyUpstream,
705    /// No alternative upstream available.
706    NoAlternative,
707    /// Plugin returned passthrough decision.
708    PluginDecision,
709}
710
711/// Per-candidate evaluation reason.
712#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
713#[serde(rename_all = "snake_case")]
714pub enum PerCandidateReason {
715    /// Candidate hit rate limit.
716    RateLimited,
717    /// Candidate has insufficient quota.
718    InsufficientQuota,
719    /// Candidate is unhealthy.
720    Unhealthy,
721    /// Candidate rejected by plugin.
722    RejectedByPlugin,
723}
724
725/// Strategy for selecting a terminal upstream.
726#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
727#[serde(rename_all = "kebab-case")]
728pub enum TerminalStrategy {
729    /// Select first available upstream.
730    #[default]
731    FirstPick,
732    /// Select a router plugin at random.
733    Random,
734    /// Round-robin selection.
735    RoundRobin,
736    /// Least connections strategy.
737    LeastConnections,
738}
739
740/// Decision made at a single routing stage.
741#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
742pub struct StageDecision {
743    /// Name of the routing stage.
744    pub stage_name: String,
745    /// Upstream candidate identifier if applicable.
746    #[serde(default, skip_serializing_if = "Option::is_none")]
747    pub upstream_id: Option<Uuid>,
748    /// Reason for this stage's decision.
749    #[serde(default, skip_serializing_if = "Option::is_none")]
750    pub reason: Option<String>,
751    /// Time spent executing this routing stage, in microseconds.
752    #[serde(default, skip_serializing_if = "is_zero_u64")]
753    pub duration_us: u64,
754}
755
756fn is_zero_u64(value: &u64) -> bool {
757    *value == 0
758}
759
760/// Terminal routing decision selecting an upstream.
761#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
762pub struct TerminalDecision {
763    /// Selected upstream identifier.
764    #[serde(default, skip_serializing_if = "Option::is_none")]
765    pub upstream_id: Option<Uuid>,
766    /// Strategy used for selection.
767    pub strategy: TerminalStrategy,
768}
769
770/// Complete routing trace for a request through all decision stages.
771#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
772pub struct RoutingTrace {
773    /// Sequence of stage decisions made during routing.
774    #[serde(default)]
775    pub stages: Vec<StageDecision>,
776    /// Final terminal routing decision.
777    #[serde(default, skip_serializing_if = "Option::is_none")]
778    pub terminal_decision: Option<TerminalDecision>,
779}
780
781/// Stage where an internal error occurred.
782#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
783#[serde(rename_all = "snake_case")]
784pub enum InternalErrorStage {
785    /// Authentication stage.
786    Authn,
787    /// Routing stage.
788    #[default]
789    Router,
790    /// Router filter stage.
791    RouterFilter,
792    /// Request shaping stage.
793    Shape,
794    /// Request signing stage.
795    Signer,
796    /// Request relay stage.
797    Relay,
798}
799
800/// Kind of internal error that occurred.
801#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
802#[serde(rename_all = "snake_case")]
803pub enum InternalErrorKind {
804    /// Plugin crashed or returned an error.
805    #[default]
806    PluginError,
807    /// Plugin returned invalid output.
808    InvalidOutput,
809    /// Plugin trapped during execution.
810    Trap,
811    /// Configuration error.
812    ConfigError,
813    /// Timeout error.
814    Timeout,
815    /// Resource unavailable.
816    Unavailable,
817}
818
819/// Internal error information with stage and kind details.
820#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
821pub struct InternalError {
822    /// Stage where the error occurred.
823    pub stage: InternalErrorStage,
824    /// Kind of error.
825    pub kind: InternalErrorKind,
826    /// Optional error message.
827    #[serde(default, skip_serializing_if = "Option::is_none")]
828    pub message: Option<String>,
829}
830
831#[cfg(test)]
832mod tests {
833    use super::*;
834
835    #[test]
836    fn shaped_request_accessors_and_mutators_preserve_parts() {
837        let mut builder = ShapedRequestBuilder {
838            _seal: crate::private::Seal,
839        };
840        let mut headers = HeaderMap::new();
841        headers.insert("x-test", "one".parse().unwrap());
842        let mut shaped = builder.shaped_request(
843            "https://example.test/v1/messages".parse().unwrap(),
844            Method::POST,
845            headers,
846            Bytes::from_static(b"first"),
847        );
848
849        assert_eq!(shaped.url().as_str(), "https://example.test/v1/messages");
850        assert_eq!(shaped.method(), Method::POST);
851        assert_eq!(shaped.headers()["x-test"], "one");
852        assert_eq!(shaped.body(), &Bytes::from_static(b"first"));
853
854        shaped.set_url("https://example.test/v1/complete".parse().unwrap());
855        shaped.set_method(Method::PUT);
856        shaped
857            .headers_mut()
858            .insert("x-test", "two".parse().unwrap());
859        shaped.set_body(Bytes::from_static(b"second"));
860
861        assert_eq!(shaped.url().path(), "/v1/complete");
862        assert_eq!(shaped.method(), Method::PUT);
863        assert_eq!(shaped.headers()["x-test"], "two");
864        assert_eq!(shaped.body(), &Bytes::from_static(b"second"));
865    }
866
867    #[test]
868    fn signed_request_exposes_and_consumes_signed_parts() {
869        let mut builder = ShapedRequestBuilder {
870            _seal: crate::private::Seal,
871        };
872        let mut headers = HeaderMap::new();
873        headers.insert("authorization", "Bearer token".parse().unwrap());
874        let shaped = builder.shaped_request(
875            "https://api.example.test/v1/messages".parse().unwrap(),
876            Method::POST,
877            headers,
878            Bytes::from_static(b"{}"),
879        );
880        let mut capability = SigningCapability {
881            _seal: crate::private::Seal,
882        };
883
884        let signed = SignedRequest::from_shaped(shaped, &mut capability);
885        assert_eq!(signed.url().host_str(), Some("api.example.test"));
886        assert_eq!(signed.method(), Method::POST);
887        assert_eq!(signed.headers()["authorization"], "Bearer token");
888        assert_eq!(signed.body(), &Bytes::from_static(b"{}"));
889
890        let (url, method, headers, body) = signed.into_parts();
891        assert_eq!(url.as_str(), "https://api.example.test/v1/messages");
892        assert_eq!(method, Method::POST);
893        assert_eq!(headers["authorization"], "Bearer token");
894        assert_eq!(body, Bytes::from_static(b"{}"));
895    }
896
897    #[test]
898    fn upstream_and_manifest_serde_round_trip() {
899        let upstreams = vec![Upstream::AnthropicDirect { base_url: None }];
900
901        for upstream in upstreams {
902            let json = serde_json::to_string(&upstream).unwrap();
903            let decoded: Upstream = serde_json::from_str(&json).unwrap();
904            assert_eq!(decoded, upstream);
905        }
906
907        let manifest: PluginManifest = serde_json::from_value(serde_json::json!({
908            "name": "authn",
909            "artifact": "plugin.wasm",
910            "config": {"enabled": true}
911        }))
912        .unwrap();
913        assert_eq!(manifest.name, "authn");
914        assert_eq!(manifest.wire_version, None);
915        assert!(manifest.metadata.is_empty());
916
917        let manifest: PluginManifest = serde_json::from_value(serde_json::json!({
918            "name": "cache-aware",
919            "artifact": "plugin.wasm",
920            "wire_version": 2,
921            "config": {}
922        }))
923        .unwrap();
924        assert_eq!(manifest.wire_version, Some(2));
925    }
926
927    #[test]
928    fn public_enums_cover_all_current_variants() {
929        let principal_kinds = [
930            PrincipalKind::ApiKey,
931            PrincipalKind::OAuthSubject,
932            PrincipalKind::InternalKey,
933            PrincipalKind::WorkloadIdentity,
934            PrincipalKind::SubscriptionBearer,
935        ];
936        assert_eq!(principal_kinds.len(), 5);
937
938        let strategies = [
939            CredentialStrategy::ApiKey,
940            CredentialStrategy::OAuth,
941            CredentialStrategy::InternalForwarded,
942        ];
943        assert_eq!(strategies.len(), 3);
944    }
945
946    #[test]
947    fn observe_event_variants_are_equatable() {
948        let events = vec![
949            ObserveEvent::RequestStarted {
950                request_id: "req".to_owned(),
951                downstream_user_agent: Some("ua".to_owned()),
952            },
953            ObserveEvent::AuthnComplete {
954                principal_id: "principal".to_owned(),
955                kind: PrincipalKind::InternalKey,
956            },
957            ObserveEvent::UpstreamChosen {
958                upstream: Upstream::AnthropicDirect { base_url: None },
959            },
960            ObserveEvent::Chunk {
961                batch_index: 1,
962                event_count: 2,
963                total_bytes: 3,
964            },
965            ObserveEvent::RequestFinished {
966                status: StatusCode::OK,
967                input_tokens: Some(4),
968                output_tokens: Some(5),
969                cache_creation_input_tokens: Some(6),
970                cache_read_input_tokens: Some(7),
971                duration_ms: 8,
972            },
973            ObserveEvent::Error {
974                code: "E".to_owned(),
975                message: "redacted".to_owned(),
976                source: "plugin".to_owned(),
977            },
978        ];
979
980        assert_eq!(events, events.clone());
981    }
982
983    #[test]
984    fn cache_types_roundtrip() {
985        let ttl_class = TtlClass::Ephemeral1h;
986        let json = serde_json::to_string(&ttl_class).unwrap();
987        let decoded: TtlClass = serde_json::from_str(&json).unwrap();
988        assert_eq!(decoded, ttl_class);
989
990        let origin = BreakpointOrigin::AutoCacheInferred;
991        let json = serde_json::to_string(&origin).unwrap();
992        let decoded: BreakpointOrigin = serde_json::from_str(&json).unwrap();
993        assert_eq!(decoded, origin);
994
995        let source = CacheBreakpointSource::System;
996        let json = serde_json::to_string(&source).unwrap();
997        let decoded: CacheBreakpointSource = serde_json::from_str(&json).unwrap();
998        assert_eq!(decoded, source);
999
1000        let breakpoint = CacheBreakpoint {
1001            block_index: 0,
1002            source: CacheBreakpointSource::Message,
1003            path: "messages.0.content.1".to_owned(),
1004            message_index: Some(0),
1005            prefix_hash: "abc123".to_owned(),
1006            prefix_token_count: 100,
1007            requested_ttl: TtlClass::Ephemeral5m,
1008            origin: BreakpointOrigin::Explicit,
1009        };
1010        let json = serde_json::to_string(&breakpoint).unwrap();
1011        let decoded: CacheBreakpoint = serde_json::from_str(&json).unwrap();
1012        assert_eq!(decoded, breakpoint);
1013
1014        let warm_entry = WarmCacheEntry {
1015            prefix_hash: "def456".to_owned(),
1016            expires_at_unix_secs: 1700000000,
1017            ttl_class: TtlClass::Ephemeral1h,
1018            last_observed_at_unix_secs: 1699999000,
1019        };
1020        let json = serde_json::to_string(&warm_entry).unwrap();
1021        let decoded: WarmCacheEntry = serde_json::from_str(&json).unwrap();
1022        assert_eq!(decoded, warm_entry);
1023
1024        let cache_score = CacheScore {
1025            predicted_cache_read_tokens: 50,
1026            predicted_cache_creation_tokens_5m: 100,
1027            predicted_cache_creation_tokens_1h: 200,
1028            predicted_uncached_input_tokens: 25,
1029            predicted_expires_at_unix_secs: Some(1700000000),
1030            matched_breakpoint_index: Some(0),
1031            confidence: 0.95,
1032            ambiguity_reason: None,
1033        };
1034        let json = serde_json::to_string(&cache_score).unwrap();
1035        let decoded: CacheScore = serde_json::from_str(&json).unwrap();
1036        assert_eq!(
1037            decoded.predicted_cache_read_tokens,
1038            cache_score.predicted_cache_read_tokens
1039        );
1040        assert_eq!(
1041            decoded.predicted_cache_creation_tokens_5m,
1042            cache_score.predicted_cache_creation_tokens_5m
1043        );
1044        assert_eq!(
1045            decoded.predicted_cache_creation_tokens_1h,
1046            cache_score.predicted_cache_creation_tokens_1h
1047        );
1048        assert_eq!(
1049            decoded.predicted_uncached_input_tokens,
1050            cache_score.predicted_uncached_input_tokens
1051        );
1052        assert_eq!(
1053            decoded.predicted_expires_at_unix_secs,
1054            cache_score.predicted_expires_at_unix_secs
1055        );
1056        assert_eq!(
1057            decoded.matched_breakpoint_index,
1058            cache_score.matched_breakpoint_index
1059        );
1060        assert!((decoded.confidence - cache_score.confidence).abs() < 0.0001);
1061        assert_eq!(decoded.ambiguity_reason, cache_score.ambiguity_reason);
1062    }
1063
1064    #[test]
1065    fn upstream_candidate_cache_score_roundtrip() {
1066        let candidate_no_cache = UpstreamCandidate {
1067            upstream_id: Uuid::new_v4(),
1068            name: "test-upstream".to_owned(),
1069            kind: UpstreamKind::AnthropicApiKey,
1070            observed_rate_limits: vec![],
1071            subscription_quotas: vec![],
1072            observed_at_unix_secs: 1700000000,
1073            cache_score: None,
1074            base_url: None,
1075        };
1076        let json = serde_json::to_string(&candidate_no_cache).unwrap();
1077        let decoded: UpstreamCandidate = serde_json::from_str(&json).unwrap();
1078        assert_eq!(decoded.upstream_id, candidate_no_cache.upstream_id);
1079        assert_eq!(decoded.name, candidate_no_cache.name);
1080        assert_eq!(decoded.cache_score, None);
1081
1082        let cache_score = CacheScore {
1083            predicted_cache_read_tokens: 50,
1084            predicted_cache_creation_tokens_5m: 100,
1085            predicted_cache_creation_tokens_1h: 200,
1086            predicted_uncached_input_tokens: 25,
1087            predicted_expires_at_unix_secs: Some(1700000000),
1088            matched_breakpoint_index: Some(0),
1089            confidence: 0.95,
1090            ambiguity_reason: None,
1091        };
1092        let candidate_with_cache = UpstreamCandidate {
1093            upstream_id: Uuid::new_v4(),
1094            name: "test-upstream-cached".to_owned(),
1095            kind: UpstreamKind::AnthropicApiKey,
1096            observed_rate_limits: vec![],
1097            subscription_quotas: vec![],
1098            observed_at_unix_secs: 1700000000,
1099            cache_score: Some(cache_score),
1100            base_url: None,
1101        };
1102        let json = serde_json::to_string(&candidate_with_cache).unwrap();
1103        let decoded: UpstreamCandidate = serde_json::from_str(&json).unwrap();
1104        assert_eq!(decoded.upstream_id, candidate_with_cache.upstream_id);
1105        assert_eq!(decoded.name, candidate_with_cache.name);
1106        assert!(decoded.cache_score.is_some());
1107        assert_eq!(decoded.cache_score.unwrap().predicted_cache_read_tokens, 50);
1108    }
1109
1110    #[test]
1111    fn request_context_cache_fields_roundtrip() {
1112        let ctx_empty = RequestContext {
1113            request_id: "req-1".to_owned(),
1114            downstream_headers: HeaderMap::new(),
1115            method: Method::POST,
1116            path: "/v1/messages".to_owned(),
1117            query: None,
1118            body_bytes: Bytes::new(),
1119            cache_breakpoints: Vec::new(),
1120            canonical_model_id: String::new(),
1121        };
1122        assert_eq!(ctx_empty.cache_breakpoints.len(), 0);
1123        assert_eq!(ctx_empty.canonical_model_id, "");
1124
1125        let breakpoint = CacheBreakpoint {
1126            block_index: 1,
1127            source: CacheBreakpointSource::Message,
1128            path: "messages.0.content.0".to_owned(),
1129            message_index: Some(0),
1130            prefix_hash: "hash123".to_owned(),
1131            prefix_token_count: 150,
1132            requested_ttl: TtlClass::Ephemeral1h,
1133            origin: BreakpointOrigin::Explicit,
1134        };
1135        let ctx_populated = RequestContext {
1136            request_id: "req-2".to_owned(),
1137            downstream_headers: HeaderMap::new(),
1138            method: Method::POST,
1139            path: "/v1/messages".to_owned(),
1140            query: Some("param=value".to_owned()),
1141            body_bytes: Bytes::from_static(b"test"),
1142            cache_breakpoints: vec![breakpoint],
1143            canonical_model_id: "claude-sonnet-4-5-20250929".to_owned(),
1144        };
1145        assert_eq!(ctx_populated.cache_breakpoints.len(), 1);
1146        assert_eq!(
1147            ctx_populated.canonical_model_id,
1148            "claude-sonnet-4-5-20250929"
1149        );
1150    }
1151
1152    #[test]
1153    fn upstream_candidate_deserialize_without_cache_score_field() {
1154        let json = r#"{
1155            "upstream_id": "00000000-0000-0000-0000-000000000001",
1156            "name": "legacy-upstream",
1157            "kind": "anthropic_api_key",
1158            "observed_rate_limits": [],
1159            "subscription_quotas": [],
1160            "observed_at_unix_secs": 1700000000
1161        }"#;
1162        let candidate: UpstreamCandidate = serde_json::from_str(json).unwrap();
1163        assert!(candidate.cache_score.is_none());
1164        assert_eq!(candidate.name, "legacy-upstream");
1165    }
1166
1167    #[test]
1168    fn request_context_cache_breakpoints_default_on_missing_fields() {
1169        let ctx = RequestContext {
1170            request_id: "test".to_owned(),
1171            downstream_headers: HeaderMap::new(),
1172            method: Method::GET,
1173            path: "/test".to_owned(),
1174            query: None,
1175            body_bytes: Bytes::new(),
1176            cache_breakpoints: Vec::new(),
1177            canonical_model_id: String::new(),
1178        };
1179        assert!(ctx.cache_breakpoints.is_empty());
1180        assert!(ctx.canonical_model_id.is_empty());
1181    }
1182}