Skip to main content

cc_lb_plugin_api/
types.rs

1//! Shared public data types for plugin boundaries.
2
3use std::collections::BTreeMap;
4use std::sync::Arc;
5use std::time::Duration;
6
7use bytes::Bytes;
8use http::{HeaderMap, Method, StatusCode};
9use serde::{Deserialize, Serialize};
10use url::Url;
11use uuid::Uuid;
12
13use crate::errors::{DialectError, SignerError};
14use crate::traits::{Signer, UpstreamDialect};
15
16/// Authenticated caller identity used for quota, audit, and routing decisions.
17#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
18pub struct Principal {
19    /// Stable principal identifier, unique within the proxy deployment.
20    pub id: String,
21    /// Principal category inferred by the authentication plugin.
22    pub kind: PrincipalKind,
23    /// Plugin-provided claims available to router and observability layers.
24    pub claims: serde_json::Map<String, serde_json::Value>,
25}
26
27/// Principal categories supported by first-party and custom auth plugins.
28#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
29#[serde(rename_all = "snake_case")]
30pub enum PrincipalKind {
31    /// Principal authenticated by an Anthropic-compatible API key.
32    ApiKey,
33    /// Principal authenticated as an OAuth subject.
34    OAuthSubject,
35    /// Principal authenticated by an internal key managed by cc-lb.
36    InternalKey,
37    /// Principal authenticated through a workload identity mechanism.
38    WorkloadIdentity,
39    /// Principal authenticated by a Claude subscription bearer token.
40    SubscriptionBearer,
41}
42
43/// Runtime chain slot a plugin can provide.
44#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
45#[serde(rename_all = "snake_case")]
46pub enum PluginSlot {
47    /// Router filter slot for selecting or filtering upstream candidates.
48    Router,
49    /// Observability hook slot for receiving request lifecycle events.
50    ObservabilityHook,
51    /// Request shaping slot for producing upstream-specific requests.
52    Shape,
53}
54
55impl PluginSlot {
56    /// Returns the stable snake_case wire name for this slot.
57    pub fn as_str(self) -> &'static str {
58        match self {
59            Self::Router => "router",
60            Self::ObservabilityHook => "observability_hook",
61            Self::Shape => "shape",
62        }
63    }
64
65    /// Parses a stable snake_case wire name into a plugin slot.
66    pub fn parse(value: &str) -> Option<Self> {
67        match value {
68            "router" => Some(Self::Router),
69            "observability_hook" => Some(Self::ObservabilityHook),
70            "shape" => Some(Self::Shape),
71            _ => None,
72        }
73    }
74}
75
76/// Upstream backends supported by the proxy routing contract.
77#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
78#[serde(rename_all = "snake_case", tag = "kind")]
79pub enum Upstream {
80    /// Direct Anthropic API endpoint.
81    AnthropicDirect {
82        /// Operator-configured base URL override for this upstream, resolved
83        /// from the upstream record at routing time. `None` indicates the
84        /// canonical Anthropic endpoint should be used.
85        #[serde(default, skip_serializing_if = "Option::is_none")]
86        base_url: Option<Url>,
87    },
88}
89
90/// Upstream record kind exposed to router plugins for candidate selection.
91#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
92#[serde(rename_all = "snake_case")]
93pub enum UpstreamKind {
94    /// Anthropic API-key upstream.
95    AnthropicApiKey,
96    /// Anthropic OAuth upstream.
97    AnthropicOauth,
98}
99
100impl UpstreamKind {
101    /// Returns the stable snake_case wire name.
102    pub fn as_str(self) -> &'static str {
103        match self {
104            Self::AnthropicApiKey => "anthropic_api_key",
105            Self::AnthropicOauth => "anthropic_oauth",
106        }
107    }
108}
109
110/// Upstream rate-limit metric kind observed from upstream responses.
111#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)]
112#[serde(rename_all = "snake_case")]
113pub enum RateLimitKind {
114    /// Request-count rate limit.
115    Requests,
116    /// Aggregate token rate limit.
117    Tokens,
118    /// Input-token rate limit.
119    InputTokens,
120    /// Output-token rate limit.
121    OutputTokens,
122}
123
124impl RateLimitKind {
125    /// Returns the stable snake_case wire name.
126    pub fn as_str(self) -> &'static str {
127        match self {
128            Self::Requests => "requests",
129            Self::Tokens => "tokens",
130            Self::InputTokens => "input_tokens",
131            Self::OutputTokens => "output_tokens",
132        }
133    }
134}
135
136/// Latest upstream rate-limit observation exposed to router plugins.
137#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
138pub struct RateLimitObservation {
139    /// Observed rate-limit kind.
140    pub kind: RateLimitKind,
141    /// Provider-defined rate-limit window label.
142    pub window: String,
143    /// Optional maximum quota for the window.
144    pub limit: Option<u64>,
145    /// Optional remaining quota for the window.
146    pub remaining: Option<u64>,
147    /// Optional provider reset timestamp or duration string.
148    pub reset: Option<String>,
149}
150
151/// Freshness state for subscription quota data exposed to router plugins.
152#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
153#[serde(rename_all = "snake_case")]
154pub enum SubscriptionQuotaDataState {
155    /// Data is inside the configured routing freshness window.
156    Fresh,
157    /// Data exists but is older than the configured routing freshness window.
158    Stale,
159    /// No usable subscription quota data exists for the candidate/window.
160    Missing,
161}
162
163/// Latest subscription quota snapshot for one candidate/window.
164#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
165pub struct SubscriptionQuotaCandidateSnapshot {
166    /// Subscription quota window label.
167    pub window: String,
168    /// Freshness state for this window snapshot.
169    pub state: SubscriptionQuotaDataState,
170    /// Data source label, such as header, api, or merged.
171    pub source: Option<String>,
172    /// Provider-reported utilization fraction.
173    pub utilization: Option<f64>,
174    /// Provider-reported quota status.
175    pub status: Option<String>,
176    /// Provider reset timestamp in Unix seconds.
177    pub resets_at_unix_secs: Option<u64>,
178    /// Per-window threshold fraction that was crossed (0.0..=1.0). Matches the
179    /// numeric `anthropic-ratelimit-unified-{5h,7d,overage}-surpassed-threshold`
180    /// header rather than a boolean approximation.
181    pub surpassed_threshold: Option<f64>,
182    /// Representative claim used for provenance/debugging.
183    pub representative_claim: Option<String>,
184    /// Provider reason the quota window is disabled.
185    pub disabled_reason: Option<String>,
186    /// Whether provider extra usage is enabled.
187    pub extra_usage_enabled: Option<bool>,
188    /// Provider extra-usage monthly credit limit.
189    pub extra_usage_monthly_limit: Option<f64>,
190    /// Provider extra-usage consumed credits.
191    pub extra_usage_used_credits: Option<f64>,
192    /// Observation timestamp in Unix milliseconds.
193    pub observed_at_unix_millis: Option<u64>,
194    /// Configured maximum age before this snapshot becomes stale.
195    pub max_staleness_secs: u64,
196    /// `anthropic-ratelimit-unified-fallback` == "available". Top-level signal
197    /// surfaced only on the unified window.
198    #[serde(default, skip_serializing_if = "Option::is_none")]
199    pub fallback_available: Option<bool>,
200    /// `anthropic-ratelimit-unified-overage-in-use` == "true". Top-level signal
201    /// surfaced only on the unified window.
202    #[serde(default, skip_serializing_if = "Option::is_none")]
203    pub overage_in_use: Option<bool>,
204    /// Monthly overage utilization fraction (0.0..=1.0). Top-level signal
205    /// distinct from per-window overage utilization.
206    #[serde(default, skip_serializing_if = "Option::is_none")]
207    pub overage_period_monthly_utilization: Option<f64>,
208    /// Suggested upgrade paths (csv -> normalized list).
209    #[serde(default, skip_serializing_if = "Option::is_none")]
210    pub upgrade_paths: Option<Vec<String>>,
211}
212
213/// Prompt cache TTL class: immutable after entry creation.
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
215#[serde(rename_all = "snake_case")]
216#[allow(dead_code)]
217pub enum TtlClass {
218    /// 5-minute TTL cache entry.
219    #[default]
220    Ephemeral5m,
221    /// 1-hour TTL cache entry.
222    Ephemeral1h,
223}
224
225/// Origin of a cache breakpoint: whether explicitly requested or auto-inferred.
226#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
227#[serde(rename_all = "snake_case")]
228#[allow(dead_code)]
229pub enum BreakpointOrigin {
230    /// Explicit cache breakpoint requested by the user or application.
231    Explicit,
232    /// Auto-inferred cache breakpoint from proxy analysis.
233    AutoCacheInferred,
234}
235
236/// Source of a cache breakpoint within the request.
237#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
238#[serde(rename_all = "snake_case")]
239#[allow(dead_code)]
240pub enum CacheBreakpointSource {
241    /// Breakpoint from tools in the request.
242    Tools,
243    /// Breakpoint from system content.
244    System,
245    /// Breakpoint from message content.
246    Message,
247}
248
249/// Cache breakpoint position in the request, for prompt cache optimization.
250#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
251#[allow(dead_code)]
252pub struct CacheBreakpoint {
253    /// Block index of the breakpoint.
254    pub block_index: u32,
255    /// Source of the breakpoint.
256    pub source: CacheBreakpointSource,
257    /// Dot-separated path (e.g., "messages.0.content.1") within request.
258    pub path: String,
259    /// Message index if this breakpoint is within a message, None for system content.
260    pub message_index: Option<u32>,
261    /// Content hash of the prefix up to this breakpoint.
262    pub prefix_hash: String,
263    /// Token count of the prefix up to this breakpoint.
264    pub prefix_token_count: u64,
265    /// Requested TTL class for this breakpoint.
266    pub requested_ttl: TtlClass,
267    /// Origin of this breakpoint.
268    pub origin: BreakpointOrigin,
269}
270
271/// Warm cache entry eligible for reuse in upstream requests.
272#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
273#[allow(dead_code)]
274pub struct WarmCacheEntry {
275    /// Content hash of the cached prefix.
276    pub prefix_hash: String,
277    /// Unix timestamp in seconds when this entry expires.
278    pub expires_at_unix_secs: u64,
279    /// TTL class of this cache entry.
280    pub ttl_class: TtlClass,
281    /// Last observed usage time in Unix seconds.
282    pub last_observed_at_unix_secs: u64,
283}
284
285/// Cache utility prediction for routing decisions.
286#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
287#[allow(dead_code)]
288pub struct CacheScore {
289    /// Predicted input tokens that can be read from cache.
290    pub predicted_cache_read_tokens: u32,
291    /// Predicted input tokens written to 5-minute cache.
292    pub predicted_cache_creation_tokens_5m: u32,
293    /// Predicted input tokens written to 1-hour cache.
294    pub predicted_cache_creation_tokens_1h: u32,
295    /// Predicted input tokens not read from cache.
296    pub predicted_uncached_input_tokens: u32,
297    /// Predicted Unix timestamp when the cache entry will expire, None if permanent.
298    pub predicted_expires_at_unix_secs: Option<u64>,
299    /// Index of the matched cache breakpoint if one was selected, None otherwise.
300    pub matched_breakpoint_index: Option<u32>,
301    /// Confidence score for this prediction (0.0 to 1.0).
302    pub confidence: f32,
303    /// Optional explanation for ambiguous or low-confidence predictions.
304    pub ambiguity_reason: Option<String>,
305}
306
307/// Available upstream candidate for routing decisions.
308///
309/// The router receives a list of available upstream candidates sorted by
310/// `upstream_id` in ascending order (Uuid byte order). This stable ordering
311/// allows plugins to implement deterministic routing algorithms.
312#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
313pub struct UpstreamCandidate {
314    /// Stable upstream identifier.
315    pub upstream_id: Uuid,
316    /// Operator-facing upstream name.
317    pub name: String,
318    /// Upstream kind used to select compatible routing strategies.
319    pub kind: UpstreamKind,
320    /// Latest rate-limit observations for this candidate.
321    pub observed_rate_limits: Vec<RateLimitObservation>,
322    /// Latest subscription quota snapshots for this candidate.
323    #[serde(default)]
324    pub subscription_quotas: Vec<SubscriptionQuotaCandidateSnapshot>,
325    /// Unix timestamp in seconds for the candidate observation snapshot.
326    pub observed_at_unix_secs: u64,
327    /// Predicted cache utility for this candidate, if available.
328    #[serde(default, skip_serializing_if = "Option::is_none")]
329    pub cache_score: Option<CacheScore>,
330    /// Resolved upstream base URL. Populated by the host so that v2 plugins
331    /// that self-reference for shape can construct the dispatch URL against
332    /// the configured upstream instead of hardcoding api.anthropic.com.
333    #[serde(default, skip_serializing_if = "Option::is_none")]
334    pub base_url: Option<String>,
335}
336
337/// Credential strategy expected by a selected upstream.
338#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
339#[serde(rename_all = "snake_case")]
340pub enum CredentialStrategy {
341    /// Anthropic-style `x-api-key` signing.
342    ApiKey,
343    /// Anthropic-style OAuth bearer signing.
344    OAuth,
345    /// Forward an internal credential supplied by upstream configuration.
346    InternalForwarded,
347}
348
349/// Parsed downstream request data passed into plugins.
350#[derive(Clone, Debug)]
351pub struct RequestContext {
352    /// Stable request identifier used for logs, audit rows, and upstream traceability.
353    pub request_id: String,
354    /// Downstream request headers after hop-by-hop stripping.
355    pub downstream_headers: HeaderMap,
356    /// Downstream HTTP method.
357    pub method: Method,
358    /// Downstream request path, such as `/v1/messages`.
359    pub path: String,
360    /// Raw downstream query string without the leading `?`.
361    pub query: Option<String>,
362    /// Buffered request body bytes, required by SigV4 and other signing schemes.
363    pub body_bytes: Bytes,
364    /// Cache breakpoints extracted from the request for prompt cache optimization.
365    pub cache_breakpoints: Vec<CacheBreakpoint>,
366    /// Canonical model identifier resolved from the request.
367    pub canonical_model_id: String,
368}
369
370/// Request produced by an upstream dialect before credentials are applied.
371#[derive(Clone, Debug)]
372pub struct ShapedRequest {
373    url: Url,
374    method: Method,
375    headers: HeaderMap,
376    body: Bytes,
377    _seal: crate::private::Seal,
378}
379
380impl ShapedRequest {
381    /// Returns the destination URL.
382    pub fn url(&self) -> &Url {
383        &self.url
384    }
385
386    /// Replaces the destination URL.
387    pub fn set_url(&mut self, url: Url) {
388        self.url = url;
389    }
390
391    /// Returns the HTTP method.
392    pub fn method(&self) -> &Method {
393        &self.method
394    }
395
396    /// Replaces the HTTP method.
397    pub fn set_method(&mut self, method: Method) {
398        self.method = method;
399    }
400
401    /// Returns the request headers.
402    pub fn headers(&self) -> &HeaderMap {
403        &self.headers
404    }
405
406    /// Returns mutable request headers for signer-owned changes.
407    pub fn headers_mut(&mut self) -> &mut HeaderMap {
408        &mut self.headers
409    }
410
411    /// Returns the request body.
412    pub fn body(&self) -> &Bytes {
413        &self.body
414    }
415
416    /// Replaces the request body.
417    pub fn set_body(&mut self, body: Bytes) {
418        self.body = body;
419    }
420}
421
422/// Dialect-facing capability used to construct shaped requests.
423///
424/// Values of this type are created only by [`shape_request`]. Dialect
425/// implementations receive a mutable reference while their
426/// [`crate::UpstreamDialect::shape`] method is executing, which lets them return
427/// a shaped request without exposing an unrestricted public constructor.
428#[derive(Debug)]
429pub struct ShapedRequestBuilder {
430    _seal: crate::private::Seal,
431}
432
433impl ShapedRequestBuilder {
434    /// Creates a shaped request from dialect-owned parts.
435    pub fn shaped_request(
436        &mut self,
437        url: Url,
438        method: Method,
439        headers: HeaderMap,
440        body: Bytes,
441    ) -> ShapedRequest {
442        ShapedRequest {
443            url,
444            method,
445            headers,
446            body,
447            _seal: crate::private::Seal,
448        }
449    }
450}
451
452/// Invokes an upstream dialect with a temporary shaped-request capability.
453pub fn shape_request(
454    dialect: &dyn UpstreamDialect,
455    ctx: &RequestContext,
456    upstream: &Upstream,
457    principal: &Principal,
458) -> Result<ShapedRequest, DialectError> {
459    let mut builder = ShapedRequestBuilder {
460        _seal: crate::private::Seal,
461    };
462    dialect.shape(ctx, upstream, principal, &mut builder)
463}
464
465/// Request after a signer has consumed and sealed a shaped request.
466#[derive(Clone, Debug)]
467pub struct SignedRequest {
468    url: Url,
469    method: Method,
470    headers: HeaderMap,
471    body: Bytes,
472    _seal: crate::private::Seal,
473}
474
475impl SignedRequest {
476    /// Seals an owned shaped request after the signer has applied credentials.
477    pub fn from_shaped(shaped: ShapedRequest, _capability: &mut SigningCapability) -> Self {
478        Self {
479            url: shaped.url,
480            method: shaped.method,
481            headers: shaped.headers,
482            body: shaped.body,
483            _seal: crate::private::Seal,
484        }
485    }
486
487    /// Returns the destination URL.
488    pub fn url(&self) -> &Url {
489        &self.url
490    }
491
492    /// Returns the HTTP method.
493    pub fn method(&self) -> &Method {
494        &self.method
495    }
496
497    /// Returns the signed request headers.
498    pub fn headers(&self) -> &HeaderMap {
499        &self.headers
500    }
501
502    /// Returns the signed request body.
503    pub fn body(&self) -> &Bytes {
504        &self.body
505    }
506
507    /// Consumes the signed request into relay-ready parts.
508    pub fn into_parts(self) -> (Url, Method, HeaderMap, Bytes) {
509        (self.url, self.method, self.headers, self.body)
510    }
511}
512
513/// Signer-facing capability used to seal shaped requests.
514///
515/// Values of this type are created only by [`sign_request`]. Signer
516/// implementations receive a mutable reference while their [`crate::Signer::sign`]
517/// method is executing, which keeps arbitrary crates from sealing shaped
518/// requests outside the signer boundary.
519#[derive(Debug)]
520pub struct SigningCapability {
521    _seal: crate::private::Seal,
522}
523
524/// Invokes a signer with a temporary signing capability.
525pub async fn sign_request(
526    signer: &dyn Signer,
527    shaped: ShapedRequest,
528) -> Result<SignedRequest, SignerError> {
529    let mut capability = SigningCapability {
530        _seal: crate::private::Seal,
531    };
532    signer.sign(shaped, &mut capability).await
533}
534
535/// Router output selecting both an upstream and its dialect boundary object.
536pub struct RouteDecision {
537    /// Stable upstream identifier selected by the router, when provided by the plugin.
538    pub upstream_id: Option<Uuid>,
539    /// Upstream selected for the request.
540    pub upstream: Upstream,
541    /// Dialect plugin that shapes the request for the selected upstream.
542    pub dialect: Arc<dyn UpstreamDialect>,
543}
544
545/// Per-principal quota window and model allow-list.
546#[derive(Clone, Debug, Eq, PartialEq)]
547pub struct PrincipalQuotas {
548    /// Maximum request count allowed within `window`.
549    pub requests_per_window: u64,
550    /// Maximum input token count allowed within `window`.
551    pub input_tokens_per_window: u64,
552    /// Maximum output token count allowed within `window`.
553    pub output_tokens_per_window: u64,
554    /// Quota window duration.
555    pub window: Duration,
556    /// Glob-style model names allowed for this principal.
557    pub allowed_models: Vec<String>,
558}
559
560/// Observability events emitted by the lifecycle.
561#[derive(Clone, Debug, Eq, PartialEq)]
562pub enum ObserveEvent {
563    /// Downstream request has entered the proxy.
564    RequestStarted {
565        /// Request identifier.
566        request_id: String,
567        /// Downstream user-agent header, when present.
568        downstream_user_agent: Option<String>,
569    },
570    /// Authentication completed successfully.
571    AuthnComplete {
572        /// Authenticated principal identifier.
573        principal_id: String,
574        /// Authenticated principal kind.
575        kind: PrincipalKind,
576    },
577    /// Router selected an upstream.
578    UpstreamChosen {
579        /// Selected upstream.
580        upstream: Upstream,
581    },
582    /// A batch of streamed events passed through the relay.
583    Chunk {
584        /// Monotonic batch index within the response stream.
585        batch_index: u64,
586        /// Number of SSE events in the batch.
587        event_count: usize,
588        /// Total bytes in the batch.
589        total_bytes: usize,
590    },
591    /// Request finished successfully or with an upstream HTTP error.
592    RequestFinished {
593        /// Final HTTP status code.
594        status: StatusCode,
595        /// Input token count reported by the upstream, when known.
596        input_tokens: Option<u64>,
597        /// Output token count reported by the upstream, when known.
598        output_tokens: Option<u64>,
599        /// Cache write token count reported by the upstream, when known.
600        cache_creation_input_tokens: Option<u64>,
601        /// Cache read token count reported by the upstream, when known.
602        cache_read_input_tokens: Option<u64>,
603        /// End-to-end request duration in milliseconds.
604        duration_ms: u64,
605    },
606    /// Lifecycle or plugin error was observed.
607    Error {
608        /// Stable error code.
609        code: String,
610        /// Redacted human-readable message.
611        message: String,
612        /// Error source component.
613        source: String,
614    },
615}
616
617/// Decision returned by a signer after receiving an unauthorized upstream error.
618#[derive(Clone)]
619pub enum RetryDecision {
620    /// Retry with a refreshed signer.
621    Refresh {
622        /// Signer containing refreshed credentials.
623        new_signer: Arc<dyn Signer>,
624    },
625    /// Do not retry the request.
626    Fail,
627}
628
629/// Plugin manifest passed to runtime adapters when instantiating plugins.
630#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
631pub struct PluginManifest {
632    /// Plugin name from configuration.
633    pub name: String,
634    /// Filesystem path or runtime-specific locator for the plugin artifact.
635    pub artifact: String,
636    /// Preferred plugin wire envelope version. Omitted manifests default to v1.
637    #[serde(default, skip_serializing_if = "Option::is_none")]
638    pub wire_version: Option<u8>,
639    /// Runtime configuration provided to the plugin.
640    pub config: serde_json::Value,
641    /// Runtime-specific metadata not interpreted by the core API contract.
642    #[serde(default)]
643    pub metadata: BTreeMap<String, serde_json::Value>,
644}
645
646// Routing trace cap constants
647/// Maximum number of stages in a routing trace.
648pub const MAX_ROUTING_TRACE_STAGES: usize = 100;
649/// Maximum length of a stage name.
650pub const MAX_STAGE_NAME_LEN: usize = 256;
651/// Maximum length of an error message in internal errors.
652pub const MAX_ERROR_MESSAGE_LEN: usize = 1024;
653
654/// Reason for a passthrough routing decision.
655#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
656#[serde(rename_all = "snake_case")]
657pub enum PassthroughCause {
658    /// Upstream is healthy and available.
659    HealthyUpstream,
660    /// No alternative upstream available.
661    NoAlternative,
662    /// Plugin returned passthrough decision.
663    PluginDecision,
664}
665
666/// Per-candidate evaluation reason.
667#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
668#[serde(rename_all = "snake_case")]
669pub enum PerCandidateReason {
670    /// Candidate hit rate limit.
671    RateLimited,
672    /// Candidate has insufficient quota.
673    InsufficientQuota,
674    /// Candidate is unhealthy.
675    Unhealthy,
676    /// Candidate rejected by plugin.
677    RejectedByPlugin,
678}
679
680/// Strategy for selecting a terminal upstream.
681#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
682#[serde(rename_all = "kebab-case")]
683pub enum TerminalStrategy {
684    /// Select first available upstream.
685    #[default]
686    FirstPick,
687    /// Select a router plugin at random.
688    Random,
689    /// Round-robin selection.
690    RoundRobin,
691    /// Least connections strategy.
692    LeastConnections,
693}
694
695/// Decision made at a single routing stage.
696#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
697pub struct StageDecision {
698    /// Name of the routing stage.
699    pub stage_name: String,
700    /// Upstream candidate identifier if applicable.
701    #[serde(default, skip_serializing_if = "Option::is_none")]
702    pub upstream_id: Option<Uuid>,
703    /// Reason for this stage's decision.
704    #[serde(default, skip_serializing_if = "Option::is_none")]
705    pub reason: Option<String>,
706    /// Time spent executing this routing stage, in microseconds.
707    #[serde(default, skip_serializing_if = "is_zero_u64")]
708    pub duration_us: u64,
709}
710
711fn is_zero_u64(value: &u64) -> bool {
712    *value == 0
713}
714
715/// Terminal routing decision selecting an upstream.
716#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
717pub struct TerminalDecision {
718    /// Selected upstream identifier.
719    #[serde(default, skip_serializing_if = "Option::is_none")]
720    pub upstream_id: Option<Uuid>,
721    /// Strategy used for selection.
722    pub strategy: TerminalStrategy,
723}
724
725/// Complete routing trace for a request through all decision stages.
726#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
727pub struct RoutingTrace {
728    /// Sequence of stage decisions made during routing.
729    #[serde(default)]
730    pub stages: Vec<StageDecision>,
731    /// Final terminal routing decision.
732    #[serde(default, skip_serializing_if = "Option::is_none")]
733    pub terminal_decision: Option<TerminalDecision>,
734}
735
736/// Stage where an internal error occurred.
737#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
738#[serde(rename_all = "snake_case")]
739pub enum InternalErrorStage {
740    /// Authentication stage.
741    Authn,
742    /// Routing stage.
743    #[default]
744    Router,
745    /// Router filter stage.
746    RouterFilter,
747    /// Request shaping stage.
748    Shape,
749    /// Request signing stage.
750    Signer,
751    /// Request relay stage.
752    Relay,
753}
754
755/// Kind of internal error that occurred.
756#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
757#[serde(rename_all = "snake_case")]
758pub enum InternalErrorKind {
759    /// Plugin crashed or returned an error.
760    #[default]
761    PluginError,
762    /// Plugin returned invalid output.
763    InvalidOutput,
764    /// Plugin trapped during execution.
765    Trap,
766    /// Configuration error.
767    ConfigError,
768    /// Timeout error.
769    Timeout,
770    /// Resource unavailable.
771    Unavailable,
772}
773
774/// Internal error information with stage and kind details.
775#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)]
776pub struct InternalError {
777    /// Stage where the error occurred.
778    pub stage: InternalErrorStage,
779    /// Kind of error.
780    pub kind: InternalErrorKind,
781    /// Optional error message.
782    #[serde(default, skip_serializing_if = "Option::is_none")]
783    pub message: Option<String>,
784}
785
786#[cfg(test)]
787mod tests {
788    use super::*;
789
790    #[test]
791    fn shaped_request_accessors_and_mutators_preserve_parts() {
792        let mut builder = ShapedRequestBuilder {
793            _seal: crate::private::Seal,
794        };
795        let mut headers = HeaderMap::new();
796        headers.insert("x-test", "one".parse().unwrap());
797        let mut shaped = builder.shaped_request(
798            "https://example.test/v1/messages".parse().unwrap(),
799            Method::POST,
800            headers,
801            Bytes::from_static(b"first"),
802        );
803
804        assert_eq!(shaped.url().as_str(), "https://example.test/v1/messages");
805        assert_eq!(shaped.method(), Method::POST);
806        assert_eq!(shaped.headers()["x-test"], "one");
807        assert_eq!(shaped.body(), &Bytes::from_static(b"first"));
808
809        shaped.set_url("https://example.test/v1/complete".parse().unwrap());
810        shaped.set_method(Method::PUT);
811        shaped
812            .headers_mut()
813            .insert("x-test", "two".parse().unwrap());
814        shaped.set_body(Bytes::from_static(b"second"));
815
816        assert_eq!(shaped.url().path(), "/v1/complete");
817        assert_eq!(shaped.method(), Method::PUT);
818        assert_eq!(shaped.headers()["x-test"], "two");
819        assert_eq!(shaped.body(), &Bytes::from_static(b"second"));
820    }
821
822    #[test]
823    fn signed_request_exposes_and_consumes_signed_parts() {
824        let mut builder = ShapedRequestBuilder {
825            _seal: crate::private::Seal,
826        };
827        let mut headers = HeaderMap::new();
828        headers.insert("authorization", "Bearer token".parse().unwrap());
829        let shaped = builder.shaped_request(
830            "https://api.example.test/v1/messages".parse().unwrap(),
831            Method::POST,
832            headers,
833            Bytes::from_static(b"{}"),
834        );
835        let mut capability = SigningCapability {
836            _seal: crate::private::Seal,
837        };
838
839        let signed = SignedRequest::from_shaped(shaped, &mut capability);
840        assert_eq!(signed.url().host_str(), Some("api.example.test"));
841        assert_eq!(signed.method(), Method::POST);
842        assert_eq!(signed.headers()["authorization"], "Bearer token");
843        assert_eq!(signed.body(), &Bytes::from_static(b"{}"));
844
845        let (url, method, headers, body) = signed.into_parts();
846        assert_eq!(url.as_str(), "https://api.example.test/v1/messages");
847        assert_eq!(method, Method::POST);
848        assert_eq!(headers["authorization"], "Bearer token");
849        assert_eq!(body, Bytes::from_static(b"{}"));
850    }
851
852    #[test]
853    fn upstream_and_manifest_serde_round_trip() {
854        let upstreams = vec![Upstream::AnthropicDirect { base_url: None }];
855
856        for upstream in upstreams {
857            let json = serde_json::to_string(&upstream).unwrap();
858            let decoded: Upstream = serde_json::from_str(&json).unwrap();
859            assert_eq!(decoded, upstream);
860        }
861
862        let manifest: PluginManifest = serde_json::from_value(serde_json::json!({
863            "name": "authn",
864            "artifact": "plugin.wasm",
865            "config": {"enabled": true}
866        }))
867        .unwrap();
868        assert_eq!(manifest.name, "authn");
869        assert_eq!(manifest.wire_version, None);
870        assert!(manifest.metadata.is_empty());
871
872        let manifest: PluginManifest = serde_json::from_value(serde_json::json!({
873            "name": "cache-aware",
874            "artifact": "plugin.wasm",
875            "wire_version": 2,
876            "config": {}
877        }))
878        .unwrap();
879        assert_eq!(manifest.wire_version, Some(2));
880    }
881
882    #[test]
883    fn public_enums_cover_all_current_variants() {
884        let principal_kinds = [
885            PrincipalKind::ApiKey,
886            PrincipalKind::OAuthSubject,
887            PrincipalKind::InternalKey,
888            PrincipalKind::WorkloadIdentity,
889            PrincipalKind::SubscriptionBearer,
890        ];
891        assert_eq!(principal_kinds.len(), 5);
892
893        let strategies = [
894            CredentialStrategy::ApiKey,
895            CredentialStrategy::OAuth,
896            CredentialStrategy::InternalForwarded,
897        ];
898        assert_eq!(strategies.len(), 3);
899    }
900
901    #[test]
902    fn observe_event_variants_are_equatable() {
903        let events = vec![
904            ObserveEvent::RequestStarted {
905                request_id: "req".to_owned(),
906                downstream_user_agent: Some("ua".to_owned()),
907            },
908            ObserveEvent::AuthnComplete {
909                principal_id: "principal".to_owned(),
910                kind: PrincipalKind::InternalKey,
911            },
912            ObserveEvent::UpstreamChosen {
913                upstream: Upstream::AnthropicDirect { base_url: None },
914            },
915            ObserveEvent::Chunk {
916                batch_index: 1,
917                event_count: 2,
918                total_bytes: 3,
919            },
920            ObserveEvent::RequestFinished {
921                status: StatusCode::OK,
922                input_tokens: Some(4),
923                output_tokens: Some(5),
924                cache_creation_input_tokens: Some(6),
925                cache_read_input_tokens: Some(7),
926                duration_ms: 8,
927            },
928            ObserveEvent::Error {
929                code: "E".to_owned(),
930                message: "redacted".to_owned(),
931                source: "plugin".to_owned(),
932            },
933        ];
934
935        assert_eq!(events, events.clone());
936    }
937
938    #[test]
939    fn cache_types_roundtrip() {
940        let ttl_class = TtlClass::Ephemeral1h;
941        let json = serde_json::to_string(&ttl_class).unwrap();
942        let decoded: TtlClass = serde_json::from_str(&json).unwrap();
943        assert_eq!(decoded, ttl_class);
944
945        let origin = BreakpointOrigin::AutoCacheInferred;
946        let json = serde_json::to_string(&origin).unwrap();
947        let decoded: BreakpointOrigin = serde_json::from_str(&json).unwrap();
948        assert_eq!(decoded, origin);
949
950        let source = CacheBreakpointSource::System;
951        let json = serde_json::to_string(&source).unwrap();
952        let decoded: CacheBreakpointSource = serde_json::from_str(&json).unwrap();
953        assert_eq!(decoded, source);
954
955        let breakpoint = CacheBreakpoint {
956            block_index: 0,
957            source: CacheBreakpointSource::Message,
958            path: "messages.0.content.1".to_owned(),
959            message_index: Some(0),
960            prefix_hash: "abc123".to_owned(),
961            prefix_token_count: 100,
962            requested_ttl: TtlClass::Ephemeral5m,
963            origin: BreakpointOrigin::Explicit,
964        };
965        let json = serde_json::to_string(&breakpoint).unwrap();
966        let decoded: CacheBreakpoint = serde_json::from_str(&json).unwrap();
967        assert_eq!(decoded, breakpoint);
968
969        let warm_entry = WarmCacheEntry {
970            prefix_hash: "def456".to_owned(),
971            expires_at_unix_secs: 1700000000,
972            ttl_class: TtlClass::Ephemeral1h,
973            last_observed_at_unix_secs: 1699999000,
974        };
975        let json = serde_json::to_string(&warm_entry).unwrap();
976        let decoded: WarmCacheEntry = serde_json::from_str(&json).unwrap();
977        assert_eq!(decoded, warm_entry);
978
979        let cache_score = CacheScore {
980            predicted_cache_read_tokens: 50,
981            predicted_cache_creation_tokens_5m: 100,
982            predicted_cache_creation_tokens_1h: 200,
983            predicted_uncached_input_tokens: 25,
984            predicted_expires_at_unix_secs: Some(1700000000),
985            matched_breakpoint_index: Some(0),
986            confidence: 0.95,
987            ambiguity_reason: None,
988        };
989        let json = serde_json::to_string(&cache_score).unwrap();
990        let decoded: CacheScore = serde_json::from_str(&json).unwrap();
991        assert_eq!(
992            decoded.predicted_cache_read_tokens,
993            cache_score.predicted_cache_read_tokens
994        );
995        assert_eq!(
996            decoded.predicted_cache_creation_tokens_5m,
997            cache_score.predicted_cache_creation_tokens_5m
998        );
999        assert_eq!(
1000            decoded.predicted_cache_creation_tokens_1h,
1001            cache_score.predicted_cache_creation_tokens_1h
1002        );
1003        assert_eq!(
1004            decoded.predicted_uncached_input_tokens,
1005            cache_score.predicted_uncached_input_tokens
1006        );
1007        assert_eq!(
1008            decoded.predicted_expires_at_unix_secs,
1009            cache_score.predicted_expires_at_unix_secs
1010        );
1011        assert_eq!(
1012            decoded.matched_breakpoint_index,
1013            cache_score.matched_breakpoint_index
1014        );
1015        assert!((decoded.confidence - cache_score.confidence).abs() < 0.0001);
1016        assert_eq!(decoded.ambiguity_reason, cache_score.ambiguity_reason);
1017    }
1018
1019    #[test]
1020    fn upstream_candidate_cache_score_roundtrip() {
1021        let candidate_no_cache = UpstreamCandidate {
1022            upstream_id: Uuid::new_v4(),
1023            name: "test-upstream".to_owned(),
1024            kind: UpstreamKind::AnthropicApiKey,
1025            observed_rate_limits: vec![],
1026            subscription_quotas: vec![],
1027            observed_at_unix_secs: 1700000000,
1028            cache_score: None,
1029            base_url: None,
1030        };
1031        let json = serde_json::to_string(&candidate_no_cache).unwrap();
1032        let decoded: UpstreamCandidate = serde_json::from_str(&json).unwrap();
1033        assert_eq!(decoded.upstream_id, candidate_no_cache.upstream_id);
1034        assert_eq!(decoded.name, candidate_no_cache.name);
1035        assert_eq!(decoded.cache_score, None);
1036
1037        let cache_score = CacheScore {
1038            predicted_cache_read_tokens: 50,
1039            predicted_cache_creation_tokens_5m: 100,
1040            predicted_cache_creation_tokens_1h: 200,
1041            predicted_uncached_input_tokens: 25,
1042            predicted_expires_at_unix_secs: Some(1700000000),
1043            matched_breakpoint_index: Some(0),
1044            confidence: 0.95,
1045            ambiguity_reason: None,
1046        };
1047        let candidate_with_cache = UpstreamCandidate {
1048            upstream_id: Uuid::new_v4(),
1049            name: "test-upstream-cached".to_owned(),
1050            kind: UpstreamKind::AnthropicApiKey,
1051            observed_rate_limits: vec![],
1052            subscription_quotas: vec![],
1053            observed_at_unix_secs: 1700000000,
1054            cache_score: Some(cache_score),
1055            base_url: None,
1056        };
1057        let json = serde_json::to_string(&candidate_with_cache).unwrap();
1058        let decoded: UpstreamCandidate = serde_json::from_str(&json).unwrap();
1059        assert_eq!(decoded.upstream_id, candidate_with_cache.upstream_id);
1060        assert_eq!(decoded.name, candidate_with_cache.name);
1061        assert!(decoded.cache_score.is_some());
1062        assert_eq!(decoded.cache_score.unwrap().predicted_cache_read_tokens, 50);
1063    }
1064
1065    #[test]
1066    fn request_context_cache_fields_roundtrip() {
1067        let ctx_empty = RequestContext {
1068            request_id: "req-1".to_owned(),
1069            downstream_headers: HeaderMap::new(),
1070            method: Method::POST,
1071            path: "/v1/messages".to_owned(),
1072            query: None,
1073            body_bytes: Bytes::new(),
1074            cache_breakpoints: Vec::new(),
1075            canonical_model_id: String::new(),
1076        };
1077        assert_eq!(ctx_empty.cache_breakpoints.len(), 0);
1078        assert_eq!(ctx_empty.canonical_model_id, "");
1079
1080        let breakpoint = CacheBreakpoint {
1081            block_index: 1,
1082            source: CacheBreakpointSource::Message,
1083            path: "messages.0.content.0".to_owned(),
1084            message_index: Some(0),
1085            prefix_hash: "hash123".to_owned(),
1086            prefix_token_count: 150,
1087            requested_ttl: TtlClass::Ephemeral1h,
1088            origin: BreakpointOrigin::Explicit,
1089        };
1090        let ctx_populated = RequestContext {
1091            request_id: "req-2".to_owned(),
1092            downstream_headers: HeaderMap::new(),
1093            method: Method::POST,
1094            path: "/v1/messages".to_owned(),
1095            query: Some("param=value".to_owned()),
1096            body_bytes: Bytes::from_static(b"test"),
1097            cache_breakpoints: vec![breakpoint],
1098            canonical_model_id: "claude-sonnet-4-5-20250929".to_owned(),
1099        };
1100        assert_eq!(ctx_populated.cache_breakpoints.len(), 1);
1101        assert_eq!(
1102            ctx_populated.canonical_model_id,
1103            "claude-sonnet-4-5-20250929"
1104        );
1105    }
1106
1107    #[test]
1108    fn upstream_candidate_deserialize_without_cache_score_field() {
1109        let json = r#"{
1110            "upstream_id": "00000000-0000-0000-0000-000000000001",
1111            "name": "legacy-upstream",
1112            "kind": "anthropic_api_key",
1113            "observed_rate_limits": [],
1114            "subscription_quotas": [],
1115            "observed_at_unix_secs": 1700000000
1116        }"#;
1117        let candidate: UpstreamCandidate = serde_json::from_str(json).unwrap();
1118        assert!(candidate.cache_score.is_none());
1119        assert_eq!(candidate.name, "legacy-upstream");
1120    }
1121
1122    #[test]
1123    fn request_context_cache_breakpoints_default_on_missing_fields() {
1124        let ctx = RequestContext {
1125            request_id: "test".to_owned(),
1126            downstream_headers: HeaderMap::new(),
1127            method: Method::GET,
1128            path: "/test".to_owned(),
1129            query: None,
1130            body_bytes: Bytes::new(),
1131            cache_breakpoints: Vec::new(),
1132            canonical_model_id: String::new(),
1133        };
1134        assert!(ctx.cache_breakpoints.is_empty());
1135        assert!(ctx.canonical_model_id.is_empty());
1136    }
1137}