grapsus_config/routes.rs
1//! Route configuration types
2//!
3//! This module contains configuration types for routing requests
4//! to upstreams or static file handlers.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::PathBuf;
9use validator::Validate;
10
11use grapsus_common::budget::{CostAttributionConfig, TokenBudgetConfig};
12use grapsus_common::types::{ByteSize, CircuitBreakerConfig, Priority, RetryPolicy};
13
14use crate::filters::RateLimitKey;
15
16// ============================================================================
17// Route Configuration
18// ============================================================================
19
20/// Route configuration
21#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
22pub struct RouteConfig {
23 /// Unique route identifier
24 pub id: String,
25
26 /// Route priority (higher = evaluated first)
27 #[serde(default)]
28 pub priority: Priority,
29
30 /// Match conditions
31 pub matches: Vec<MatchCondition>,
32
33 /// Target upstream (optional for static file serving)
34 pub upstream: Option<String>,
35
36 /// Service type for this route
37 #[serde(default)]
38 pub service_type: ServiceType,
39
40 /// Route-specific policies
41 #[serde(default)]
42 pub policies: RoutePolicies,
43
44 /// Filter chain for this route - list of filter IDs (executed in order)
45 /// References filters defined in the top-level `filters` block
46 #[serde(default)]
47 pub filters: Vec<String>,
48
49 /// Built-in handler (for service_type = Builtin)
50 #[serde(default, rename = "builtin-handler")]
51 pub builtin_handler: Option<BuiltinHandler>,
52
53 /// WAF enabled for this route (shorthand for adding WAF agent filter)
54 #[serde(default)]
55 pub waf_enabled: bool,
56
57 /// Circuit breaker configuration
58 #[serde(default)]
59 pub circuit_breaker: Option<CircuitBreakerConfig>,
60
61 /// Retry policy
62 #[serde(default)]
63 pub retry_policy: Option<RetryPolicy>,
64
65 /// Static file serving configuration (for service_type = Static)
66 #[serde(default)]
67 pub static_files: Option<StaticFileConfig>,
68
69 /// API schema validation configuration (for service_type = Api)
70 #[serde(default)]
71 pub api_schema: Option<ApiSchemaConfig>,
72
73 /// Inference configuration (for service_type = Inference)
74 #[serde(default)]
75 pub inference: Option<InferenceConfig>,
76
77 /// Error page configuration
78 #[serde(default)]
79 pub error_pages: Option<ErrorPageConfig>,
80
81 /// Enable WebSocket upgrade support for this route (default: false)
82 /// When enabled, HTTP Upgrade requests with "websocket" protocol are allowed.
83 /// Pingora handles the actual WebSocket tunneling transparently.
84 #[serde(default)]
85 pub websocket: bool,
86
87 /// Enable WebSocket frame inspection (default: false)
88 /// When enabled, individual WebSocket frames are sent to agents for inspection.
89 /// Agents can allow, drop, or close the connection based on frame content.
90 /// Requires `websocket: true` to have any effect.
91 /// Note: If `permessage-deflate` compression is negotiated, inspection is skipped.
92 #[serde(default)]
93 pub websocket_inspection: bool,
94
95 /// Traffic mirroring / shadowing configuration
96 /// Mirrors requests to a shadow upstream for safe canary testing
97 #[serde(default)]
98 pub shadow: Option<ShadowConfig>,
99
100 /// Fallback routing configuration
101 /// Enables automatic failover to alternative upstreams on failure
102 #[serde(default)]
103 pub fallback: Option<FallbackConfig>,
104}
105
106// ============================================================================
107// Match Conditions
108// ============================================================================
109
110/// Match condition for route selection
111#[derive(Debug, Clone, Serialize, Deserialize)]
112#[serde(rename_all = "snake_case")]
113pub enum MatchCondition {
114 /// Match by path prefix
115 PathPrefix(String),
116
117 /// Match by exact path
118 Path(String),
119
120 /// Match by regex pattern
121 PathRegex(String),
122
123 /// Match by host header
124 Host(String),
125
126 /// Match by header presence
127 Header { name: String, value: Option<String> },
128
129 /// Match by method
130 Method(Vec<String>),
131
132 /// Match by query parameter
133 QueryParam { name: String, value: Option<String> },
134}
135
136// ============================================================================
137// Service Types
138// ============================================================================
139
140/// Service type for route handling
141#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
142#[serde(rename_all = "snake_case")]
143#[derive(Default)]
144pub enum ServiceType {
145 /// Traditional web service (default)
146 #[default]
147 Web,
148 /// REST API service with JSON responses
149 Api,
150 /// Static file hosting
151 Static,
152 /// Built-in handler (status page, health check, etc.)
153 Builtin,
154 /// LLM/AI inference endpoint with token-based rate limiting
155 Inference,
156}
157
158/// Built-in handler types for ServiceType::Builtin routes
159#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
160#[serde(rename_all = "snake_case")]
161pub enum BuiltinHandler {
162 /// JSON status page with version and uptime
163 Status,
164 /// Health check endpoint (returns 200 OK if healthy)
165 Health,
166 /// Prometheus metrics endpoint
167 Metrics,
168 /// 404 Not Found handler
169 NotFound,
170 /// Configuration dump endpoint (admin only)
171 Config,
172 /// Upstream health status endpoint (admin only)
173 Upstreams,
174 /// Cache purge endpoint (admin only, accepts PURGE method)
175 CachePurge,
176 /// Cache statistics endpoint (admin only)
177 CacheStats,
178}
179
180// ============================================================================
181// Route Policies
182// ============================================================================
183
184/// Route-specific policies
185#[derive(Debug, Clone, Serialize, Deserialize, Default)]
186pub struct RoutePolicies {
187 /// Request header modifications
188 #[serde(default)]
189 pub request_headers: HeaderModifications,
190
191 /// Response header modifications
192 #[serde(default)]
193 pub response_headers: HeaderModifications,
194
195 /// Request timeout override
196 pub timeout_secs: Option<u64>,
197
198 /// Body size limit override
199 pub max_body_size: Option<ByteSize>,
200
201 /// Rate limit override
202 pub rate_limit: Option<RateLimitPolicy>,
203
204 /// Failure mode (fail-open or fail-closed)
205 #[serde(default = "default_failure_mode")]
206 pub failure_mode: FailureMode,
207
208 /// Enable request buffering
209 #[serde(default)]
210 pub buffer_requests: bool,
211
212 /// Enable response buffering
213 #[serde(default)]
214 pub buffer_responses: bool,
215
216 /// HTTP caching configuration
217 #[serde(default)]
218 pub cache: Option<RouteCacheConfig>,
219}
220
221// ============================================================================
222// Cache Configuration
223// ============================================================================
224
225/// Route-level HTTP caching configuration
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct RouteCacheConfig {
228 /// Enable caching for this route
229 #[serde(default)]
230 pub enabled: bool,
231
232 /// Default TTL in seconds if no Cache-Control header
233 #[serde(default = "default_cache_ttl")]
234 pub default_ttl_secs: u64,
235
236 /// Maximum cacheable response size in bytes
237 #[serde(default = "default_max_cache_size")]
238 pub max_size_bytes: usize,
239
240 /// Whether to cache private responses
241 #[serde(default)]
242 pub cache_private: bool,
243
244 /// Stale-while-revalidate grace period in seconds
245 #[serde(default = "default_stale_while_revalidate")]
246 pub stale_while_revalidate_secs: u64,
247
248 /// Stale-if-error grace period in seconds
249 #[serde(default = "default_stale_if_error")]
250 pub stale_if_error_secs: u64,
251
252 /// HTTP methods that are cacheable
253 #[serde(default = "default_cacheable_methods")]
254 pub cacheable_methods: Vec<String>,
255
256 /// Status codes that are cacheable
257 #[serde(default = "default_cacheable_status_codes")]
258 pub cacheable_status_codes: Vec<u16>,
259
260 /// Vary headers to include in cache key
261 #[serde(default)]
262 pub vary_headers: Vec<String>,
263
264 /// Query parameters to exclude from cache key
265 #[serde(default)]
266 pub ignore_query_params: Vec<String>,
267
268 /// File extensions to exclude from caching (without dot, e.g., "php", "html")
269 #[serde(default)]
270 pub exclude_extensions: Vec<String>,
271
272 /// Path patterns to exclude from caching (glob: *, **, ?)
273 #[serde(default)]
274 pub exclude_paths: Vec<String>,
275}
276
277impl Default for RouteCacheConfig {
278 fn default() -> Self {
279 Self {
280 enabled: false,
281 default_ttl_secs: default_cache_ttl(),
282 max_size_bytes: default_max_cache_size(),
283 cache_private: false,
284 stale_while_revalidate_secs: default_stale_while_revalidate(),
285 stale_if_error_secs: default_stale_if_error(),
286 cacheable_methods: default_cacheable_methods(),
287 cacheable_status_codes: default_cacheable_status_codes(),
288 vary_headers: Vec::new(),
289 ignore_query_params: Vec::new(),
290 exclude_extensions: Vec::new(),
291 exclude_paths: Vec::new(),
292 }
293 }
294}
295
296fn default_cache_ttl() -> u64 {
297 3600 // 1 hour
298}
299
300fn default_max_cache_size() -> usize {
301 10 * 1024 * 1024 // 10MB
302}
303
304fn default_stale_while_revalidate() -> u64 {
305 60 // 1 minute
306}
307
308fn default_stale_if_error() -> u64 {
309 300 // 5 minutes
310}
311
312fn default_cacheable_methods() -> Vec<String> {
313 vec!["GET".to_string(), "HEAD".to_string()]
314}
315
316fn default_cacheable_status_codes() -> Vec<u16> {
317 vec![200, 203, 204, 206, 300, 301, 308, 404, 410]
318}
319
320// ============================================================================
321// Global Cache Storage Configuration
322// ============================================================================
323
324/// Global cache storage configuration
325///
326/// Controls the underlying storage backend for HTTP caching.
327/// This is separate from per-route cache policies which control
328/// what gets cached and for how long.
329#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct CacheStorageConfig {
331 /// Enable HTTP caching globally (default: true when cache block is present)
332 #[serde(default = "default_cache_enabled")]
333 pub enabled: bool,
334
335 /// Storage backend type
336 #[serde(default)]
337 pub backend: CacheBackend,
338
339 /// Maximum cache size in bytes (default: 100MB)
340 #[serde(default = "default_cache_storage_size")]
341 pub max_size_bytes: usize,
342
343 /// Eviction limit in bytes (when to start evicting, default: same as max_size)
344 #[serde(default)]
345 pub eviction_limit_bytes: Option<usize>,
346
347 /// Cache lock timeout in seconds (prevents thundering herd)
348 #[serde(default = "default_cache_lock_timeout")]
349 pub lock_timeout_secs: u64,
350
351 /// Path for disk-based cache (only used with Disk backend)
352 #[serde(default)]
353 pub disk_path: Option<PathBuf>,
354
355 /// Number of shards for disk cache (improves concurrent access)
356 #[serde(default = "default_disk_shards")]
357 pub disk_shards: u32,
358
359 /// Maximum size for the disk tier in hybrid mode (defaults to max_size_bytes)
360 #[serde(default)]
361 pub disk_max_size_bytes: Option<usize>,
362
363 /// Add Cache-Status response header (RFC 9211) for cache observability
364 #[serde(default)]
365 pub status_header: bool,
366}
367
368impl Default for CacheStorageConfig {
369 fn default() -> Self {
370 Self {
371 enabled: true,
372 backend: CacheBackend::Memory,
373 max_size_bytes: default_cache_storage_size(),
374 eviction_limit_bytes: None,
375 lock_timeout_secs: default_cache_lock_timeout(),
376 disk_path: None,
377 disk_shards: default_disk_shards(),
378 disk_max_size_bytes: None,
379 status_header: false,
380 }
381 }
382}
383
384/// Cache storage backend type
385#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
386#[serde(rename_all = "snake_case")]
387pub enum CacheBackend {
388 /// In-memory cache (fast, but lost on restart)
389 #[default]
390 Memory,
391 /// Disk-based cache (persistent, larger capacity)
392 Disk,
393 /// Hybrid: memory for hot entries, disk for cold
394 Hybrid,
395}
396
397fn default_cache_enabled() -> bool {
398 true
399}
400
401fn default_cache_storage_size() -> usize {
402 100 * 1024 * 1024 // 100MB
403}
404
405fn default_cache_lock_timeout() -> u64 {
406 10 // 10 seconds
407}
408
409fn default_disk_shards() -> u32 {
410 16
411}
412
413/// Header modification rules
414#[derive(Debug, Clone, Serialize, Deserialize, Default)]
415pub struct HeaderModifications {
416 /// Headers to rename (old_name -> new_name, applied before set/add/remove)
417 #[serde(default)]
418 pub rename: HashMap<String, String>,
419
420 /// Headers to add/set
421 #[serde(default)]
422 pub set: HashMap<String, String>,
423
424 /// Headers to append
425 #[serde(default)]
426 pub add: HashMap<String, String>,
427
428 /// Headers to remove
429 #[serde(default)]
430 pub remove: Vec<String>,
431}
432
433/// Rate limit policy (legacy - prefer using rate-limit filter)
434#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct RateLimitPolicy {
436 /// Requests per second
437 pub requests_per_second: u32,
438
439 /// Burst size
440 pub burst: u32,
441
442 /// Key to rate limit by
443 pub key: RateLimitKey,
444}
445
446/// Failure mode for degraded operation
447#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
448#[serde(rename_all = "snake_case")]
449pub enum FailureMode {
450 Open, // Allow traffic through on failure
451 #[default]
452 Closed, // Block traffic on failure (default for security)
453}
454
455pub(crate) fn default_failure_mode() -> FailureMode {
456 FailureMode::Closed
457}
458
459// ============================================================================
460// Static File Configuration
461// ============================================================================
462
463/// Static file serving configuration
464#[derive(Debug, Clone, Serialize, Deserialize)]
465pub struct StaticFileConfig {
466 /// Root directory for static files
467 pub root: PathBuf,
468
469 /// Index file name (default: index.html)
470 #[serde(default = "default_index_file")]
471 pub index: String,
472
473 /// Enable directory listing
474 #[serde(default)]
475 pub directory_listing: bool,
476
477 /// Cache control header value
478 #[serde(default = "default_cache_control")]
479 pub cache_control: String,
480
481 /// Compress responses
482 #[serde(default = "default_true")]
483 pub compress: bool,
484
485 /// Additional MIME type mappings
486 #[serde(default)]
487 pub mime_types: HashMap<String, String>,
488
489 /// Fallback file for SPA routing (e.g., index.html)
490 pub fallback: Option<String>,
491}
492
493fn default_index_file() -> String {
494 "index.html".to_string()
495}
496
497fn default_cache_control() -> String {
498 "public, max-age=3600".to_string()
499}
500
501fn default_true() -> bool {
502 true
503}
504
505// ============================================================================
506// API Schema Configuration
507// ============================================================================
508
509/// API schema validation configuration
510#[derive(Debug, Clone, Serialize, Deserialize)]
511pub struct ApiSchemaConfig {
512 /// OpenAPI/Swagger schema file path (mutually exclusive with schema_content)
513 pub schema_file: Option<PathBuf>,
514
515 /// Inline OpenAPI/Swagger schema content (YAML or JSON string)
516 /// Mutually exclusive with schema_file
517 pub schema_content: Option<String>,
518
519 /// JSON Schema for request validation
520 pub request_schema: Option<serde_json::Value>,
521
522 /// JSON Schema for response validation
523 pub response_schema: Option<serde_json::Value>,
524
525 /// Validate requests against schema
526 #[serde(default = "default_true")]
527 pub validate_requests: bool,
528
529 /// Validate responses against schema
530 #[serde(default)]
531 pub validate_responses: bool,
532
533 /// Strict validation mode (fail on additional properties)
534 #[serde(default)]
535 pub strict_mode: bool,
536}
537
538// ============================================================================
539// Error Page Configuration
540// ============================================================================
541
542/// Error page configuration
543#[derive(Debug, Clone, Serialize, Deserialize)]
544pub struct ErrorPageConfig {
545 /// Custom error pages by status code
546 #[serde(default)]
547 pub pages: HashMap<u16, ErrorPage>,
548
549 /// Default error page format
550 #[serde(default)]
551 pub default_format: ErrorFormat,
552
553 /// Include stack traces in errors (development only)
554 #[serde(default)]
555 pub include_stack_trace: bool,
556
557 /// Custom error template directory
558 pub template_dir: Option<PathBuf>,
559}
560
561/// Individual error page configuration
562#[derive(Debug, Clone, Serialize, Deserialize)]
563pub struct ErrorPage {
564 /// Error page format
565 pub format: ErrorFormat,
566
567 /// Custom template or static file path
568 pub template: Option<PathBuf>,
569
570 /// Custom error message
571 pub message: Option<String>,
572
573 /// Additional headers to include
574 #[serde(default)]
575 pub headers: HashMap<String, String>,
576}
577
578/// Error response format
579#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
580#[serde(rename_all = "snake_case")]
581#[derive(Default)]
582pub enum ErrorFormat {
583 /// HTML error page
584 #[default]
585 Html,
586 /// JSON error response
587 Json,
588 /// Plain text error
589 Text,
590 /// XML error response
591 Xml,
592}
593
594// ============================================================================
595// Shadow / Traffic Mirroring Configuration
596// ============================================================================
597
598/// Traffic mirroring (shadow) configuration
599///
600/// Enables fire-and-forget request duplication to a shadow upstream
601/// for safe canary deployments and testing.
602#[derive(Debug, Clone, Serialize, Deserialize)]
603pub struct ShadowConfig {
604 /// Shadow target upstream ID
605 pub upstream: String,
606
607 /// Sampling percentage (0.0-100.0)
608 /// Only this percentage of requests will be mirrored
609 #[serde(default = "default_shadow_percentage")]
610 pub percentage: f64,
611
612 /// Only shadow requests with this header match
613 /// Format: (header_name, header_value)
614 pub sample_header: Option<(String, String)>,
615
616 /// Shadow request timeout in milliseconds
617 #[serde(default = "default_shadow_timeout_ms")]
618 pub timeout_ms: u64,
619
620 /// Whether to buffer request bodies for mirroring
621 /// Required for POST/PUT/PATCH requests with bodies
622 #[serde(default)]
623 pub buffer_body: bool,
624
625 /// Maximum body size to mirror (bytes)
626 #[serde(default = "default_shadow_max_body_bytes")]
627 pub max_body_bytes: usize,
628}
629
630fn default_shadow_percentage() -> f64 {
631 100.0 // Mirror all requests by default
632}
633
634fn default_shadow_timeout_ms() -> u64 {
635 5000 // 5 seconds
636}
637
638fn default_shadow_max_body_bytes() -> usize {
639 1048576 // 1 MB
640}
641
642// ============================================================================
643// Inference Configuration (for ServiceType::Inference)
644// ============================================================================
645
646/// Inference routing configuration for LLM/AI endpoints
647///
648/// Provides token-based rate limiting, model-aware load balancing,
649/// and multi-provider support for inference traffic.
650#[derive(Debug, Clone, Serialize, Deserialize, Default)]
651pub struct InferenceConfig {
652 /// Inference provider (determines token extraction strategy)
653 #[serde(default)]
654 pub provider: InferenceProvider,
655
656 /// Header containing model name (optional, provider-specific default)
657 pub model_header: Option<String>,
658
659 /// Token-based rate limiting configuration (per-minute)
660 pub rate_limit: Option<TokenRateLimit>,
661
662 /// Token budget configuration (per-period cumulative tracking)
663 pub budget: Option<TokenBudgetConfig>,
664
665 /// Cost attribution configuration (per-model pricing)
666 pub cost_attribution: Option<CostAttributionConfig>,
667
668 /// Inference-aware routing configuration
669 pub routing: Option<InferenceRouting>,
670
671 /// Model-based upstream routing configuration
672 pub model_routing: Option<ModelRoutingConfig>,
673
674 /// Semantic guardrails configuration (prompt injection, PII detection)
675 pub guardrails: Option<GuardrailsConfig>,
676}
677
678/// Inference provider type (determines token counting strategy)
679#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
680#[serde(rename_all = "snake_case")]
681pub enum InferenceProvider {
682 /// Generic provider (uses x-tokens-used header or estimation)
683 #[default]
684 Generic,
685 /// OpenAI API (uses x-ratelimit-remaining-tokens header)
686 OpenAi,
687 /// Anthropic API (uses anthropic-ratelimit-tokens-remaining header)
688 Anthropic,
689}
690
691impl InferenceProvider {
692 /// Returns the string label for this provider (for metrics and logging).
693 pub fn as_str(&self) -> &'static str {
694 match self {
695 Self::Generic => "generic",
696 Self::OpenAi => "openai",
697 Self::Anthropic => "anthropic",
698 }
699 }
700}
701
702/// Token-based rate limiting configuration
703#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct TokenRateLimit {
705 /// Maximum tokens per minute
706 pub tokens_per_minute: u64,
707
708 /// Maximum requests per minute (optional, dual tracking)
709 pub requests_per_minute: Option<u64>,
710
711 /// Burst tokens allowed above rate
712 #[serde(default = "default_burst_tokens")]
713 pub burst_tokens: u64,
714
715 /// Token estimation method (fallback when headers unavailable)
716 #[serde(default)]
717 pub estimation_method: TokenEstimation,
718}
719
720fn default_burst_tokens() -> u64 {
721 10000
722}
723
724/// Token estimation method for request sizing
725#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
726#[serde(rename_all = "snake_case")]
727pub enum TokenEstimation {
728 /// Character count / 4 (fast, rough estimate)
729 #[default]
730 Chars,
731 /// Word count * 1.3 (slightly more accurate)
732 Words,
733 /// Actual tiktoken encoding (accurate but slower, feature-gated)
734 Tiktoken,
735}
736
737/// Inference-aware routing configuration
738#[derive(Debug, Clone, Serialize, Deserialize)]
739pub struct InferenceRouting {
740 /// Load balancing strategy for inference traffic
741 #[serde(default)]
742 pub strategy: InferenceRoutingStrategy,
743
744 /// Header to read queue depth from upstream (optional)
745 pub queue_depth_header: Option<String>,
746}
747
748/// Inference-specific load balancing strategies
749#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
750#[serde(rename_all = "snake_case")]
751pub enum InferenceRoutingStrategy {
752 /// Route to upstream with least tokens queued (default)
753 #[default]
754 LeastTokensQueued,
755 /// Standard round-robin
756 RoundRobin,
757 /// Route to upstream with lowest observed latency
758 LeastLatency,
759}
760
761// ============================================================================
762// Model-Based Routing Configuration
763// ============================================================================
764
765/// Model-based routing configuration for inference requests.
766///
767/// Routes requests to different upstreams based on the model name in the request.
768/// Supports glob patterns for flexible model matching (e.g., `gpt-4*`, `claude-*`).
769///
770/// # Example KDL Configuration
771/// ```kdl
772/// model-routing {
773/// model "gpt-4" upstream="openai-primary"
774/// model "gpt-4*" upstream="openai-primary"
775/// model "claude-*" upstream="anthropic-backend" provider="anthropic"
776/// default-upstream "openai-primary"
777/// }
778/// ```
779#[derive(Debug, Clone, Serialize, Deserialize, Default)]
780pub struct ModelRoutingConfig {
781 /// Ordered list of model-to-upstream mappings (first match wins).
782 /// Supports exact matches and glob patterns with `*` wildcard.
783 #[serde(default)]
784 pub mappings: Vec<ModelUpstreamMapping>,
785
786 /// Default upstream when no mapping matches (overrides route's upstream).
787 /// If not set, falls back to the route's configured upstream.
788 pub default_upstream: Option<String>,
789}
790
791/// A single model-to-upstream mapping.
792///
793/// Maps a model name (or pattern) to a specific upstream pool.
794/// Optionally overrides the inference provider for cross-provider routing.
795#[derive(Debug, Clone, Serialize, Deserialize)]
796pub struct ModelUpstreamMapping {
797 /// Model name pattern. Can be:
798 /// - Exact match: `"gpt-4"`, `"claude-3-opus"`
799 /// - Glob pattern: `"gpt-4*"`, `"claude-*"`, `"*-turbo"`
800 pub model_pattern: String,
801
802 /// Target upstream pool for requests matching this model.
803 pub upstream: String,
804
805 /// Optional provider override for cross-provider routing.
806 /// When set, the inference provider will be switched for token
807 /// extraction and rate limiting purposes.
808 pub provider: Option<InferenceProvider>,
809}
810
811// ============================================================================
812// Fallback Routing Configuration
813// ============================================================================
814
815/// Fallback routing configuration for automatic failover
816///
817/// Enables requests to automatically fail over to alternative upstreams
818/// when the primary upstream is unhealthy, exhausted, or returns errors.
819/// Supports cross-provider failback with model mapping.
820#[derive(Debug, Clone, Serialize, Deserialize, Default)]
821pub struct FallbackConfig {
822 /// Ordered list of fallback upstreams (tried in order)
823 #[serde(default)]
824 pub upstreams: Vec<FallbackUpstream>,
825
826 /// Triggers that activate fallback behavior
827 #[serde(default)]
828 pub triggers: FallbackTriggers,
829
830 /// Maximum number of fallback attempts before giving up
831 #[serde(default = "default_max_fallback_attempts")]
832 pub max_attempts: u32,
833}
834
835/// A single fallback upstream with optional model mapping
836#[derive(Debug, Clone, Serialize, Deserialize)]
837pub struct FallbackUpstream {
838 /// Upstream pool ID to fallback to
839 pub upstream: String,
840
841 /// Provider type for this upstream (for correct token extraction)
842 #[serde(default)]
843 pub provider: InferenceProvider,
844
845 /// Model mapping from primary model to this provider's equivalent
846 /// Key: original model name (or pattern with * wildcard), Value: replacement model name
847 #[serde(default)]
848 pub model_mapping: HashMap<String, String>,
849
850 /// Skip this fallback if its health check reports unhealthy
851 #[serde(default)]
852 pub skip_if_unhealthy: bool,
853}
854
855/// Triggers that activate fallback routing
856#[derive(Debug, Clone, Serialize, Deserialize)]
857pub struct FallbackTriggers {
858 /// Trigger on health check failure of primary upstream
859 #[serde(default = "default_true")]
860 pub on_health_failure: bool,
861
862 /// Trigger when token budget is exhausted
863 #[serde(default)]
864 pub on_budget_exhausted: bool,
865
866 /// Trigger when latency exceeds threshold (milliseconds)
867 #[serde(default)]
868 pub on_latency_threshold_ms: Option<u64>,
869
870 /// Trigger on specific HTTP error codes from upstream
871 #[serde(default)]
872 pub on_error_codes: Vec<u16>,
873
874 /// Trigger on connection errors (refused, timeout, etc.)
875 #[serde(default = "default_true")]
876 pub on_connection_error: bool,
877}
878
879impl Default for FallbackTriggers {
880 fn default() -> Self {
881 Self {
882 on_health_failure: true,
883 on_budget_exhausted: false,
884 on_latency_threshold_ms: None,
885 on_error_codes: Vec::new(),
886 on_connection_error: true,
887 }
888 }
889}
890
891fn default_max_fallback_attempts() -> u32 {
892 3
893}
894
895// ============================================================================
896// Semantic Guardrails Configuration
897// ============================================================================
898
899/// Semantic guardrails configuration for inference routes.
900///
901/// Enables content inspection via external agents for security:
902/// - Prompt injection detection on requests
903/// - PII detection on responses
904#[derive(Debug, Clone, Serialize, Deserialize, Default)]
905pub struct GuardrailsConfig {
906 /// Prompt injection detection configuration
907 pub prompt_injection: Option<PromptInjectionConfig>,
908
909 /// PII detection configuration
910 pub pii_detection: Option<PiiDetectionConfig>,
911}
912
913/// Prompt injection detection configuration.
914///
915/// Detects and optionally blocks requests containing prompt injection attempts.
916/// Uses an external agent for content analysis.
917#[derive(Debug, Clone, Serialize, Deserialize)]
918pub struct PromptInjectionConfig {
919 /// Enable prompt injection detection
920 #[serde(default)]
921 pub enabled: bool,
922
923 /// Name of the agent to use for inspection
924 pub agent: String,
925
926 /// Action to take when injection is detected
927 #[serde(default)]
928 pub action: GuardrailAction,
929
930 /// HTTP status code when blocking (default: 400)
931 #[serde(default = "default_guardrail_block_status")]
932 pub block_status: u16,
933
934 /// Custom message when blocking
935 pub block_message: Option<String>,
936
937 /// Agent timeout in milliseconds (default: 500)
938 #[serde(default = "default_prompt_injection_timeout_ms")]
939 pub timeout_ms: u64,
940
941 /// Behavior when agent times out or fails
942 #[serde(default)]
943 pub failure_mode: GuardrailFailureMode,
944}
945
946/// PII detection configuration.
947///
948/// Detects sensitive data (SSN, credit cards, emails, etc.) in responses.
949/// Uses an external agent for content analysis.
950#[derive(Debug, Clone, Serialize, Deserialize)]
951pub struct PiiDetectionConfig {
952 /// Enable PII detection
953 #[serde(default)]
954 pub enabled: bool,
955
956 /// Name of the agent to use for inspection
957 pub agent: String,
958
959 /// Action to take when PII is detected
960 #[serde(default)]
961 pub action: PiiAction,
962
963 /// PII categories to detect (e.g., "ssn", "credit-card", "email", "phone")
964 #[serde(default)]
965 pub categories: Vec<String>,
966
967 /// Agent timeout in milliseconds (default: 1000)
968 #[serde(default = "default_pii_detection_timeout_ms")]
969 pub timeout_ms: u64,
970
971 /// Behavior when agent times out or fails
972 #[serde(default)]
973 pub failure_mode: GuardrailFailureMode,
974}
975
976/// Action to take when a guardrail detects an issue
977#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
978#[serde(rename_all = "lowercase")]
979pub enum GuardrailAction {
980 /// Block the request and return an error
981 Block,
982 /// Log the detection but allow the request (default)
983 #[default]
984 Log,
985 /// Allow request but add warning header to response
986 Warn,
987}
988
989/// Action to take when PII is detected in responses
990#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
991#[serde(rename_all = "lowercase")]
992pub enum PiiAction {
993 /// Log the detection only (default)
994 #[default]
995 Log,
996 /// Redact PII in response (non-streaming only)
997 Redact,
998 /// Block response (non-streaming only)
999 Block,
1000}
1001
1002/// Failure mode for guardrail agents (when agent times out or errors)
1003#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1004#[serde(rename_all = "lowercase")]
1005pub enum GuardrailFailureMode {
1006 /// Allow request to proceed on agent failure (fail-open, default)
1007 #[default]
1008 Open,
1009 /// Block request on agent failure (fail-closed)
1010 Closed,
1011}
1012
1013fn default_guardrail_block_status() -> u16 {
1014 400
1015}
1016
1017fn default_prompt_injection_timeout_ms() -> u64 {
1018 500
1019}
1020
1021fn default_pii_detection_timeout_ms() -> u64 {
1022 1000
1023}