grapsus_config/
routes.rs

1//! Route configuration types
2//!
3//! This module contains configuration types for routing requests
4//! to upstreams or static file handlers.
5
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::PathBuf;
9use validator::Validate;
10
11use grapsus_common::budget::{CostAttributionConfig, TokenBudgetConfig};
12use grapsus_common::types::{ByteSize, CircuitBreakerConfig, Priority, RetryPolicy};
13
14use crate::filters::RateLimitKey;
15
16// ============================================================================
17// Route Configuration
18// ============================================================================
19
20/// Route configuration
21#[derive(Debug, Clone, Serialize, Deserialize, Validate)]
22pub struct RouteConfig {
23    /// Unique route identifier
24    pub id: String,
25
26    /// Route priority (higher = evaluated first)
27    #[serde(default)]
28    pub priority: Priority,
29
30    /// Match conditions
31    pub matches: Vec<MatchCondition>,
32
33    /// Target upstream (optional for static file serving)
34    pub upstream: Option<String>,
35
36    /// Service type for this route
37    #[serde(default)]
38    pub service_type: ServiceType,
39
40    /// Route-specific policies
41    #[serde(default)]
42    pub policies: RoutePolicies,
43
44    /// Filter chain for this route - list of filter IDs (executed in order)
45    /// References filters defined in the top-level `filters` block
46    #[serde(default)]
47    pub filters: Vec<String>,
48
49    /// Built-in handler (for service_type = Builtin)
50    #[serde(default, rename = "builtin-handler")]
51    pub builtin_handler: Option<BuiltinHandler>,
52
53    /// WAF enabled for this route (shorthand for adding WAF agent filter)
54    #[serde(default)]
55    pub waf_enabled: bool,
56
57    /// Circuit breaker configuration
58    #[serde(default)]
59    pub circuit_breaker: Option<CircuitBreakerConfig>,
60
61    /// Retry policy
62    #[serde(default)]
63    pub retry_policy: Option<RetryPolicy>,
64
65    /// Static file serving configuration (for service_type = Static)
66    #[serde(default)]
67    pub static_files: Option<StaticFileConfig>,
68
69    /// API schema validation configuration (for service_type = Api)
70    #[serde(default)]
71    pub api_schema: Option<ApiSchemaConfig>,
72
73    /// Inference configuration (for service_type = Inference)
74    #[serde(default)]
75    pub inference: Option<InferenceConfig>,
76
77    /// Error page configuration
78    #[serde(default)]
79    pub error_pages: Option<ErrorPageConfig>,
80
81    /// Enable WebSocket upgrade support for this route (default: false)
82    /// When enabled, HTTP Upgrade requests with "websocket" protocol are allowed.
83    /// Pingora handles the actual WebSocket tunneling transparently.
84    #[serde(default)]
85    pub websocket: bool,
86
87    /// Enable WebSocket frame inspection (default: false)
88    /// When enabled, individual WebSocket frames are sent to agents for inspection.
89    /// Agents can allow, drop, or close the connection based on frame content.
90    /// Requires `websocket: true` to have any effect.
91    /// Note: If `permessage-deflate` compression is negotiated, inspection is skipped.
92    #[serde(default)]
93    pub websocket_inspection: bool,
94
95    /// Traffic mirroring / shadowing configuration
96    /// Mirrors requests to a shadow upstream for safe canary testing
97    #[serde(default)]
98    pub shadow: Option<ShadowConfig>,
99
100    /// Fallback routing configuration
101    /// Enables automatic failover to alternative upstreams on failure
102    #[serde(default)]
103    pub fallback: Option<FallbackConfig>,
104}
105
106// ============================================================================
107// Match Conditions
108// ============================================================================
109
110/// Match condition for route selection
111#[derive(Debug, Clone, Serialize, Deserialize)]
112#[serde(rename_all = "snake_case")]
113pub enum MatchCondition {
114    /// Match by path prefix
115    PathPrefix(String),
116
117    /// Match by exact path
118    Path(String),
119
120    /// Match by regex pattern
121    PathRegex(String),
122
123    /// Match by host header
124    Host(String),
125
126    /// Match by header presence
127    Header { name: String, value: Option<String> },
128
129    /// Match by method
130    Method(Vec<String>),
131
132    /// Match by query parameter
133    QueryParam { name: String, value: Option<String> },
134}
135
136// ============================================================================
137// Service Types
138// ============================================================================
139
140/// Service type for route handling
141#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
142#[serde(rename_all = "snake_case")]
143#[derive(Default)]
144pub enum ServiceType {
145    /// Traditional web service (default)
146    #[default]
147    Web,
148    /// REST API service with JSON responses
149    Api,
150    /// Static file hosting
151    Static,
152    /// Built-in handler (status page, health check, etc.)
153    Builtin,
154    /// LLM/AI inference endpoint with token-based rate limiting
155    Inference,
156}
157
158/// Built-in handler types for ServiceType::Builtin routes
159#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
160#[serde(rename_all = "snake_case")]
161pub enum BuiltinHandler {
162    /// JSON status page with version and uptime
163    Status,
164    /// Health check endpoint (returns 200 OK if healthy)
165    Health,
166    /// Prometheus metrics endpoint
167    Metrics,
168    /// 404 Not Found handler
169    NotFound,
170    /// Configuration dump endpoint (admin only)
171    Config,
172    /// Upstream health status endpoint (admin only)
173    Upstreams,
174    /// Cache purge endpoint (admin only, accepts PURGE method)
175    CachePurge,
176    /// Cache statistics endpoint (admin only)
177    CacheStats,
178}
179
180// ============================================================================
181// Route Policies
182// ============================================================================
183
184/// Route-specific policies
185#[derive(Debug, Clone, Serialize, Deserialize, Default)]
186pub struct RoutePolicies {
187    /// Request header modifications
188    #[serde(default)]
189    pub request_headers: HeaderModifications,
190
191    /// Response header modifications
192    #[serde(default)]
193    pub response_headers: HeaderModifications,
194
195    /// Request timeout override
196    pub timeout_secs: Option<u64>,
197
198    /// Body size limit override
199    pub max_body_size: Option<ByteSize>,
200
201    /// Rate limit override
202    pub rate_limit: Option<RateLimitPolicy>,
203
204    /// Failure mode (fail-open or fail-closed)
205    #[serde(default = "default_failure_mode")]
206    pub failure_mode: FailureMode,
207
208    /// Enable request buffering
209    #[serde(default)]
210    pub buffer_requests: bool,
211
212    /// Enable response buffering
213    #[serde(default)]
214    pub buffer_responses: bool,
215
216    /// HTTP caching configuration
217    #[serde(default)]
218    pub cache: Option<RouteCacheConfig>,
219}
220
221// ============================================================================
222// Cache Configuration
223// ============================================================================
224
225/// Route-level HTTP caching configuration
226#[derive(Debug, Clone, Serialize, Deserialize)]
227pub struct RouteCacheConfig {
228    /// Enable caching for this route
229    #[serde(default)]
230    pub enabled: bool,
231
232    /// Default TTL in seconds if no Cache-Control header
233    #[serde(default = "default_cache_ttl")]
234    pub default_ttl_secs: u64,
235
236    /// Maximum cacheable response size in bytes
237    #[serde(default = "default_max_cache_size")]
238    pub max_size_bytes: usize,
239
240    /// Whether to cache private responses
241    #[serde(default)]
242    pub cache_private: bool,
243
244    /// Stale-while-revalidate grace period in seconds
245    #[serde(default = "default_stale_while_revalidate")]
246    pub stale_while_revalidate_secs: u64,
247
248    /// Stale-if-error grace period in seconds
249    #[serde(default = "default_stale_if_error")]
250    pub stale_if_error_secs: u64,
251
252    /// HTTP methods that are cacheable
253    #[serde(default = "default_cacheable_methods")]
254    pub cacheable_methods: Vec<String>,
255
256    /// Status codes that are cacheable
257    #[serde(default = "default_cacheable_status_codes")]
258    pub cacheable_status_codes: Vec<u16>,
259
260    /// Vary headers to include in cache key
261    #[serde(default)]
262    pub vary_headers: Vec<String>,
263
264    /// Query parameters to exclude from cache key
265    #[serde(default)]
266    pub ignore_query_params: Vec<String>,
267
268    /// File extensions to exclude from caching (without dot, e.g., "php", "html")
269    #[serde(default)]
270    pub exclude_extensions: Vec<String>,
271
272    /// Path patterns to exclude from caching (glob: *, **, ?)
273    #[serde(default)]
274    pub exclude_paths: Vec<String>,
275}
276
277impl Default for RouteCacheConfig {
278    fn default() -> Self {
279        Self {
280            enabled: false,
281            default_ttl_secs: default_cache_ttl(),
282            max_size_bytes: default_max_cache_size(),
283            cache_private: false,
284            stale_while_revalidate_secs: default_stale_while_revalidate(),
285            stale_if_error_secs: default_stale_if_error(),
286            cacheable_methods: default_cacheable_methods(),
287            cacheable_status_codes: default_cacheable_status_codes(),
288            vary_headers: Vec::new(),
289            ignore_query_params: Vec::new(),
290            exclude_extensions: Vec::new(),
291            exclude_paths: Vec::new(),
292        }
293    }
294}
295
296fn default_cache_ttl() -> u64 {
297    3600 // 1 hour
298}
299
300fn default_max_cache_size() -> usize {
301    10 * 1024 * 1024 // 10MB
302}
303
304fn default_stale_while_revalidate() -> u64 {
305    60 // 1 minute
306}
307
308fn default_stale_if_error() -> u64 {
309    300 // 5 minutes
310}
311
312fn default_cacheable_methods() -> Vec<String> {
313    vec!["GET".to_string(), "HEAD".to_string()]
314}
315
316fn default_cacheable_status_codes() -> Vec<u16> {
317    vec![200, 203, 204, 206, 300, 301, 308, 404, 410]
318}
319
320// ============================================================================
321// Global Cache Storage Configuration
322// ============================================================================
323
324/// Global cache storage configuration
325///
326/// Controls the underlying storage backend for HTTP caching.
327/// This is separate from per-route cache policies which control
328/// what gets cached and for how long.
329#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct CacheStorageConfig {
331    /// Enable HTTP caching globally (default: true when cache block is present)
332    #[serde(default = "default_cache_enabled")]
333    pub enabled: bool,
334
335    /// Storage backend type
336    #[serde(default)]
337    pub backend: CacheBackend,
338
339    /// Maximum cache size in bytes (default: 100MB)
340    #[serde(default = "default_cache_storage_size")]
341    pub max_size_bytes: usize,
342
343    /// Eviction limit in bytes (when to start evicting, default: same as max_size)
344    #[serde(default)]
345    pub eviction_limit_bytes: Option<usize>,
346
347    /// Cache lock timeout in seconds (prevents thundering herd)
348    #[serde(default = "default_cache_lock_timeout")]
349    pub lock_timeout_secs: u64,
350
351    /// Path for disk-based cache (only used with Disk backend)
352    #[serde(default)]
353    pub disk_path: Option<PathBuf>,
354
355    /// Number of shards for disk cache (improves concurrent access)
356    #[serde(default = "default_disk_shards")]
357    pub disk_shards: u32,
358
359    /// Maximum size for the disk tier in hybrid mode (defaults to max_size_bytes)
360    #[serde(default)]
361    pub disk_max_size_bytes: Option<usize>,
362
363    /// Add Cache-Status response header (RFC 9211) for cache observability
364    #[serde(default)]
365    pub status_header: bool,
366}
367
368impl Default for CacheStorageConfig {
369    fn default() -> Self {
370        Self {
371            enabled: true,
372            backend: CacheBackend::Memory,
373            max_size_bytes: default_cache_storage_size(),
374            eviction_limit_bytes: None,
375            lock_timeout_secs: default_cache_lock_timeout(),
376            disk_path: None,
377            disk_shards: default_disk_shards(),
378            disk_max_size_bytes: None,
379            status_header: false,
380        }
381    }
382}
383
384/// Cache storage backend type
385#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
386#[serde(rename_all = "snake_case")]
387pub enum CacheBackend {
388    /// In-memory cache (fast, but lost on restart)
389    #[default]
390    Memory,
391    /// Disk-based cache (persistent, larger capacity)
392    Disk,
393    /// Hybrid: memory for hot entries, disk for cold
394    Hybrid,
395}
396
397fn default_cache_enabled() -> bool {
398    true
399}
400
401fn default_cache_storage_size() -> usize {
402    100 * 1024 * 1024 // 100MB
403}
404
405fn default_cache_lock_timeout() -> u64 {
406    10 // 10 seconds
407}
408
409fn default_disk_shards() -> u32 {
410    16
411}
412
413/// Header modification rules
414#[derive(Debug, Clone, Serialize, Deserialize, Default)]
415pub struct HeaderModifications {
416    /// Headers to rename (old_name -> new_name, applied before set/add/remove)
417    #[serde(default)]
418    pub rename: HashMap<String, String>,
419
420    /// Headers to add/set
421    #[serde(default)]
422    pub set: HashMap<String, String>,
423
424    /// Headers to append
425    #[serde(default)]
426    pub add: HashMap<String, String>,
427
428    /// Headers to remove
429    #[serde(default)]
430    pub remove: Vec<String>,
431}
432
433/// Rate limit policy (legacy - prefer using rate-limit filter)
434#[derive(Debug, Clone, Serialize, Deserialize)]
435pub struct RateLimitPolicy {
436    /// Requests per second
437    pub requests_per_second: u32,
438
439    /// Burst size
440    pub burst: u32,
441
442    /// Key to rate limit by
443    pub key: RateLimitKey,
444}
445
446/// Failure mode for degraded operation
447#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
448#[serde(rename_all = "snake_case")]
449pub enum FailureMode {
450    Open, // Allow traffic through on failure
451    #[default]
452    Closed, // Block traffic on failure (default for security)
453}
454
455pub(crate) fn default_failure_mode() -> FailureMode {
456    FailureMode::Closed
457}
458
459// ============================================================================
460// Static File Configuration
461// ============================================================================
462
463/// Static file serving configuration
464#[derive(Debug, Clone, Serialize, Deserialize)]
465pub struct StaticFileConfig {
466    /// Root directory for static files
467    pub root: PathBuf,
468
469    /// Index file name (default: index.html)
470    #[serde(default = "default_index_file")]
471    pub index: String,
472
473    /// Enable directory listing
474    #[serde(default)]
475    pub directory_listing: bool,
476
477    /// Cache control header value
478    #[serde(default = "default_cache_control")]
479    pub cache_control: String,
480
481    /// Compress responses
482    #[serde(default = "default_true")]
483    pub compress: bool,
484
485    /// Additional MIME type mappings
486    #[serde(default)]
487    pub mime_types: HashMap<String, String>,
488
489    /// Fallback file for SPA routing (e.g., index.html)
490    pub fallback: Option<String>,
491}
492
493fn default_index_file() -> String {
494    "index.html".to_string()
495}
496
497fn default_cache_control() -> String {
498    "public, max-age=3600".to_string()
499}
500
501fn default_true() -> bool {
502    true
503}
504
505// ============================================================================
506// API Schema Configuration
507// ============================================================================
508
509/// API schema validation configuration
510#[derive(Debug, Clone, Serialize, Deserialize)]
511pub struct ApiSchemaConfig {
512    /// OpenAPI/Swagger schema file path (mutually exclusive with schema_content)
513    pub schema_file: Option<PathBuf>,
514
515    /// Inline OpenAPI/Swagger schema content (YAML or JSON string)
516    /// Mutually exclusive with schema_file
517    pub schema_content: Option<String>,
518
519    /// JSON Schema for request validation
520    pub request_schema: Option<serde_json::Value>,
521
522    /// JSON Schema for response validation
523    pub response_schema: Option<serde_json::Value>,
524
525    /// Validate requests against schema
526    #[serde(default = "default_true")]
527    pub validate_requests: bool,
528
529    /// Validate responses against schema
530    #[serde(default)]
531    pub validate_responses: bool,
532
533    /// Strict validation mode (fail on additional properties)
534    #[serde(default)]
535    pub strict_mode: bool,
536}
537
538// ============================================================================
539// Error Page Configuration
540// ============================================================================
541
542/// Error page configuration
543#[derive(Debug, Clone, Serialize, Deserialize)]
544pub struct ErrorPageConfig {
545    /// Custom error pages by status code
546    #[serde(default)]
547    pub pages: HashMap<u16, ErrorPage>,
548
549    /// Default error page format
550    #[serde(default)]
551    pub default_format: ErrorFormat,
552
553    /// Include stack traces in errors (development only)
554    #[serde(default)]
555    pub include_stack_trace: bool,
556
557    /// Custom error template directory
558    pub template_dir: Option<PathBuf>,
559}
560
561/// Individual error page configuration
562#[derive(Debug, Clone, Serialize, Deserialize)]
563pub struct ErrorPage {
564    /// Error page format
565    pub format: ErrorFormat,
566
567    /// Custom template or static file path
568    pub template: Option<PathBuf>,
569
570    /// Custom error message
571    pub message: Option<String>,
572
573    /// Additional headers to include
574    #[serde(default)]
575    pub headers: HashMap<String, String>,
576}
577
578/// Error response format
579#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
580#[serde(rename_all = "snake_case")]
581#[derive(Default)]
582pub enum ErrorFormat {
583    /// HTML error page
584    #[default]
585    Html,
586    /// JSON error response
587    Json,
588    /// Plain text error
589    Text,
590    /// XML error response
591    Xml,
592}
593
594// ============================================================================
595// Shadow / Traffic Mirroring Configuration
596// ============================================================================
597
598/// Traffic mirroring (shadow) configuration
599///
600/// Enables fire-and-forget request duplication to a shadow upstream
601/// for safe canary deployments and testing.
602#[derive(Debug, Clone, Serialize, Deserialize)]
603pub struct ShadowConfig {
604    /// Shadow target upstream ID
605    pub upstream: String,
606
607    /// Sampling percentage (0.0-100.0)
608    /// Only this percentage of requests will be mirrored
609    #[serde(default = "default_shadow_percentage")]
610    pub percentage: f64,
611
612    /// Only shadow requests with this header match
613    /// Format: (header_name, header_value)
614    pub sample_header: Option<(String, String)>,
615
616    /// Shadow request timeout in milliseconds
617    #[serde(default = "default_shadow_timeout_ms")]
618    pub timeout_ms: u64,
619
620    /// Whether to buffer request bodies for mirroring
621    /// Required for POST/PUT/PATCH requests with bodies
622    #[serde(default)]
623    pub buffer_body: bool,
624
625    /// Maximum body size to mirror (bytes)
626    #[serde(default = "default_shadow_max_body_bytes")]
627    pub max_body_bytes: usize,
628}
629
630fn default_shadow_percentage() -> f64 {
631    100.0 // Mirror all requests by default
632}
633
634fn default_shadow_timeout_ms() -> u64 {
635    5000 // 5 seconds
636}
637
638fn default_shadow_max_body_bytes() -> usize {
639    1048576 // 1 MB
640}
641
642// ============================================================================
643// Inference Configuration (for ServiceType::Inference)
644// ============================================================================
645
646/// Inference routing configuration for LLM/AI endpoints
647///
648/// Provides token-based rate limiting, model-aware load balancing,
649/// and multi-provider support for inference traffic.
650#[derive(Debug, Clone, Serialize, Deserialize, Default)]
651pub struct InferenceConfig {
652    /// Inference provider (determines token extraction strategy)
653    #[serde(default)]
654    pub provider: InferenceProvider,
655
656    /// Header containing model name (optional, provider-specific default)
657    pub model_header: Option<String>,
658
659    /// Token-based rate limiting configuration (per-minute)
660    pub rate_limit: Option<TokenRateLimit>,
661
662    /// Token budget configuration (per-period cumulative tracking)
663    pub budget: Option<TokenBudgetConfig>,
664
665    /// Cost attribution configuration (per-model pricing)
666    pub cost_attribution: Option<CostAttributionConfig>,
667
668    /// Inference-aware routing configuration
669    pub routing: Option<InferenceRouting>,
670
671    /// Model-based upstream routing configuration
672    pub model_routing: Option<ModelRoutingConfig>,
673
674    /// Semantic guardrails configuration (prompt injection, PII detection)
675    pub guardrails: Option<GuardrailsConfig>,
676}
677
678/// Inference provider type (determines token counting strategy)
679#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
680#[serde(rename_all = "snake_case")]
681pub enum InferenceProvider {
682    /// Generic provider (uses x-tokens-used header or estimation)
683    #[default]
684    Generic,
685    /// OpenAI API (uses x-ratelimit-remaining-tokens header)
686    OpenAi,
687    /// Anthropic API (uses anthropic-ratelimit-tokens-remaining header)
688    Anthropic,
689}
690
691impl InferenceProvider {
692    /// Returns the string label for this provider (for metrics and logging).
693    pub fn as_str(&self) -> &'static str {
694        match self {
695            Self::Generic => "generic",
696            Self::OpenAi => "openai",
697            Self::Anthropic => "anthropic",
698        }
699    }
700}
701
702/// Token-based rate limiting configuration
703#[derive(Debug, Clone, Serialize, Deserialize)]
704pub struct TokenRateLimit {
705    /// Maximum tokens per minute
706    pub tokens_per_minute: u64,
707
708    /// Maximum requests per minute (optional, dual tracking)
709    pub requests_per_minute: Option<u64>,
710
711    /// Burst tokens allowed above rate
712    #[serde(default = "default_burst_tokens")]
713    pub burst_tokens: u64,
714
715    /// Token estimation method (fallback when headers unavailable)
716    #[serde(default)]
717    pub estimation_method: TokenEstimation,
718}
719
720fn default_burst_tokens() -> u64 {
721    10000
722}
723
724/// Token estimation method for request sizing
725#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
726#[serde(rename_all = "snake_case")]
727pub enum TokenEstimation {
728    /// Character count / 4 (fast, rough estimate)
729    #[default]
730    Chars,
731    /// Word count * 1.3 (slightly more accurate)
732    Words,
733    /// Actual tiktoken encoding (accurate but slower, feature-gated)
734    Tiktoken,
735}
736
737/// Inference-aware routing configuration
738#[derive(Debug, Clone, Serialize, Deserialize)]
739pub struct InferenceRouting {
740    /// Load balancing strategy for inference traffic
741    #[serde(default)]
742    pub strategy: InferenceRoutingStrategy,
743
744    /// Header to read queue depth from upstream (optional)
745    pub queue_depth_header: Option<String>,
746}
747
748/// Inference-specific load balancing strategies
749#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
750#[serde(rename_all = "snake_case")]
751pub enum InferenceRoutingStrategy {
752    /// Route to upstream with least tokens queued (default)
753    #[default]
754    LeastTokensQueued,
755    /// Standard round-robin
756    RoundRobin,
757    /// Route to upstream with lowest observed latency
758    LeastLatency,
759}
760
761// ============================================================================
762// Model-Based Routing Configuration
763// ============================================================================
764
765/// Model-based routing configuration for inference requests.
766///
767/// Routes requests to different upstreams based on the model name in the request.
768/// Supports glob patterns for flexible model matching (e.g., `gpt-4*`, `claude-*`).
769///
770/// # Example KDL Configuration
771/// ```kdl
772/// model-routing {
773///     model "gpt-4" upstream="openai-primary"
774///     model "gpt-4*" upstream="openai-primary"
775///     model "claude-*" upstream="anthropic-backend" provider="anthropic"
776///     default-upstream "openai-primary"
777/// }
778/// ```
779#[derive(Debug, Clone, Serialize, Deserialize, Default)]
780pub struct ModelRoutingConfig {
781    /// Ordered list of model-to-upstream mappings (first match wins).
782    /// Supports exact matches and glob patterns with `*` wildcard.
783    #[serde(default)]
784    pub mappings: Vec<ModelUpstreamMapping>,
785
786    /// Default upstream when no mapping matches (overrides route's upstream).
787    /// If not set, falls back to the route's configured upstream.
788    pub default_upstream: Option<String>,
789}
790
791/// A single model-to-upstream mapping.
792///
793/// Maps a model name (or pattern) to a specific upstream pool.
794/// Optionally overrides the inference provider for cross-provider routing.
795#[derive(Debug, Clone, Serialize, Deserialize)]
796pub struct ModelUpstreamMapping {
797    /// Model name pattern. Can be:
798    /// - Exact match: `"gpt-4"`, `"claude-3-opus"`
799    /// - Glob pattern: `"gpt-4*"`, `"claude-*"`, `"*-turbo"`
800    pub model_pattern: String,
801
802    /// Target upstream pool for requests matching this model.
803    pub upstream: String,
804
805    /// Optional provider override for cross-provider routing.
806    /// When set, the inference provider will be switched for token
807    /// extraction and rate limiting purposes.
808    pub provider: Option<InferenceProvider>,
809}
810
811// ============================================================================
812// Fallback Routing Configuration
813// ============================================================================
814
815/// Fallback routing configuration for automatic failover
816///
817/// Enables requests to automatically fail over to alternative upstreams
818/// when the primary upstream is unhealthy, exhausted, or returns errors.
819/// Supports cross-provider failback with model mapping.
820#[derive(Debug, Clone, Serialize, Deserialize, Default)]
821pub struct FallbackConfig {
822    /// Ordered list of fallback upstreams (tried in order)
823    #[serde(default)]
824    pub upstreams: Vec<FallbackUpstream>,
825
826    /// Triggers that activate fallback behavior
827    #[serde(default)]
828    pub triggers: FallbackTriggers,
829
830    /// Maximum number of fallback attempts before giving up
831    #[serde(default = "default_max_fallback_attempts")]
832    pub max_attempts: u32,
833}
834
835/// A single fallback upstream with optional model mapping
836#[derive(Debug, Clone, Serialize, Deserialize)]
837pub struct FallbackUpstream {
838    /// Upstream pool ID to fallback to
839    pub upstream: String,
840
841    /// Provider type for this upstream (for correct token extraction)
842    #[serde(default)]
843    pub provider: InferenceProvider,
844
845    /// Model mapping from primary model to this provider's equivalent
846    /// Key: original model name (or pattern with * wildcard), Value: replacement model name
847    #[serde(default)]
848    pub model_mapping: HashMap<String, String>,
849
850    /// Skip this fallback if its health check reports unhealthy
851    #[serde(default)]
852    pub skip_if_unhealthy: bool,
853}
854
855/// Triggers that activate fallback routing
856#[derive(Debug, Clone, Serialize, Deserialize)]
857pub struct FallbackTriggers {
858    /// Trigger on health check failure of primary upstream
859    #[serde(default = "default_true")]
860    pub on_health_failure: bool,
861
862    /// Trigger when token budget is exhausted
863    #[serde(default)]
864    pub on_budget_exhausted: bool,
865
866    /// Trigger when latency exceeds threshold (milliseconds)
867    #[serde(default)]
868    pub on_latency_threshold_ms: Option<u64>,
869
870    /// Trigger on specific HTTP error codes from upstream
871    #[serde(default)]
872    pub on_error_codes: Vec<u16>,
873
874    /// Trigger on connection errors (refused, timeout, etc.)
875    #[serde(default = "default_true")]
876    pub on_connection_error: bool,
877}
878
879impl Default for FallbackTriggers {
880    fn default() -> Self {
881        Self {
882            on_health_failure: true,
883            on_budget_exhausted: false,
884            on_latency_threshold_ms: None,
885            on_error_codes: Vec::new(),
886            on_connection_error: true,
887        }
888    }
889}
890
891fn default_max_fallback_attempts() -> u32 {
892    3
893}
894
895// ============================================================================
896// Semantic Guardrails Configuration
897// ============================================================================
898
899/// Semantic guardrails configuration for inference routes.
900///
901/// Enables content inspection via external agents for security:
902/// - Prompt injection detection on requests
903/// - PII detection on responses
904#[derive(Debug, Clone, Serialize, Deserialize, Default)]
905pub struct GuardrailsConfig {
906    /// Prompt injection detection configuration
907    pub prompt_injection: Option<PromptInjectionConfig>,
908
909    /// PII detection configuration
910    pub pii_detection: Option<PiiDetectionConfig>,
911}
912
913/// Prompt injection detection configuration.
914///
915/// Detects and optionally blocks requests containing prompt injection attempts.
916/// Uses an external agent for content analysis.
917#[derive(Debug, Clone, Serialize, Deserialize)]
918pub struct PromptInjectionConfig {
919    /// Enable prompt injection detection
920    #[serde(default)]
921    pub enabled: bool,
922
923    /// Name of the agent to use for inspection
924    pub agent: String,
925
926    /// Action to take when injection is detected
927    #[serde(default)]
928    pub action: GuardrailAction,
929
930    /// HTTP status code when blocking (default: 400)
931    #[serde(default = "default_guardrail_block_status")]
932    pub block_status: u16,
933
934    /// Custom message when blocking
935    pub block_message: Option<String>,
936
937    /// Agent timeout in milliseconds (default: 500)
938    #[serde(default = "default_prompt_injection_timeout_ms")]
939    pub timeout_ms: u64,
940
941    /// Behavior when agent times out or fails
942    #[serde(default)]
943    pub failure_mode: GuardrailFailureMode,
944}
945
946/// PII detection configuration.
947///
948/// Detects sensitive data (SSN, credit cards, emails, etc.) in responses.
949/// Uses an external agent for content analysis.
950#[derive(Debug, Clone, Serialize, Deserialize)]
951pub struct PiiDetectionConfig {
952    /// Enable PII detection
953    #[serde(default)]
954    pub enabled: bool,
955
956    /// Name of the agent to use for inspection
957    pub agent: String,
958
959    /// Action to take when PII is detected
960    #[serde(default)]
961    pub action: PiiAction,
962
963    /// PII categories to detect (e.g., "ssn", "credit-card", "email", "phone")
964    #[serde(default)]
965    pub categories: Vec<String>,
966
967    /// Agent timeout in milliseconds (default: 1000)
968    #[serde(default = "default_pii_detection_timeout_ms")]
969    pub timeout_ms: u64,
970
971    /// Behavior when agent times out or fails
972    #[serde(default)]
973    pub failure_mode: GuardrailFailureMode,
974}
975
976/// Action to take when a guardrail detects an issue
977#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
978#[serde(rename_all = "lowercase")]
979pub enum GuardrailAction {
980    /// Block the request and return an error
981    Block,
982    /// Log the detection but allow the request (default)
983    #[default]
984    Log,
985    /// Allow request but add warning header to response
986    Warn,
987}
988
989/// Action to take when PII is detected in responses
990#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
991#[serde(rename_all = "lowercase")]
992pub enum PiiAction {
993    /// Log the detection only (default)
994    #[default]
995    Log,
996    /// Redact PII in response (non-streaming only)
997    Redact,
998    /// Block response (non-streaming only)
999    Block,
1000}
1001
1002/// Failure mode for guardrail agents (when agent times out or errors)
1003#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
1004#[serde(rename_all = "lowercase")]
1005pub enum GuardrailFailureMode {
1006    /// Allow request to proceed on agent failure (fail-open, default)
1007    #[default]
1008    Open,
1009    /// Block request on agent failure (fail-closed)
1010    Closed,
1011}
1012
1013fn default_guardrail_block_status() -> u16 {
1014    400
1015}
1016
1017fn default_prompt_injection_timeout_ms() -> u64 {
1018    500
1019}
1020
1021fn default_pii_detection_timeout_ms() -> u64 {
1022    1000
1023}
grapsus_config/routes.rs

grapsus_config/
routes.rs