dwctl 8.40.0 - Docs.rs

//! API request/response models for model deployments.

pub mod enrichment;

use super::pagination::Pagination;
use crate::api::models::groups::GroupResponse;
use crate::db::models::api_keys::ApiKeyPurpose;
use crate::db::models::deployments::{
    DeploymentDBResponse, FallbackConfig, LoadBalancingStrategy, ModelCatalogMetadata, ModelType, ProviderPricing, ProviderPricingUpdate,
    TrafficRuleDBRow,
};
use crate::types::{DeploymentId, InferenceEndpointId, UserId};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_with::rust::double_option;
use utoipa::{IntoParams, ToSchema};
use uuid::Uuid;

/// Sort field for model listing
#[derive(Debug, Clone, Copy, Deserialize, ToSchema)]
#[serde(rename_all = "snake_case")]
pub enum ModelSortField {
    /// Sort by creation date (default)
    CreatedAt,
    /// Sort alphabetically by alias
    Alias,
    /// Sort by intelligence index (from metadata)
    IntelligenceIndex,
    /// Sort by release date (from metadata)
    ReleasedAt,
    /// Sort by context window size (from metadata)
    ContextWindow,
    /// Sort alphabetically by provider name (from metadata)
    Provider,
    /// Sort by cheapest active tariff price (min input+output per token)
    PriceFrom,
}

/// Sort direction
#[derive(Debug, Clone, Copy, Deserialize, ToSchema)]
#[serde(rename_all = "snake_case")]
pub enum SortDirection {
    Asc,
    Desc,
}

/// Facets: distinct values for filter dropdowns
#[derive(Debug, Clone, Serialize, ToSchema)]
pub struct ModelFacets {
    /// Distinct provider names across all active models
    pub providers: Vec<String>,
    /// Distinct capabilities across all active models
    pub capabilities: Vec<String>,
    /// Distinct model types across all active models
    pub model_types: Vec<String>,
}

/// Response for model listing, extending PaginatedResponse with optional facets
#[derive(Debug, Clone, Serialize, ToSchema)]
pub struct ModelListResponse {
    /// The models for the current page
    pub data: Vec<DeployedModelResponse>,
    /// Total number of models matching the query (before pagination)
    pub total_count: i64,
    /// Number of items skipped
    pub skip: i64,
    /// Maximum items returned per page
    pub limit: i64,
    /// Filter facets (only included when include=facets)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub facets: Option<ModelFacets>,
}

/// Query parameters for listing deployed models
#[derive(Debug, Deserialize, IntoParams, ToSchema)]
pub struct ListModelsQuery {
    /// Pagination parameters
    #[serde(flatten)]
    #[param(inline)]
    pub pagination: Pagination,
    /// Filter by inference endpoint ID
    #[param(value_type = Option<String>, format = "uuid")]
    #[schema(value_type = Option<String>, format = "uuid")]
    pub endpoint: Option<InferenceEndpointId>,
    /// Filter by group IDs (comma-separated UUIDs)
    pub group: Option<String>,
    /// Include related data (comma-separated: "groups", "metrics", "status", "pricing", "endpoints", "facets")
    pub include: Option<String>,
    /// Show deleted models when true, non-deleted when false, all when not specified (admin only for deleted=true)
    pub deleted: Option<bool>,
    /// Show inactive models when true, active when false, all when not specified (admin only for inactive=true)
    pub inactive: Option<bool>,
    /// Filter to only models the current user can access (defaults to false for admins, true for users)
    pub accessible: Option<bool>,
    /// Search query to filter models by alias or model_name (case-insensitive substring match)
    pub search: Option<String>,
    /// Filter by composite/virtual model status (true = composite only, false = non-composite only)
    pub is_composite: Option<bool>,
    /// Filter by provider name (case-insensitive exact match against metadata.provider)
    pub provider: Option<String>,
    /// Filter by model type (CHAT, EMBEDDINGS, RERANKER)
    pub model_type: Option<ModelType>,
    /// Filter by capability (returns models that have this capability)
    pub capability: Option<String>,
    /// Sort field (default: created_at)
    pub sort: Option<ModelSortField>,
    /// Sort direction (default depends on sort field)
    pub sort_direction: Option<SortDirection>,
}

/// Query parameters for getting a single deployed model
#[derive(Debug, Deserialize, IntoParams, ToSchema)]
pub struct GetModelQuery {
    /// Show deleted model when true, 404 when false/unspecified if model is deleted
    pub deleted: Option<bool>,
    /// Show inactive model when true, 404 when false/unspecified if model is inactive
    pub inactive: Option<bool>,
    /// Include related data (comma-separated: "groups", "metrics", "status", "pricing", "endpoints")
    pub include: Option<String>,
}

/// Time series point for model activity sparklines
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ModelTimeSeriesPoint {
    pub timestamp: DateTime<Utc>,
    pub requests: i64,
}

/// Model metrics for display on model cards
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ModelMetrics {
    /// Average latency across all requests in milliseconds
    pub avg_latency_ms: Option<f64>,
    /// Total number of requests made to this model
    pub total_requests: i64,
    /// Total input tokens processed by this model
    pub total_input_tokens: i64,
    /// Total output tokens generated by this model
    pub total_output_tokens: i64,
    /// When the model was last active (last request timestamp)
    pub last_active_at: Option<DateTime<Utc>>,
    /// Recent activity for sparklines (last 24 hours, hourly buckets)
    pub time_series: Option<Vec<ModelTimeSeriesPoint>>,
}

/// Tariff definition for model creation/update
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TariffDefinition {
    /// Tariff name (e.g., "Standard Pricing", "Premium Tier")
    pub name: String,
    /// Input price per token (sent/returned as string to preserve precision)
    #[schema(value_type = String)]
    pub input_price_per_token: rust_decimal::Decimal,
    /// Output price per token (sent/returned as string to preserve precision)
    #[schema(value_type = String)]
    pub output_price_per_token: rust_decimal::Decimal,
    /// Optional API key purpose this tariff applies to (realtime, batch, playground)
    pub api_key_purpose: Option<crate::db::models::api_keys::ApiKeyPurpose>,
    /// Optional completion window (priority) for batch tariffs (e.g., "24h", "1h")
    /// Required when api_key_purpose is Batch to support multiple pricing tiers per priority
    #[serde(skip_serializing_if = "Option::is_none")]
    pub completion_window: Option<String>,
}

/// A traffic routing rule that controls access by API key purpose.
/// Rules are evaluated in order; first match wins; no match = allow.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct TrafficRoutingRule {
    /// The API key purpose this rule applies to (realtime, batch, or playground).
    pub api_key_purpose: crate::db::models::api_keys::ApiKeyPurpose,
    /// Action to take when matched
    pub action: TrafficRoutingAction,
}

/// Action taken when a traffic routing rule matches
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum TrafficRoutingAction {
    /// Return 403 Forbidden - deny access for this traffic kind
    Deny,
    /// Redirect to another model alias transparently
    Redirect {
        /// The model alias to redirect traffic to
        target: String,
    },
}

/// The data required to create a new model (standard or composite).
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum DeployedModelCreate {
    /// Create a standard model backed by a single inference endpoint
    Standard(StandardModelCreate),
    /// Create a composite model that routes across multiple providers
    Composite(CompositeModelCreate),
}

/// Data for creating a standard model (backed by a single endpoint)
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct StandardModelCreate {
    /// The actual model identifier (e.g., "gpt-4", "claude-3-sonnet")
    pub model_name: String,
    /// User-friendly alias (e.g., "GPT-4 Turbo", "Claude Sonnet") - defaults to model_name if not provided
    pub alias: Option<String>,
    /// Human-readable display name for the model catalog (e.g., "Qwen 3.5 397B")
    pub display_name: Option<String>,
    /// Inference endpoint ID where the model is hosted
    #[schema(value_type = String, format = "uuid")]
    pub hosted_on: InferenceEndpointId,
    /// Optional description of the model
    pub description: Option<String>,
    /// Optional model type (Chat or Embeddings)
    pub model_type: Option<ModelType>,
    /// Optional array of model capabilities
    pub capabilities: Option<Vec<String>>,
    /// Global per-model rate limit: requests per second (null = no limit)
    pub requests_per_second: Option<f32>,
    /// Global per-model rate limit: maximum burst size (null = no limit)
    pub burst_size: Option<i32>,
    /// Maximum number of concurrent requests allowed for this model (null = no limit)
    pub capacity: Option<i32>,
    /// Maximum number of concurrent batch requests allowed for this model (null = defaults to capacity or no limit)
    pub batch_capacity: Option<i32>,
    /// Throughput in requests/second for batch capacity calculations (null = use config default)
    pub throughput: Option<f32>,
    /// Provider/downstream pricing details (admin only)
    pub provider_pricing: Option<ProviderPricing>,
    /// Tariffs for this model - if provided, these will be created as active tariffs
    pub tariffs: Option<Vec<TariffDefinition>>,
    /// Whether to sanitize/filter sensitive data from model responses (defaults to false, used when strict_mode=false)
    #[serde(default)]
    pub sanitize_responses: Option<bool>,
    /// Whether to mark provider as trusted in strict mode (defaults to false, used when strict_mode=true)
    #[serde(default)]
    pub trusted: Option<bool>,
    /// Whether to enable the open_responses adapter that converts /v1/responses to /v1/chat/completions (defaults to true)
    #[serde(default)]
    pub open_responses_adapter: Option<bool>,
    /// Traffic routing rules evaluated against API key labels.
    /// Each rule matches on key labels (e.g., purpose) and either denies or redirects traffic.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub traffic_routing_rules: Option<Vec<TrafficRoutingRule>>,
    /// Per-model allowed batch completion windows (overrides global config).
    /// Example: ["24h"] to only allow 24-hour batches on an expensive model.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub allowed_batch_completion_windows: Option<Vec<String>>,
    /// Catalog metadata for display purposes
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub metadata: Option<ModelCatalogMetadata>,
}

/// Data for creating a composite model (routes across multiple providers)
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct CompositeModelCreate {
    /// The model identifier for the composite model (e.g., "gpt-4-multi")
    pub model_name: String,
    /// User-friendly alias - defaults to model_name if not provided
    pub alias: Option<String>,
    /// Human-readable display name for the model catalog (e.g., "Qwen 3.5 397B")
    pub display_name: Option<String>,
    /// Optional description of the composite model
    pub description: Option<String>,
    /// Optional model type (Chat or Embeddings)
    pub model_type: Option<ModelType>,
    /// Optional array of model capabilities
    pub capabilities: Option<Vec<String>>,
    /// Global per-model rate limit: requests per second (null = no limit)
    pub requests_per_second: Option<f32>,
    /// Global per-model rate limit: maximum burst size (null = no limit)
    pub burst_size: Option<i32>,
    /// Maximum number of concurrent requests allowed for this model (null = no limit)
    pub capacity: Option<i32>,
    /// Maximum number of concurrent batch requests allowed for this model (null = defaults to capacity or no limit)
    pub batch_capacity: Option<i32>,
    /// Throughput in requests/second for batch capacity calculations (null = use config default)
    pub throughput: Option<f32>,
    /// Tariffs for this model - if provided, these will be created as active tariffs
    pub tariffs: Option<Vec<TariffDefinition>>,
    /// Load balancing strategy (defaults to weighted_random)
    #[serde(default)]
    pub lb_strategy: LoadBalancingStrategy,
    /// Whether to enable automatic fallback on failure (defaults to true)
    #[serde(default = "default_true")]
    pub fallback_enabled: bool,
    /// Whether to trigger fallback on rate limit responses (defaults to true)
    #[serde(default = "default_true")]
    pub fallback_on_rate_limit: bool,
    /// HTTP status codes that trigger fallback (defaults to [500, 502, 503, 504])
    #[serde(default = "default_fallback_statuses")]
    pub fallback_on_status: Vec<i32>,
    /// Sample with replacement during weighted random failover (defaults to false)
    #[serde(default)]
    pub fallback_with_replacement: bool,
    /// Maximum number of failover attempts (defaults to provider count)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub fallback_max_attempts: Option<i32>,
    /// Whether to sanitize/filter sensitive data from model responses (defaults to false, used when strict_mode=false)
    #[serde(default)]
    pub sanitize_responses: bool,
    /// Whether to mark provider as trusted in strict mode (defaults to false, used when strict_mode=true)
    #[serde(default)]
    pub trusted: Option<bool>,
    /// Whether to enable the open_responses adapter that converts /v1/responses to /v1/chat/completions (defaults to true)
    #[serde(default)]
    pub open_responses_adapter: Option<bool>,
    /// Traffic routing rules evaluated against API key labels.
    /// Each rule matches on key labels (e.g., purpose) and either denies or redirects traffic.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub traffic_routing_rules: Option<Vec<TrafficRoutingRule>>,
    /// Per-model allowed batch completion windows (overrides global config).
    /// Example: ["24h"] to only allow 24-hour batches on an expensive model.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub allowed_batch_completion_windows: Option<Vec<String>>,
    /// Catalog metadata for display purposes
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub metadata: Option<ModelCatalogMetadata>,
}

fn default_true() -> bool {
    true
}

fn default_fallback_statuses() -> Vec<i32> {
    vec![500, 502, 503, 504]
}

/// The data required to update a specific model.
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct DeployedModelUpdate {
    pub alias: Option<String>,
    /// Human-readable display name for the model catalog
    pub display_name: Option<String>,
    pub description: Option<Option<String>>,
    pub model_type: Option<Option<ModelType>>,
    pub capabilities: Option<Option<Vec<String>>>,
    /// Global per-model rate limit: requests per second (null = no change, Some(None) = remove limit)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub requests_per_second: Option<Option<f32>>,
    /// Global per-model rate limit: maximum burst size (null = no change, Some(None) = remove limit)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub burst_size: Option<Option<i32>>,
    /// Maximum concurrent requests (null = no change, Some(None) = remove limit, Some(Some(n)) = set limit)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub capacity: Option<Option<i32>>,
    /// Maximum concurrent batch requests (null = no change, Some(None) = remove limit, Some(Some(n)) = set limit)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub batch_capacity: Option<Option<i32>>,
    /// Throughput in requests/second for batch SLA capacity (null = no change, Some(None) = use default, Some(Some(n)) = set)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub throughput: Option<Option<f32>>,
    /// Provider/downstream pricing details partial updates (null = no change, Some(pricing_update) = partial update)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub provider_pricing: Option<ProviderPricingUpdate>,
    /// Tariffs for this model - if provided, closes all existing active tariffs and creates these as new active tariffs
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub tariffs: Option<Vec<TariffDefinition>>,
    // Composite model fields
    /// Load balancing strategy for composite models (null = no change)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub lb_strategy: Option<LoadBalancingStrategy>,
    /// Whether to enable automatic fallback on failure (null = no change)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub fallback_enabled: Option<bool>,
    /// Whether to trigger fallback on upstream rate limit (429) responses (null = no change)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub fallback_on_rate_limit: Option<bool>,
    /// HTTP status codes that trigger fallback (null = no change)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub fallback_on_status: Option<Vec<i32>>,
    /// Sample with replacement during weighted random failover (null = no change)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub fallback_with_replacement: Option<bool>,
    /// Maximum number of failover attempts (null = no change, Some(None) = reset to default)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub fallback_max_attempts: Option<Option<i32>>,
    /// Whether to sanitize/filter sensitive data from model responses (null = no change, used when strict_mode=false)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub sanitize_responses: Option<bool>,
    /// Whether to mark provider as trusted in strict mode (null = no change, used when strict_mode=true)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub trusted: Option<bool>,
    /// Whether to enable the open_responses adapter (null = no change)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub open_responses_adapter: Option<bool>,
    /// Traffic routing rules (null = no change, Some(None) = clear, Some(rules) = set)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub traffic_routing_rules: Option<Option<Vec<TrafficRoutingRule>>>,
    /// Per-model allowed batch completion windows (null = no change, Some(None) = clear, Some(windows) = set)
    #[serde(default, skip_serializing_if = "Option::is_none", with = "double_option")]
    pub allowed_batch_completion_windows: Option<Option<Vec<String>>>,
    /// Catalog metadata (null = no change, Some(metadata) = replace)
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub metadata: Option<ModelCatalogMetadata>,
}

/// A request to update a specific model (i.e. bundle a `DeployedModelUpdate` with a model id).
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct DeployedModelUpdateRequest {
    #[schema(value_type = String, format = "uuid")]
    pub id: DeploymentId,
    pub deployed_model: DeployedModelUpdate,
}

/// Probe status information for a model (only included when include=status)
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ModelProbeStatus {
    #[schema(value_type = Option<String>, format = "uuid")]
    pub probe_id: Option<Uuid>,
    pub active: bool,
    pub interval_seconds: Option<i32>,
    pub last_check: Option<DateTime<Utc>>,
    pub last_success: Option<bool>,
    pub uptime_percentage: Option<f64>,
}

/// API response for a deployed model
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct DeployedModelResponse {
    #[schema(value_type = String, format = "uuid")]
    pub id: DeploymentId,
    pub model_name: String,
    pub alias: String,
    /// Human-readable display name for the model catalog
    #[serde(skip_serializing_if = "Option::is_none")]
    pub display_name: Option<String>,
    pub description: Option<String>,
    pub model_type: Option<ModelType>,
    pub capabilities: Option<Vec<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    #[schema(value_type = Option<String>, format = "uuid")]
    pub created_by: Option<UserId>,
    /// Inference endpoint where the model is hosted (null for composite models)
    #[serde(skip_serializing_if = "Option::is_none")]
    #[schema(value_type = Option<String>, format = "uuid")]
    pub hosted_on: Option<InferenceEndpointId>,
    pub created_at: DateTime<Utc>,
    pub updated_at: DateTime<Utc>,
    /// Global per-model rate limit: requests per second (null = no limit)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub requests_per_second: Option<f32>,
    /// Global per-model rate limit: maximum burst size (null = no limit)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub burst_size: Option<i32>,
    /// Maximum number of concurrent requests allowed for this model (null = no limit)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub capacity: Option<i32>,
    /// Maximum number of concurrent batch requests allowed for this model (null = defaults to capacity or no limit)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub batch_capacity: Option<i32>,
    /// Throughput in requests/second for batch capacity calculations (null = use config default)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub throughput: Option<f32>,
    /// Groups that have access to this model (only included if requested)
    /// Note: no_recursion is important! utoipa will panic at runtime, because it overflows the
    /// stack trying to follow the relationship.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[schema(no_recursion)]
    pub groups: Option<Vec<GroupResponse>>,
    /// Model usage metrics (only included if requested)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metrics: Option<ModelMetrics>,
    /// Probe status (only included if requested)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub status: Option<ModelProbeStatus>,
    /// Provider/downstream pricing details (only included if requested and user has Pricing::ReadAll)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub provider_pricing: Option<ProviderPricing>,
    /// Inference endpoint information (only included if requested)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub endpoint: Option<super::inference_endpoints::InferenceEndpointResponse>,
    /// Tariffs for this model (only included if requested)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tariffs: Option<Vec<super::tariffs::TariffResponse>>,
    // Composite model fields
    /// Whether this is a composite model (virtual model routing to multiple providers)
    /// Only included for users with permission to view composite model information
    #[serde(skip_serializing_if = "Option::is_none")]
    pub is_composite: Option<bool>,
    /// Load balancing strategy for composite models
    #[serde(skip_serializing_if = "Option::is_none")]
    pub lb_strategy: Option<LoadBalancingStrategy>,
    /// Fallback configuration for composite models
    #[serde(skip_serializing_if = "Option::is_none")]
    pub fallback: Option<FallbackConfig>,
    /// Components of this composite model (only included if requested for composite models)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub components: Option<Vec<ModelComponentResponse>>,
    /// Whether to sanitize/filter sensitive data from model responses (used when strict_mode=false)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sanitize_responses: Option<bool>,
    /// Whether to mark provider as trusted in strict mode (used when strict_mode=true)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub trusted: Option<bool>,
    /// Whether the open_responses adapter is enabled (converts /v1/responses to /v1/chat/completions)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub open_responses_adapter: Option<bool>,
    /// Traffic routing rules evaluated against API key labels
    #[serde(skip_serializing_if = "Option::is_none")]
    pub traffic_routing_rules: Option<Vec<TrafficRoutingRule>>,
    /// Per-model allowed batch completion windows (overrides global config)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub allowed_batch_completion_windows: Option<Vec<String>>,
    /// Catalog metadata for display purposes
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<ModelCatalogMetadata>,
}

impl From<DeploymentDBResponse> for DeployedModelResponse {
    fn from(db: DeploymentDBResponse) -> Self {
        // Build fallback config for composite models
        let fallback = if db.is_composite {
            Some(FallbackConfig {
                enabled: db.fallback_enabled,
                on_rate_limit: db.fallback_on_rate_limit,
                on_status: db.fallback_on_status,
                with_replacement: db.fallback_with_replacement,
                max_attempts: db.fallback_max_attempts,
            })
        } else {
            None
        };

        Self {
            id: db.id,
            model_name: db.model_name,
            alias: db.alias,
            display_name: db.display_name,
            description: db.description,
            model_type: db.model_type,
            capabilities: db.capabilities,
            created_by: Some(db.created_by),
            hosted_on: db.hosted_on,
            created_at: db.created_at,
            updated_at: db.updated_at,
            requests_per_second: db.requests_per_second,
            burst_size: db.burst_size,
            capacity: db.capacity,
            batch_capacity: db.batch_capacity,
            throughput: db.throughput,
            groups: None,           // By default, relationships are not included
            metrics: None,          // By default, metrics are not included
            status: None,           // By default, probe status is not included
            provider_pricing: None, // By default, provider pricing is not included
            endpoint: None,         // By default, endpoint is not included
            tariffs: None,          // By default, tariffs are not included
            // Composite model fields
            is_composite: Some(db.is_composite),
            lb_strategy: if db.is_composite { Some(db.lb_strategy) } else { None },
            fallback,
            components: None, // By default, components are not included
            sanitize_responses: Some(db.sanitize_responses),
            trusted: Some(db.trusted),
            open_responses_adapter: Some(db.open_responses_adapter),
            traffic_routing_rules: None, // Populated via enrichment (with_traffic_rules)
            allowed_batch_completion_windows: db.allowed_batch_completion_windows,
            metadata: serde_json::from_value::<ModelCatalogMetadata>(db.metadata)
                .inspect_err(|e| tracing::warn!(error = %e, "failed to deserialize model metadata"))
                .ok()
                .filter(|m| *m != ModelCatalogMetadata::default()),
        }
    }
}

impl DeployedModelResponse {
    /// Create a response with groups included
    pub fn with_groups(mut self, groups: Vec<GroupResponse>) -> Self {
        self.groups = Some(groups);
        self
    }

    /// Create a response with metrics included
    pub fn with_metrics(mut self, metrics: ModelMetrics) -> Self {
        self.metrics = Some(metrics);
        self
    }

    /// Create a response with probe status included
    pub fn with_status(mut self, status: ModelProbeStatus) -> Self {
        self.status = Some(status);
        self
    }

    /// Create a response with provider pricing included (admin only)
    pub fn with_provider_pricing(mut self, provider_pricing: Option<ProviderPricing>) -> Self {
        self.provider_pricing = provider_pricing;
        self
    }

    /// Mask rate limiting information (sets to None for users without permission)
    pub fn mask_rate_limiting(mut self) -> Self {
        self.requests_per_second = None;
        self.burst_size = None;
        self
    }

    /// Mask capacity information (sets to None for users without permission)
    pub fn mask_capacity(mut self) -> Self {
        self.capacity = None;
        self.batch_capacity = None;
        self.throughput = None;
        self
    }

    /// Mask created_by field (sets to None for users without system access)
    pub fn mask_created_by(mut self) -> Self {
        self.created_by = None;
        self
    }

    /// Mask composite model fields (for users without permission to see composite info)
    pub fn mask_composite_fields(mut self) -> Self {
        self.is_composite = None;
        self.lb_strategy = None;
        self.fallback = None;
        self.components = None;
        self
    }

    /// Mask response configuration fields (sets to None for users without permission)
    pub fn mask_response_config(mut self) -> Self {
        self.sanitize_responses = None;
        self.trusted = None;
        self.open_responses_adapter = None;
        self
    }

    /// Filter out batch tariffs for completion windows that aren't in allowed_batch_completion_windows.
    /// - `None` → all tariffs pass through (global defaults apply).
    /// - `Some([])` → no batch windows allowed, all batch tariffs removed.
    /// - `Some(["24h", ...])` → keep batch tariffs whose window is in the list, plus generic
    ///   batch tariffs (no window) since they serve as billing fallbacks for allowed windows.
    pub fn filter_disabled_batch_tariffs(mut self) -> Self {
        if let Some(ref allowed) = self.allowed_batch_completion_windows
            && let Some(ref mut tariffs) = self.tariffs
        {
            tariffs.retain(|t| match (&t.api_key_purpose, &t.completion_window) {
                (Some(ApiKeyPurpose::Batch), Some(window)) => allowed.contains(window),
                (Some(ApiKeyPurpose::Batch), None) => !allowed.is_empty(), // Generic fallback kept when batch is allowed
                _ => true,                                                 // Non-batch tariffs always pass through
            });
        }
        self
    }

    /// Create a response with endpoint information included
    pub fn with_endpoint(mut self, endpoint: super::inference_endpoints::InferenceEndpointResponse) -> Self {
        self.endpoint = Some(endpoint);
        self
    }

    /// Create a response with tariffs included
    pub fn with_tariffs(mut self, tariffs: Vec<super::tariffs::TariffResponse>) -> Self {
        self.tariffs = Some(tariffs);
        self
    }

    /// Create a response with components included (for composite models)
    pub fn with_components(mut self, components: Vec<ModelComponentResponse>) -> Self {
        self.components = Some(components);
        self
    }

    /// Create a response with traffic routing rules included
    pub fn with_traffic_rules(mut self, rules: Vec<TrafficRuleDBRow>) -> Self {
        self.traffic_routing_rules = if rules.is_empty() {
            None
        } else {
            Some(
                rules
                    .into_iter()
                    .filter_map(|r| {
                        let purpose: crate::db::models::api_keys::ApiKeyPurpose =
                            serde_json::from_value(serde_json::Value::String(r.api_key_purpose)).ok()?;
                        let action = match r.action.as_str() {
                            "deny" => TrafficRoutingAction::Deny,
                            "redirect" => TrafficRoutingAction::Redirect {
                                target: r.redirect_target_alias.unwrap_or_default(),
                            },
                            _ => return None,
                        };
                        Some(TrafficRoutingRule {
                            api_key_purpose: purpose,
                            action,
                        })
                    })
                    .collect(),
            )
        };
        self
    }
}

// ===== Composite Model Component Types =====

/// Request to add a component to a composite model
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ModelComponentCreate {
    /// Weight for load balancing (1-100, higher = more traffic)
    #[serde(default = "default_weight")]
    pub weight: i32,
    /// Whether this component is enabled
    #[serde(default = "default_enabled")]
    pub enabled: bool,
    /// Sort order for priority-based routing (lower = higher priority)
    #[serde(default)]
    pub sort_order: i32,
}

fn default_weight() -> i32 {
    1
}

fn default_enabled() -> bool {
    true
}

/// Request to update a component's configuration
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ModelComponentUpdate {
    /// New weight for load balancing (1-100)
    pub weight: Option<i32>,
    /// Whether this component is enabled
    pub enabled: Option<bool>,
    /// Sort order for priority-based routing (lower = higher priority)
    pub sort_order: Option<i32>,
}

/// Summary of a model used as a component in a composite model
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ComponentModelSummary {
    /// The model ID
    #[schema(value_type = String, format = "uuid")]
    pub id: DeploymentId,
    /// The model alias (user-facing name)
    pub alias: String,
    /// The underlying model name
    pub model_name: String,
    /// Optional description
    pub description: Option<String>,
    /// Model type (CHAT, COMPLETION, EMBEDDING)
    pub model_type: Option<ModelType>,
    /// The endpoint hosting this model (if any)
    pub endpoint: Option<ComponentEndpointSummary>,
    /// Whether to mark provider as trusted in strict mode
    pub trusted: bool,
    /// Whether the open_responses adapter is enabled
    pub open_responses_adapter: bool,
}

/// Summary of an endpoint hosting a component model
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ComponentEndpointSummary {
    /// The endpoint ID
    #[schema(value_type = String, format = "uuid")]
    pub id: InferenceEndpointId,
    /// The endpoint name
    pub name: String,
}

/// Response for a composite model component
#[derive(Debug, Clone, Serialize, Deserialize, ToSchema)]
pub struct ModelComponentResponse {
    /// Weight for load balancing (1-100)
    pub weight: i32,
    /// Whether this component is enabled
    pub enabled: bool,
    /// Sort order for priority-based routing (lower = higher priority)
    pub sort_order: i32,
    /// When this component was added
    pub created_at: DateTime<Utc>,
    /// The underlying model details
    pub model: ComponentModelSummary,
}