use super::{HealthStatus, Metadata};
use crate::config::ProviderConfig;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
use uuid::Uuid;
#[derive(Debug, Clone)]
pub struct Deployment {
pub metadata: Metadata,
pub config: ProviderConfig,
pub health: Arc<DeploymentHealth>,
pub metrics: Arc<DeploymentMetrics>,
pub state: DeploymentState,
pub tags: Vec<String>,
pub weight: f32,
pub rate_limits: Option<DeploymentRateLimits>,
pub cost_config: Option<DeploymentCostConfig>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum DeploymentState {
Active,
Degraded,
Disabled,
Draining,
Maintenance,
Failed,
}
#[derive(Debug)]
pub struct DeploymentHealth {
pub status: parking_lot::RwLock<HealthStatus>,
pub last_check: AtomicU64,
pub failure_count: AtomicU32,
pub last_failure: AtomicU64,
pub avg_response_time: AtomicU64,
pub success_rate: AtomicU32,
pub circuit_breaker: parking_lot::RwLock<CircuitBreakerState>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum CircuitBreakerState {
Closed,
Open,
HalfOpen,
}
#[derive(Debug)]
pub struct DeploymentMetrics {
pub total_requests: AtomicU64,
pub successful_requests: AtomicU64,
pub failed_requests: AtomicU64,
pub total_tokens: AtomicU64,
pub total_cost: parking_lot::RwLock<f64>,
pub active_connections: AtomicU32,
pub queue_size: AtomicU32,
pub last_request: AtomicU64,
pub request_rate: AtomicU32,
pub token_rate: AtomicU32,
pub avg_response_time: AtomicU64,
pub p95_response_time: AtomicU64,
pub p99_response_time: AtomicU64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeploymentRateLimits {
pub rpm: Option<u32>,
pub tpm: Option<u32>,
pub rpd: Option<u32>,
pub tpd: Option<u32>,
pub concurrent: Option<u32>,
pub burst: Option<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeploymentCostConfig {
pub input_cost_per_token: Option<f64>,
pub output_cost_per_token: Option<f64>,
pub cost_per_request: Option<f64>,
pub cost_per_image: Option<f64>,
pub cost_per_audio_second: Option<f64>,
pub currency: String,
pub billing_model: BillingModel,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum BillingModel {
PayPerUse,
Subscription,
Prepaid,
Free,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeploymentSnapshot {
pub id: Uuid,
pub name: String,
pub provider_type: String,
pub model: String,
pub state: DeploymentState,
pub health_status: HealthStatus,
pub weight: f32,
pub tags: Vec<String>,
pub metrics: DeploymentMetricsSnapshot,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeploymentMetricsSnapshot {
pub total_requests: u64,
pub successful_requests: u64,
pub failed_requests: u64,
pub success_rate: f64,
pub total_tokens: u64,
pub total_cost: f64,
pub active_connections: u32,
pub avg_response_time: u64,
pub p95_response_time: u64,
pub p99_response_time: u64,
pub request_rate: u32,
pub token_rate: u32,
}
impl Deployment {
pub fn new(config: ProviderConfig) -> Self {
let weight = config.weight;
let tags = config.tags.clone();
Self {
metadata: Metadata::new(),
config,
health: Arc::new(DeploymentHealth::new()),
metrics: Arc::new(DeploymentMetrics::new()),
state: DeploymentState::Active,
tags,
weight,
rate_limits: None,
cost_config: None,
}
}
pub fn id(&self) -> Uuid {
self.metadata.id
}
pub fn name(&self) -> &str {
&self.config.name
}
pub fn provider_type(&self) -> &str {
&self.config.provider_type
}
pub fn is_available(&self) -> bool {
matches!(
self.state,
DeploymentState::Active | DeploymentState::Degraded
) && !matches!(
*self.health.circuit_breaker.read(),
CircuitBreakerState::Open
)
}
pub fn health_status(&self) -> HealthStatus {
*self.health.status.read()
}
pub fn update_health(&self, status: HealthStatus, response_time_ms: Option<u64>) {
*self.health.status.write() = status;
self.health
.last_check
.store(chrono::Utc::now().timestamp() as u64, Ordering::Relaxed);
if let Some(response_time) = response_time_ms {
self.health
.avg_response_time
.store(response_time, Ordering::Relaxed);
}
match status {
HealthStatus::Healthy => {
self.health.failure_count.store(0, Ordering::Relaxed);
}
HealthStatus::Unhealthy => {
self.health.failure_count.fetch_add(1, Ordering::Relaxed);
self.health
.last_failure
.store(chrono::Utc::now().timestamp() as u64, Ordering::Relaxed);
}
_ => {}
}
}
pub fn record_request(&self, success: bool, tokens: u32, cost: f64, response_time_ms: u64) {
self.metrics.total_requests.fetch_add(1, Ordering::Relaxed);
if success {
self.metrics
.successful_requests
.fetch_add(1, Ordering::Relaxed);
} else {
self.metrics.failed_requests.fetch_add(1, Ordering::Relaxed);
}
self.metrics
.total_tokens
.fetch_add(tokens as u64, Ordering::Relaxed);
{
let mut total_cost = self.metrics.total_cost.write();
*total_cost += cost;
}
self.metrics
.last_request
.store(chrono::Utc::now().timestamp() as u64, Ordering::Relaxed);
self.metrics
.avg_response_time
.store(response_time_ms, Ordering::Relaxed);
}
pub fn metrics_snapshot(&self) -> DeploymentMetricsSnapshot {
let total_requests = self.metrics.total_requests.load(Ordering::Relaxed);
let successful_requests = self.metrics.successful_requests.load(Ordering::Relaxed);
let failed_requests = self.metrics.failed_requests.load(Ordering::Relaxed);
let success_rate = if total_requests > 0 {
(successful_requests as f64 / total_requests as f64) * 100.0
} else {
0.0
};
DeploymentMetricsSnapshot {
total_requests,
successful_requests,
failed_requests,
success_rate,
total_tokens: self.metrics.total_tokens.load(Ordering::Relaxed),
total_cost: *self.metrics.total_cost.read(),
active_connections: self.metrics.active_connections.load(Ordering::Relaxed),
avg_response_time: self.metrics.avg_response_time.load(Ordering::Relaxed),
p95_response_time: self.metrics.p95_response_time.load(Ordering::Relaxed),
p99_response_time: self.metrics.p99_response_time.load(Ordering::Relaxed),
request_rate: self.metrics.request_rate.load(Ordering::Relaxed),
token_rate: self.metrics.token_rate.load(Ordering::Relaxed),
}
}
pub fn snapshot(&self) -> DeploymentSnapshot {
DeploymentSnapshot {
id: self.id(),
name: self.name().to_string(),
provider_type: self.provider_type().to_string(),
model: self.config.api_key.clone(), state: self.state.clone(),
health_status: self.health_status(),
weight: self.weight,
tags: self.tags.clone(),
metrics: self.metrics_snapshot(),
updated_at: chrono::Utc::now(),
}
}
}
impl Default for DeploymentHealth {
fn default() -> Self {
Self::new()
}
}
impl DeploymentHealth {
pub fn new() -> Self {
Self {
status: parking_lot::RwLock::new(HealthStatus::Unknown),
last_check: AtomicU64::new(0),
failure_count: AtomicU32::new(0),
last_failure: AtomicU64::new(0),
avg_response_time: AtomicU64::new(0),
success_rate: AtomicU32::new(10000), circuit_breaker: parking_lot::RwLock::new(CircuitBreakerState::Closed),
}
}
}
impl Default for DeploymentMetrics {
fn default() -> Self {
Self::new()
}
}
impl DeploymentMetrics {
pub fn new() -> Self {
Self {
total_requests: AtomicU64::new(0),
successful_requests: AtomicU64::new(0),
failed_requests: AtomicU64::new(0),
total_tokens: AtomicU64::new(0),
total_cost: parking_lot::RwLock::new(0.0),
active_connections: AtomicU32::new(0),
queue_size: AtomicU32::new(0),
last_request: AtomicU64::new(0),
request_rate: AtomicU32::new(0),
token_rate: AtomicU32::new(0),
avg_response_time: AtomicU64::new(0),
p95_response_time: AtomicU64::new(0),
p99_response_time: AtomicU64::new(0),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::ProviderConfig;
use std::collections::HashMap;
#[test]
fn test_deployment_creation() {
let config = ProviderConfig {
name: "test-provider".to_string(),
provider_type: "openai".to_string(),
api_key: "test-key".to_string(),
base_url: None,
models: vec!["gpt-4".to_string()],
timeout: 30,
max_retries: 3,
organization: None,
api_version: None,
project: None,
weight: 1.0,
rpm: 1000,
tpm: 10000,
enabled: true,
max_concurrent_requests: 10,
retry: crate::config::RetryConfig::default(),
health_check: crate::config::HealthCheckConfig::default(),
settings: HashMap::new(),
tags: vec!["test".to_string()],
};
let deployment = Deployment::new(config);
assert_eq!(deployment.name(), "test-provider");
assert_eq!(deployment.provider_type(), "openai");
assert!(deployment.is_available());
}
#[test]
fn test_metrics_recording() {
let config = ProviderConfig {
name: "test-provider".to_string(),
provider_type: "openai".to_string(),
api_key: "test-key".to_string(),
base_url: None,
api_version: None,
organization: None,
project: None,
weight: 1.0,
rpm: 1000,
tpm: 10000,
max_concurrent_requests: 10,
timeout: 30,
max_retries: 3,
retry: crate::config::RetryConfig::default(),
health_check: crate::config::HealthCheckConfig::default(),
settings: HashMap::new(),
models: vec![],
tags: vec![],
enabled: true,
};
let deployment = Deployment::new(config);
deployment.record_request(true, 100, 0.01, 250);
let snapshot = deployment.metrics_snapshot();
assert_eq!(snapshot.total_requests, 1);
assert_eq!(snapshot.successful_requests, 1);
assert_eq!(snapshot.total_tokens, 100);
assert_eq!(snapshot.total_cost, 0.01);
assert_eq!(snapshot.success_rate, 100.0);
}
}