use async_trait::async_trait;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "kebab-case")]
pub enum RoutingStrategy {
Fallback,
CostOptimized,
LatencyOptimized,
RoundRobin,
AbSplit { primary_weight: f32 },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModelProvider {
pub provider: ProviderKind,
pub model: String,
pub endpoint: String,
#[serde(default)]
pub priority: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub cost_per_1k_input: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub cost_per_1k_output: Option<f64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tokens: Option<u32>,
#[serde(default = "default_true")]
pub enabled: bool,
}
fn default_true() -> bool {
true
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "kebab-case")]
pub enum ProviderKind {
AzureOpenai,
OpenAi,
Anthropic,
Bedrock,
DatabricksFoundation,
GoogleVertex,
Ollama,
Custom,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TokenUsage {
pub input_tokens: u32,
pub output_tokens: u32,
pub total_tokens: u32,
pub estimated_cost_usd: f64,
pub provider: ProviderKind,
pub model: String,
}
#[async_trait]
pub trait ModelProviderClient: Send + Sync {
async fn chat_completion(
&self,
messages: Vec<serde_json::Value>,
config: &serde_json::Value,
) -> anyhow::Result<(String, TokenUsage)>;
async fn health_check(&self) -> bool;
}