1use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "kebab-case")]
12pub enum RoutingStrategy {
13 Fallback,
15 CostOptimized,
17 LatencyOptimized,
19 RoundRobin,
21 AbSplit { primary_weight: f32 },
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ModelProvider {
28 pub provider: ProviderKind,
30
31 pub model: String,
33
34 pub endpoint: String,
36
37 #[serde(default)]
39 pub priority: u32,
40
41 #[serde(skip_serializing_if = "Option::is_none")]
43 pub cost_per_1k_input: Option<f64>,
44
45 #[serde(skip_serializing_if = "Option::is_none")]
47 pub cost_per_1k_output: Option<f64>,
48
49 #[serde(skip_serializing_if = "Option::is_none")]
51 pub max_tokens: Option<u32>,
52
53 #[serde(default = "default_true")]
55 pub enabled: bool,
56}
57
58fn default_true() -> bool {
59 true
60}
61
62#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
64#[serde(rename_all = "kebab-case")]
65pub enum ProviderKind {
66 AzureOpenai,
67 OpenAi,
68 Anthropic,
69 Bedrock,
70 DatabricksFoundation,
71 GoogleVertex,
72 Ollama,
73 Custom,
74}
75
76#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct TokenUsage {
79 pub input_tokens: u32,
81 pub output_tokens: u32,
83 pub total_tokens: u32,
85 pub estimated_cost_usd: f64,
87 pub provider: ProviderKind,
89 pub model: String,
90}
91
92#[async_trait]
94pub trait ModelProviderClient: Send + Sync {
95 async fn chat_completion(
97 &self,
98 messages: Vec<serde_json::Value>,
99 config: &serde_json::Value,
100 ) -> anyhow::Result<(String, TokenUsage)>;
101
102 async fn health_check(&self) -> bool;
104}