Skip to main content

agentoven_core/
router.rs

1//! Model Router — intelligent routing across LLM providers.
2//!
3//! The router selects which model provider to use based on routing strategy,
4//! tracks costs per request, and handles fallbacks.
5
6use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8
9/// Routing strategy for model selection.
10#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
11#[serde(rename_all = "kebab-case")]
12pub enum RoutingStrategy {
13    /// Use the primary model; fall back to alternatives on failure.
14    Fallback,
15    /// Route to the lowest-cost provider.
16    CostOptimized,
17    /// Route to the lowest-latency provider.
18    LatencyOptimized,
19    /// Round-robin across providers.
20    RoundRobin,
21    /// A/B split by percentage.
22    AbSplit { primary_weight: f32 },
23}
24
25/// A model provider configuration.
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ModelProvider {
28    /// Provider identifier (e.g., "azure-openai", "anthropic", "ollama").
29    pub provider: ProviderKind,
30
31    /// Model name at the provider (e.g., "gpt-4o", "claude-sonnet-4-20250514").
32    pub model: String,
33
34    /// API endpoint URL.
35    pub endpoint: String,
36
37    /// Priority (lower = higher priority in fallback strategy).
38    #[serde(default)]
39    pub priority: u32,
40
41    /// Cost per 1K input tokens (USD).
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub cost_per_1k_input: Option<f64>,
44
45    /// Cost per 1K output tokens (USD).
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub cost_per_1k_output: Option<f64>,
48
49    /// Maximum tokens per request.
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub max_tokens: Option<u32>,
52
53    /// Whether this provider is currently enabled.
54    #[serde(default = "default_true")]
55    pub enabled: bool,
56}
57
58fn default_true() -> bool {
59    true
60}
61
62/// Supported model providers.
63#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
64#[serde(rename_all = "kebab-case")]
65pub enum ProviderKind {
66    AzureOpenai,
67    OpenAi,
68    Anthropic,
69    Bedrock,
70    DatabricksFoundation,
71    GoogleVertex,
72    Ollama,
73    Custom,
74}
75
76/// Token usage from a model invocation.
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct TokenUsage {
79    /// Number of input (prompt) tokens.
80    pub input_tokens: u32,
81    /// Number of output (completion) tokens.
82    pub output_tokens: u32,
83    /// Total tokens.
84    pub total_tokens: u32,
85    /// Estimated cost in USD.
86    pub estimated_cost_usd: f64,
87    /// The provider and model used.
88    pub provider: ProviderKind,
89    pub model: String,
90}
91
92/// Trait for model provider implementations.
93#[async_trait]
94pub trait ModelProviderClient: Send + Sync {
95    /// Send a chat completion request.
96    async fn chat_completion(
97        &self,
98        messages: Vec<serde_json::Value>,
99        config: &serde_json::Value,
100    ) -> anyhow::Result<(String, TokenUsage)>;
101
102    /// Check if the provider is healthy.
103    async fn health_check(&self) -> bool;
104}