inference_runtime_litellm/
lib.rs1#![forbid(unsafe_code)]
16#![deny(rust_2018_idioms)]
17
18use std::sync::Arc;
19
20use arc_swap::ArcSwap;
21use async_trait::async_trait;
22use serde::{Deserialize, Serialize};
23use url::Url;
24
25use inference_core::batch::ExecuteBatch;
26use inference_core::deployment::{RateLimits, RetryPolicy, Timeouts};
27use inference_core::error::InferenceResult;
28use inference_core::runner::{ModelRunner, RunHandle, SessionRebuildCause};
29use inference_core::runtime::{CircuitBreakerConfig, ProviderKind, RuntimeKind, TransportKind};
30
31use inference_remote_core::session::SessionSnapshot;
32use inference_runtime_openai::{OpenAiConfig, OpenAiRunner, OpenAiVariant};
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct LiteLlmConfig {
36 pub endpoint: Url,
37 pub api_key: SecretRef,
38 #[serde(default)]
39 pub rate_limits: RateLimits,
40 #[serde(default = "default_retry")]
41 pub retry: RetryPolicy,
42 #[serde(default)]
43 pub circuit_breaker: CircuitBreakerConfig,
44 #[serde(default)]
45 pub timeouts: Timeouts,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
49#[serde(tag = "from", rename_all = "snake_case")]
50pub enum SecretRef {
51 Env { name: String },
52 File { path: std::path::PathBuf },
53 Inline { value: String },
54}
55
56fn default_retry() -> RetryPolicy {
57 RetryPolicy {
60 max_retries: 1,
61 ..RetryPolicy::default()
62 }
63}
64
65impl LiteLlmConfig {
66 pub fn into_openai(self, openai_secret: inference_runtime_openai::config::SecretRef) -> OpenAiConfig {
67 OpenAiConfig {
68 variant: OpenAiVariant::Direct {
69 endpoint: self.endpoint,
70 },
71 api_key: openai_secret,
72 organization: None,
73 project: None,
74 rate_limits: self.rate_limits,
75 retry: self.retry,
76 circuit_breaker: self.circuit_breaker,
77 timeouts: self.timeouts,
78 }
79 }
80}
81
82pub struct LiteLlmRunner {
86 inner: OpenAiRunner,
87}
88
89impl LiteLlmRunner {
90 pub fn new(config: OpenAiConfig, session: Arc<ArcSwap<SessionSnapshot>>) -> InferenceResult<Self> {
91 Ok(Self {
92 inner: OpenAiRunner::new(config, session)?,
93 })
94 }
95}
96
97#[async_trait]
98impl ModelRunner for LiteLlmRunner {
99 async fn execute(&mut self, batch: ExecuteBatch) -> InferenceResult<RunHandle> {
100 self.inner.execute(batch).await
101 }
102
103 async fn rebuild_session(&mut self, cause: SessionRebuildCause) -> InferenceResult<()> {
104 self.inner.rebuild_session(cause).await
105 }
106
107 fn runtime_kind(&self) -> RuntimeKind {
108 RuntimeKind::LiteLlm
109 }
110 fn transport_kind(&self) -> TransportKind {
111 TransportKind::RemoteNetwork {
112 provider: ProviderKind::LiteLlm,
113 }
114 }
115 fn rate_limits(&self) -> Option<&RateLimits> {
116 self.inner.rate_limits()
117 }
118 fn estimate_cost_usd(&self, batch: &ExecuteBatch) -> f64 {
119 self.inner.estimate_cost_usd(batch)
123 }
124}