Skip to main content

llmg_providers/
docker_runner.rs

1//! Docker Model Runner provider for LLMG
2//!
3//! Implements the Provider trait for Docker-based LLM model runners.
4//! This provider supports running LLMs in Docker containers with a simple API.
5
6use llmg_core::{
7    provider::{LlmError, Provider},
8    types::{ChatCompletionRequest, ChatCompletionResponse, EmbeddingRequest, EmbeddingResponse},
9};
10
11/// Docker Model Runner API client
12#[derive(Debug)]
13pub struct DockerRunnerClient {
14    http_client: reqwest::Client,
15    base_url: String,
16    api_key: Option<String>,
17}
18
19/// Docker Runner chat request format (OpenAI-compatible)
20#[derive(Debug, serde::Serialize)]
21struct DockerRunnerChatRequest {
22    model: String,
23    messages: Vec<serde_json::Value>,
24    #[serde(skip_serializing_if = "Option::is_none")]
25    temperature: Option<f32>,
26    #[serde(skip_serializing_if = "Option::is_none")]
27    max_tokens: Option<u32>,
28    #[serde(skip_serializing_if = "Option::is_none")]
29    stream: Option<bool>,
30    #[serde(skip_serializing_if = "Option::is_none")]
31    top_p: Option<f32>,
32    #[serde(skip_serializing_if = "Option::is_none")]
33    frequency_penalty: Option<f32>,
34    #[serde(skip_serializing_if = "Option::is_none")]
35    presence_penalty: Option<f32>,
36    #[serde(skip_serializing_if = "Option::is_none")]
37    stop: Option<Vec<String>>,
38}
39
40/// Docker Runner chat response format (OpenAI-compatible)
41#[derive(Debug, serde::Deserialize)]
42struct DockerRunnerChatResponse {
43    id: String,
44    object: String,
45    created: i64,
46    model: String,
47    choices: Vec<DockerRunnerChoice>,
48    #[serde(default)]
49    usage: Option<DockerRunnerUsage>,
50}
51
52#[derive(Debug, serde::Deserialize)]
53struct DockerRunnerChoice {
54    index: u32,
55    message: DockerRunnerMessage,
56    finish_reason: Option<String>,
57}
58
59#[derive(Debug, serde::Deserialize)]
60struct DockerRunnerMessage {
61    role: String,
62    content: String,
63}
64
65#[derive(Debug, serde::Deserialize)]
66struct DockerRunnerUsage {
67    prompt_tokens: u32,
68    completion_tokens: u32,
69    total_tokens: u32,
70}
71
72impl DockerRunnerClient {
73    /// Create a new Docker Runner client with default localhost URL
74    pub fn new() -> Self {
75        Self {
76            http_client: reqwest::Client::new(),
77            base_url: "http://localhost:5000/v1".to_string(),
78            api_key: None,
79        }
80    }
81
82    /// Create a new DockerRunnerClient from environment variables.
83    ///
84    /// Reads `DOCKER_RUNNER_BASE_URL` (default "http://localhost:5000/v1") and
85    /// `DOCKER_RUNNER_API_KEY` (optional).
86    pub fn from_env() -> Self {
87        let mut client = Self::new();
88        if let Ok(base_url) = std::env::var("DOCKER_RUNNER_BASE_URL") {
89            client = client.with_base_url(base_url);
90        }
91        if let Ok(api_key) = std::env::var("DOCKER_RUNNER_API_KEY") {
92            client = client.with_api_key(api_key);
93        }
94        client
95    }
96
97    /// Create a new Docker Runner client with API key
98    pub fn with_api_key(mut self, api_key: impl Into<String>) -> Self {
99        self.api_key = Some(api_key.into());
100        self
101    }
102
103    /// Create with custom base URL
104    pub fn with_base_url(mut self, url: impl Into<String>) -> Self {
105        self.base_url = url.into();
106        self
107    }
108
109    /// Convert OpenAI format to Docker Runner format
110    fn convert_request(&self, request: ChatCompletionRequest) -> DockerRunnerChatRequest {
111        let messages: Vec<serde_json::Value> = request
112            .messages
113            .into_iter()
114            .filter_map(|msg| {
115                let json_msg = match msg {
116                    llmg_core::types::Message::System { content, .. } => {
117                        serde_json::json!({ "role": "system", "content": content })
118                    }
119                    llmg_core::types::Message::User { content, .. } => {
120                        serde_json::json!({ "role": "user", "content": content })
121                    }
122                    llmg_core::types::Message::Assistant { content, .. } => {
123                        serde_json::json!({
124                            "role": "assistant",
125                            "content": content.unwrap_or_default()
126                        })
127                    }
128                    _ => return None,
129                };
130                Some(json_msg)
131            })
132            .collect();
133
134        DockerRunnerChatRequest {
135            model: request.model,
136            messages,
137            temperature: request.temperature,
138            max_tokens: request.max_tokens,
139            stream: request.stream,
140            top_p: request.top_p,
141            frequency_penalty: request.frequency_penalty,
142            presence_penalty: request.presence_penalty,
143            stop: request.stop,
144        }
145    }
146
147    /// Convert Docker Runner response to OpenAI format
148    fn convert_response(&self, response: DockerRunnerChatResponse) -> ChatCompletionResponse {
149        ChatCompletionResponse {
150            id: response.id,
151            object: response.object,
152            created: response.created,
153            model: response.model,
154            choices: response
155                .choices
156                .into_iter()
157                .map(|choice| llmg_core::types::Choice {
158                    index: choice.index,
159                    message: llmg_core::types::Message::Assistant {
160                        content: Some(choice.message.content),
161                        refusal: None,
162                        tool_calls: None,
163                    },
164                    finish_reason: choice.finish_reason,
165                })
166                .collect(),
167            usage: response.usage.map(|u| llmg_core::types::Usage {
168                prompt_tokens: u.prompt_tokens,
169                completion_tokens: u.completion_tokens,
170                total_tokens: u.total_tokens,
171            }),
172        }
173    }
174
175    async fn make_request(
176        &self,
177        request: ChatCompletionRequest,
178    ) -> Result<ChatCompletionResponse, LlmError> {
179        let docker_runner_req = self.convert_request(request);
180        let url = format!("{}/chat/completions", self.base_url);
181
182        let mut req_builder = self.http_client.post(&url).json(&docker_runner_req);
183
184        if let Some(ref key) = self.api_key {
185            req_builder = req_builder.header("Authorization", format!("Bearer {}", key));
186        }
187
188        let response = req_builder
189            .send()
190            .await
191            .map_err(|e| LlmError::HttpError(e.to_string()))?;
192
193        if !response.status().is_success() {
194            let status = response.status().as_u16();
195            let text = response.text().await.unwrap_or_default();
196            return Err(LlmError::ApiError {
197                status,
198                message: text,
199            });
200        }
201
202        let docker_runner_resp: DockerRunnerChatResponse = response
203            .json()
204            .await
205            .map_err(|e| LlmError::HttpError(e.to_string()))?;
206
207        Ok(self.convert_response(docker_runner_resp))
208    }
209}
210
211impl Default for DockerRunnerClient {
212    fn default() -> Self {
213        Self::new()
214    }
215}
216
217#[async_trait::async_trait]
218impl Provider for DockerRunnerClient {
219    async fn chat_completion(
220        &self,
221        request: ChatCompletionRequest,
222    ) -> Result<ChatCompletionResponse, LlmError> {
223        self.make_request(request).await
224    }
225
226    async fn embeddings(&self, request: EmbeddingRequest) -> Result<EmbeddingResponse, LlmError> {
227        let url = format!("{}/embeddings", self.base_url);
228
229        let mut req_builder = self.http_client.post(&url).json(&request);
230
231        if let Some(ref key) = self.api_key {
232            req_builder = req_builder.header("Authorization", format!("Bearer {}", key));
233        }
234
235        let response = req_builder
236            .send()
237            .await
238            .map_err(|e| LlmError::HttpError(e.to_string()))?;
239
240        if !response.status().is_success() {
241            let status = response.status().as_u16();
242            let text = response.text().await.unwrap_or_default();
243            return Err(LlmError::ApiError {
244                status,
245                message: text,
246            });
247        }
248
249        response
250            .json::<EmbeddingResponse>()
251            .await
252            .map_err(|e| LlmError::HttpError(e.to_string()))
253    }
254    fn provider_name(&self) -> &'static str {
255        "docker_runner"
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[test]
264    fn test_docker_runner_client_creation() {
265        let client = DockerRunnerClient::new();
266        assert_eq!(client.provider_name(), "docker_runner");
267        assert_eq!(client.base_url, "http://localhost:5000/v1");
268    }
269
270    #[test]
271    fn test_docker_runner_custom_url() {
272        let client = DockerRunnerClient::new().with_base_url("http://custom-host:8080/v1");
273        assert_eq!(client.base_url, "http://custom-host:8080/v1");
274    }
275
276    #[test]
277    fn test_docker_runner_with_api_key() {
278        let client = DockerRunnerClient::new().with_api_key("test-key");
279        assert_eq!(client.api_key, Some("test-key".to_string()));
280    }
281
282    #[test]
283    fn test_request_conversion() {
284        let client = DockerRunnerClient::new();
285
286        let request = ChatCompletionRequest {
287            model: "docker-llama-3-8b".to_string(),
288            messages: vec![llmg_core::types::Message::User {
289                content: "Hello!".to_string(),
290                name: None,
291            }],
292            temperature: Some(0.7),
293            max_tokens: Some(100),
294            stream: None,
295            top_p: None,
296            frequency_penalty: None,
297            presence_penalty: None,
298            stop: None,
299            user: None,
300            tools: None,
301            tool_choice: None,
302            response_format: None,
303        };
304
305        let docker_runner_req = client.convert_request(request);
306
307        assert_eq!(docker_runner_req.model, "docker-llama-3-8b");
308        assert_eq!(docker_runner_req.messages.len(), 1);
309        assert_eq!(docker_runner_req.temperature, Some(0.7));
310        assert_eq!(docker_runner_req.max_tokens, Some(100));
311    }
312}
313