provider_agent/backend/
llamacpp.rs1use async_trait::async_trait;
9
10use super::http::{
11 build_client, get_json, parse_openai_models, probe, stream_chat_completions, trim_url,
12};
13use super::{Backend, BackendHealth, BackendModel, BackendResult, Job, JobResult, JobSink};
14
15pub struct LlamaCppBackend {
16 id: String,
17 base_url: String,
18 client: reqwest::Client,
19}
20
21impl LlamaCppBackend {
22 pub fn new(url: &str) -> Self {
23 let base_url = trim_url(url).to_string();
24 Self {
25 id: format!("llamacpp:{base_url}"),
26 base_url,
27 client: build_client(),
28 }
29 }
30}
31
32#[async_trait]
33impl Backend for LlamaCppBackend {
34 fn kind(&self) -> &'static str {
35 "llamacpp"
36 }
37
38 fn id(&self) -> &str {
39 &self.id
40 }
41
42 async fn list_models(&self) -> BackendResult<Vec<BackendModel>> {
43 let url = format!("{}/v1/models", self.base_url);
44 let v = get_json(&self.client, &url, None).await?;
45 let mut models = parse_openai_models(&v, true);
48 if models.is_empty() {
49 if let Some(id) = v.get("id").and_then(|s| s.as_str()) {
50 models.push(BackendModel {
51 model_id: id.to_string(),
52 context_window: None,
53 native: true,
54 });
55 }
56 }
57 Ok(models)
58 }
59
60 async fn health(&self) -> BackendResult<BackendHealth> {
61 let url = format!("{}/health", self.base_url);
62 match probe(&self.client, &url, None).await {
63 Ok(latency_ms) => Ok(BackendHealth {
64 reachable: true,
65 latency_ms: Some(latency_ms),
66 last_error: None,
67 }),
68 Err(e) => Ok(BackendHealth {
69 reachable: false,
70 latency_ms: None,
71 last_error: Some(e.to_string()),
72 }),
73 }
74 }
75
76 async fn execute(&self, job: &Job, sink: &mut dyn JobSink) -> BackendResult<JobResult> {
77 let endpoint = format!("{}/v1/chat/completions", self.base_url);
78 stream_chat_completions(&self.client, &endpoint, None, job, sink).await
79 }
80}