offline_intelligence/model_runtime/
ggml_runtime.rs1use async_trait::async_trait;
5use super::gguf_runtime::GGUFRuntime;
6use super::runtime_trait::*;
7
8pub struct GGMLRuntime {
10 inner: GGUFRuntime,
11}
12
13impl GGMLRuntime {
14 pub fn new() -> Self {
15 Self {
16 inner: GGUFRuntime::new(),
17 }
18 }
19}
20
21impl Default for GGMLRuntime {
22 fn default() -> Self {
23 Self::new()
24 }
25}
26
27#[async_trait]
28impl ModelRuntime for GGMLRuntime {
29 fn supported_format(&self) -> ModelFormat {
30 ModelFormat::GGML
31 }
32
33 async fn initialize(&mut self, mut config: RuntimeConfig) -> anyhow::Result<()> {
34 config.format = ModelFormat::GGUF; self.inner.initialize(config).await
37 }
38
39 async fn is_ready(&self) -> bool {
40 self.inner.is_ready().await
41 }
42
43 async fn health_check(&self) -> anyhow::Result<String> {
44 self.inner.health_check().await
45 }
46
47 fn base_url(&self) -> String {
48 self.inner.base_url()
49 }
50
51 async fn generate(&self, request: InferenceRequest) -> anyhow::Result<InferenceResponse> {
52 self.inner.generate(request).await
53 }
54
55 async fn generate_stream(
56 &self,
57 request: InferenceRequest,
58 ) -> anyhow::Result<Box<dyn futures_util::Stream<Item = Result<String, anyhow::Error>> + Send + Unpin>> {
59 self.inner.generate_stream(request).await
60 }
61
62 async fn shutdown(&mut self) -> anyhow::Result<()> {
63 self.inner.shutdown().await
64 }
65
66 fn metadata(&self) -> RuntimeMetadata {
67 RuntimeMetadata {
68 format: ModelFormat::GGML,
69 runtime_name: "llama.cpp (llama-server)".to_string(),
70 version: "latest".to_string(),
71 supports_gpu: true,
72 supports_streaming: true,
73 }
74 }
75}