1pub mod backend;
3
4mod artifact;
5mod build;
6mod cache;
7mod error;
8mod http;
9
10pub use artifact::Artifact;
11pub use backend::Backend;
12pub use build::Build;
13pub use error::Error;
14pub use http::Progress;
15
16use crate::cache::Cache;
17
18use sipper::{Sipper, Straw, sipper};
19use tokio::process;
20use tokio::time::{self, Duration};
21
22use std::io;
23use std::path::{Path, PathBuf};
24use std::process::Stdio;
25
26#[derive(Debug, Clone, PartialEq, Eq)]
28pub struct Server {
29 pub build: Build,
31 pub backends: backend::Set,
33 pub executable: PathBuf,
35}
36
37impl Server {
38 pub async fn list() -> Result<Vec<Build>, Error> {
40 let mut builds: Vec<_> = Cache::list().await?.iter().map(Cache::build).collect();
41
42 builds.sort();
43
44 Ok(builds)
45 }
46
47 pub fn download(build: Build, backends: backend::Set) -> impl Straw<Self, Download, Error> {
49 sipper(async move |sender| {
50 let cache = Cache::new(build);
51
52 let artifacts = [Artifact::Server]
53 .into_iter()
54 .chain(backends.available().map(Artifact::Backend));
55
56 let mut components = Vec::new();
57
58 for artifact in artifacts {
59 let component = cache
60 .download(artifact)
61 .with(|progress| Download { artifact, progress })
62 .run(sender.clone())
63 .await?;
64
65 components.push(component);
66 }
67
68 let executable = cache.link(components).await?;
69
70 Ok(Self {
71 build,
72 backends: backends.normalize(),
73 executable,
74 })
75 })
76 }
77
78 pub async fn boot(
80 &self,
81 model: impl AsRef<Path>,
82 settings: Settings,
83 ) -> Result<Instance, Error> {
84 let process = process::Command::new(&self.executable)
85 .args(
86 format!(
87 "--model {model} --host {host} --port {port} --gpu-layers {gpu_layers} --jinja",
88 model = model.as_ref().display(),
89 host = settings.host,
90 port = settings.port,
91 gpu_layers = settings.gpu_layers,
92 )
93 .split_whitespace(),
94 )
95 .stdin(settings.stdin)
96 .stdout(settings.stdout)
97 .stderr(settings.stderr)
98 .kill_on_drop(true)
99 .spawn()?;
100
101 Ok(Instance {
102 host: settings.host,
103 port: settings.port,
104 process,
105 })
106 }
107
108 pub async fn delete(build: Build) -> Result<(), Error> {
110 Cache::new(build).delete().await
111 }
112}
113
114#[derive(Debug)]
116pub struct Settings {
117 pub host: String,
119 pub port: u32,
121 pub gpu_layers: u32,
123 pub stdin: Stdio,
125 pub stdout: Stdio,
127 pub stderr: Stdio,
129}
130
131impl Default for Settings {
132 fn default() -> Self {
133 Self {
134 host: "127.0.0.1".to_owned(),
135 port: 8080,
136 gpu_layers: 80,
137 stdin: Stdio::null(),
138 stdout: Stdio::null(),
139 stderr: Stdio::null(),
140 }
141 }
142}
143
144#[derive(Debug)]
146pub struct Instance {
147 pub host: String,
149 pub port: u32,
151 pub process: process::Child,
153}
154
155impl Instance {
156 pub fn url(&self) -> String {
158 format!("http://{}:{}", self.host, self.port)
159 }
160
161 pub async fn wait_until_ready(&mut self) -> Result<(), Error> {
163 loop {
164 if let Some(status) = self.process.try_wait()? {
165 return Err(io::Error::other(format!(
166 "llama-server exited unexpectedly: {status}"
167 )))?;
168 }
169
170 if let Ok(response) = http::client()
171 .get(format!("{}/health", self.url()))
172 .send()
173 .await
174 && response.error_for_status().is_ok()
175 {
176 break;
177 }
178
179 time::sleep(Duration::from_secs(1)).await;
180 }
181
182 Ok(())
183 }
184}
185
186#[derive(Debug, Clone, Copy, PartialEq, Eq)]
188pub struct Download {
189 pub artifact: Artifact,
191 pub progress: Progress,
193}
194
195#[cfg(test)]
196mod tests {
197 use super::*;
198
199 use tokio::fs;
200 use tokio::io;
201
202 #[tokio::test]
203 #[ignore]
204 async fn it_works() -> Result<(), Error> {
205 const MODEL_URL: &str = "https://huggingface.co/unsloth/Qwen3-1.7B-GGUF/resolve/main/Qwen3-1.7B-UD-Q2_K_XL.gguf?download=true";
206 const MODEL_FILE: &str = "Qwen3.gguf";
207
208 let is_ci = std::env::var("CI").as_deref() == Ok("true");
209
210 if is_ci {
211 let installed = Server::list().await?;
212 assert!(installed.is_empty());
213 }
214
215 let build = Build::latest().await.unwrap_or(Build::locked(6730));
216 let server = Server::download(build, backend::Set::all()).await?;
217
218 assert_eq!(server.build, build);
219 assert_eq!(
220 server.backends,
221 if cfg!(target_os = "macos") {
222 backend::Set::empty()
223 } else {
224 backend::Set::all()
225 }
226 );
227
228 if !fs::try_exists(MODEL_FILE).await? {
229 let model = fs::File::create(MODEL_FILE).await?;
230 http::download(MODEL_URL, &mut io::BufWriter::new(model)).await?;
231 }
232
233 let mut instance = server
234 .boot(
235 MODEL_FILE,
236 Settings {
237 stdout: Stdio::inherit(),
238 stderr: Stdio::inherit(),
239 ..Settings::default()
240 },
241 )
242 .await?;
243 instance.wait_until_ready().await?;
244 assert_eq!(instance.url(), "http://127.0.0.1:8080");
245
246 if is_ci {
247 drop(instance);
248
249 let installed = Server::list().await?;
250 assert!(installed.len() == 1);
251 assert_eq!(installed.first(), Some(&server.build));
252
253 Server::delete(server.build).await?;
254
255 let installed = Server::list().await?;
256 assert!(installed.is_empty());
257 }
258
259 Ok(())
260 }
261}