use std::process::Stdio;
use std::time::{Duration, Instant};
use anyhow::{Context, Result, bail};
use hrdr_llm::Client;
use tokio::process::{Child, Command};
pub enum Backend {
Spawned(#[allow(dead_code)] Box<Child>),
External,
}
#[derive(Clone, Copy, PartialEq, Eq)]
enum BackendKind {
Infr,
Llama,
}
#[derive(Clone)]
pub struct BackendConfig {
pub model: String,
pub bin: String,
pub ctx: u32,
pub extra_args: Vec<String>,
}
impl Default for BackendConfig {
fn default() -> Self {
Self {
model: "unsloth/Qwen3-8B-GGUF:Q4_K_M".to_string(),
bin: "llama-server".to_string(),
ctx: 16384,
extra_args: Vec::new(),
}
}
}
impl Backend {
pub async fn ensure(cfg: &BackendConfig, base_url: &str) -> Result<Self> {
let probe = Client::new(base_url, None, "default");
if probe.list_models().await.is_ok() {
eprintln!("hrdr: reusing existing backend at {base_url}");
return Ok(Backend::External);
}
let (host, port) = parse_host_port(base_url)?;
let kind = if which::which("infr").is_ok() {
BackendKind::Infr
} else if which::which(&cfg.bin).is_ok() {
BackendKind::Llama
} else {
bail!(
"no local backend found on PATH — install `infr` (preferred, native tool \
calling) or `llama-server` (llama.cpp), or run your own OpenAI-compatible \
server and start hrdr with `--no-backend --base-url <url>`"
);
};
let log_path = log_file(kind);
let log = std::fs::File::create(&log_path)
.with_context(|| format!("creating {}", log_path.display()))?;
let log_err = log.try_clone()?;
let (label, mut command) = match kind {
BackendKind::Infr => {
let mut c = Command::new("infr");
c.arg("serve")
.arg(&cfg.model)
.arg("--addr")
.arg(format!("{host}:{port}"));
("infr serve", c)
}
BackendKind::Llama => {
let mut c = Command::new(&cfg.bin);
c.arg("-hf")
.arg(&cfg.model)
.arg("--jinja")
.arg("-c")
.arg(cfg.ctx.to_string())
.arg("--host")
.arg(&host)
.arg("--port")
.arg(port.to_string())
.args(&cfg.extra_args);
("llama-server", c)
}
};
eprintln!(
"hrdr: starting {label} ({}) on {host}:{port} — loading model, this can take a \
minute…\n logs: {}",
cfg.model,
log_path.display(),
);
let child = command
.stdout(Stdio::from(log))
.stderr(Stdio::from(log_err))
.kill_on_drop(true)
.spawn()
.with_context(|| format!("spawning `{label}` — see {}", log_path.display()))?;
if !wait_ready(&probe, Duration::from_secs(300)).await {
bail!(
"{label} did not become ready within 5 min — see {}",
log_path.display()
);
}
eprintln!("hrdr: backend ready.");
Ok(Backend::Spawned(Box::new(child)))
}
}
async fn wait_ready(client: &Client, timeout: Duration) -> bool {
let start = Instant::now();
while start.elapsed() < timeout {
if client.list_models().await.is_ok() {
return true;
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
false
}
fn parse_host_port(base_url: &str) -> Result<(String, u16)> {
let after = base_url.split("://").nth(1).unwrap_or(base_url);
let authority = after.split('/').next().unwrap_or(after);
let (host, port) = authority
.split_once(':')
.context("base_url must include host:port to spawn a backend")?;
let host = if host == "localhost" {
"127.0.0.1"
} else {
host
};
let port: u16 = port.parse().context("invalid port in base_url")?;
Ok((host.to_string(), port))
}
fn log_file(kind: BackendKind) -> std::path::PathBuf {
let dir = std::env::var("XDG_CACHE_HOME")
.map(std::path::PathBuf::from)
.unwrap_or_else(|_| {
std::path::PathBuf::from(std::env::var("HOME").unwrap_or_else(|_| ".".into()))
.join(".cache")
})
.join("hrdr");
let _ = std::fs::create_dir_all(&dir);
let name = match kind {
BackendKind::Infr => "infr-serve.log",
BackendKind::Llama => "llama-server.log",
};
dir.join(name)
}