Skip to main content

forge_guardrails/server/
manager.rs

1use std::path::{Path, PathBuf};
2use std::process::Child;
3use std::sync::Mutex;
4
5/// Tracked configuration for reuse decisions during start().
6#[derive(Debug, Clone, PartialEq)]
7pub(super) struct RunConfig {
8    pub(super) model: String,
9    pub(super) mode: String,
10    pub(super) ctx_override: Option<i64>,
11    pub(super) extra_flags: Vec<String>,
12    pub(super) cache_type_k: Option<String>,
13    pub(super) cache_type_v: Option<String>,
14    pub(super) n_slots: Option<i64>,
15    pub(super) kv_unified: bool,
16}
17
18/// Backend process lifecycle manager.
19///
20/// Handles subprocess spawning for llamaserver/llamafile, budget resolution,
21/// VRAM tier detection, and health polling.
22pub struct ServerManager {
23    pub(super) backend: String,
24    pub(super) port: i64,
25    pub(super) _models_dir: Option<PathBuf>,
26    pub(super) llamafile_runtime: Option<PathBuf>,
27    pub(super) process: Mutex<Option<Child>>,
28    pub(super) current_config: Mutex<Option<RunConfig>>,
29    pub(super) last_context: Mutex<Option<i64>>,
30}
31
32impl ServerManager {
33    /// Creates a new `ServerManager` tracking the specified backend and port.
34    pub fn new(backend: &str, port: i64, models_dir: Option<&Path>) -> Self {
35        Self {
36            backend: backend.to_string(),
37            port,
38            _models_dir: models_dir.map(|p| p.to_path_buf()),
39            llamafile_runtime: None,
40            process: Mutex::new(None),
41            current_config: Mutex::new(None),
42            last_context: Mutex::new(None),
43        }
44    }
45
46    /// Sets the executable path for the llamafile runtime.
47    pub fn with_llamafile_runtime(mut self, path: impl AsRef<Path>) -> Self {
48        self.llamafile_runtime = Some(path.as_ref().to_path_buf());
49        self
50    }
51}