use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::process::{Child, Command, Stdio};
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::Mutex;
use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity};
use crate::backend::buildah_sidecar::discover::{self, Discovery};
use crate::backend::buildah_sidecar::proto::build_service_client::BuildServiceClient;
use crate::backend::buildah_sidecar::tls::{ensure_tls_material, set_dir_mode_0700, TlsMaterial};
use crate::error::{BuildError, Result};
const HANDSHAKE_PREFIX: &str = "LISTENING ";
const SPAWN_HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(15);
const SIGTERM_GRACE: Duration = Duration::from_secs(5);
#[derive(Debug)]
struct ChildHolder {
child: std::sync::Mutex<Option<Child>>,
}
impl ChildHolder {
fn new(child: Child) -> Self {
Self {
child: std::sync::Mutex::new(Some(child)),
}
}
}
impl Drop for ChildHolder {
fn drop(&mut self) {
let mut guard = match self.child.lock() {
Ok(g) => g,
Err(p) => p.into_inner(),
};
let Some(mut child) = guard.take() else {
return;
};
#[cfg(unix)]
{
use nix::sys::signal::{kill, Signal};
use nix::unistd::Pid;
if let Ok(raw) = i32::try_from(child.id()) {
let pid = Pid::from_raw(raw);
let _ = kill(pid, Signal::SIGTERM);
}
}
let deadline = Instant::now() + SIGTERM_GRACE;
loop {
match child.try_wait() {
Ok(Some(_)) => return,
Ok(None) => {
if Instant::now() >= deadline {
let _ = child.kill();
let _ = child.wait();
return;
}
std::thread::sleep(Duration::from_millis(50));
}
Err(_) => {
let _ = child.kill();
let _ = child.wait();
return;
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct LiveSidecar {
pub addr: String,
pub tls: TlsMaterial,
pub binary: PathBuf,
pub channel: Channel,
_child: Option<Arc<ChildHolder>>,
}
impl LiveSidecar {
#[must_use]
pub fn client(&self) -> BuildServiceClient<Channel> {
BuildServiceClient::new(self.channel.clone())
}
}
#[derive(Debug)]
pub struct SidecarLifecycle {
config: Arc<zlayer_types::builder::SidecarConfig>,
state: Mutex<Option<LiveSidecar>>,
}
impl SidecarLifecycle {
#[must_use]
pub fn new(config: Arc<zlayer_types::builder::SidecarConfig>) -> Self {
Self {
config,
state: Mutex::new(None),
}
}
pub async fn ensure(&self) -> Result<LiveSidecar> {
let mut guard = self.state.lock().await;
if let Some(existing) = guard.as_ref() {
return Ok(existing.clone());
}
let live = self.spawn_and_dial().await?;
*guard = Some(live.clone());
Ok(live)
}
pub async fn drop_connection(&self) {
let mut guard = self.state.lock().await;
*guard = None;
}
async fn spawn_and_dial(&self) -> Result<LiveSidecar> {
let tls_dir = self.config.tls_dir.clone().unwrap_or_else(default_tls_dir);
let tls = ensure_tls_material(&tls_dir)?;
let storage = prepare_storage_spec(&self.config)?;
if let Some(addr) = self.config.addr.clone() {
let channel = dial_mtls(&addr, &tls).await?;
return Ok(LiveSidecar {
addr,
tls,
binary: PathBuf::new(),
channel,
_child: None,
});
}
let Discovery { binary, tried } = discover::discover_default()?;
tracing::info!(?binary, ?tried, "spawning zlayer-buildd");
let mut cmd = Command::new(&binary);
cmd.arg("--bind").arg("127.0.0.1:0");
cmd.arg("--tls-ca").arg(&tls.ca_pem);
cmd.arg("--tls-cert").arg(&tls.cert_pem);
cmd.arg("--tls-key").arg(&tls.key_pem);
cmd.arg("--idle-secs")
.arg(self.config.idle_secs.to_string());
cmd.arg("--storage-root").arg(&storage.graph_root);
cmd.arg("--storage-runroot").arg(&storage.run_root);
cmd.arg("--storage-driver").arg(&storage.driver);
cmd.stdin(Stdio::null());
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
let mut child = cmd.spawn().map_err(|e| BuildError::NotSupported {
operation: format!("spawning zlayer-buildd at {}: {e}", binary.display()),
})?;
let stdout = child
.stdout
.take()
.ok_or_else(|| BuildError::NotSupported {
operation: "zlayer-buildd: missing stdout pipe".into(),
})?;
let (tx, rx) = std::sync::mpsc::channel::<Result<String>>();
std::thread::spawn(move || {
let mut reader = BufReader::new(stdout);
let mut line = String::new();
let send_result = match reader.read_line(&mut line) {
Ok(0) => tx.send(Err(BuildError::NotSupported {
operation: "zlayer-buildd exited before printing LISTENING".into(),
})),
Ok(_) => {
let trimmed = line
.trim_end_matches('\n')
.trim_end_matches('\r')
.to_string();
if let Some(addr) = trimmed.strip_prefix(HANDSHAKE_PREFIX) {
tx.send(Ok(addr.to_string()))
} else {
tx.send(Err(BuildError::NotSupported {
operation: format!("zlayer-buildd handshake malformed: {trimmed:?}"),
}))
}
}
Err(e) => tx.send(Err(BuildError::NotSupported {
operation: format!("reading zlayer-buildd stdout: {e}"),
})),
};
let _ = send_result;
});
let addr_string = match rx.recv_timeout(SPAWN_HANDSHAKE_TIMEOUT) {
Ok(Ok(addr)) => addr,
Ok(Err(e)) => {
let _ = child.kill();
let _ = child.wait();
return Err(e);
}
Err(_) => {
let _ = child.kill();
let _ = child.wait();
return Err(BuildError::NotSupported {
operation: format!(
"zlayer-buildd did not print LISTENING within {SPAWN_HANDSHAKE_TIMEOUT:?}"
),
});
}
};
let channel = match dial_mtls(&addr_string, &tls).await {
Ok(ch) => ch,
Err(e) => {
let _ = child.kill();
let _ = child.wait();
return Err(e);
}
};
Ok(LiveSidecar {
addr: addr_string,
tls,
binary,
channel,
_child: Some(Arc::new(ChildHolder::new(child))),
})
}
}
fn default_buildd_dir() -> PathBuf {
zlayer_paths::ZLayerDirs::system_default().buildd()
}
fn default_tls_dir() -> PathBuf {
default_buildd_dir()
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct StorageSpec {
graph_root: PathBuf,
run_root: PathBuf,
driver: String,
}
fn prepare_storage_spec(config: &zlayer_types::builder::SidecarConfig) -> Result<StorageSpec> {
let spec = resolve_storage_spec(config);
std::fs::create_dir_all(&spec.graph_root).map_err(BuildError::from)?;
std::fs::create_dir_all(&spec.run_root).map_err(BuildError::from)?;
set_dir_mode_0700(&spec.graph_root)?;
set_dir_mode_0700(&spec.run_root)?;
Ok(spec)
}
fn resolve_storage_spec(config: &zlayer_types::builder::SidecarConfig) -> StorageSpec {
let storage_base = default_buildd_dir().join("storage");
StorageSpec {
graph_root: config
.storage_graph_root
.clone()
.unwrap_or_else(|| storage_base.join("graph")),
run_root: config
.storage_run_root
.clone()
.unwrap_or_else(|| storage_base.join("run")),
driver: config
.storage_driver
.clone()
.unwrap_or_else(|| "vfs".to_string()),
}
}
async fn dial_mtls(addr: &str, tls: &TlsMaterial) -> Result<Channel> {
let ca = std::fs::read(&tls.ca_pem).map_err(BuildError::from)?;
let cert = std::fs::read(&tls.cert_pem).map_err(BuildError::from)?;
let key = std::fs::read(&tls.key_pem).map_err(BuildError::from)?;
let identity = Identity::from_pem(&cert, &key);
let ca_root = Certificate::from_pem(&ca);
let tls_config = ClientTlsConfig::new()
.ca_certificate(ca_root)
.identity(identity)
.domain_name("zlayer-buildd");
let uri = format!("https://{addr}")
.parse::<tonic::transport::Uri>()
.map_err(|e| BuildError::NotSupported {
operation: format!("invalid sidecar address {addr:?}: {e}"),
})?;
let endpoint = Endpoint::from(uri)
.tls_config(tls_config)
.map_err(|e| BuildError::NotSupported {
operation: format!("sidecar TLS config: {e}"),
})?
.connect_timeout(Duration::from_secs(10))
.timeout(Duration::from_secs(600));
endpoint
.connect()
.await
.map_err(|e| BuildError::NotSupported {
operation: format!("dialing sidecar at {addr}: {e}"),
})
}
#[cfg(test)]
#[allow(unsafe_code)]
mod tests {
use super::*;
use std::sync::Arc;
#[tokio::test]
#[allow(clippy::await_holding_lock)]
async fn ensure_fails_cleanly_when_binary_missing() {
let _g = crate::TEST_ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let prev_path = std::env::var_os("PATH");
let prev_buildd_bin = std::env::var_os("ZLAYER_BUILDD_BIN");
let prev_data_dir = std::env::var_os("ZLAYER_DATA_DIR");
let tmp = tempfile::tempdir().unwrap();
unsafe {
std::env::remove_var("ZLAYER_BUILDD_BIN");
std::env::set_var("PATH", "/nonexistent-zlayer-test-dir");
std::env::set_var("ZLAYER_DATA_DIR", tmp.path());
}
let cfg = Arc::new(zlayer_types::builder::SidecarConfig {
addr: None,
tls_dir: Some(tmp.path().to_path_buf()),
idle_secs: 30,
..Default::default()
});
let lifecycle = SidecarLifecycle::new(cfg);
let result = lifecycle.ensure().await;
unsafe {
match prev_path {
Some(v) => std::env::set_var("PATH", v),
None => std::env::remove_var("PATH"),
}
match prev_buildd_bin {
Some(v) => std::env::set_var("ZLAYER_BUILDD_BIN", v),
None => std::env::remove_var("ZLAYER_BUILDD_BIN"),
}
match prev_data_dir {
Some(v) => std::env::set_var("ZLAYER_DATA_DIR", v),
None => std::env::remove_var("ZLAYER_DATA_DIR"),
}
}
let err = result.expect_err("ensure() should fail when binary cannot be discovered");
let msg = err.to_string();
assert!(
msg.contains("zlayer-buildd") || msg.contains("not found"),
"error should mention the missing binary: {msg}"
);
}
#[tokio::test]
#[ignore = "requires zlayer-buildd binary; gate with ZLAYER_BUILDD_BIN"]
#[allow(clippy::await_holding_lock)]
async fn spawn_smoke() {
let _g = crate::TEST_ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let tmp = tempfile::tempdir().unwrap();
let cfg = Arc::new(zlayer_types::builder::SidecarConfig {
addr: None,
tls_dir: Some(tmp.path().to_path_buf()),
idle_secs: 30,
storage_graph_root: Some(tmp.path().join("storage").join("graph")),
storage_run_root: Some(tmp.path().join("storage").join("run")),
storage_driver: Some("vfs".into()),
context_mount: None,
});
let lifecycle = SidecarLifecycle::new(cfg);
let live = lifecycle
.ensure()
.await
.expect("sidecar should spawn and handshake");
assert!(
live.addr.starts_with("127.0.0.1:"),
"expected loopback addr, got {}",
live.addr
);
let _client = live.client();
}
#[test]
fn storage_spec_defaults_to_buildd_storage() {
let cfg = zlayer_types::builder::SidecarConfig::default();
let spec = resolve_storage_spec(&cfg);
assert_eq!(spec.driver, "vfs");
assert!(
spec.graph_root.ends_with("graph"),
"expected graph leaf, got {:?}",
spec.graph_root
);
assert!(
spec.run_root.ends_with("run"),
"expected run leaf, got {:?}",
spec.run_root
);
assert!(
spec.graph_root.parent().unwrap().ends_with("storage"),
"expected storage parent, got {:?}",
spec.graph_root.parent()
);
assert!(
spec.run_root.parent().unwrap().ends_with("storage"),
"expected storage parent, got {:?}",
spec.run_root.parent()
);
}
#[test]
fn storage_spec_honors_override() {
let cfg = zlayer_types::builder::SidecarConfig {
storage_graph_root: Some(PathBuf::from("/tmp/test-graph")),
storage_run_root: Some(PathBuf::from("/tmp/test-run")),
storage_driver: Some("overlay".into()),
..Default::default()
};
let spec = resolve_storage_spec(&cfg);
assert_eq!(spec.graph_root, PathBuf::from("/tmp/test-graph"));
assert_eq!(spec.run_root, PathBuf::from("/tmp/test-run"));
assert_eq!(spec.driver, "overlay");
}
}