zlayer-builder 0.12.2

Dockerfile parsing and buildah-based container image building
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
//! Sidecar process lifecycle: spawn, handshake, mTLS dialing, teardown.
//!
//! The lifecycle manager keeps at most one live `zlayer-buildd` instance
//! alive per `BuildahSidecarBackend`. On the first call to
//! [`SidecarLifecycle::ensure`] we either:
//!
//! * dial a pre-existing remote sidecar named by `SidecarConfig::addr`,
//!   or
//! * discover the local `zlayer-buildd` binary, spawn it bound to
//!   `127.0.0.1:0`, read its `LISTENING host:port` handshake from
//!   stdout, and dial the reported address.
//!
//! The connected [`Channel`] is stored alongside the spawned `Child`
//! (when applicable). The `Child` is held in an `Arc<ChildHolder>` so
//! its `Drop` impl is what actually tears the process down (SIGTERM,
//! 5s grace, then SIGKILL). Cloning a [`LiveSidecar`] therefore shares
//! the same process — teardown happens when the last clone is dropped.

use std::io::{BufRead, BufReader};
use std::path::PathBuf;
use std::process::{Child, Command, Stdio};
use std::sync::Arc;
use std::time::{Duration, Instant};

use tokio::sync::Mutex;
use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity};

use crate::backend::buildah_sidecar::discover::{self, Discovery};
use crate::backend::buildah_sidecar::proto::build_service_client::BuildServiceClient;
use crate::backend::buildah_sidecar::tls::{ensure_tls_material, set_dir_mode_0700, TlsMaterial};
use crate::error::{BuildError, Result};

/// Prefix the sidecar prints to stdout exactly once on startup.
const HANDSHAKE_PREFIX: &str = "LISTENING ";

/// How long to wait for `LISTENING host:port` after spawning the sidecar.
const SPAWN_HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(15);

/// How long to wait for SIGTERM to take effect before escalating to SIGKILL.
const SIGTERM_GRACE: Duration = Duration::from_secs(5);

/// Holds the `Child` so its `Drop` impl performs graceful teardown.
///
/// Kept private to this module — outside callers interact with
/// [`LiveSidecar`], which wraps this holder in an `Arc` so multiple
/// clones share one process and only the last drop terminates it.
#[derive(Debug)]
struct ChildHolder {
    child: std::sync::Mutex<Option<Child>>,
}

impl ChildHolder {
    fn new(child: Child) -> Self {
        Self {
            child: std::sync::Mutex::new(Some(child)),
        }
    }
}

impl Drop for ChildHolder {
    fn drop(&mut self) {
        // Grab the inner Child. If it was already taken by manual
        // shutdown, nothing to do.
        let mut guard = match self.child.lock() {
            Ok(g) => g,
            Err(p) => p.into_inner(),
        };
        let Some(mut child) = guard.take() else {
            return;
        };

        // Try graceful shutdown via SIGTERM on Unix.
        #[cfg(unix)]
        {
            use nix::sys::signal::{kill, Signal};
            use nix::unistd::Pid;

            // Child::id() returns u32 (the PID). Real PIDs always fit
            // in a non-negative i32 (PID_MAX is well below i32::MAX),
            // so an overflow here would indicate either kernel
            // misbehavior or a wrap that already invalidates the
            // signal target — fall through to try_wait / kill in that
            // case instead of panicking.
            if let Ok(raw) = i32::try_from(child.id()) {
                let pid = Pid::from_raw(raw);
                let _ = kill(pid, Signal::SIGTERM);
            }
        }

        // On Windows there is no SIGTERM; fall straight through to
        // try_wait + kill below.
        let deadline = Instant::now() + SIGTERM_GRACE;
        loop {
            match child.try_wait() {
                Ok(Some(_)) => return,
                Ok(None) => {
                    if Instant::now() >= deadline {
                        let _ = child.kill();
                        let _ = child.wait();
                        return;
                    }
                    std::thread::sleep(Duration::from_millis(50));
                }
                Err(_) => {
                    let _ = child.kill();
                    let _ = child.wait();
                    return;
                }
            }
        }
    }
}

/// A live `zlayer-buildd` plus the gRPC channel pointing at it.
///
/// The held `Arc<ChildHolder>` is the lifetime anchor for the spawned
/// process; when all `LiveSidecar` clones are dropped the underlying
/// `Child` is SIGTERM'd (5s grace) and then SIGKILL'd. For
/// remote-sidecar mode (`SidecarConfig::addr = Some(_)`) the child
/// field is `None` because the operator owns lifecycle.
#[derive(Debug, Clone)]
pub struct LiveSidecar {
    /// The `host:port` we connected to. For locally-spawned sidecars
    /// this is always `127.0.0.1:<auto-port>`.
    pub addr: String,
    /// Paths to the mTLS material in use.
    pub tls: TlsMaterial,
    /// The binary we spawned (empty `PathBuf` when `addr` was supplied
    /// and we did not spawn anything).
    pub binary: PathBuf,
    /// Connected channel ready to issue RPCs.
    pub channel: Channel,
    /// Holds the spawned child (when present); `Drop` performs
    /// teardown. `None` when we dialed a remote sidecar.
    _child: Option<Arc<ChildHolder>>,
}

impl LiveSidecar {
    /// Build a fresh `BuildServiceClient` over the shared channel.
    #[must_use]
    pub fn client(&self) -> BuildServiceClient<Channel> {
        BuildServiceClient::new(self.channel.clone())
    }
}

/// Lifecycle manager owned by `BuildahSidecarBackend`.
///
/// Holds at most one `LiveSidecar` and lazily produces it on the first
/// call to [`Self::ensure`]. Cheap to clone (the inner `Arc` shares the
/// underlying state), so the backend can pass `&self` references into
/// async build pipelines without ceremony.
#[derive(Debug)]
pub struct SidecarLifecycle {
    config: Arc<zlayer_types::builder::SidecarConfig>,
    state: Mutex<Option<LiveSidecar>>,
}

impl SidecarLifecycle {
    /// Construct a manager bound to `config`. No filesystem or network
    /// I/O happens here.
    #[must_use]
    pub fn new(config: Arc<zlayer_types::builder::SidecarConfig>) -> Self {
        Self {
            config,
            state: Mutex::new(None),
        }
    }

    /// Return the cached [`LiveSidecar`], spawning + dialing on first
    /// call.
    ///
    /// # Errors
    ///
    /// Returns whatever the underlying spawn / handshake / dial flow
    /// produced — typically [`BuildError::NotSupported`] when the
    /// binary is missing or the handshake times out.
    pub async fn ensure(&self) -> Result<LiveSidecar> {
        let mut guard = self.state.lock().await;

        if let Some(existing) = guard.as_ref() {
            return Ok(existing.clone());
        }

        let live = self.spawn_and_dial().await?;
        *guard = Some(live.clone());
        Ok(live)
    }

    /// Drop the cached [`LiveSidecar`] so the next call to
    /// [`Self::ensure`] performs a fresh spawn + dial. Tears down the
    /// previous child via its `Drop` impl once the last outstanding
    /// clone is released.
    pub async fn drop_connection(&self) {
        let mut guard = self.state.lock().await;
        *guard = None;
    }

    async fn spawn_and_dial(&self) -> Result<LiveSidecar> {
        let tls_dir = self.config.tls_dir.clone().unwrap_or_else(default_tls_dir);

        let tls = ensure_tls_material(&tls_dir)?;

        let storage = prepare_storage_spec(&self.config)?;

        // Remote-sidecar branch: caller pre-configured a reachable
        // address, so we never spawn.
        if let Some(addr) = self.config.addr.clone() {
            let channel = dial_mtls(&addr, &tls).await?;
            return Ok(LiveSidecar {
                addr,
                tls,
                binary: PathBuf::new(),
                channel,
                _child: None,
            });
        }

        // Local spawn branch.
        let Discovery { binary, tried } = discover::discover_default()?;
        tracing::info!(?binary, ?tried, "spawning zlayer-buildd");

        let mut cmd = Command::new(&binary);
        cmd.arg("--bind").arg("127.0.0.1:0");
        cmd.arg("--tls-ca").arg(&tls.ca_pem);
        cmd.arg("--tls-cert").arg(&tls.cert_pem);
        cmd.arg("--tls-key").arg(&tls.key_pem);
        cmd.arg("--idle-secs")
            .arg(self.config.idle_secs.to_string());
        cmd.arg("--storage-root").arg(&storage.graph_root);
        cmd.arg("--storage-runroot").arg(&storage.run_root);
        cmd.arg("--storage-driver").arg(&storage.driver);
        cmd.stdin(Stdio::null());
        cmd.stdout(Stdio::piped());
        cmd.stderr(Stdio::piped());

        let mut child = cmd.spawn().map_err(|e| BuildError::NotSupported {
            operation: format!("spawning zlayer-buildd at {}: {e}", binary.display()),
        })?;

        let stdout = child
            .stdout
            .take()
            .ok_or_else(|| BuildError::NotSupported {
                operation: "zlayer-buildd: missing stdout pipe".into(),
            })?;

        // Read the handshake line on a dedicated blocking thread. We
        // can't park the async runtime on a synchronous
        // `read_line` against a `std::process::Child`'s stdout, and
        // the runtime-free thread keeps the code portable across
        // tokio current_thread / multi_thread schedulers.
        let (tx, rx) = std::sync::mpsc::channel::<Result<String>>();
        std::thread::spawn(move || {
            let mut reader = BufReader::new(stdout);
            let mut line = String::new();
            let send_result = match reader.read_line(&mut line) {
                Ok(0) => tx.send(Err(BuildError::NotSupported {
                    operation: "zlayer-buildd exited before printing LISTENING".into(),
                })),
                Ok(_) => {
                    let trimmed = line
                        .trim_end_matches('\n')
                        .trim_end_matches('\r')
                        .to_string();
                    if let Some(addr) = trimmed.strip_prefix(HANDSHAKE_PREFIX) {
                        tx.send(Ok(addr.to_string()))
                    } else {
                        tx.send(Err(BuildError::NotSupported {
                            operation: format!("zlayer-buildd handshake malformed: {trimmed:?}"),
                        }))
                    }
                }
                Err(e) => tx.send(Err(BuildError::NotSupported {
                    operation: format!("reading zlayer-buildd stdout: {e}"),
                })),
            };
            // If the receiver has already gone away (e.g. timeout
            // killed the child), there's nothing we can do.
            let _ = send_result;
        });

        let addr_string = match rx.recv_timeout(SPAWN_HANDSHAKE_TIMEOUT) {
            Ok(Ok(addr)) => addr,
            Ok(Err(e)) => {
                let _ = child.kill();
                let _ = child.wait();
                return Err(e);
            }
            Err(_) => {
                let _ = child.kill();
                let _ = child.wait();
                return Err(BuildError::NotSupported {
                    operation: format!(
                        "zlayer-buildd did not print LISTENING within {SPAWN_HANDSHAKE_TIMEOUT:?}"
                    ),
                });
            }
        };

        let channel = match dial_mtls(&addr_string, &tls).await {
            Ok(ch) => ch,
            Err(e) => {
                // Dial failed — make sure we don't leak the orphaned
                // child since we haven't yet wrapped it in the
                // `ChildHolder` Drop guard.
                let _ = child.kill();
                let _ = child.wait();
                return Err(e);
            }
        };

        Ok(LiveSidecar {
            addr: addr_string,
            tls,
            binary,
            channel,
            _child: Some(Arc::new(ChildHolder::new(child))),
        })
    }
}

/// Shared resolution for the sidecar's per-user data root.
///
/// Both the TLS material directory and the containers/storage tree hang
/// off this path, so they share a single source of truth via
/// `ZLayerDirs::buildd()` (which honors `ZLAYER_DATA_DIR` overrides).
fn default_buildd_dir() -> PathBuf {
    zlayer_paths::ZLayerDirs::system_default().buildd()
}

fn default_tls_dir() -> PathBuf {
    default_buildd_dir()
}

/// Per-user containers/storage paths handed to `zlayer-buildd`.
///
/// Storing graph + run under `${ZLAYER_DATA_DIR}/buildd/storage/` keeps
/// the layout rootless-safe: nothing in `/var/lib/containers` or
/// `/run/containers` is touched, so non-root users on a host with a
/// root-only `/etc/containers/storage.conf` still build cleanly.
#[derive(Debug, Clone, PartialEq, Eq)]
struct StorageSpec {
    graph_root: PathBuf,
    run_root: PathBuf,
    driver: String,
}

/// Resolve `StorageSpec`, pre-create graph + run dirs, and tighten perms.
///
/// containers/storage only creates the leaf directory itself, not parent
/// paths, so we own the parent-dir contract. 0700 keeps any other local
/// user from peering into the per-user image store.
fn prepare_storage_spec(config: &zlayer_types::builder::SidecarConfig) -> Result<StorageSpec> {
    let spec = resolve_storage_spec(config);
    std::fs::create_dir_all(&spec.graph_root).map_err(BuildError::from)?;
    std::fs::create_dir_all(&spec.run_root).map_err(BuildError::from)?;
    set_dir_mode_0700(&spec.graph_root)?;
    set_dir_mode_0700(&spec.run_root)?;
    Ok(spec)
}

fn resolve_storage_spec(config: &zlayer_types::builder::SidecarConfig) -> StorageSpec {
    let storage_base = default_buildd_dir().join("storage");

    StorageSpec {
        graph_root: config
            .storage_graph_root
            .clone()
            .unwrap_or_else(|| storage_base.join("graph")),
        run_root: config
            .storage_run_root
            .clone()
            .unwrap_or_else(|| storage_base.join("run")),
        driver: config
            .storage_driver
            .clone()
            .unwrap_or_else(|| "vfs".to_string()),
    }
}

async fn dial_mtls(addr: &str, tls: &TlsMaterial) -> Result<Channel> {
    let ca = std::fs::read(&tls.ca_pem).map_err(BuildError::from)?;
    let cert = std::fs::read(&tls.cert_pem).map_err(BuildError::from)?;
    let key = std::fs::read(&tls.key_pem).map_err(BuildError::from)?;

    let identity = Identity::from_pem(&cert, &key);
    let ca_root = Certificate::from_pem(&ca);

    let tls_config = ClientTlsConfig::new()
        .ca_certificate(ca_root)
        .identity(identity)
        .domain_name("zlayer-buildd");

    let uri = format!("https://{addr}")
        .parse::<tonic::transport::Uri>()
        .map_err(|e| BuildError::NotSupported {
            operation: format!("invalid sidecar address {addr:?}: {e}"),
        })?;

    let endpoint = Endpoint::from(uri)
        .tls_config(tls_config)
        .map_err(|e| BuildError::NotSupported {
            operation: format!("sidecar TLS config: {e}"),
        })?
        .connect_timeout(Duration::from_secs(10))
        .timeout(Duration::from_secs(600));

    endpoint
        .connect()
        .await
        .map_err(|e| BuildError::NotSupported {
            operation: format!("dialing sidecar at {addr}: {e}"),
        })
}

#[cfg(test)]
#[allow(unsafe_code)]
mod tests {
    use super::*;
    use std::sync::Arc;

    // The env-lock guard *must* be held across the `.await` below so
    // that no other test races us on `PATH`/`ZLAYER_BUILDD_BIN`/
    // `ZLAYER_DATA_DIR` while we have them clobbered. The blocking
    // mutex is the right tool — switching to an async mutex would
    // make every other env-mutating sync test (in discover.rs etc.)
    // unusable.
    #[tokio::test]
    #[allow(clippy::await_holding_lock)]
    async fn ensure_fails_cleanly_when_binary_missing() {
        // No env override, restricted PATH → discover_default must
        // fail, and ensure() must surface that as an error rather
        // than hanging.
        let _g = crate::TEST_ENV_LOCK
            .lock()
            .unwrap_or_else(std::sync::PoisonError::into_inner);

        let prev_path = std::env::var_os("PATH");
        let prev_buildd_bin = std::env::var_os("ZLAYER_BUILDD_BIN");
        let prev_data_dir = std::env::var_os("ZLAYER_DATA_DIR");

        let tmp = tempfile::tempdir().unwrap();
        // SAFETY: env mutation serialized by `TEST_ENV_LOCK`.
        unsafe {
            std::env::remove_var("ZLAYER_BUILDD_BIN");
            std::env::set_var("PATH", "/nonexistent-zlayer-test-dir");
            std::env::set_var("ZLAYER_DATA_DIR", tmp.path());
        }

        let cfg = Arc::new(zlayer_types::builder::SidecarConfig {
            addr: None,
            tls_dir: Some(tmp.path().to_path_buf()),
            idle_secs: 30,
            ..Default::default()
        });
        let lifecycle = SidecarLifecycle::new(cfg);
        let result = lifecycle.ensure().await;

        // SAFETY: env mutation serialized by `TEST_ENV_LOCK`.
        unsafe {
            match prev_path {
                Some(v) => std::env::set_var("PATH", v),
                None => std::env::remove_var("PATH"),
            }
            match prev_buildd_bin {
                Some(v) => std::env::set_var("ZLAYER_BUILDD_BIN", v),
                None => std::env::remove_var("ZLAYER_BUILDD_BIN"),
            }
            match prev_data_dir {
                Some(v) => std::env::set_var("ZLAYER_DATA_DIR", v),
                None => std::env::remove_var("ZLAYER_DATA_DIR"),
            }
        }

        let err = result.expect_err("ensure() should fail when binary cannot be discovered");
        let msg = err.to_string();
        assert!(
            msg.contains("zlayer-buildd") || msg.contains("not found"),
            "error should mention the missing binary: {msg}"
        );
    }

    /// End-to-end spawn smoke. Builds the Go sidecar once with
    /// `make build` in `bin/zlayer-buildd`, then sets
    /// `ZLAYER_BUILDD_BIN` and runs this test with `--ignored`.
    #[tokio::test]
    #[ignore = "requires zlayer-buildd binary; gate with ZLAYER_BUILDD_BIN"]
    #[allow(clippy::await_holding_lock)]
    async fn spawn_smoke() {
        let _g = crate::TEST_ENV_LOCK
            .lock()
            .unwrap_or_else(std::sync::PoisonError::into_inner);
        let tmp = tempfile::tempdir().unwrap();
        // Scope storage to the tempdir too so the smoke test doesn't
        // pollute the real per-user `${data_dir}/buildd/storage` tree.
        let cfg = Arc::new(zlayer_types::builder::SidecarConfig {
            addr: None,
            tls_dir: Some(tmp.path().to_path_buf()),
            idle_secs: 30,
            storage_graph_root: Some(tmp.path().join("storage").join("graph")),
            storage_run_root: Some(tmp.path().join("storage").join("run")),
            storage_driver: Some("vfs".into()),
            context_mount: None,
        });
        let lifecycle = SidecarLifecycle::new(cfg);
        let live = lifecycle
            .ensure()
            .await
            .expect("sidecar should spawn and handshake");
        assert!(
            live.addr.starts_with("127.0.0.1:"),
            "expected loopback addr, got {}",
            live.addr
        );
        // Build a client; we don't issue an RPC because the proto
        // server-side handlers may not exist yet — the dial alone
        // proves we got past the handshake and TLS negotiation.
        let _client = live.client();
    }

    #[test]
    fn storage_spec_defaults_to_buildd_storage() {
        let cfg = zlayer_types::builder::SidecarConfig::default();
        let spec = resolve_storage_spec(&cfg);
        assert_eq!(spec.driver, "vfs");
        assert!(
            spec.graph_root.ends_with("graph"),
            "expected graph leaf, got {:?}",
            spec.graph_root
        );
        assert!(
            spec.run_root.ends_with("run"),
            "expected run leaf, got {:?}",
            spec.run_root
        );
        assert!(
            spec.graph_root.parent().unwrap().ends_with("storage"),
            "expected storage parent, got {:?}",
            spec.graph_root.parent()
        );
        assert!(
            spec.run_root.parent().unwrap().ends_with("storage"),
            "expected storage parent, got {:?}",
            spec.run_root.parent()
        );
    }

    #[test]
    fn storage_spec_honors_override() {
        let cfg = zlayer_types::builder::SidecarConfig {
            storage_graph_root: Some(PathBuf::from("/tmp/test-graph")),
            storage_run_root: Some(PathBuf::from("/tmp/test-run")),
            storage_driver: Some("overlay".into()),
            ..Default::default()
        };
        let spec = resolve_storage_spec(&cfg);
        assert_eq!(spec.graph_root, PathBuf::from("/tmp/test-graph"));
        assert_eq!(spec.run_root, PathBuf::from("/tmp/test-run"));
        assert_eq!(spec.driver, "overlay");
    }
}