use std::path::Path;
use anyhow::{Context, Result};
use super::super::kernel_cmd::{
DIRTY_TREE_CACHE_SKIP_HINT, EMBEDDED_KCONFIG, NON_GIT_TREE_CACHE_SKIP_HINT,
embedded_kconfig_hash,
};
use super::super::util::{Spinner, success, warn};
use super::kconfig::{
configure_kernel, has_sched_ext, validate_kernel_config, warn_dropped_extra_kconfig_lines,
warn_extra_kconfig_overrides_baked_in,
};
use super::make::{make_kernel_with_output, run_make, run_make_with_output};
#[non_exhaustive]
pub struct KernelBuildResult {
pub entry: Option<crate::cache::CacheEntry>,
pub image_path: std::path::PathBuf,
pub post_build_is_dirty: bool,
}
#[derive(Debug)]
pub(crate) struct BuildReservation {
pub(crate) _sandbox: Option<crate::vmm::cgroup_sandbox::BuildSandbox>,
pub(crate) plan: Option<crate::vmm::host_topology::LlcPlan>,
pub(crate) make_jobs: Option<usize>,
}
pub(crate) fn acquire_build_reservation(
cli_label: &str,
cpu_cap: Option<crate::vmm::host_topology::CpuCap>,
) -> Result<BuildReservation> {
let bypass = std::env::var("KTSTR_BYPASS_LLC_LOCKS")
.ok()
.is_some_and(|v| !v.is_empty());
let plan: Option<crate::vmm::host_topology::LlcPlan> = if bypass {
if cpu_cap.is_some() {
anyhow::bail!(
"{cli_label}: --cpu-cap conflicts with KTSTR_BYPASS_LLC_LOCKS=1; \
unset one of them. --cpu-cap is a resource contract; bypass \
disables the contract entirely."
);
}
None
} else if let Ok(host_topo) = crate::vmm::host_topology::HostTopology::from_sysfs() {
let test_topo = crate::topology::TestTopology::from_system()?;
let acquired_plan =
crate::vmm::host_topology::acquire_llc_plan(&host_topo, &test_topo, cpu_cap)?;
crate::vmm::host_topology::warn_if_cross_node_spill(&acquired_plan, &host_topo);
Some(acquired_plan)
} else {
if cpu_cap.is_some() {
anyhow::bail!(
"{cli_label}: --cpu-cap set but host LLC topology unreadable \
from sysfs — cannot enforce the resource budget. Run on a \
host with /sys/devices/system/cpu populated, or drop \
--cpu-cap to build without enforcement."
);
}
tracing::warn!(
"{cli_label}: could not read host LLC topology from sysfs; \
skipping kernel-build LLC reservation. Concurrent perf-mode \
runs on this host will NOT be serialized against this build"
);
None
};
let sandbox: Option<crate::vmm::cgroup_sandbox::BuildSandbox> = match plan.as_ref() {
Some(p) => Some(crate::vmm::cgroup_sandbox::BuildSandbox::try_create(
&p.cpus,
&p.mems,
cpu_cap.is_some(),
)?),
None => None,
};
let make_jobs = plan
.as_ref()
.map(crate::vmm::host_topology::make_jobs_for_plan);
Ok(BuildReservation {
plan,
_sandbox: sandbox,
make_jobs,
})
}
pub(crate) fn acquire_source_tree_lock(
canonical: &Path,
cli_label: &str,
) -> Result<std::os::fd::OwnedFd> {
use anyhow::Context;
let path_hash = crate::fetch::canonical_path_hash(canonical);
let cache = crate::cache::CacheDir::new()
.with_context(|| "open cache root for source-tree lockfile placement")?;
cache
.ensure_lock_dir()
.with_context(|| "create cache `.locks/` subdir for source-tree lock")?;
let lock_path = cache.lock_path(&format!("source-{path_hash}"));
match crate::flock::try_flock(&lock_path, crate::flock::FlockMode::Exclusive)
.with_context(|| format!("acquire source-tree flock {}", lock_path.display()))?
{
Some(fd) => Ok(fd),
None => {
let holders = crate::flock::read_holders(&lock_path).unwrap_or_default();
let holder_text = if holders.is_empty() {
String::from("(holder not identified via /proc/locks)")
} else {
crate::flock::format_holder_list(&holders)
};
eprintln!(
"{cli_label}: source tree {} is locked by a concurrent ktstr \
build — waiting for it to finish.\n{holder_text}",
canonical.display(),
);
crate::flock::block_flock(&lock_path, crate::flock::FlockMode::Exclusive).with_context(
|| format!("blocking wait on source-tree flock {}", lock_path.display()),
)
}
}
}
pub fn kernel_build_pipeline(
acquired: &crate::fetch::AcquiredSource,
cache: &crate::cache::CacheDir,
cli_label: &str,
clean: bool,
is_local_source: bool,
cpu_cap: Option<crate::vmm::host_topology::CpuCap>,
extra_kconfig: Option<&str>,
) -> Result<KernelBuildResult> {
let source_dir = &acquired.source_dir;
let (arch, image_name) = crate::fetch::arch_info();
let BuildReservation {
plan: _plan,
_sandbox,
make_jobs,
} = acquire_build_reservation(cli_label, cpu_cap)?;
let _source_lock = if is_local_source
&& std::env::var("KTSTR_BYPASS_LLC_LOCKS")
.ok()
.is_none_or(|v| v.is_empty())
{
Some(acquire_source_tree_lock(source_dir, cli_label)?)
} else {
None
};
if clean {
if !is_local_source {
eprintln!(
"{cli_label}: --clean is only meaningful with --source (downloaded sources start clean)"
);
} else {
eprintln!("{cli_label}: make mrproper");
run_make(source_dir, &["mrproper"])?;
}
}
let merged_fragment = crate::merge_kconfig_fragments(EMBEDDED_KCONFIG, extra_kconfig);
if let Some(extra) = extra_kconfig {
warn_extra_kconfig_overrides_baked_in(extra, cli_label);
}
let needs_configure = extra_kconfig.is_some() || !has_sched_ext(source_dir);
if needs_configure {
let configure_result =
Spinner::with_progress("Configuring kernel...", "Kernel configured", |_| {
configure_kernel(source_dir, &merged_fragment)
});
configure_result.with_context(|| {
if extra_kconfig.is_some() {
"kernel configure failed (with --extra-kconfig fragment merged on top of \
baked-in ktstr.kconfig); check the fragment for syntax errors or \
conflicting symbol declarations"
.to_string()
} else {
"kernel configure failed".to_string()
}
})?;
if let Some(extra) = extra_kconfig {
warn_dropped_extra_kconfig_lines(source_dir, extra, cli_label);
}
}
Spinner::with_progress("Building kernel...", "Kernel built", |sp| {
make_kernel_with_output(source_dir, Some(sp), make_jobs)
})?;
validate_kernel_config(source_dir).with_context(|| {
if extra_kconfig.is_some() {
"post-build kernel config validation failed; check that your \
--extra-kconfig fragment does not disable a CONFIG_X required by \
ktstr (e.g. CONFIG_BPF, CONFIG_DEBUG_INFO_BTF, CONFIG_FTRACE, \
CONFIG_SCHED_CLASS_EXT)"
.to_string()
} else {
"post-build kernel config validation failed".to_string()
}
})?;
if !acquired.is_temp {
Spinner::with_progress(
"Generating compile_commands.json...",
"compile_commands.json generated",
|sp| run_make_with_output(source_dir, &["compile_commands.json"], Some(sp)),
)?;
}
let image_path = crate::kernel_path::find_image_in_dir(source_dir)
.ok_or_else(|| anyhow::anyhow!("no kernel image found in {}", source_dir.display()))?;
let vmlinux_path = source_dir.join("vmlinux");
let vmlinux_ref = if vmlinux_path.exists() {
let orig_mb = std::fs::metadata(&vmlinux_path)
.map(|m| m.len() as f64 / (1024.0 * 1024.0))
.unwrap_or(0.0);
eprintln!("{cli_label}: caching vmlinux ({orig_mb:.0} MB, will be stripped)");
Some(vmlinux_path.as_path())
} else {
eprintln!("{cli_label}: warning: vmlinux not found, BTF will not be cached");
None
};
if acquired.is_dirty {
eprintln!("{cli_label}: kernel built at {}", image_path.display());
let hint = if acquired.is_git {
DIRTY_TREE_CACHE_SKIP_HINT
} else {
NON_GIT_TREE_CACHE_SKIP_HINT
};
eprintln!("{cli_label}: {hint}");
return Ok(KernelBuildResult {
entry: None,
image_path,
post_build_is_dirty: true,
});
}
if is_local_source {
match crate::fetch::inspect_local_source_state(source_dir) {
Ok(post) => {
let hash_changed = post.short_hash
!= acquired
.kernel_source
.as_local_git_hash()
.map(str::to_string);
if post.is_dirty || hash_changed {
eprintln!(
"{cli_label}: source tree changed during build \
(acquire-time dirty={}, post-build dirty={}; \
hash_changed={hash_changed}); skipping cache store \
to avoid recording a stale identity. Re-run after \
the working tree settles to populate the cache.",
acquired.is_dirty, post.is_dirty,
);
return Ok(KernelBuildResult {
entry: None,
image_path,
post_build_is_dirty: true,
});
}
}
Err(e) => {
tracing::warn!(
cli_label = cli_label,
err = %format!("{e:#}"),
"post-build dirty re-check failed; proceeding to cache store",
);
}
}
}
let config_path = source_dir.join(".config");
let config_hash = if config_path.exists() {
let data = std::fs::read(&config_path)?;
Some(format!("{:08x}", crc32fast::hash(&data)))
} else {
None
};
let kconfig_hash = embedded_kconfig_hash();
let extra_kconfig_hash_value = extra_kconfig.map(crate::extra_kconfig_hash);
let source_vmlinux_stat = vmlinux_ref.and_then(|v| {
let stat = std::fs::metadata(v).ok()?;
let mtime_secs = stat.modified().ok().and_then(|t| {
t.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.ok()
.or_else(|| {
std::time::UNIX_EPOCH
.duration_since(t)
.ok()
.map(|d| -(d.as_secs() as i64))
})
})?;
Some((stat.len(), mtime_secs))
});
let mut metadata = crate::cache::KernelMetadata::new(
acquired.kernel_source.clone(),
arch.to_string(),
image_name.to_string(),
crate::test_support::now_iso8601(),
)
.with_version(acquired.version.clone())
.with_config_hash(config_hash)
.with_ktstr_kconfig_hash(Some(kconfig_hash))
.with_extra_kconfig_hash(extra_kconfig_hash_value);
if is_local_source && let Some((size, mtime_secs)) = source_vmlinux_stat {
metadata = metadata.with_source_vmlinux_stat(size, mtime_secs);
}
let mut artifacts = crate::cache::CacheArtifacts::new(&image_path);
if let Some(v) = vmlinux_ref {
artifacts = artifacts.with_vmlinux(v);
}
let entry = match cache.store(&acquired.cache_key, &artifacts, &metadata) {
Ok(entry) => {
success(&format!("\u{2713} Kernel cached: {}", acquired.cache_key));
eprintln!("{cli_label}: image: {}", entry.image_path().display());
if crate::remote_cache::is_enabled() {
crate::remote_cache::remote_store(&entry, cli_label);
}
Some(entry)
}
Err(e) => {
warn(&format!("{cli_label}: cache store failed: {e:#}"));
None
}
};
Ok(KernelBuildResult {
entry,
image_path,
post_build_is_dirty: false,
})
}
#[cfg(test)]
mod tests {
use super::super::super::kernel_cmd::KernelCommand;
use super::*;
#[test]
fn kernel_build_parses_cpu_cap_without_extra_flags() {
use clap::Parser as _;
#[derive(clap::Parser, Debug)]
struct TestCli {
#[command(subcommand)]
cmd: KernelCommand,
}
let parsed = TestCli::try_parse_from(["prog", "build", "6.14.2", "--cpu-cap", "4"])
.expect("kernel build --cpu-cap N must parse");
match parsed.cmd {
KernelCommand::Build {
cpu_cap, version, ..
} => {
assert_eq!(cpu_cap, Some(4));
assert_eq!(version.as_deref(), Some("6.14.2"));
}
other => panic!("expected KernelCommand::Build, got {other:?}"),
}
}
#[test]
fn kernel_build_without_cpu_cap_defaults_to_none() {
use clap::Parser as _;
#[derive(clap::Parser, Debug)]
struct TestCli {
#[command(subcommand)]
cmd: KernelCommand,
}
let parsed = TestCli::try_parse_from(["prog", "build", "6.14.2"])
.expect("kernel build without --cpu-cap must parse");
match parsed.cmd {
KernelCommand::Build { cpu_cap, .. } => {
assert_eq!(cpu_cap, None, "no --cpu-cap must produce None, not Some(0)",);
}
other => panic!("expected KernelCommand::Build, got {other:?}"),
}
}
#[test]
fn kernel_build_cpu_cap_zero_passes_clap() {
use clap::Parser as _;
#[derive(clap::Parser, Debug)]
struct TestCli {
#[command(subcommand)]
cmd: KernelCommand,
}
let parsed = TestCli::try_parse_from(["prog", "build", "6.14.2", "--cpu-cap", "0"])
.expect("clap-level parse must accept 0; runtime validation rejects");
match parsed.cmd {
KernelCommand::Build { cpu_cap, .. } => {
assert_eq!(
cpu_cap,
Some(0),
"clap parses 0 verbatim; validation is downstream",
);
}
other => panic!("expected KernelCommand::Build, got {other:?}"),
}
}
fn bypass_env_lock() -> std::sync::MutexGuard<'static, ()> {
use std::sync::{Mutex, OnceLock};
static ENV_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
ENV_LOCK
.get_or_init(|| Mutex::new(()))
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
}
struct BypassGuard;
impl BypassGuard {
fn set(value: &str) -> Self {
unsafe {
std::env::set_var("KTSTR_BYPASS_LLC_LOCKS", value);
}
BypassGuard
}
fn remove() -> Self {
unsafe {
std::env::remove_var("KTSTR_BYPASS_LLC_LOCKS");
}
BypassGuard
}
}
impl Drop for BypassGuard {
fn drop(&mut self) {
unsafe {
std::env::remove_var("KTSTR_BYPASS_LLC_LOCKS");
}
}
}
#[test]
fn acquire_build_reservation_bypass_returns_no_reservation() {
let _lock = bypass_env_lock();
let _env = BypassGuard::set("1");
let r = acquire_build_reservation("test", None).expect("bypass + no cap must succeed");
assert!(r.plan.is_none(), "bypass must produce no LLC plan");
assert!(
r._sandbox.is_none(),
"bypass must produce no cgroup sandbox",
);
assert!(
r.make_jobs.is_none(),
"bypass must fall back to nproc (None signals to caller)",
);
}
#[test]
fn acquire_build_reservation_bypass_with_cap_errors() {
let _lock = bypass_env_lock();
let _env = BypassGuard::set("1");
let cap = crate::vmm::host_topology::CpuCap::new(2).expect("cap=2 valid");
let err =
acquire_build_reservation("test", Some(cap)).expect_err("bypass + cap must error");
let msg = format!("{err:#}");
assert!(
msg.contains("resource contract"),
"err must name the resource contract: {msg}",
);
}
#[test]
fn acquire_build_reservation_plan_and_make_jobs_consistent() {
let _lock = bypass_env_lock();
let _env = BypassGuard::remove();
match acquire_build_reservation("test", None) {
Ok(r) => {
assert_eq!(
r.plan.is_some(),
r.make_jobs.is_some(),
"plan and make_jobs must agree on reservation presence",
);
if let (Some(p), Some(jobs)) = (r.plan.as_ref(), r.make_jobs) {
assert_eq!(
jobs,
crate::vmm::host_topology::make_jobs_for_plan(p),
"make_jobs must equal make_jobs_for_plan(&plan)",
);
}
assert_eq!(
r.plan.is_some(),
r._sandbox.is_some(),
"sandbox and plan must agree on reservation presence",
);
}
Err(e) => {
eprintln!("acquire_build_reservation unavailable on this host: {e:#}");
}
}
}
#[test]
fn acquire_build_reservation_bypass_does_not_touch_sysfs() {
let _lock = bypass_env_lock();
let _env = BypassGuard::set("1");
let r = acquire_build_reservation("test", None)
.expect("bypass must succeed regardless of sysfs availability");
assert!(r.plan.is_none());
assert!(r._sandbox.is_none());
assert!(r.make_jobs.is_none());
}
#[test]
fn acquire_source_tree_lock_succeeds_on_fresh_path() {
use crate::test_support::test_helpers::{isolated_cache_dir, lock_env};
let _env_lock = lock_env();
let cache = isolated_cache_dir();
let canonical = std::path::PathBuf::from("/tmp/fake-source-tree-for-test");
let fd = acquire_source_tree_lock(&canonical, "test")
.expect("fresh-path acquire must succeed under isolated cache");
let path_hash = crate::fetch::canonical_path_hash(&canonical);
let expected = cache
.path()
.join(crate::flock::LOCK_DIR_NAME)
.join(format!("source-{path_hash}.lock"));
assert!(
expected.exists(),
"lockfile must exist at {} after acquire",
expected.display(),
);
drop(fd);
}
#[test]
fn acquire_source_tree_lock_distinct_paths_yield_distinct_lockfiles() {
use crate::test_support::test_helpers::{isolated_cache_dir, lock_env};
let _env_lock = lock_env();
let cache = isolated_cache_dir();
let path_a = std::path::PathBuf::from("/tmp/fake-source-a");
let path_b = std::path::PathBuf::from("/tmp/fake-source-b");
let fd_a = acquire_source_tree_lock(&path_a, "test")
.expect("path A acquire must succeed under isolated cache");
let fd_b = acquire_source_tree_lock(&path_b, "test").expect(
"path B acquire must succeed concurrently with A — \
distinct canonical paths must hash to distinct \
lockfiles so unrelated builds don't serialize",
);
let hash_a = crate::fetch::canonical_path_hash(&path_a);
let hash_b = crate::fetch::canonical_path_hash(&path_b);
assert_ne!(
hash_a, hash_b,
"distinct canonical paths must produce distinct CRC32 hashes",
);
let lock_a = cache
.path()
.join(crate::flock::LOCK_DIR_NAME)
.join(format!("source-{hash_a}.lock"));
let lock_b = cache
.path()
.join(crate::flock::LOCK_DIR_NAME)
.join(format!("source-{hash_b}.lock"));
assert!(lock_a.exists());
assert!(lock_b.exists());
assert_ne!(lock_a, lock_b);
drop(fd_a);
drop(fd_b);
}
#[test]
fn acquire_source_tree_lock_blocks_on_contention_then_succeeds() {
use crate::test_support::test_helpers::{isolated_cache_dir, lock_env};
let _env_lock = lock_env();
let cache = isolated_cache_dir();
let canonical = std::path::PathBuf::from("/tmp/fake-source-contention");
let holder = acquire_source_tree_lock(&canonical, "test")
.expect("first acquire must succeed under isolated cache");
let path_hash = crate::fetch::canonical_path_hash(&canonical);
let lock_path = cache
.path()
.join(crate::flock::LOCK_DIR_NAME)
.join(format!("source-{path_hash}.lock"));
let needle = crate::flock::needle_from_path(&lock_path)
.expect("needle_from_path must resolve the lockfile inode");
let worker_canonical = canonical.clone();
let (tx, rx) = std::sync::mpsc::sync_channel::<(
std::result::Result<std::os::fd::OwnedFd, anyhow::Error>,
std::time::Duration,
)>(1);
let _worker = std::thread::spawn(move || {
let started = std::time::Instant::now();
let result = acquire_source_tree_lock(&worker_canonical, "test");
let elapsed = started.elapsed();
let _ = tx.send((result, elapsed));
});
const POLL_INTERVAL: std::time::Duration = std::time::Duration::from_millis(10);
const POLL_DEADLINE: std::time::Duration = std::time::Duration::from_secs(5);
let poll_start = std::time::Instant::now();
let mut waiter_observed = false;
while poll_start.elapsed() < POLL_DEADLINE {
let contents = std::fs::read_to_string("/proc/locks")
.expect("/proc/locks must be readable on a Linux host");
if contents
.lines()
.any(|line| line.contains("->") && line.contains(&needle))
{
waiter_observed = true;
break;
}
std::thread::sleep(POLL_INTERVAL);
}
assert!(
waiter_observed,
"no `-> FLOCK ... {needle}` waiter line appeared in \
/proc/locks within {POLL_DEADLINE:?} — worker did not \
enter the kernel's blocked-flock state, which means \
`acquire_source_tree_lock` regressed off the blocking path",
);
const HOLD_WINDOW: std::time::Duration = std::time::Duration::from_millis(200);
std::thread::sleep(HOLD_WINDOW);
drop(holder);
let (worker_result, worker_elapsed) =
rx.recv_timeout(std::time::Duration::from_secs(5)).expect(
"worker must deliver its acquire result within 5s of \
holder release — a regression that caused the worker \
to hang forever lands here",
);
let acquired = worker_result.expect("worker acquire must succeed once the holder releases");
assert!(
worker_elapsed >= HOLD_WINDOW,
"worker's acquire returned in {worker_elapsed:?}, less than \
the {HOLD_WINDOW:?} holder-retention window — worker did \
not actually block on the held flock",
);
drop(acquired);
}
#[test]
fn build_reservation_field_order_pins_drop_invariant() {
let r = BuildReservation {
_sandbox: None,
plan: None,
make_jobs: None,
};
let dbg = format!("{r:?}");
let sandbox_pos = dbg
.find("_sandbox")
.expect("Debug output must mention _sandbox field");
let plan_pos = dbg
.find("plan")
.expect("Debug output must mention plan field");
assert!(
sandbox_pos < plan_pos,
"_sandbox MUST be declared before plan so cgroup rmdir \
runs BEFORE LLC flock release on Drop. Debug: {dbg}",
);
}
}