1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
//! zccache daemon process.
//!
//! The daemon maintains in-memory caches, manages the artifact store,
//! runs the file watcher, and handles IPC requests from CLI/wrappers.
//!
//! On the long-lived foreground path, the daemon releases its launch
//! handles (exe file lock on Windows, implicit cwd handle on all OSes)
//! via [`zccache::daemon::trampoline`] before entering [`run_server`].
#[cfg(unix)]
#[global_allocator]
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
#[cfg(windows)]
#[global_allocator]
static GLOBAL_WIN: mimalloc::MiMalloc = mimalloc::MiMalloc;
use clap::Parser;
use zccache::core::NormalizedPath;
const DAEMON_MAX_BLOCKING_THREADS: usize = 16;
/// zccache daemon -- local compiler cache service.
#[derive(Debug, Parser)]
#[command(name = "zccache-daemon", version, about)]
struct Args {
/// Path to configuration file.
#[arg(long)]
config: Option<NormalizedPath>,
/// Log level (trace, debug, info, warn, error).
#[arg(long, default_value = "info")]
log_level: String,
/// Run in foreground (don't daemonize).
#[arg(long)]
foreground: bool,
/// IPC endpoint (default: platform-specific).
#[arg(long)]
endpoint: Option<String>,
/// Idle timeout in seconds (0 = no timeout).
///
/// Default comes from `zccache::core::config::DEFAULT_IDLE_TIMEOUT_SECS`
/// (60 minutes), kept as the single source of truth so `Config::default`
/// and this flag never drift apart.
///
/// Reads `ZCCACHE_IDLE_TIMEOUT_SECS` from the environment when the
/// flag is not given. Setting the env var on `zccache-cli` propagates
/// to the daemon via `spawn_daemon`'s inherited environment, so a
/// caller can ask for a shorter idle window without touching the
/// command line. `0` disables the timeout (daemon runs forever).
#[arg(
long,
default_value_t = zccache::core::config::DEFAULT_IDLE_TIMEOUT_SECS,
env = "ZCCACHE_IDLE_TIMEOUT_SECS"
)]
idle_timeout: u64,
/// Disable loading/saving the dependency graph from/to disk.
#[arg(long)]
no_depgraph_cache: bool,
/// File path to redirect the daemon's own stdout + stderr onto.
///
/// Set by `zccache-cli`'s `spawn_daemon` so that errors which fire
/// before the lifecycle log / panic hook can attach (dyld failures on
/// macOS, Gatekeeper kills, early-init panics) leave evidence on
/// disk instead of disappearing into `/dev/null`. When unset the
/// daemon falls back to the legacy detach-stdio behavior.
#[arg(long)]
log_file: Option<NormalizedPath>,
}
fn main() {
let args = Args::parse();
if args.foreground {
// FIRST thing in the long-lived path: drop any stdio handles we
// inherited from the spawning process. Without this, an orphaned
// daemon keeps its grandparent's pipe write ends alive and the
// grandparent's pipe reader (e.g. `subprocess.Popen(stdout=PIPE)`)
// never sees EOF after the parent exits. See issue #276.
//
// When the CLI hands us a `--log-file` we redirect stdout +
// stderr onto that file instead of `/dev/null` (stdin stays
// nulled) so failures that fire before the lifecycle log /
// panic hook still leave evidence on disk. Must run before
// init_tracing() so the subscriber's writes land in the log
// file too.
match args.log_file.as_deref() {
Some(path) => zccache::daemon::trampoline::redirect_stdio_to_log(path),
None => zccache::daemon::trampoline::detach_stdio(),
}
init_tracing(&args.log_level);
// Long-lived process: release exe-file lock and cwd handle so
// `pip install --upgrade zccache` and `rm -rf <project>` can
// succeed while the daemon is running. See issue #134.
zccache::daemon::trampoline::unlock_exe();
zccache::daemon::trampoline::release_cwd();
run_server(args);
} else {
print_status(&args);
}
}
fn print_status(args: &Args) {
let endpoint = args
.endpoint
.clone()
.unwrap_or_else(zccache::ipc::default_endpoint);
println!("zccache-daemon v{}", env!("CARGO_PKG_VERSION"));
println!();
println!(" endpoint: {endpoint}");
println!(
" namespace: {}",
zccache::core::config::daemon_namespace_label()
);
println!(" lock file: {}", zccache::ipc::lock_file_path().display());
println!();
// Try to connect and get status from a running daemon
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("failed to create tokio runtime");
match rt.block_on(query_daemon_status(&endpoint)) {
Ok(status) => {
println!(" status: running");
println!(" daemon ns: {}", status.daemon_namespace);
println!(" daemon ep: {}", status.endpoint);
println!(
" private: {}",
if status.private_daemon.enabled {
"yes"
} else {
"no"
}
);
println!(" uptime: {}s", status.uptime_secs);
println!(" artifacts: {}", status.artifact_count);
println!(" cache size: {} bytes", status.cache_size_bytes);
println!(" metadata: {} entries", status.metadata_entries);
println!(
" hits/miss: {} / {}",
status.cache_hits, status.cache_misses
);
}
Err(_) => {
println!(" status: not running");
println!();
println!("Start with: zccache-daemon --foreground");
}
}
}
async fn query_daemon_status(
endpoint: &str,
) -> Result<zccache::protocol::DaemonStatus, Box<dyn std::error::Error>> {
let mut conn = zccache::ipc::connect(endpoint).await?;
// Client-style round trip: opt into the 5-minute default so a hung
// daemon surfaces as a Timeout rather than blocking forever.
conn.set_recv_timeout(zccache::ipc::DEFAULT_CLIENT_RECV_TIMEOUT);
conn.send(&zccache::protocol::Request::Status).await?;
let resp: Option<zccache::protocol::Response> = conn.recv().await?;
match resp {
Some(zccache::protocol::Response::Status(s)) => Ok(s),
Some(other) => Err(format!("unexpected response: {other:?}").into()),
None => Err("connection closed".into()),
}
}
fn run_server(args: Args) {
let endpoint = args.endpoint.unwrap_or_else(zccache::ipc::default_endpoint);
let idle_timeout = args.idle_timeout;
// The returned guard MUST stay alive — drop unregisters the
// OS-level signal/exception handlers. Bind it for the whole
// `run_server` lifetime by storing it in this stack frame.
let _crash_guard = zccache::core::crash::install("zccache-daemon");
zccache::core::crash::check_previous_crashes();
tracing::info!(%endpoint, idle_timeout, "zccache-daemon starting");
// Issue #273: on Windows, warn once on stderr if the cache dir is
// not on Defender's exclusion list. Non-fatal; no-ops off Windows
// and when `ZCCACHE_QUIET` is set.
let cache_root = zccache::core::config::default_cache_dir();
zccache::core::defender::maybe_emit_first_run_banner(cache_root.as_path());
// Persist a "spawn" lifecycle event to disk. tracing logs go to
// stderr which is detached to NUL, so this file-based sink is the
// only way an operator (or CI) can correlate daemon lifetime with
// surrounding events after the fact.
zccache::daemon::lifecycle::write_event(
zccache::daemon::lifecycle::EVENT_SPAWN,
serde_json::json!({
"endpoint": &endpoint,
"daemon_namespace": zccache::core::config::daemon_namespace_label(),
"idle_timeout": idle_timeout,
"version": env!("CARGO_PKG_VERSION"),
}),
);
// Write lock file so CLI can detect us
let pid = std::process::id();
if let Err(e) = zccache::ipc::write_lock_file(pid) {
tracing::warn!("failed to write lock file: {e}");
}
let rt = tokio::runtime::Builder::new_multi_thread()
.enable_all()
.max_blocking_threads(DAEMON_MAX_BLOCKING_THREADS)
.build()
.expect("failed to create tokio runtime");
// Load dep graph from disk (before entering async block).
//
// Issue #320: a fresh daemon pointed at a populated cache dir is auto-
// classified as warm by loading the persisted graph here. On version
// mismatch or corruption the outcome carries a warning that we surface
// both on stderr (for operators) and via the daemon server (which forwards
// it into per-session logs) so the cold fallback is never silent.
let path = zccache::depgraph::depgraph_file_path();
let (dep_graph, depgraph_load_warning) = if args.no_depgraph_cache {
let _ = std::fs::remove_file(&path);
tracing::info!("depgraph cache disabled — starting with empty graph");
(None, None)
} else {
let start = std::time::Instant::now();
let outcome = zccache::depgraph::classify_load(&path);
let warning = outcome.warning(&path);
match outcome {
zccache::depgraph::DepGraphLoadOutcome::Loaded { graph } => {
let stats = graph.stats();
let (cold_ctxs, warm_ctxs, stale_ctxs) = graph.state_breakdown();
let ctxs_with_key = graph.contexts_with_artifact_key();
// State breakdown explains why cold_skip fires after load:
// an `is_cold` check only returns false for Warm contexts,
// so cold/stale contexts will take the cold_skip branch on
// the first warm-side compile and miss regardless of what
// the artifact_store knows.
tracing::info!(
contexts = stats.context_count,
files = stats.file_count,
cold = cold_ctxs,
warm = warm_ctxs,
stale = stale_ctxs,
with_artifact_key = ctxs_with_key,
elapsed_ms = start.elapsed().as_millis() as u64,
"loaded depgraph from disk"
);
(Some(graph), None)
}
zccache::depgraph::DepGraphLoadOutcome::Missing => (None, None),
zccache::depgraph::DepGraphLoadOutcome::VersionMismatch {
file_version,
expected_version,
} => {
tracing::warn!(
file_version,
expected_version,
"depgraph version mismatch — starting with empty graph"
);
if let Some(ref w) = warning {
eprintln!("{w}");
}
(None, warning)
}
zccache::depgraph::DepGraphLoadOutcome::Corrupt { ref message }
| zccache::depgraph::DepGraphLoadOutcome::IoError { ref message } => {
tracing::warn!("depgraph load failed: {message} — starting with empty graph");
if let Some(ref w) = warning {
eprintln!("{w}");
}
(None, warning)
}
}
};
rt.block_on(async {
let mut server = match zccache::daemon::DaemonServer::bind(&endpoint) {
Ok(s) => s,
Err(e) => {
tracing::error!("failed to bind {endpoint}: {e}");
zccache::ipc::remove_lock_file();
std::process::exit(1);
}
};
// Inject pre-loaded dep graph if we have one.
if let Some(graph) = dep_graph {
server.set_dep_graph(graph);
}
// Forward any depgraph load warning so SessionStart can mirror it
// into the per-session log (`last-session.log`). Without this the
// cold fallback after a version-mismatch / corrupt file would be
// invisible to operators looking at per-build logs.
if let Some(warning) = depgraph_load_warning {
server.set_depgraph_load_warning(warning);
}
// Wire up Ctrl+C to trigger graceful shutdown
let shutdown = server.shutdown_handle();
tokio::spawn(async move {
if let Ok(()) = tokio::signal::ctrl_c().await {
tracing::info!("received Ctrl+C — shutting down");
shutdown.notify_one();
}
});
tracing::info!(%endpoint, "listening for connections");
if let Err(e) = server.run(idle_timeout).await {
tracing::error!("server error: {e}");
zccache::ipc::remove_lock_file();
std::process::exit(1);
}
tracing::info!("daemon exiting cleanly");
zccache::ipc::remove_lock_file();
});
}
fn init_tracing(level: &str) {
use tracing_subscriber::EnvFilter;
let mut filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(level));
// When a parent process (notably soldr) launches us with a narrowed
// `RUST_LOG=zccache_daemon=info`, the directive *only* matches the
// `zccache_daemon` target — INFO logs emitted from sibling crates
// (`zccache_artifact`, `zccache_fscache`, `zccache_hash`, ...) are
// silently dropped, which has blocked perf-cluster diagnostics
// (runs 26255457227 / 26258412256 / 26260816043 — see PERF.md).
// Add explicit `<crate>=info` directives so the cross-crate logs
// always survive the filter regardless of how the env was set.
for target in [
"zccache_artifact",
"zccache_compiler",
"zccache_core",
"zccache_depgraph",
"zccache_download",
"zccache_fingerprint",
"zccache_fscache",
"zccache_gha",
"zccache_hash",
"zccache_ipc",
"zccache_protocol",
"zccache_symbols",
"zccache_watcher",
] {
if let Ok(d) = format!("{target}=info").parse() {
filter = filter.add_directive(d);
}
}
tracing_subscriber::fmt()
.with_env_filter(filter)
.with_target(true)
.with_thread_ids(true)
.init();
}