zccache 1.11.22

Local-first compiler cache for C/C++/Rust/Emscripten
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
//! Broker-mediated connect lane for the daemon client path.
//!
//! Wires `running_process::broker::client::connect_to_backend` in front of
//! zccache's direct IPC connect (upstream tracking:
//! zackees/running-process#383). Lane selection precedence:
//!
//! 1. `RUNNING_PROCESS_DISABLE=1` — the canonical upstream escape hatch.
//!    The broker lane (including the fake-backend test seam) is bypassed
//!    entirely and the pre-adoption direct connect is used.
//! 2. `RUNNING_PROCESS_FAKE_BACKEND=<endpoint>` — upstream TEST-ONLY seam:
//!    `connect_to_backend` dials the endpoint directly, skipping broker
//!    discovery and Hello negotiation. Never set in production.
//! 3. `ZCCACHE_BROKER_CONNECT=1` — opt-in production lane: a broker Hello
//!    resolves the backend endpoint. Any broker failure (broker absent,
//!    negotiation refused, resolved endpoint unreachable) falls back
//!    silently to the direct connect — the broker lane must never make a
//!    previously-working build fail.
//! 4. Default — direct connect, byte-for-byte the pre-adoption behavior.
//!
//! The negotiated (or seam) connection is consumed for **endpoint
//! resolution only**; the data connection is then opened with zccache's
//! own tokio transport so recv timeouts, the Windows named-pipe client
//! backoff, and the v15/v16/FrameV1 wire lanes keep working unchanged.
//! Adopting the negotiated `interprocess` stream as the data connection is
//! deferred until the broker lane becomes the default.

use running_process::broker::client::{
    connect_local_socket, connect_to_backend, BackendConnectionRoute, ConnectBackendRequest,
};

use super::error::IpcError;
use super::{connect, running_process_disabled, ClientConnection};

/// Upstream TEST-ONLY seam: a non-empty value short-circuits the broker
/// negotiation and dials the given running-process endpoint directly.
///
/// Mirrors the `running_process::broker::client` seam contract (the
/// constant ships upstream after 4.1.0; replace this local copy with the
/// upstream re-export on the next running-process bump). The canonical
/// `RUNNING_PROCESS_DISABLE=1` hatch takes precedence: a disabled broker
/// ignores the fake seam too. Never set this in production.
pub const RUNNING_PROCESS_FAKE_BACKEND_ENV: &str = "RUNNING_PROCESS_FAKE_BACKEND";

/// Opt-in switch for the production broker-negotiation lane.
///
/// The lane stays opt-in until the upstream perf gate (running-process
/// #383 item 2) is satisfied and a shared broker actually manages zccache
/// daemons; today zccache self-spawns its daemon, so the default path
/// keeps the direct connect.
pub const ZCCACHE_BROKER_CONNECT_ENV: &str = "ZCCACHE_BROKER_CONNECT";

/// How [`connect_daemon`] reached the daemon endpoint.
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum DaemonConnectRoute {
    /// Existing direct connect — the default and every fallback/escape-hatch
    /// path.
    Direct,
    /// Endpoint resolved through `connect_to_backend`.
    Broker {
        /// Route reported by the running-process broker client.
        route: BackendConnectionRoute,
        /// Resolved endpoint (zccache connect form) the data connection used.
        endpoint: String,
    },
}

/// Connect to the zccache daemon, honoring the broker lane selection
/// described in the module docs.
///
/// Drop-in replacement for [`super::connect`] on the daemon client path:
/// returns the same platform connection type and never fails for a reason
/// the direct connect would not also fail for.
pub async fn connect_daemon(endpoint: &str) -> Result<ClientConnection, IpcError> {
    connect_daemon_with_route(endpoint)
        .await
        .map(|(conn, _route)| conn)
}

/// Like [`connect_daemon`], also reporting which route was taken.
pub async fn connect_daemon_with_route(
    endpoint: &str,
) -> Result<(ClientConnection, DaemonConnectRoute), IpcError> {
    if running_process_disabled() || !broker_lane_requested() {
        let conn = connect(endpoint).await?;
        return Ok((conn, DaemonConnectRoute::Direct));
    }

    if let Some((resolved, route)) = resolve_backend_endpoint().await {
        match connect(&resolved).await {
            Ok(conn) => {
                return Ok((
                    conn,
                    DaemonConnectRoute::Broker {
                        route,
                        endpoint: resolved,
                    },
                ));
            }
            Err(err) => {
                tracing::debug!(
                    resolved_endpoint = %resolved,
                    error = %err,
                    "broker-resolved endpoint unreachable; falling back to direct connect"
                );
            }
        }
    }

    let conn = connect(endpoint).await?;
    Ok((conn, DaemonConnectRoute::Direct))
}

/// Is the broker lane requested for this process?
///
/// True when the upstream fake-backend test seam is set (non-empty) or the
/// production opt-in is enabled. The `RUNNING_PROCESS_DISABLE=1` precedence
/// check happens in [`connect_daemon_with_route`] before this is consulted.
fn broker_lane_requested() -> bool {
    if std::env::var_os(RUNNING_PROCESS_FAKE_BACKEND_ENV).is_some_and(|value| !value.is_empty()) {
        return true;
    }
    std::env::var(ZCCACHE_BROKER_CONNECT_ENV).is_ok_and(|value| value == "1")
}

/// Run broker resolution (blocking, on a worker thread) and return the
/// resolved endpoint in zccache connect form plus the broker route.
///
/// Returns `None` on any broker-side failure; the caller falls back to the
/// direct connect. The negotiated stream is dropped here — see the module
/// docs for why endpoint resolution and the data connection are separate.
async fn resolve_backend_endpoint() -> Option<(String, BackendConnectionRoute)> {
    match tokio::task::spawn_blocking(resolve_backend_endpoint_blocking).await {
        Ok(resolved) => resolved,
        Err(err) => {
            tracing::debug!(error = %err, "broker negotiation task failed; using direct connect");
            None
        }
    }
}

fn resolve_backend_endpoint_blocking() -> Option<(String, BackendConnectionRoute)> {
    // The fake-backend seam dials the endpoint directly, skipping broker
    // discovery, Hello negotiation, and version checks entirely — matching
    // the upstream connect_to_backend seam semantics.
    if let Some(seam_endpoint) = fake_backend_endpoint_from_env() {
        return match connect_local_socket(&seam_endpoint) {
            Ok(stream) => {
                drop(stream);
                Some((
                    to_zccache_endpoint(&seam_endpoint),
                    BackendConnectionRoute::HelloSkip,
                ))
            }
            Err(err) => {
                tracing::warn!(
                    endpoint = %seam_endpoint,
                    error = %err,
                    "RUNNING_PROCESS_FAKE_BACKEND endpoint unreachable; using direct connect"
                );
                None
            }
        };
    }

    let broker_endpoint = default_broker_endpoint()?;
    let request = ConnectBackendRequest::new(
        &broker_endpoint,
        "zccache",
        crate::core::VERSION,
        crate::core::VERSION,
    );
    match connect_to_backend(request) {
        Ok(connection) => Some((to_zccache_endpoint(&connection.endpoint), connection.route)),
        Err(err) => {
            tracing::debug!(
                error = %err,
                "running-process broker negotiation failed; using direct connect"
            );
            None
        }
    }
}

/// Read the fake-backend seam, honoring the disable-hatch precedence.
fn fake_backend_endpoint_from_env() -> Option<String> {
    let value = std::env::var_os(RUNNING_PROCESS_FAKE_BACKEND_ENV)?;
    let value = value.to_string_lossy();
    if value.is_empty() {
        return None;
    }
    Some(value.into_owned())
}

/// Derive the per-user shared-broker endpoint for this host.
fn default_broker_endpoint() -> Option<String> {
    let sid_hash = running_process::broker::lifecycle::user_sid_hash().ok()?;
    let pipe = running_process::broker::lifecycle::names::shared_broker_pipe(&sid_hash).ok()?;
    #[cfg(windows)]
    {
        pipe.windows
    }
    #[cfg(unix)]
    {
        pipe.unix.map(|path| path.to_string_lossy().into_owned())
    }
}

/// Translate a running-process backend endpoint into zccache connect form.
///
/// running-process uses bare pipe names on Windows (`interprocess`
/// namespaced names) while zccache's transport expects the full
/// `\\.\pipe\` form. Unix socket paths are shared verbatim.
fn to_zccache_endpoint(endpoint: &str) -> String {
    #[cfg(windows)]
    {
        if endpoint.starts_with(r"\\.\pipe\") {
            endpoint.to_string()
        } else {
            format!(r"\\.\pipe\{endpoint}")
        }
    }
    #[cfg(unix)]
    {
        endpoint.to_string()
    }
}

/// Strip a zccache endpoint down to the running-process local-socket form.
///
/// Inverse of the private `to_zccache_endpoint`; used by tests and tooling
/// that hand
/// a zccache endpoint to the upstream fake-backend seam.
#[must_use]
pub fn to_running_process_endpoint(endpoint: &str) -> String {
    #[cfg(windows)]
    {
        endpoint
            .strip_prefix(r"\\.\pipe\")
            .unwrap_or(endpoint)
            .to_string()
    }
    #[cfg(unix)]
    {
        endpoint.to_string()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ipc::test_env::EnvVarGuard;
    use crate::ipc::{unique_test_endpoint, IpcListener, RUNNING_PROCESS_DISABLE_ENV};
    use crate::protocol::{Request, Response};

    /// Spawn a ping server that accepts connections until it has answered
    /// one Ping. The accept loop is unbounded on purpose: the broker
    /// lane's resolution dial closes immediately, and on loaded Linux
    /// runners it can surface as extra reset connections, so budgeting a
    /// fixed number of accepts is racy — the listener must stay alive
    /// until the data connection's Ping is answered (seen as ECONNRESET
    /// in CI Integration runs otherwise).
    fn spawn_ping_server(listener: IpcListener) -> tokio::task::JoinHandle<usize> {
        spawn_counting_ping_server(listener, 1)
    }

    async fn ping_roundtrip(conn: &mut super::ClientConnection) {
        conn.send(&Request::Ping).await.unwrap();
        let resp: Option<Response> = conn.recv().await.unwrap();
        assert_eq!(resp, Some(Response::Pong));
    }

    #[tokio::test]
    async fn default_route_is_direct() {
        let _env = EnvVarGuard::unset_all(&[
            RUNNING_PROCESS_DISABLE_ENV,
            RUNNING_PROCESS_FAKE_BACKEND_ENV,
            ZCCACHE_BROKER_CONNECT_ENV,
        ]);

        let endpoint = unique_test_endpoint();
        let listener = IpcListener::bind(&endpoint).unwrap();
        let server = spawn_ping_server(listener);

        let (mut conn, route) = connect_daemon_with_route(&endpoint).await.unwrap();
        assert_eq!(route, DaemonConnectRoute::Direct);
        ping_roundtrip(&mut conn).await;

        assert_eq!(server.await.unwrap(), 1);
    }

    #[tokio::test]
    async fn fake_backend_seam_routes_through_connect_to_backend() {
        let endpoint = unique_test_endpoint();
        let _env = EnvVarGuard::set_all(&[
            (RUNNING_PROCESS_DISABLE_ENV, None),
            (
                RUNNING_PROCESS_FAKE_BACKEND_ENV,
                Some(to_running_process_endpoint(&endpoint)),
            ),
            (ZCCACHE_BROKER_CONNECT_ENV, None),
        ]);

        let listener = IpcListener::bind(&endpoint).unwrap();
        // The server sees the connect_to_backend resolution dial (dropped)
        // before the zccache data connection.
        let server = spawn_ping_server(listener);

        let (mut conn, route) = connect_daemon_with_route(&endpoint).await.unwrap();
        match route {
            DaemonConnectRoute::Broker {
                route: BackendConnectionRoute::HelloSkip,
                endpoint: resolved,
            } => assert_eq!(resolved, endpoint),
            other => panic!("expected broker HelloSkip route, got {other:?}"),
        }
        ping_roundtrip(&mut conn).await;

        assert_eq!(server.await.unwrap(), 1);
    }

    #[tokio::test]
    async fn disable_env_bypasses_broker_lane_entirely() {
        // The seam points at a guaranteed-dead endpoint. If the disable
        // hatch failed to take precedence, the broker lane would dial it;
        // with the hatch honored, the direct connect must succeed.
        let endpoint = unique_test_endpoint();
        let _env = EnvVarGuard::set_all(&[
            (RUNNING_PROCESS_DISABLE_ENV, Some("1".to_string())),
            (
                RUNNING_PROCESS_FAKE_BACKEND_ENV,
                Some(to_running_process_endpoint(&unique_test_endpoint())),
            ),
            (ZCCACHE_BROKER_CONNECT_ENV, Some("1".to_string())),
        ]);

        let listener = IpcListener::bind(&endpoint).unwrap();
        let server = spawn_ping_server(listener);

        let (mut conn, route) = connect_daemon_with_route(&endpoint).await.unwrap();
        assert_eq!(route, DaemonConnectRoute::Direct);
        ping_roundtrip(&mut conn).await;

        assert_eq!(server.await.unwrap(), 1);
    }

    #[tokio::test]
    async fn broker_failure_falls_back_to_direct_connect() {
        // Seam points at a dead endpoint: connect_to_backend errors, and
        // the wrapper must fall back to the direct connect.
        let endpoint = unique_test_endpoint();
        let _env = EnvVarGuard::set_all(&[
            (RUNNING_PROCESS_DISABLE_ENV, None),
            (
                RUNNING_PROCESS_FAKE_BACKEND_ENV,
                Some(to_running_process_endpoint(&unique_test_endpoint())),
            ),
            (ZCCACHE_BROKER_CONNECT_ENV, None),
        ]);

        let listener = IpcListener::bind(&endpoint).unwrap();
        let server = spawn_ping_server(listener);

        let (mut conn, route) = connect_daemon_with_route(&endpoint).await.unwrap();
        assert_eq!(route, DaemonConnectRoute::Direct);
        ping_roundtrip(&mut conn).await;

        assert_eq!(server.await.unwrap(), 1);
    }

    /// Sorted-percentile helper matching the convention in
    /// `tests/daemon_perf_test.rs`.
    fn percentile_ms(sorted: &[f64], pct: f64) -> f64 {
        let idx = ((sorted.len() as f64 * pct) as usize).min(sorted.len() - 1);
        sorted[idx]
    }

    /// Ping server for the latency evidence test: keeps accepting until it
    /// has answered `pings` Ping requests, tolerating the broker lane's
    /// dropped resolution dials in between.
    fn spawn_counting_ping_server(
        mut listener: IpcListener,
        pings: usize,
    ) -> tokio::task::JoinHandle<usize> {
        tokio::spawn(async move {
            let mut answered = 0;
            while answered < pings {
                let Ok(mut conn) = listener.accept().await else {
                    break;
                };
                match conn.recv::<Request>().await {
                    Ok(Some(Request::Ping)) => {
                        conn.send(&Response::Pong).await.unwrap();
                        answered += 1;
                    }
                    Ok(None) | Err(_) => continue,
                    Ok(Some(other)) => panic!("unexpected request: {other:?}"),
                }
            }
            answered
        })
    }

    /// Measure connect + Ping/Pong round-trip latency for `samples`
    /// iterations against a fresh listener, returning per-iteration
    /// milliseconds. `expect_broker` asserts the route per iteration.
    async fn measure_connect_roundtrip_ms(samples: usize, expect_broker: bool) -> Vec<f64> {
        let endpoint = unique_test_endpoint();
        if expect_broker {
            // Re-point the seam at this run's endpoint (the caller holds
            // the env lock for the whole measurement).
            std::env::set_var(
                RUNNING_PROCESS_FAKE_BACKEND_ENV,
                to_running_process_endpoint(&endpoint),
            );
        }
        let listener = IpcListener::bind(&endpoint).unwrap();
        let server = spawn_counting_ping_server(listener, samples);

        let mut samples_ms = Vec::with_capacity(samples);
        for _ in 0..samples {
            let start = std::time::Instant::now();
            let (mut conn, route) = connect_daemon_with_route(&endpoint).await.unwrap();
            ping_roundtrip(&mut conn).await;
            samples_ms.push(start.elapsed().as_secs_f64() * 1000.0);
            drop(conn);
            match (&route, expect_broker) {
                (DaemonConnectRoute::Broker { .. }, true) => {}
                (DaemonConnectRoute::Direct, false) => {}
                (other, _) => panic!("unexpected route {other:?} (expect_broker={expect_broker})"),
            }
        }
        assert_eq!(server.await.unwrap(), samples);
        samples_ms
    }

    /// Hot-path latency evidence for running-process#383 item 2: p50/p99 of
    /// connect + Ping round-trip over the direct lane vs the broker lane
    /// (fake-backend seam, which exercises the full lane wiring: env
    /// dispatch, spawn_blocking resolution, resolution dial, endpoint
    /// translation, re-dial).
    ///
    /// Sanctioned perf shape per PERF.md: a `#[test]` with a generous
    /// absolute Duration budget; the printed numbers are the evidence
    /// recorded in the adoption PR.
    #[tokio::test]
    async fn broker_lane_connect_latency_p50_p99() {
        const WARMUP: usize = 5;
        const SAMPLES: usize = 100;

        let _env = EnvVarGuard::set_all(&[
            (RUNNING_PROCESS_DISABLE_ENV, None),
            (RUNNING_PROCESS_FAKE_BACKEND_ENV, None),
            (ZCCACHE_BROKER_CONNECT_ENV, None),
        ]);

        // Warmup both lanes (first-connect costs: pipe namespace setup,
        // thread-pool spinup for spawn_blocking).
        measure_connect_roundtrip_ms(WARMUP, false).await;
        let mut direct = measure_connect_roundtrip_ms(SAMPLES, false).await;

        measure_connect_roundtrip_ms(WARMUP, true).await;
        let mut broker = measure_connect_roundtrip_ms(SAMPLES, true).await;
        std::env::remove_var(RUNNING_PROCESS_FAKE_BACKEND_ENV);

        direct.sort_by(|a, b| a.partial_cmp(b).unwrap());
        broker.sort_by(|a, b| a.partial_cmp(b).unwrap());
        let report = |label: &str, sorted: &[f64]| {
            let p50 = percentile_ms(sorted, 0.50);
            let p99 = percentile_ms(sorted, 0.99);
            println!(
                "  {label:<28} p50={p50:>8.3}ms  p99={p99:>8.3}ms  min={:>8.3}ms  max={:>8.3}ms  (n={})",
                sorted[0],
                sorted[sorted.len() - 1],
                sorted.len()
            );
            (p50, p99)
        };
        println!("broker lane connect+ping latency (running-process#383 evidence):");
        let (_direct_p50, direct_p99) = report("direct lane", &direct);
        let (_broker_p50, broker_p99) = report("broker lane (seam)", &broker);

        // Generous absolute budgets: local IPC connect + one round-trip
        // must stay well under a second even on loaded CI runners. These
        // exist to catch order-of-magnitude regressions, not to be tight.
        assert!(
            direct_p99 < 1000.0,
            "direct lane p99 {direct_p99:.3}ms exceeded 1000ms budget"
        );
        assert!(
            broker_p99 < 1000.0,
            "broker lane p99 {broker_p99:.3}ms exceeded 1000ms budget"
        );
    }

    #[test]
    fn endpoint_translation_round_trips() {
        let endpoint = unique_test_endpoint();
        assert_eq!(
            to_zccache_endpoint(&to_running_process_endpoint(&endpoint)),
            endpoint
        );
        #[cfg(windows)]
        {
            assert_eq!(to_zccache_endpoint("name"), r"\\.\pipe\name");
            assert_eq!(to_running_process_endpoint(r"\\.\pipe\name"), "name");
        }
    }
}