1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
// SPDX-License-Identifier: Apache-2.0
//! Local daemon UDS cache mechanism tests.
//!
//! The caches in `crates/cli/src/client/local_daemon.rs` exist to make
//! a hot agent loop cheap: the first daemon detection / channel build
//! pays the filesystem + connect cost, every subsequent call in the
//! process is an O(1) cache hit.
//!
//! These tests assert that property by the *mechanism*, not the clock.
//! Earlier revisions compared wall-clock time (warm ≥5× faster than
//! cold), which flaked on shared CI runners — worse under `llvm-cov` —
//! and became merge-blocking once `Check & test` was made a required
//! check (see issue #722). Wall-clock ratios are nondeterministic; the
//! cache hit/miss is not. So instead we count the underlying expensive
//! operations via process-wide instrumentation
//! (`probe_run_count` / `channel_build_count`) and assert the warm path
//! skips them entirely. This proves the exact same O(1)/warm-skip
//! behavior, deterministically.
//!
//! Both tests run against a daemon started in-process via
//! `daemon::local_daemon::serve`.
#![cfg(unix)]
use std::{
io::ErrorKind,
path::PathBuf,
time::{Duration, Instant},
};
use cli::client::local_daemon::{
LocalDaemonStatus, channel_build_count, connect_local_daemon_channel, detect_local_daemon,
probe, probe_run_count,
};
use daemon::local_daemon::{LocalDaemonConfig, serve};
use objects::error::HeddleError;
use repo::Repository;
use tempfile::TempDir;
use tokio::sync::oneshot;
/// Spin up a local daemon for the duration of one test. Returns the
/// repo's `.heddle/` dir, a shutdown handle, and the join handle of
/// the daemon task.
async fn spawn_local_daemon() -> Option<(
TempDir,
PathBuf,
oneshot::Sender<()>,
tokio::task::JoinHandle<()>,
)> {
let temp = TempDir::new().expect("temp repo dir");
let repo = Repository::init_default(temp.path()).expect("init heddle repo");
let heddle_dir = repo.heddle_dir().to_path_buf();
let config = LocalDaemonConfig::from_repo(&repo);
let (tx, rx) = oneshot::channel::<()>();
let (startup_tx, mut startup_rx) = oneshot::channel::<HeddleError>();
let task = tokio::spawn(async move {
let shutdown = async move {
let _ = rx.await;
};
// The serve future borrows `repo` by value; the task owns it.
if let Err(err) = serve(repo, config, shutdown).await {
eprintln!("serve_local test daemon exited: {err}");
let _ = startup_tx.send(err);
}
});
// Wait for the listener to actually bind. Polling is the standard
// local-test pattern — checking that the file-stat probe reports
// `Running` plus the socket file exists is sufficient.
let socket = heddle_dir.join("sockets").join("grpc.sock");
let deadline = Instant::now() + Duration::from_secs(5);
loop {
if socket.exists() && matches!(probe(&heddle_dir).status, LocalDaemonStatus::Running { .. })
{
break;
}
if Instant::now() > deadline {
panic!(
"local daemon did not come up within 5s; socket exists={}",
socket.exists()
);
}
tokio::select! {
err = &mut startup_rx => match err {
Ok(err) if is_permission_denied(&err) => {
eprintln!(
"skipping serve_local daemon cache test: local UDS daemon startup denied: {err}"
);
let _ = tokio::time::timeout(Duration::from_secs(1), task).await;
return None;
}
Ok(err) => panic!("local daemon exited before startup: {err}"),
Err(_) => panic!("local daemon exited before startup without an error"),
},
_ = tokio::time::sleep(Duration::from_millis(20)) => {}
}
}
Some((temp, heddle_dir, tx, task))
}
fn is_permission_denied(err: &HeddleError) -> bool {
matches!(err, HeddleError::Io(io) if io.kind() == ErrorKind::PermissionDenied)
}
#[tokio::test(flavor = "multi_thread")]
#[serial_test::serial(process_global)]
async fn detect_local_daemon_warm_path_skips_the_cold_probe() {
let Some((_temp, heddle_dir, shutdown, task)) = spawn_local_daemon().await else {
return;
};
// Prime the process-wide `DETECT_CACHE` so the first (cache-filling)
// call doesn't count against the warm sample. After this, the entry
// for `heddle_dir` is resident and every later `detect_local_daemon`
// is a pure cache hit.
let primed = detect_local_daemon(&heddle_dir).expect("daemon present");
// Warm path: N `detect_local_daemon` calls. The mechanism under
// test is the cache short-circuit — a warm hit returns the cached
// `Option<UdsTarget>` WITHOUT running `probe` (the "cold setup
// step": two file stats + `kill(pid, 0)` + executable-identity
// check). We prove that by snapshotting the process-wide probe
// counter around the loop and asserting it does not move.
let warm_iters = 100u64;
let probe_before_warm = probe_run_count(&heddle_dir);
for _ in 0..warm_iters {
let target = detect_local_daemon(&heddle_dir).expect("daemon present");
// The cache must hand back the same target each time, not a
// freshly-probed (and possibly divergent) one.
assert_eq!(target, primed, "warm detect must return the cached target");
std::hint::black_box(target);
}
let warm_probe_runs = probe_run_count(&heddle_dir) - probe_before_warm;
assert_eq!(
warm_probe_runs, 0,
"warm detect_local_daemon must skip the cold probe entirely \
(cache hit), but probe ran {warm_probe_runs} time(s) over \
{warm_iters} warm calls"
);
// Cold path, for contrast: N fresh `probe()` calls. Each one is the
// cold setup step the warm path skipped, so the counter must move
// by exactly N. This nails down that the counter actually tracks
// the work — a counter that never moves would make the warm
// assertion vacuous.
let cold_iters = 100u64;
let probe_before_cold = probe_run_count(&heddle_dir);
for _ in 0..cold_iters {
let result = probe(&heddle_dir);
assert!(matches!(result.status, LocalDaemonStatus::Running { .. }));
std::hint::black_box(result);
}
let cold_probe_runs = probe_run_count(&heddle_dir) - probe_before_cold;
assert_eq!(
cold_probe_runs, cold_iters,
"each cold probe() must run the setup step exactly once; \
expected {cold_iters} runs, saw {cold_probe_runs}"
);
// Also exercise the channel path so this file isn't only testing
// the cheap file-stat cache. This validates that the tonic UDS
// channel + Health.Check actually round-trip against the live
// `serve()` daemon (which doesn't install a health reporter —
// Unimplemented is the expected status code, treated as "channel is
// alive" by the client).
let connected = connect_local_daemon_channel(&heddle_dir, Duration::from_millis(500)).await;
assert!(
connected.is_some(),
"connect_local_daemon_channel must succeed against a live daemon"
);
let _ = shutdown.send(());
let _ = tokio::time::timeout(Duration::from_secs(5), task).await;
}
#[tokio::test(flavor = "multi_thread")]
#[serial_test::serial(process_global)]
async fn channel_cache_returns_clones_without_rebuilding() {
// The first `connect_local_daemon_channel` does the real work:
// open the UDS, build the tonic channel, run the `Health.Check`
// handshake (all of it inside `build_channel`). Every subsequent
// call should hit `CHANNEL_CACHE` and hand back a *clone* of the
// cached channel in O(1) — no second connect/handshake.
//
// We assert that by the mechanism, not the clock: `build_channel`
// bumps a process-wide counter, so a cache hit leaves the counter
// untouched. (The old revision compared wall-clock ratios, which
// flaked under CI load — issue #722.)
let Some((_temp, heddle_dir, shutdown, task)) = spawn_local_daemon().await else {
return;
};
let builds_before = channel_build_count(&heddle_dir);
let first = connect_local_daemon_channel(&heddle_dir, Duration::from_millis(500))
.await
.expect("first connect");
let builds_after_first = channel_build_count(&heddle_dir);
assert_eq!(
builds_after_first - builds_before,
1,
"first connect_local_daemon_channel must build the channel exactly once"
);
let second = connect_local_daemon_channel(&heddle_dir, Duration::from_millis(500))
.await
.expect("cached connect");
let builds_after_second = channel_build_count(&heddle_dir);
assert_eq!(
builds_after_second - builds_after_first,
0,
"second connect_local_daemon_channel must be an O(1) cache hit \
that returns a clone WITHOUT rebuilding the channel; \
build ran {} extra time(s)",
builds_after_second - builds_after_first
);
// The cache hit must hand back the same target the first build
// produced — a clone, not a divergent re-detection.
assert_eq!(
first.target, second.target,
"cached channel must carry the same UdsTarget as the first build"
);
let _ = shutdown.send(());
let _ = tokio::time::timeout(Duration::from_secs(5), task).await;
}