Skip to main content

zccache_cli/
lib.rs

1#![allow(clippy::missing_errors_doc)]
2
3use std::path::Path;
4use zccache_core::NormalizedPath;
5
6#[cfg(feature = "python")]
7mod python;
8
9pub mod symbols;
10
11pub use zccache_download_client::{
12    ArchiveFormat, DownloadSource, FetchRequest, FetchResult, FetchState, FetchStateKind,
13    FetchStatus, WaitMode,
14};
15
16#[derive(Debug, Clone)]
17pub struct InoConvertOptions {
18    pub clang_args: Vec<String>,
19    pub inject_arduino_include: bool,
20}
21
22impl Default for InoConvertOptions {
23    fn default() -> Self {
24        Self {
25            clang_args: Vec::new(),
26            inject_arduino_include: true,
27        }
28    }
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub struct InoConvertResult {
33    pub cache_hit: bool,
34    pub skipped_write: bool,
35}
36
37#[derive(Debug, Clone)]
38pub struct DownloadParams {
39    pub source: DownloadSource,
40    pub archive_path: Option<std::path::PathBuf>,
41    pub unarchive_path: Option<std::path::PathBuf>,
42    pub expected_sha256: Option<String>,
43    pub archive_format: ArchiveFormat,
44    pub max_connections: Option<usize>,
45    pub min_segment_size: Option<u64>,
46    pub wait_mode: WaitMode,
47    pub dry_run: bool,
48    pub force: bool,
49}
50
51impl DownloadParams {
52    #[must_use]
53    pub fn new(source: impl Into<DownloadSource>) -> Self {
54        Self {
55            source: source.into(),
56            archive_path: None,
57            unarchive_path: None,
58            expected_sha256: None,
59            archive_format: ArchiveFormat::Auto,
60            max_connections: None,
61            min_segment_size: None,
62            wait_mode: WaitMode::Block,
63            dry_run: false,
64            force: false,
65        }
66    }
67}
68
69pub fn run_ino_convert_cached(
70    input: &Path,
71    output: &Path,
72    options: &InoConvertOptions,
73) -> Result<InoConvertResult, Box<dyn std::error::Error>> {
74    let input_hash = zccache_hash::hash_file(input)?;
75    let mut hasher = zccache_hash::StreamHasher::new();
76    hasher.update(b"zccache-ino-convert-v1");
77    hasher.update(input_hash.as_bytes());
78    hasher.update(input.as_os_str().to_string_lossy().as_bytes());
79    hasher.update(if options.inject_arduino_include {
80        b"include-arduino-h"
81    } else {
82        b"no-arduino-h"
83    });
84    if let Some(libclang_hash) = zccache_compiler::arduino::libclang_hash() {
85        hasher.update(libclang_hash.as_bytes());
86    }
87    for arg in &options.clang_args {
88        hasher.update(arg.as_bytes());
89        hasher.update(b"\0");
90    }
91    let cache_key = hasher.finalize().to_hex();
92
93    let cache_dir = zccache_core::config::default_cache_dir().join("ino");
94    std::fs::create_dir_all(&cache_dir)?;
95    let cached_cpp = cache_dir.join(format!("{cache_key}.ino.cpp"));
96
97    if cached_cpp.exists() {
98        return restore_cached_ino_output(&cached_cpp, output);
99    }
100
101    let generated = zccache_compiler::arduino::generate_ino_cpp(
102        input,
103        &zccache_compiler::arduino::ArduinoConversionOptions {
104            clang_args: options.clang_args.clone(),
105            inject_arduino_include: options.inject_arduino_include,
106        },
107    )?;
108
109    write_file_atomically(&cached_cpp, generated.cpp.as_bytes())?;
110    restore_cached_ino_output(&cached_cpp, output).map(|_| InoConvertResult {
111        cache_hit: false,
112        skipped_write: false,
113    })
114}
115
116fn restore_cached_ino_output(
117    cached_cpp: &Path,
118    output: &Path,
119) -> Result<InoConvertResult, Box<dyn std::error::Error>> {
120    if output.exists() {
121        let output_hash = zccache_hash::hash_file(output)?;
122        let cached_hash = zccache_hash::hash_file(cached_cpp)?;
123        if output_hash == cached_hash {
124            return Ok(InoConvertResult {
125                cache_hit: true,
126                skipped_write: true,
127            });
128        }
129    }
130
131    if let Some(parent) = output.parent() {
132        std::fs::create_dir_all(parent)?;
133    }
134    std::fs::copy(cached_cpp, output)?;
135    Ok(InoConvertResult {
136        cache_hit: true,
137        skipped_write: false,
138    })
139}
140
141fn write_file_atomically(path: &Path, data: &[u8]) -> Result<(), std::io::Error> {
142    let parent = path.parent().unwrap_or_else(|| Path::new("."));
143    std::fs::create_dir_all(parent)?;
144
145    let tmp = tempfile::NamedTempFile::new_in(parent)?;
146    std::fs::write(tmp.path(), data)?;
147    match tmp.persist(path) {
148        Ok(_) => Ok(()),
149        Err(err) => Err(err.error),
150    }
151}
152
153fn resolve_endpoint(explicit: Option<&str>) -> String {
154    if let Some(ep) = explicit {
155        return ep.to_string();
156    }
157    if let Ok(ep) = std::env::var("ZCCACHE_ENDPOINT") {
158        return ep;
159    }
160    zccache_ipc::default_endpoint()
161}
162
163pub fn infer_download_archive_path(
164    source: &DownloadSource,
165    archive_format: ArchiveFormat,
166) -> std::path::PathBuf {
167    let file_name = infer_download_file_name(source, archive_format);
168    zccache_core::config::default_cache_dir()
169        .join("downloads")
170        .join("artifacts")
171        .join(file_name)
172        .into_path_buf()
173}
174
175#[must_use]
176pub fn build_download_request(params: DownloadParams) -> FetchRequest {
177    let archive_path = params
178        .archive_path
179        .unwrap_or_else(|| infer_download_archive_path(&params.source, params.archive_format));
180    let mut request = FetchRequest::new(params.source, archive_path);
181    request.destination_path_expanded = params.unarchive_path;
182    request.expected_sha256 = params.expected_sha256;
183    request.archive_format = params.archive_format;
184    request.wait_mode = params.wait_mode;
185    request.dry_run = params.dry_run;
186    request.force = params.force;
187    request.download_options.force = params.force;
188    request.download_options.max_connections = params.max_connections;
189    request.download_options.min_segment_size = params.min_segment_size;
190    request
191}
192
193pub fn client_download(
194    endpoint: Option<&str>,
195    params: DownloadParams,
196) -> Result<FetchResult, String> {
197    let request = build_download_request(params);
198    let client = zccache_download_client::DownloadClient::new(endpoint.map(ToOwned::to_owned));
199    client.fetch(request)
200}
201
202pub fn client_download_exists(
203    endpoint: Option<&str>,
204    params: DownloadParams,
205) -> Result<FetchState, String> {
206    let request = build_download_request(params);
207    let client = zccache_download_client::DownloadClient::new(endpoint.map(ToOwned::to_owned));
208    client.exists(&request)
209}
210
211fn infer_download_file_name(source: &DownloadSource, archive_format: ArchiveFormat) -> String {
212    let base = infer_source_file_name(source);
213    let hash = blake3::hash(download_source_key(source).as_bytes())
214        .to_hex()
215        .to_string();
216    let suffix = archive_suffix(archive_format);
217
218    if base.contains('.') || suffix.is_empty() {
219        format!("{hash}-{base}")
220    } else {
221        format!("{hash}-{base}{suffix}")
222    }
223}
224
225fn infer_source_file_name(source: &DownloadSource) -> String {
226    match source {
227        DownloadSource::Url(url) => {
228            infer_url_file_name(url).unwrap_or_else(|| "download".to_string())
229        }
230        DownloadSource::MultipartUrls(urls) => infer_multipart_file_name(urls),
231    }
232}
233
234fn infer_url_file_name(url: &str) -> Option<String> {
235    url.split(['?', '#'])
236        .next()
237        .and_then(|value| value.rsplit('/').next())
238        .filter(|value| !value.is_empty())
239        .map(sanitize_download_file_name)
240        .filter(|value| !value.is_empty())
241}
242
243fn infer_multipart_file_name(urls: &[String]) -> String {
244    let base = urls
245        .first()
246        .and_then(|url| infer_url_file_name(url))
247        .map(|name| strip_part_suffix(&name).to_string())
248        .filter(|name| !name.is_empty())
249        .unwrap_or_else(|| "multipart-download".to_string());
250    if base.contains('.') {
251        base
252    } else {
253        "multipart-download".to_string()
254    }
255}
256
257fn strip_part_suffix(value: &str) -> &str {
258    if let Some((base, suffix)) = value.rsplit_once(".part-") {
259        if !base.is_empty() && !suffix.is_empty() {
260            return base;
261        }
262    }
263    if let Some((base, suffix)) = value.rsplit_once(".part_") {
264        if !base.is_empty() && !suffix.is_empty() {
265            return base;
266        }
267    }
268    if let Some(index) = value.rfind(".part") {
269        let suffix = &value[index + ".part".len()..];
270        if !suffix.is_empty()
271            && suffix
272                .chars()
273                .all(|ch| ch.is_ascii_alphanumeric() || ch == '-' || ch == '_')
274        {
275            return &value[..index];
276        }
277    }
278    value
279}
280
281fn download_source_key(source: &DownloadSource) -> String {
282    match source {
283        DownloadSource::Url(url) => url.clone(),
284        DownloadSource::MultipartUrls(urls) => urls.join("\n"),
285    }
286}
287
288fn sanitize_download_file_name(value: &str) -> String {
289    value
290        .chars()
291        .map(|ch| match ch {
292            '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' => '_',
293            c if c.is_control() => '_',
294            c => c,
295        })
296        .collect()
297}
298
299fn archive_suffix(format: ArchiveFormat) -> &'static str {
300    match format {
301        ArchiveFormat::Auto | ArchiveFormat::None => "",
302        ArchiveFormat::Zst => ".zst",
303        ArchiveFormat::Zip => ".zip",
304        ArchiveFormat::Xz => ".xz",
305        ArchiveFormat::TarGz => ".tar.gz",
306        ArchiveFormat::TarXz => ".tar.xz",
307        ArchiveFormat::TarZst => ".tar.zst",
308        ArchiveFormat::SevenZip => ".7z",
309    }
310}
311
312fn run_async<T>(future: impl std::future::Future<Output = Result<T, String>>) -> Result<T, String> {
313    tokio::runtime::Builder::new_current_thread()
314        .enable_all()
315        .build()
316        .map_err(|e| format!("failed to create tokio runtime: {e}"))?
317        .block_on(future)
318}
319
320#[derive(Debug)]
321enum VersionCheck {
322    Ok,
323    Unreachable,
324    DaemonOlder { daemon_ver: String },
325    DaemonNewer,
326    CommError,
327}
328
329#[cfg(unix)]
330async fn connect_client(
331    endpoint: &str,
332) -> Result<zccache_ipc::IpcConnection, zccache_ipc::IpcError> {
333    let mut conn = zccache_ipc::connect(endpoint).await?;
334    conn.set_recv_timeout(zccache_ipc::DEFAULT_CLIENT_RECV_TIMEOUT);
335    Ok(conn)
336}
337
338#[cfg(windows)]
339async fn connect_client(
340    endpoint: &str,
341) -> Result<zccache_ipc::IpcClientConnection, zccache_ipc::IpcError> {
342    let mut conn = zccache_ipc::connect(endpoint).await?;
343    conn.set_recv_timeout(zccache_ipc::DEFAULT_CLIENT_RECV_TIMEOUT);
344    Ok(conn)
345}
346
347async fn check_daemon_version(endpoint: &str) -> VersionCheck {
348    let mut conn = match connect_client(endpoint).await {
349        Ok(c) => c,
350        Err(_) => return VersionCheck::Unreachable,
351    };
352    if conn.send(&zccache_protocol::Request::Status).await.is_err() {
353        return VersionCheck::CommError;
354    }
355    match conn.recv::<zccache_protocol::Response>().await {
356        Ok(Some(zccache_protocol::Response::Status(s))) => {
357            if s.version == zccache_core::VERSION {
358                return VersionCheck::Ok;
359            }
360            let client_ver = zccache_core::version::current();
361            match zccache_core::version::Version::parse(&s.version) {
362                Some(daemon_ver) => match daemon_ver.cmp(&client_ver) {
363                    std::cmp::Ordering::Equal => VersionCheck::Ok,
364                    std::cmp::Ordering::Greater => VersionCheck::DaemonNewer,
365                    std::cmp::Ordering::Less => VersionCheck::DaemonOlder {
366                        daemon_ver: s.version,
367                    },
368                },
369                None => VersionCheck::DaemonOlder {
370                    daemon_ver: s.version,
371                },
372            }
373        }
374        _ => VersionCheck::CommError,
375    }
376}
377
378async fn spawn_and_wait(endpoint: &str, reason: &str) -> Result<(), String> {
379    let daemon_bin = find_daemon_binary().ok_or("cannot find zccache-daemon binary")?;
380    // Record *why* the CLI is about to spawn a daemon. Pairs with the
381    // daemon-side "spawn" event so an operator can correlate each CLI
382    // decision with the resulting daemon PID by parsing the single
383    // `daemon-lifecycle.log`. Reasons: initial-start vs. one of the
384    // replaced-* variants. This is the diagnostic gap zccache#323
385    // identified — knowing 5 daemons spawned without knowing why
386    // makes the root cause undebuggable.
387    zccache_core::lifecycle::write_event(
388        zccache_core::lifecycle::EVENT_SPAWN_ATTEMPT,
389        serde_json::json!({
390            "reason": reason,
391            "endpoint": endpoint,
392            "client_pid": std::process::id(),
393        }),
394    );
395    spawn_daemon(&daemon_bin, endpoint)?;
396
397    for _ in 0..100 {
398        tokio::time::sleep(std::time::Duration::from_millis(100)).await;
399        if connect_client(endpoint).await.is_ok() {
400            return Ok(());
401        }
402    }
403    Err("daemon started but not accepting connections after 10s".to_string())
404}
405
406/// Stop a stale daemon that is unreachable or version-incompatible.
407async fn stop_stale_daemon(endpoint: &str) {
408    if let Ok(mut conn) = connect_client(endpoint).await {
409        let _ = conn.send(&zccache_protocol::Request::Shutdown).await;
410        tokio::time::sleep(std::time::Duration::from_millis(200)).await;
411    }
412
413    if let Some(pid) = zccache_ipc::check_running_daemon() {
414        if zccache_ipc::force_kill_process(pid).is_ok() {
415            for _ in 0..50 {
416                if !zccache_ipc::is_process_alive(pid) {
417                    break;
418                }
419                tokio::time::sleep(std::time::Duration::from_millis(100)).await;
420            }
421        }
422        zccache_ipc::remove_lock_file();
423    }
424
425    tokio::time::sleep(std::time::Duration::from_millis(200)).await;
426}
427
428async fn ensure_daemon(endpoint: &str) -> Result<(), String> {
429    match check_daemon_version(endpoint).await {
430        VersionCheck::Ok | VersionCheck::DaemonNewer => return Ok(()),
431        VersionCheck::DaemonOlder { daemon_ver } => {
432            tracing::info!(
433                daemon_ver,
434                client_ver = zccache_core::VERSION,
435                "daemon is older than client, auto-recovering"
436            );
437            stop_stale_daemon(endpoint).await;
438            return spawn_and_wait(
439                endpoint,
440                zccache_core::lifecycle::REASON_REPLACED_STALE_VERSION,
441            )
442            .await;
443        }
444        VersionCheck::CommError => {
445            tracing::info!("cannot communicate with daemon, auto-recovering");
446            stop_stale_daemon(endpoint).await;
447            return spawn_and_wait(
448                endpoint,
449                zccache_core::lifecycle::REASON_REPLACED_COMM_ERROR,
450            )
451            .await;
452        }
453        VersionCheck::Unreachable => {}
454    }
455
456    if let Some(pid) = zccache_ipc::check_running_daemon() {
457        let mut backoff = std::time::Duration::from_millis(100);
458        for _ in 0..20 {
459            tokio::time::sleep(backoff).await;
460            backoff = (backoff * 2).min(std::time::Duration::from_millis(500));
461            match check_daemon_version(endpoint).await {
462                VersionCheck::Ok | VersionCheck::DaemonNewer => return Ok(()),
463                VersionCheck::DaemonOlder { daemon_ver } => {
464                    tracing::info!(
465                        daemon_ver,
466                        client_ver = zccache_core::VERSION,
467                        "daemon is older than client during startup, auto-recovering"
468                    );
469                    stop_stale_daemon(endpoint).await;
470                    return spawn_and_wait(
471                        endpoint,
472                        zccache_core::lifecycle::REASON_REPLACED_STALE_VERSION,
473                    )
474                    .await;
475                }
476                VersionCheck::CommError => {
477                    stop_stale_daemon(endpoint).await;
478                    return spawn_and_wait(
479                        endpoint,
480                        zccache_core::lifecycle::REASON_REPLACED_COMM_ERROR,
481                    )
482                    .await;
483                }
484                VersionCheck::Unreachable => continue,
485            }
486        }
487        return Err(format!(
488            "daemon process {pid} exists but not accepting connections after retrying"
489        ));
490    }
491
492    spawn_and_wait(endpoint, zccache_core::lifecycle::REASON_INITIAL_START).await
493}
494
495fn find_daemon_binary() -> Option<NormalizedPath> {
496    let name = if cfg!(windows) {
497        "zccache-daemon.exe"
498    } else {
499        "zccache-daemon"
500    };
501
502    if let Ok(exe) = std::env::current_exe() {
503        if let Some(dir) = exe.parent() {
504            let candidate = dir.join(name);
505            if candidate.exists() {
506                return Some(candidate.into());
507            }
508        }
509    }
510
511    which_on_path(name)
512}
513
514fn which_on_path(name: &str) -> Option<NormalizedPath> {
515    let path_var = std::env::var_os("PATH")?;
516    for dir in std::env::split_paths(&path_var) {
517        let candidate = dir.join(name);
518        if candidate.is_file() {
519            return Some(candidate.into());
520        }
521        #[cfg(windows)]
522        if Path::new(name).extension().is_none() {
523            let with_exe = dir.join(format!("{name}.exe"));
524            if with_exe.is_file() {
525                return Some(with_exe.into());
526            }
527        }
528    }
529    None
530}
531
532/// Initialize spawn-lineage env vars on a command the CLI is about to spawn.
533///
534/// Mirrors the daemon-side propagation in `zccache_daemon::lineage` so that
535/// any process attribution (orphan tracking, running-process scanners) sees
536/// a consistent chain across CLI -> daemon -> compiler hops. The chain is
537/// initialized with the CLI's PID, and the originator marker (used by
538/// running-process for crash-resilient orphan discovery) is set to
539/// `zccache-cli:<pid>` unless an outer tool has already claimed it.
540#[cfg(not(windows))]
541fn apply_cli_spawn_lineage(cmd: &mut std::process::Command) {
542    for (k, v) in cli_spawn_lineage_env() {
543        cmd.env(k, v);
544    }
545}
546
547/// Compute the lineage env-var pairs the CLI sets on the daemon it
548/// spawns. Returns the same overrides `apply_cli_spawn_lineage` writes
549/// onto a `Command`, in a form usable by the Windows raw-spawn path
550/// (which needs to build its own merged environment block).
551fn cli_spawn_lineage_env() -> Vec<(String, String)> {
552    const ENV_ORIGINATOR: &str = "RUNNING_PROCESS_ORIGINATOR";
553    const ENV_LINEAGE: &str = "ZCCACHE_LINEAGE";
554    const ENV_PARENT_PID: &str = "ZCCACHE_PARENT_PID";
555    const ENV_CLIENT_PID: &str = "ZCCACHE_CLIENT_PID";
556
557    let cli_pid = std::process::id();
558    let mut out: Vec<(String, String)> = Vec::with_capacity(4);
559
560    // Preserve any outer originator (e.g. the build tool was already wrapped
561    // by running-process). Otherwise, claim the originator slot ourselves.
562    if std::env::var(ENV_ORIGINATOR).is_err() {
563        out.push((ENV_ORIGINATOR.to_string(), format!("zccache-cli:{cli_pid}")));
564    }
565
566    // Extend or initialize the chain with our PID.
567    let chain = match std::env::var(ENV_LINEAGE) {
568        Ok(existing)
569            if existing
570                .rsplit_once('>')
571                .map_or(existing.as_str(), |(_, last)| last)
572                != cli_pid.to_string() =>
573        {
574            format!("{existing}>{cli_pid}")
575        }
576        Ok(existing) => existing,
577        Err(_) => cli_pid.to_string(),
578    };
579    out.push((ENV_LINEAGE.to_string(), chain));
580    out.push((ENV_PARENT_PID.to_string(), cli_pid.to_string()));
581    out.push((ENV_CLIENT_PID.to_string(), cli_pid.to_string()));
582    out
583}
584
585/// Subdir of the zccache global cache directory where the CLI stores
586/// per-launch copies of the daemon binary. The daemon runs from one of
587/// these copies, never from the install path (e.g. `Scripts/zccache-daemon.exe`),
588/// so `pip install --upgrade zccache` can always overwrite the install
589/// path regardless of whether a daemon is alive. See issue #134.
590const RUNTIME_BINARIES_SUBDIR: &str = "runtime-binaries";
591
592/// Returns `<global_cache_dir>/runtime-binaries`.
593#[must_use]
594pub fn runtime_binaries_dir() -> NormalizedPath {
595    zccache_core::config::default_cache_dir().join(RUNTIME_BINARIES_SUBDIR)
596}
597
598/// Copy `canonical` (the daemon binary at its install location) to a unique
599/// path inside [`runtime_binaries_dir`] and return the new path. The caller
600/// then spawns from the returned path so the install location is never
601/// file-locked by a running daemon.
602///
603/// On copy failure the caller should fall back to spawning `canonical`
604/// directly; the in-place `unlock_exe()` in the daemon then handles the
605/// lock removal as a fallback.
606pub fn prepare_daemon_exe(canonical: &Path) -> Result<std::path::PathBuf, std::io::Error> {
607    prepare_daemon_exe_in(canonical, runtime_binaries_dir().as_path())
608}
609
610/// Test seam for [`prepare_daemon_exe`]: copies `canonical` into `dir`
611/// (which is created if missing) and returns the destination path.
612pub fn prepare_daemon_exe_in(
613    canonical: &Path,
614    dir: &Path,
615) -> Result<std::path::PathBuf, std::io::Error> {
616    std::fs::create_dir_all(dir)?;
617
618    // Per-launch unique name. PID alone is reused across reboots; xor with
619    // the current nanos timestamp to keep collisions rare even when several
620    // CLI processes spawn back-to-back.
621    let rand_id: u32 = std::process::id()
622        ^ std::time::UNIX_EPOCH
623            .elapsed()
624            .unwrap_or_default()
625            .subsec_nanos();
626    let extension = canonical.extension().and_then(|s| s.to_str()).unwrap_or("");
627    let file_name = if extension.is_empty() {
628        format!("zccache-daemon.{rand_id}")
629    } else {
630        format!("zccache-daemon.{rand_id}.{extension}")
631    };
632    let dest = dir.join(&file_name);
633    std::fs::copy(canonical, &dest)?;
634    Ok(dest)
635}
636
637/// Best-effort delete every entry in [`runtime_binaries_dir`]. On Windows
638/// the kernel refuses to delete a file with an open handle, so files
639/// belonging to a *currently running* daemon are silently skipped — no PID
640/// tracking, no sidecar files. Cheap enough to call before every spawn.
641pub fn gc_runtime_binaries() {
642    gc_runtime_binaries_in(runtime_binaries_dir().as_path());
643}
644
645/// Test seam for [`gc_runtime_binaries`].
646pub fn gc_runtime_binaries_in(dir: &Path) {
647    let entries = match std::fs::read_dir(dir) {
648        Ok(e) => e,
649        Err(_) => return,
650    };
651    for entry in entries.flatten() {
652        let _ = std::fs::remove_file(entry.path());
653    }
654}
655
656/// Subdir of the global cache directory where the daemon writes its own
657/// stdout + stderr on every spawn. Each spawn gets a fresh file named
658/// `daemon-spawn-{pid}-{nanos}.log` so concurrent CLI invocations don't
659/// stomp each other. Errors that hit the daemon before its panic hook or
660/// lifecycle log are alive land here — previously they went to `/dev/null`
661/// on Unix and caused silent failures (notably the macOS regression that
662/// motivated this change).
663const DAEMON_SPAWN_LOGS_SUBDIR: &str = "logs";
664
665/// Allocate a unique per-spawn log path under `{cache_dir}/logs/`.
666/// The directory is created lazily; if creation fails we still hand back a
667/// path — the daemon's own opener will see the error and fall back to
668/// `Stdio::null` after warning.
669fn allocate_daemon_spawn_log_path() -> std::path::PathBuf {
670    let dir = zccache_core::config::default_cache_dir().join(DAEMON_SPAWN_LOGS_SUBDIR);
671    let _ = std::fs::create_dir_all(dir.as_path());
672    let nanos = std::time::SystemTime::now()
673        .duration_since(std::time::UNIX_EPOCH)
674        .map(|d| d.as_nanos() as u64)
675        .unwrap_or(0);
676    dir.as_path()
677        .join(format!("daemon-spawn-{}-{nanos}.log", std::process::id()))
678}
679
680/// Default age cutoff for entries swept by [`gc_log_directory`]. Files
681/// older than this are removed. Subdirectories are skipped (the daemon
682/// doesn't create any under `logs/` today).
683const LOG_GC_CUTOFF: std::time::Duration = std::time::Duration::from_secs(60 * 60 * 24);
684
685/// Best-effort sweep of stale files in `{cache_dir}/logs/`.
686///
687/// Catches every log type that lands in this directory — not just
688/// `daemon-spawn-*.log`. As of the issue-#323 fix this includes:
689///   * `daemon-spawn-{pid}-{nanos}.log` (per-spawn daemon stdio
690///     capture; CLI-owned)
691///   * `daemon-lifecycle.log.1` (rotated lifecycle archive; the daemon
692///     handles its own 1 MiB soft-cap but never garbage-collects the
693///     archive, so it can sit on disk forever after the daemon exits)
694///   * `daemon.log.*` (rotated event-log archives; the EventLogger
695///     keeps N by count, this adds a time-based safety net for archives
696///     left behind by daemons that exited before the next rotation)
697///   * `compile_journal.jsonl.*` (rotated compile-journal archives;
698///     same rationale)
699///   * Anything else that may have accumulated here from past versions
700///     or external tooling
701///
702/// The active `daemon-lifecycle.log` is intentionally *preserved* — a
703/// long-idle daemon may go 24h between writes (spawn → next event),
704/// and deleting it mid-life would erase the very history that #323
705/// needed to diagnose the multi-spawn bug.
706pub fn gc_log_directory() {
707    let dir = zccache_core::config::default_cache_dir().join(DAEMON_SPAWN_LOGS_SUBDIR);
708    gc_log_directory_in(dir.as_path(), LOG_GC_CUTOFF);
709}
710
711/// Test seam for [`gc_log_directory`]. Sweeps stale files in `dir`
712/// older than `cutoff`, preserving the active
713/// `daemon-lifecycle.log` regardless of age.
714pub fn gc_log_directory_in(dir: &Path, cutoff: std::time::Duration) {
715    let entries = match std::fs::read_dir(dir) {
716        Ok(e) => e,
717        Err(_) => return,
718    };
719    let now = std::time::SystemTime::now();
720    for entry in entries.flatten() {
721        let Some(name) = entry.file_name().to_str().map(str::to_owned) else {
722            continue;
723        };
724        // Skip the live lifecycle log: it's the one file that may sit
725        // untouched between a daemon's `spawn` and `died-*` events.
726        // Every other file in `logs/` either rotates often or is a
727        // historical artifact safe to discard once old.
728        if name == zccache_core::lifecycle::LIVE_LOG_FILENAME {
729            continue;
730        }
731        let file_type = entry.file_type();
732        if file_type.map(|t| !t.is_file()).unwrap_or(true) {
733            continue;
734        }
735        let modified = entry
736            .metadata()
737            .and_then(|m| m.modified())
738            .ok()
739            .and_then(|t| now.duration_since(t).ok());
740        if let Some(age) = modified {
741            if age > cutoff {
742                let _ = std::fs::remove_file(entry.path());
743            }
744        }
745    }
746}
747
748/// Back-compat alias for the broadened sweep. Earlier callers used
749/// the spawn-log-only name; new code should use [`gc_log_directory`].
750#[deprecated(note = "use gc_log_directory instead — sweeps the full logs/ directory")]
751pub fn gc_daemon_spawn_logs() {
752    gc_log_directory();
753}
754
755pub fn spawn_daemon(bin: &Path, endpoint: &str) -> Result<(), String> {
756    // GC before the new spawn so neither dir grows unbounded across
757    // crash-loop scenarios. Live daemons keep their open log file FDs;
758    // GC only touches files older than the 24h cutoff and preserves
759    // the active `daemon-lifecycle.log` regardless of age.
760    gc_runtime_binaries();
761    gc_log_directory();
762
763    // Prefer to spawn from a relocated copy in the zccache global dir.
764    // Fall back to the canonical install path if the copy fails — the
765    // daemon's own `unlock_exe()` then handles the in-place rename.
766    let bin_owned: std::path::PathBuf;
767    let spawn_bin: &Path = match prepare_daemon_exe(bin) {
768        Ok(p) => {
769            bin_owned = p;
770            &bin_owned
771        }
772        Err(_) => bin,
773    };
774
775    // Allocate a per-spawn log file path. Passed to the daemon via
776    // `--log-file`; the daemon reopens its own stdout + stderr onto that
777    // path early in startup. This replaces the previous Unix
778    // `Stdio::null()` daemon spawn which made macOS dyld/gatekeeper
779    // failures invisible (see PR #312 for full diagnosis).
780    let log_path = allocate_daemon_spawn_log_path();
781    let log_arg = log_path.to_string_lossy().into_owned();
782
783    // Delegate the actual spawn to `running_process_core::spawn_daemon`
784    // (renamed from `sanitized::spawn` in the 3.2 → 3.3 reshape — same
785    // semantics, lives in the `spawn` module now and is re-exported at
786    // the crate root). That helper handles both platform-specific quirks
787    // the daemon hits:
788    //  • Windows: STARTUPINFOEX + PROC_THREAD_ATTRIBUTE_HANDLE_LIST so
789    //    grandparent pipe handles (e.g. Python's
790    //    `subprocess.Popen(stdout=PIPE)` further up the chain) don't
791    //    leak into the daemon and prevent EOF on the parent's read.
792    //  • Unix: `setsid()` to detach from the controlling tty + close every
793    //    fd > 2 between fork and exec so the same orphan-handle issue
794    //    doesn't bite on macOS in particular.
795    //
796    // `DaemonChild` always opens NUL for its stdio at the spawn site;
797    // the daemon then redirects its own stdout + stderr to `--log-file`
798    // once it's running.
799    let mut cmd = std::process::Command::new(spawn_bin);
800    cmd.args([
801        "--foreground",
802        "--endpoint",
803        endpoint,
804        "--log-file",
805        &log_arg,
806    ]);
807    #[cfg(not(windows))]
808    apply_cli_spawn_lineage(&mut cmd);
809    #[cfg(windows)]
810    {
811        // On Windows the sanitized spawn rebuilds the environment block
812        // itself; pass our lineage overrides via `cmd.env(...)` so they
813        // land in the merged block.
814        for (k, v) in cli_spawn_lineage_env() {
815            cmd.env(k, v);
816        }
817    }
818    running_process_core::spawn_daemon(&mut cmd)
819        .map(|_child| ())
820        .map_err(|e| format!("failed to spawn daemon (sanitized): {e}"))
821}
822
823#[derive(Debug, Clone)]
824pub struct SessionStartResponse {
825    pub session_id: String,
826    pub journal_path: Option<String>,
827}
828
829pub fn client_start(endpoint: Option<&str>) -> Result<(), String> {
830    let endpoint = resolve_endpoint(endpoint);
831    run_async(async move { ensure_daemon(&endpoint).await })
832}
833
834pub fn client_stop(endpoint: Option<&str>) -> Result<bool, String> {
835    let endpoint = resolve_endpoint(endpoint);
836    run_async(async move {
837        let mut conn = match connect_client(&endpoint).await {
838            Ok(c) => c,
839            Err(_) => return Ok(false),
840        };
841        conn.send(&zccache_protocol::Request::Shutdown)
842            .await
843            .map_err(|e| format!("failed to send to daemon: {e}"))?;
844        match conn.recv::<zccache_protocol::Response>().await {
845            Ok(Some(zccache_protocol::Response::ShuttingDown)) => Ok(true),
846            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
847            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
848            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
849            Err(e) => Err(format!("broken connection to daemon: {e}")),
850        }
851    })
852}
853
854pub fn client_status(endpoint: Option<&str>) -> Result<zccache_protocol::DaemonStatus, String> {
855    let endpoint = resolve_endpoint(endpoint);
856    run_async(async move {
857        let mut conn = connect_client(&endpoint)
858            .await
859            .map_err(|e| format!("daemon not running at {endpoint}: {e}"))?;
860        conn.send(&zccache_protocol::Request::Status)
861            .await
862            .map_err(|e| format!("failed to send to daemon: {e}"))?;
863        match conn.recv::<zccache_protocol::Response>().await {
864            Ok(Some(zccache_protocol::Response::Status(status))) => Ok(status),
865            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
866            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
867            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
868            Err(e) => Err(format!("broken connection to daemon: {e}")),
869        }
870    })
871}
872
873pub fn client_session_start(
874    endpoint: Option<&str>,
875    cwd: &Path,
876    log_file: Option<&Path>,
877    track_stats: bool,
878    journal_path: Option<&Path>,
879) -> Result<SessionStartResponse, String> {
880    let endpoint = resolve_endpoint(endpoint);
881    let cwd = cwd.to_path_buf();
882    let log_file = log_file.map(NormalizedPath::from);
883    let journal_path = journal_path.map(NormalizedPath::from);
884
885    run_async(async move {
886        ensure_daemon(&endpoint).await?;
887        let mut conn = connect_client(&endpoint)
888            .await
889            .map_err(|e| format!("cannot connect to daemon at {endpoint}: {e}"))?;
890        conn.send(&zccache_protocol::Request::SessionStart {
891            client_pid: std::process::id(),
892            working_dir: cwd.into(),
893            log_file,
894            track_stats,
895            journal_path,
896            profile: false,
897        })
898        .await
899        .map_err(|e| format!("failed to send to daemon: {e}"))?;
900
901        match conn.recv::<zccache_protocol::Response>().await {
902            Ok(Some(zccache_protocol::Response::SessionStarted {
903                session_id,
904                journal_path,
905            })) => Ok(SessionStartResponse {
906                session_id,
907                journal_path: journal_path.map(|p| p.display().to_string()),
908            }),
909            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
910            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
911            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
912            Err(e) => Err(format!("broken connection to daemon: {e}")),
913        }
914    })
915}
916
917/// End a session — daemon-unreachable is treated as a successful no-op.
918///
919/// Thin `String`-error wrapper around [`session_end_idempotent`]. All in-process
920/// callers (Python bindings, soldr, future tools) route through here, so the
921/// idempotency contract that #151 / #159 established for the CLI subprocess
922/// path applies equally to library users. Without this, soldr's at-exit
923/// `zccache session-end` from `rust-plan save` fails Windows CI with
924/// "cannot connect to daemon at \\.\pipe\zccache-…" when the daemon already
925/// exited — every workspace test passed but teardown failed.
926pub fn client_session_end(
927    endpoint: Option<&str>,
928    session_id: &str,
929) -> Result<Option<zccache_protocol::SessionStats>, String> {
930    let endpoint = resolve_endpoint(endpoint);
931    session_end_idempotent(&endpoint, session_id).map_err(|e| e.to_string())
932}
933
934/// Is this connect-time error a "daemon process is gone entirely" error?
935///
936/// The conservative set: `NotFound` (Unix socket missing, Windows pipe
937/// missing), `ConnectionRefused` (Unix socket exists but no listener;
938/// Windows backoff helper synthesizes this when all pipe instances are
939/// permanently busy), and `BrokenPipe` (race: pipe vanished between
940/// open and use). Other errors (`TimedOut`, protocol mismatches, etc.)
941/// are NOT daemon-gone — they should still fail loudly.
942///
943/// `IpcError::Timeout` is explicitly **NOT** in the unreachable set. A
944/// timed-out recv means we connected successfully but the peer did not
945/// respond in the configured window — that's either a hung daemon (a
946/// real fault) or a per-call budget that was too tight (caller error).
947/// Either way: propagate, don't silently swallow.
948///
949/// Used by `session_end_idempotent` (issue #159) and the CLI's
950/// `cmd_session_end` (issue #150 / #151) to map "the daemon already
951/// died" connect-time failures onto a success no-op. Other request
952/// types keep their existing strict error semantics.
953#[must_use]
954pub fn is_daemon_unreachable_err(err: &zccache_ipc::IpcError) -> bool {
955    use std::io::ErrorKind;
956    match err {
957        zccache_ipc::IpcError::Io(io) => matches!(
958            io.kind(),
959            ErrorKind::NotFound | ErrorKind::ConnectionRefused | ErrorKind::BrokenPipe
960        ),
961        _ => false,
962    }
963}
964
965/// End a session, treating a vanished daemon as success.
966///
967/// This is the shared library entry point for ending a session. It is
968/// the contract used by the CLI's `zccache session-end <uuid>`
969/// subcommand AND by any in-process caller (e.g. soldr's at-exit
970/// `rust-plan save`) — both must agree on what "the daemon already
971/// died" means.
972///
973/// # Return shape
974///
975/// - `Ok(Some(stats))` — daemon was reached and returned stats for the
976///   session.
977/// - `Ok(None)` — daemon was reached but returned no stats (session
978///   was tracked without stats), OR the daemon was unreachable at
979///   connect time. Both are no-ops from the caller's perspective:
980///   the session is implicitly ended when the daemon dies (see #137
981///   for the daemon-side mirror), and a caller that just wants to
982///   "end the session, don't care if the daemon is still alive"
983///   should treat both as success.
984/// - `Err(IpcError)` — anything else: timeouts, protocol mismatches,
985///   send/recv mid-conversation failures, daemon error responses.
986///   These are real faults and must be surfaced.
987///
988/// # Why a separate function
989///
990/// Issue #159: soldr was failing Windows CI on every main commit
991/// because its in-process session-end (called from `rust-plan save`)
992/// did not share code with `cmd_session_end`, so #151's
993/// connect-failure idempotency only applied to the CLI subprocess
994/// path. Promoting this contract to the library lets all callers —
995/// current and future — share the same behavior.
996pub fn session_end_idempotent(
997    endpoint: &str,
998    session_id: &str,
999) -> Result<Option<zccache_protocol::SessionStats>, zccache_ipc::IpcError> {
1000    let endpoint = endpoint.to_string();
1001    let session_id = session_id.to_string();
1002
1003    // Build a dedicated current-thread runtime. Can't use the existing
1004    // `run_async` helper because its `Output = Result<T, String>` shape
1005    // doesn't compose with our `Result<_, IpcError>` return type.
1006    let runtime = tokio::runtime::Builder::new_current_thread()
1007        .enable_all()
1008        .build()
1009        .map_err(|e| {
1010            zccache_ipc::IpcError::Endpoint(format!("failed to create tokio runtime: {e}"))
1011        })?;
1012
1013    runtime.block_on(async move {
1014        let mut conn = match connect_client(&endpoint).await {
1015            Ok(c) => c,
1016            Err(e) => {
1017                if is_daemon_unreachable_err(&e) {
1018                    eprintln!(
1019                        "session-end: daemon unreachable at {endpoint}, treating session {session_id} as ended"
1020                    );
1021                    return Ok(None);
1022                }
1023                return Err(e);
1024            }
1025        };
1026
1027        conn.send(&zccache_protocol::Request::SessionEnd {
1028            session_id: session_id.clone(),
1029        })
1030        .await?;
1031
1032        match conn.recv::<zccache_protocol::Response>().await? {
1033            Some(zccache_protocol::Response::SessionEnded { stats }) => Ok(stats),
1034            Some(zccache_protocol::Response::Error { message }) => Err(
1035                zccache_ipc::IpcError::Endpoint(format!("session-end failed: {message}")),
1036            ),
1037            None => Err(zccache_ipc::IpcError::ConnectionClosed),
1038            Some(other) => Err(zccache_ipc::IpcError::Endpoint(format!(
1039                "unexpected response from daemon: {other:?}"
1040            ))),
1041        }
1042    })
1043}
1044
1045pub fn client_session_stats(
1046    endpoint: Option<&str>,
1047    session_id: &str,
1048) -> Result<Option<zccache_protocol::SessionStats>, String> {
1049    let endpoint = resolve_endpoint(endpoint);
1050    let session_id = session_id.to_string();
1051    run_async(async move {
1052        let mut conn = connect_client(&endpoint)
1053            .await
1054            .map_err(|e| format!("cannot connect to daemon at {endpoint}: {e}"))?;
1055        conn.send(&zccache_protocol::Request::SessionStats {
1056            session_id: session_id.clone(),
1057        })
1058        .await
1059        .map_err(|e| format!("failed to send to daemon: {e}"))?;
1060
1061        match conn.recv::<zccache_protocol::Response>().await {
1062            Ok(Some(zccache_protocol::Response::SessionStatsResult { stats })) => Ok(stats),
1063            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
1064            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
1065            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
1066            Err(e) => Err(format!("broken connection to daemon: {e}")),
1067        }
1068    })
1069}
1070
1071#[derive(Debug, Clone)]
1072pub struct FingerprintCheckResponse {
1073    pub decision: String,
1074    pub reason: Option<String>,
1075    pub changed_files: Vec<String>,
1076}
1077
1078pub fn fingerprint_check(
1079    endpoint: Option<&str>,
1080    cache_file: &Path,
1081    cache_type: &str,
1082    root: &Path,
1083    extensions: &[String],
1084    include_globs: &[String],
1085    exclude: &[String],
1086) -> Result<FingerprintCheckResponse, String> {
1087    let endpoint = resolve_endpoint(endpoint);
1088    let cache_file = cache_file.to_path_buf();
1089    let cache_type = cache_type.to_string();
1090    let root = root.to_path_buf();
1091    let extensions = extensions.to_vec();
1092    let include_globs = include_globs.to_vec();
1093    let exclude = exclude.to_vec();
1094
1095    run_async(async move {
1096        ensure_daemon(&endpoint).await?;
1097        let mut conn = connect_client(&endpoint)
1098            .await
1099            .map_err(|e| format!("cannot connect to daemon at {endpoint}: {e}"))?;
1100
1101        conn.send(&zccache_protocol::Request::FingerprintCheck {
1102            cache_file: cache_file.into(),
1103            cache_type,
1104            root: root.into(),
1105            extensions,
1106            include_globs,
1107            exclude,
1108        })
1109        .await
1110        .map_err(|e| format!("failed to send to daemon: {e}"))?;
1111
1112        match conn.recv::<zccache_protocol::Response>().await {
1113            Ok(Some(zccache_protocol::Response::FingerprintCheckResult {
1114                decision,
1115                reason,
1116                changed_files,
1117            })) => Ok(FingerprintCheckResponse {
1118                decision,
1119                reason,
1120                changed_files,
1121            }),
1122            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
1123            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
1124            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
1125            Err(e) => Err(format!("broken connection to daemon: {e}")),
1126        }
1127    })
1128}
1129
1130pub fn fingerprint_mark_success(endpoint: Option<&str>, cache_file: &Path) -> Result<(), String> {
1131    fingerprint_mark(endpoint, cache_file, true)
1132}
1133
1134pub fn fingerprint_mark_failure(endpoint: Option<&str>, cache_file: &Path) -> Result<(), String> {
1135    fingerprint_mark(endpoint, cache_file, false)
1136}
1137
1138fn fingerprint_mark(
1139    endpoint: Option<&str>,
1140    cache_file: &Path,
1141    success: bool,
1142) -> Result<(), String> {
1143    let endpoint = resolve_endpoint(endpoint);
1144    let cache_file = cache_file.to_path_buf();
1145    run_async(async move {
1146        ensure_daemon(&endpoint).await?;
1147        let mut conn = connect_client(&endpoint)
1148            .await
1149            .map_err(|e| format!("cannot connect to daemon at {endpoint}: {e}"))?;
1150        let request = if success {
1151            zccache_protocol::Request::FingerprintMarkSuccess {
1152                cache_file: cache_file.into(),
1153            }
1154        } else {
1155            zccache_protocol::Request::FingerprintMarkFailure {
1156                cache_file: cache_file.into(),
1157            }
1158        };
1159        conn.send(&request)
1160            .await
1161            .map_err(|e| format!("failed to send to daemon: {e}"))?;
1162        match conn.recv::<zccache_protocol::Response>().await {
1163            Ok(Some(zccache_protocol::Response::FingerprintAck)) => Ok(()),
1164            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
1165            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
1166            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
1167            Err(e) => Err(format!("broken connection to daemon: {e}")),
1168        }
1169    })
1170}
1171
1172pub fn fingerprint_invalidate(endpoint: Option<&str>, cache_file: &Path) -> Result<(), String> {
1173    let endpoint = resolve_endpoint(endpoint);
1174    let cache_file = cache_file.to_path_buf();
1175    run_async(async move {
1176        ensure_daemon(&endpoint).await?;
1177        let mut conn = connect_client(&endpoint)
1178            .await
1179            .map_err(|e| format!("cannot connect to daemon at {endpoint}: {e}"))?;
1180        conn.send(&zccache_protocol::Request::FingerprintInvalidate {
1181            cache_file: cache_file.into(),
1182        })
1183        .await
1184        .map_err(|e| format!("failed to send to daemon: {e}"))?;
1185        match conn.recv::<zccache_protocol::Response>().await {
1186            Ok(Some(zccache_protocol::Response::FingerprintAck)) => Ok(()),
1187            Ok(Some(zccache_protocol::Response::Error { message })) => Err(message),
1188            Ok(None) => Err("lost connection to daemon (no response received)".to_string()),
1189            Ok(Some(other)) => Err(format!("unexpected response from daemon: {other:?}")),
1190            Err(e) => Err(format!("broken connection to daemon: {e}")),
1191        }
1192    })
1193}
1194
1195#[cfg(test)]
1196mod tests {
1197    use super::*;
1198
1199    #[test]
1200    fn infer_download_path_keeps_url_filename() {
1201        let path = infer_download_archive_path(
1202            &DownloadSource::Url("https://example.com/releases/toolchain.tar.gz?download=1".into()),
1203            ArchiveFormat::Auto,
1204        );
1205        let file_name = path.file_name().unwrap().to_string_lossy();
1206        assert!(file_name.ends_with("-toolchain.tar.gz"));
1207    }
1208
1209    #[test]
1210    fn infer_download_path_uses_archive_format_suffix_when_needed() {
1211        let path = infer_download_archive_path(
1212            &DownloadSource::Url("https://example.com/download".into()),
1213            ArchiveFormat::Zip,
1214        );
1215        let file_name = path.file_name().unwrap().to_string_lossy();
1216        assert!(file_name.ends_with(".zip"));
1217    }
1218
1219    #[test]
1220    fn build_download_request_derives_archive_path_when_missing() {
1221        let request = build_download_request(DownloadParams::new("https://example.com/file.zip"));
1222        let file_name = request
1223            .destination_path
1224            .file_name()
1225            .unwrap()
1226            .to_string_lossy();
1227        assert!(file_name.ends_with("-file.zip"));
1228    }
1229
1230    #[test]
1231    fn infer_download_path_strips_multipart_suffix_from_first_part() {
1232        let path = infer_download_archive_path(
1233            &DownloadSource::MultipartUrls(vec![
1234                "https://example.com/toolchain.tar.zst.part-aa".into(),
1235                "https://example.com/toolchain.tar.zst.part-ab".into(),
1236            ]),
1237            ArchiveFormat::Auto,
1238        );
1239        let file_name = path.file_name().unwrap().to_string_lossy();
1240        assert!(file_name.ends_with("-toolchain.tar.zst"));
1241    }
1242
1243    #[test]
1244    fn prepare_daemon_exe_in_copies_to_target_dir() {
1245        let tmp = tempfile::tempdir().expect("create tempdir");
1246        let src = tmp.path().join("zccache-daemon.exe");
1247        std::fs::write(&src, b"fake-daemon-bytes").expect("write source");
1248
1249        let dest_dir = tmp.path().join("runtime-binaries");
1250        let copied =
1251            prepare_daemon_exe_in(&src, &dest_dir).expect("prepare_daemon_exe_in succeeds");
1252
1253        assert!(
1254            copied.is_file(),
1255            "copy at {} should exist",
1256            copied.display()
1257        );
1258        assert_eq!(
1259            copied.parent().unwrap(),
1260            dest_dir,
1261            "copy should land inside dest_dir"
1262        );
1263        assert!(
1264            copied
1265                .file_name()
1266                .unwrap()
1267                .to_string_lossy()
1268                .starts_with("zccache-daemon."),
1269            "filename should start with zccache-daemon., got {}",
1270            copied.display()
1271        );
1272        assert!(
1273            copied.extension().and_then(|s| s.to_str()) == Some("exe"),
1274            "extension should be preserved"
1275        );
1276        assert_eq!(
1277            std::fs::read(&copied).unwrap(),
1278            b"fake-daemon-bytes",
1279            "copy contents should match source"
1280        );
1281    }
1282
1283    #[test]
1284    fn prepare_daemon_exe_in_creates_missing_dest_dir() {
1285        let tmp = tempfile::tempdir().expect("create tempdir");
1286        let src = tmp.path().join("zccache-daemon");
1287        std::fs::write(&src, b"x").expect("write source");
1288
1289        let dest_dir = tmp.path().join("nested").join("runtime-binaries");
1290        assert!(!dest_dir.exists(), "precondition: dest_dir does not exist");
1291
1292        let copied = prepare_daemon_exe_in(&src, &dest_dir).expect("create + copy");
1293        assert!(dest_dir.is_dir(), "dest_dir should now exist");
1294        assert!(copied.is_file());
1295    }
1296
1297    #[test]
1298    fn gc_runtime_binaries_in_removes_unlocked_entries() {
1299        let tmp = tempfile::tempdir().expect("create tempdir");
1300        let dir = tmp.path().join("runtime-binaries");
1301        std::fs::create_dir_all(&dir).expect("create dir");
1302
1303        let a = dir.join("zccache-daemon.111.exe");
1304        let b = dir.join("zccache-daemon.222.exe");
1305        std::fs::write(&a, b"a").unwrap();
1306        std::fs::write(&b, b"b").unwrap();
1307
1308        gc_runtime_binaries_in(&dir);
1309
1310        assert!(!a.exists(), "{} should be GC'd", a.display());
1311        assert!(!b.exists(), "{} should be GC'd", b.display());
1312        assert!(dir.is_dir(), "directory itself remains");
1313    }
1314
1315    #[test]
1316    fn gc_runtime_binaries_in_is_noop_for_missing_dir() {
1317        let tmp = tempfile::tempdir().expect("create tempdir");
1318        let dir = tmp.path().join("does-not-exist");
1319        gc_runtime_binaries_in(&dir);
1320    }
1321
1322    /// Issue #159: `session_end_idempotent` is the shared library entry
1323    /// point for ending a session — used by the CLI `session-end` command
1324    /// AND by tools like soldr that call into the library directly. When
1325    /// the daemon process is gone (pipe / socket missing), this function
1326    /// must return `Ok(None)` rather than propagating the connect-time
1327    /// I/O error. Soldr's at-exit `rust-plan save` previously failed
1328    /// Windows CI because its in-process session-end did NOT go through
1329    /// `cmd_session_end` (which is gated to the CLI subprocess path) and
1330    /// so the #151 idempotency fix didn't apply.
1331    #[test]
1332    fn session_end_idempotent_swallows_vanished_daemon() {
1333        // Construct an endpoint that is guaranteed to have no listener —
1334        // a unique pipe / socket name with no server bound to it.
1335        let endpoint = zccache_ipc::unique_test_endpoint();
1336        let session_id = "00000000-0000-0000-0000-000000000000";
1337
1338        let result = session_end_idempotent(&endpoint, session_id);
1339
1340        assert!(
1341            matches!(result, Ok(None)),
1342            "vanished daemon must produce Ok(None) (success no-op), got {result:?}"
1343        );
1344    }
1345
1346    /// Control: non-unreachable errors (the function shouldn't be a
1347    /// blanket "ignore everything"). We can't easily synthesize a live
1348    /// daemon error here, but we can at least assert the routing via the
1349    /// helper used inside the function: connect-time `TimedOut` must NOT
1350    /// be classified as unreachable, so the function would propagate it
1351    /// (rather than silently return Ok(None)). This guards against a
1352    /// regression where someone widens the unreachable set to "any I/O
1353    /// error".
1354    #[test]
1355    fn session_end_idempotent_treats_timeout_as_real_error() {
1356        let err = zccache_ipc::IpcError::Io(std::io::Error::from(std::io::ErrorKind::TimedOut));
1357        assert!(
1358            !is_daemon_unreachable_err(&err),
1359            "TimedOut must NOT be classified as daemon-unreachable; session_end_idempotent \
1360             would otherwise silently swallow real timeouts"
1361        );
1362    }
1363
1364    /// Control: protocol-layer errors (malformed framing, closed
1365    /// connection mid-response) must NOT be classified as unreachable.
1366    #[test]
1367    fn session_end_idempotent_treats_protocol_errors_as_real() {
1368        let err = zccache_ipc::IpcError::ConnectionClosed;
1369        assert!(!is_daemon_unreachable_err(&err));
1370        let err = zccache_ipc::IpcError::Endpoint("bogus".into());
1371        assert!(!is_daemon_unreachable_err(&err));
1372    }
1373
1374    /// Issue #150: connect-time errors that mean "daemon process is gone
1375    /// entirely" must be classified as unreachable so the idempotent
1376    /// session-end paths (`session_end_idempotent` + the CLI's
1377    /// `cmd_session_end` wrapper) can fall through to the success path.
1378    /// The set covers every shape `connect()` actually returns when the
1379    /// pipe / socket is missing or has no listener.
1380    #[test]
1381    fn is_daemon_unreachable_recognizes_not_found() {
1382        let err = zccache_ipc::IpcError::Io(std::io::Error::from(std::io::ErrorKind::NotFound));
1383        assert!(is_daemon_unreachable_err(&err));
1384    }
1385
1386    #[test]
1387    fn is_daemon_unreachable_recognizes_connection_refused() {
1388        let err =
1389            zccache_ipc::IpcError::Io(std::io::Error::from(std::io::ErrorKind::ConnectionRefused));
1390        assert!(is_daemon_unreachable_err(&err));
1391    }
1392
1393    #[test]
1394    fn is_daemon_unreachable_recognizes_broken_pipe() {
1395        let err = zccache_ipc::IpcError::Io(std::io::Error::from(std::io::ErrorKind::BrokenPipe));
1396        assert!(is_daemon_unreachable_err(&err));
1397    }
1398
1399    /// `IpcError::Timeout` is explicitly NOT daemon-unreachable. A
1400    /// timed-out recv means we connected successfully but the peer did
1401    /// not respond — that's a hung-daemon fault, not a vanished daemon.
1402    /// Soldr's at-exit `session_end` path classifies vanished-daemon as
1403    /// a no-op; if `Timeout` were misclassified here, a stuck daemon
1404    /// would be silently swallowed and the user would never see it.
1405    #[test]
1406    fn is_daemon_unreachable_timeout_is_not_unreachable() {
1407        let err = zccache_ipc::IpcError::Timeout(std::time::Duration::from_secs(5));
1408        assert!(
1409            !is_daemon_unreachable_err(&err),
1410            "Timeout must propagate as a real fault, not be swallowed as daemon-unreachable"
1411        );
1412    }
1413
1414    /// Mapping ENOENT through `from_raw_os_error` must yield the same
1415    /// classification as constructing from `ErrorKind::NotFound`. This
1416    /// guards against platform variance (macOS / Linux / Windows could
1417    /// in principle synthesize a different kind for the same errno).
1418    #[test]
1419    fn is_daemon_unreachable_recognizes_raw_enoent() {
1420        // ENOENT == 2 on every Unix; on Windows ERROR_FILE_NOT_FOUND == 2 too.
1421        let err = zccache_ipc::IpcError::Io(std::io::Error::from_raw_os_error(2));
1422        assert!(
1423            is_daemon_unreachable_err(&err),
1424            "errno 2 must map to a kind in the unreachable set; got kind={:?}",
1425            match &err {
1426                zccache_ipc::IpcError::Io(io) => io.kind(),
1427                _ => unreachable!(),
1428            }
1429        );
1430    }
1431
1432    /// Regression: `client_session_end` is the in-process library entry point
1433    /// used by Python bindings and external tools (soldr's `rust-plan save`).
1434    /// It must mirror `session_end_idempotent` — a vanished daemon is a no-op
1435    /// success, not a hard error. Before this fix, soldr called
1436    /// `client_session_end`, got `Err("cannot connect to daemon at …")`,
1437    /// surfaced it as "soldr: zccache session-end … failed: …", and Windows
1438    /// Test failed teardown even after every workspace test passed.
1439    #[test]
1440    fn client_session_end_swallows_vanished_daemon() {
1441        let endpoint = zccache_ipc::unique_test_endpoint();
1442        let session_id = "00000000-0000-0000-0000-000000000000";
1443
1444        let result = client_session_end(Some(&endpoint), session_id);
1445
1446        assert!(
1447            matches!(result, Ok(None)),
1448            "vanished daemon must produce Ok(None) (success no-op), got {result:?}"
1449        );
1450    }
1451
1452    /// `gc_log_directory_in` must:
1453    /// 1. delete every stale file regardless of name (not just
1454    ///    `daemon-spawn-*.log`), so leftover `daemon-lifecycle.log.1`,
1455    ///    `daemon.log.<ts>`, `compile_journal.jsonl.<ts>`, and stray
1456    ///    files from previous versions all get reaped;
1457    /// 2. preserve the live `daemon-lifecycle.log` even when it's
1458    ///    older than the cutoff — a long-idle daemon may only touch
1459    ///    it twice (at `spawn` and `died-*`).
1460    #[test]
1461    fn gc_log_directory_sweeps_stale_files_and_preserves_lifecycle_log() {
1462        let tmp = tempfile::tempdir().expect("tempdir");
1463        let logs = tmp.path();
1464
1465        // Fresh files (mtime = now). Must all survive a sweep with a
1466        // 60-second cutoff regardless of name.
1467        for name in [
1468            "daemon-lifecycle.log",
1469            "daemon-lifecycle.log.1",
1470            "daemon-spawn-1234-9999.log",
1471            "daemon.log",
1472            "daemon.log.2026-01-01T00-00-00Z",
1473            "compile_journal.jsonl",
1474            "compile_journal.jsonl.2026-01-01T00-00-00Z",
1475            "last-session.log",
1476            "stray-from-external-tool.txt",
1477        ] {
1478            std::fs::write(logs.join(name), b"x").unwrap();
1479        }
1480
1481        gc_log_directory_in(logs, std::time::Duration::from_secs(60));
1482
1483        for name in [
1484            "daemon-lifecycle.log",
1485            "daemon-lifecycle.log.1",
1486            "daemon-spawn-1234-9999.log",
1487            "daemon.log",
1488            "daemon.log.2026-01-01T00-00-00Z",
1489            "compile_journal.jsonl",
1490            "compile_journal.jsonl.2026-01-01T00-00-00Z",
1491            "last-session.log",
1492            "stray-from-external-tool.txt",
1493        ] {
1494            assert!(
1495                logs.join(name).exists(),
1496                "{name} must survive when mtime is fresh"
1497            );
1498        }
1499
1500        // Now age every file by overwriting mtime to two days ago.
1501        // Then sweep with a 24h cutoff. Only `daemon-lifecycle.log`
1502        // should survive — it's the live writer and may sit idle for
1503        // an arbitrarily long time between events.
1504        let two_days_ago =
1505            std::time::SystemTime::now() - std::time::Duration::from_secs(60 * 60 * 48);
1506        for name in [
1507            "daemon-lifecycle.log",
1508            "daemon-lifecycle.log.1",
1509            "daemon-spawn-1234-9999.log",
1510            "daemon.log",
1511            "daemon.log.2026-01-01T00-00-00Z",
1512            "compile_journal.jsonl",
1513            "compile_journal.jsonl.2026-01-01T00-00-00Z",
1514            "last-session.log",
1515            "stray-from-external-tool.txt",
1516        ] {
1517            let path = logs.join(name);
1518            let f = std::fs::File::options().write(true).open(&path).unwrap();
1519            f.set_modified(two_days_ago).unwrap();
1520        }
1521
1522        gc_log_directory_in(logs, std::time::Duration::from_secs(60 * 60 * 24));
1523
1524        assert!(
1525            logs.join("daemon-lifecycle.log").exists(),
1526            "active lifecycle log must be preserved even when stale"
1527        );
1528        for name in [
1529            "daemon-lifecycle.log.1",
1530            "daemon-spawn-1234-9999.log",
1531            "daemon.log",
1532            "daemon.log.2026-01-01T00-00-00Z",
1533            "compile_journal.jsonl",
1534            "compile_journal.jsonl.2026-01-01T00-00-00Z",
1535            "last-session.log",
1536            "stray-from-external-tool.txt",
1537        ] {
1538            assert!(
1539                !logs.join(name).exists(),
1540                "{name} should have been swept (older than 24h cutoff)"
1541            );
1542        }
1543    }
1544
1545    /// Sweeping a nonexistent directory is a silent no-op (called
1546    /// before every spawn — must never fail on a fresh install).
1547    #[test]
1548    fn gc_log_directory_silently_handles_missing_dir() {
1549        let tmp = tempfile::tempdir().unwrap();
1550        let missing = tmp.path().join("does-not-exist");
1551        gc_log_directory_in(&missing, std::time::Duration::from_secs(60));
1552        assert!(!missing.exists());
1553    }
1554}