Skip to main content

remote/
deploy.rs

1//! Binary deployment for rcpd
2//!
3//! This module handles automatic deployment of rcpd binaries to remote hosts.
4//! It transfers static rcpd binaries via SSH using base64 encoding, verifies integrity with SHA-256 checksums, and manages cached versions.
5//!
6//! ## Atomicity and Concurrent Deployment Safety
7//!
8//! The deployment mechanism is designed to handle concurrent deployments from multiple rcp instances safely:
9//!
10//! ### Atomic Operations
11//!
12//! 1. **Unique Temporary Files**: Each deployment uses a shell-PID-unique temp file
13//!    (`.rcpd-{version}.tmp.$$`) which ensures concurrent deployments don't
14//!    interfere with each other. The `$$` expands to the shell process PID,
15//!    guaranteeing uniqueness even when multiple deployments run simultaneously.
16//!
17//! 2. **Atomic Rename**: The final deployment step uses `mv -f` which is atomic
18//!    on POSIX-compliant filesystems. This means:
19//!    - The binary is either fully present at the final location or not present at all
20//!    - No partial writes are visible to readers
21//!    - Concurrent renames of the same file complete in a well-defined order
22//!
23//! 3. **Write-Then-Verify**: The deployment sequence ensures the binary is:
24//!    - Fully written to the temp file
25//!    - Marked executable (chmod 700)
26//!    - Moved atomically to the final location
27//!    - Checksummed after the move completes
28//!
29//! ### Race Condition Scenarios
30//!
31//! **Scenario 1: Multiple rcp instances deploying the same version concurrently**
32//!
33//! - Each uses a unique temp file (`.rcpd-0.22.0.tmp.1234`, `.rcpd-0.22.0.tmp.5678`)
34//! - Both successfully write and verify their temp files
35//! - Both attempt `mv -f .rcpd-0.22.0.tmp.$$ rcpd-0.22.0`
36//! - The filesystem ensures one wins atomically, the other overwrites atomically
37//! - Result: Final binary is valid (both were identical and checksummed)
38//!
39//! **Scenario 2: One deployment while another is reading**
40//!
41//! - Reader opens `rcpd-0.22.0` and gets a valid file descriptor
42//! - Writer completes deployment and `mv -f` replaces the inode
43//! - Reader continues reading from the original inode (POSIX semantics)
44//! - Result: Reader gets the old version (but it's still valid)
45//!
46//! **Scenario 3: Deployment interrupted (network failure, SIGKILL)**
47//!
48//! - Temp file may be left in `.cache/rcp/bin/.rcpd-{version}.tmp.*`
49//! - Final file is either:
50//!   - Not present (deployment never completed)
51//!   - Present and valid (mv completed before interruption)
52//! - Temp files are hidden (dotfiles) and don't interfere with discovery
53//! - Result: Safe to retry; old temp files are harmless
54//!
55//! ### Assumptions
56//!
57//! 1. **POSIX Filesystem Semantics**: The deployment assumes the remote filesystem
58//!    supports atomic `mv` (rename) operations. This is true for all POSIX-compliant
59//!    filesystems (ext4, xfs, btrfs, etc.) but may not hold for network filesystems
60//!    with relaxed consistency (NFSv3 without proper locking).
61//!
62//! 2. **Unique Shell PIDs**: The `$$` shell variable expands to the process ID,
63//!    which is assumed to be unique during the lifetime of the deployment. This is
64//!    guaranteed by the OS but requires PIDs not to wrap around extremely rapidly.
65//!
66//! 3. **Checksum Integrity**: SHA-256 checksums are assumed to be collision-resistant.
67//!    If two different binaries produce the same checksum (astronomically unlikely),
68//!    the deployment would consider them identical.
69//!
70//! 4. **No Malicious Interference**: The deployment assumes the remote host is not
71//!    actively malicious (no adversary replacing files during deployment). Protection
72//!    against malicious hosts is provided by SSH authentication, not by this module.
73//!
74//! ### Non-Atomic Operations
75//!
76//! The following operations are **not** atomic and may observe intermediate states:
77//!
78//! - **Cleanup of old versions**: Uses `ls -t | tail | xargs rm` which may race with
79//!   concurrent deployments. This is acceptable because cleanup only removes old
80//!   versions, never the current version being deployed. Worst case: a version is
81//!   not cleaned up and remains on disk.
82//!
83//! - **Directory creation**: `mkdir -p` may race with concurrent deployments creating
84//!   the same directory. This is safe because `mkdir -p` is idempotent and succeeds
85//!   if the directory already exists.
86
87use anyhow::Context;
88use std::path::PathBuf;
89use std::sync::Arc;
90
91const TRANSFER_HINTS: &str = "\
92    This may indicate:\n\
93    - Insufficient disk space on remote host\n\
94    - Permission denied creating $HOME/.cache/rcp/bin\n\
95    - base64 command not available on remote host";
96
97/// Build an error message for a failed stdin write during binary transfer.
98///
99/// When writing base64 data to the remote SSH process fails (typically a broken
100/// pipe because the remote command exited early), this formats the error to
101/// include remote stderr (which reveals the actual cause) and the exit status.
102fn format_write_error(
103    write_err: &std::io::Error,
104    stderr_data: &[u8],
105    status: &dyn std::fmt::Display,
106) -> String {
107    let stderr = String::from_utf8_lossy(stderr_data);
108    let stderr = stderr.trim();
109    if stderr.is_empty() {
110        format!(
111            "failed to write base64 data to remote stdin: {write_err}\n\
112            \n\
113            remote command exited with status: {status}\n\
114            remote stderr was empty\n\
115            \n\
116            {TRANSFER_HINTS}"
117        )
118    } else {
119        format!(
120            "failed to write base64 data to remote stdin: {write_err}\n\
121            \n\
122            remote stderr: {stderr}\n\
123            \n\
124            {TRANSFER_HINTS}"
125        )
126    }
127}
128
129/// Find local static rcpd binary suitable for deployment
130///
131/// Searches in the following order:
132/// 1. Same directory as the current rcp executable
133/// 2. PATH via `which rcpd`
134///
135/// This covers:
136/// - Development builds (cargo run/test): rcpd is in same directory as rcp in target/
137/// - cargo install: rcpd is in ~/.cargo/bin (which should be in PATH)
138/// - nixpkgs: rcpd is available via nix profile (which adds to PATH)
139/// - Production deployments: rcp and rcpd are co-located
140///
141/// # Returns
142///
143/// Path to the local rcpd binary suitable for deployment
144///
145/// # Errors
146///
147/// Returns an error if no suitable binary is found
148pub fn find_local_rcpd_binary() -> anyhow::Result<PathBuf> {
149    let mut searched_paths = Vec::new();
150
151    // try same directory as current executable first
152    // this ensures we use the same build (debug/release) as the running rcp
153    // and covers development builds where rcp and rcpd are both in target/
154    if let Ok(current_exe) = std::env::current_exe()
155        && let Some(bin_dir) = current_exe.parent()
156    {
157        let path = bin_dir.join("rcpd");
158        searched_paths.push(format!("Same directory: {}", path.display()));
159        if path.exists() && path.is_file() {
160            tracing::info!("Found local rcpd binary at {}", path.display());
161            return Ok(path);
162        }
163    }
164
165    // try PATH (covers cargo install, nixpkgs, and other system installations)
166    tracing::debug!("Trying to find rcpd in PATH");
167    let which_output = std::process::Command::new("which")
168        .arg("rcpd")
169        .output()
170        .ok();
171
172    if let Some(output) = which_output
173        && output.status.success()
174    {
175        let path_str = String::from_utf8_lossy(&output.stdout);
176        let path_str = path_str.trim();
177        if !path_str.is_empty() {
178            let path = PathBuf::from(path_str);
179            searched_paths.push(format!("PATH: {}", path.display()));
180            if path.exists() && path.is_file() {
181                tracing::info!("Found local rcpd binary in PATH: {}", path.display());
182                return Ok(path);
183            }
184        }
185    }
186
187    anyhow::bail!(
188        "no local rcpd binary found for deployment\n\
189        \n\
190        Searched in:\n\
191        {}\n\
192        \n\
193        To use auto-deployment, ensure rcpd is available:\n\
194        - cargo install rcp-tools-rcp (installs to ~/.cargo/bin)\n\
195        - or add rcpd to PATH\n\
196        - or build with: cargo build --release --bin rcpd",
197        searched_paths
198            .iter()
199            .map(|p| format!("- {}", p))
200            .collect::<Vec<_>>()
201            .join("\n")
202    )
203}
204
205/// Deploy rcpd binary to remote host
206///
207/// Transfers the local static rcpd binary to the remote host at
208/// `~/.cache/rcp/bin/rcpd-{version}`, verifies the checksum, and returns
209/// the path to the deployed binary.
210///
211/// # Arguments
212///
213/// * `session` - SSH session to the remote host
214/// * `local_rcpd_path` - Path to the local static rcpd binary to deploy
215/// * `version` - Semantic version string for the binary
216/// * `remote_host` - Hostname for logging/error messages
217///
218/// # Returns
219///
220/// The path to the deployed binary on the remote host
221///
222/// # Errors
223///
224/// Returns an error if:
225/// - Local binary cannot be read
226/// - Remote directory creation fails
227/// - Transfer fails
228/// - Checksum verification fails
229pub async fn deploy_rcpd(
230    session: &Arc<openssh::Session>,
231    local_rcpd_path: &std::path::Path,
232    version: &str,
233    remote_host: &str,
234) -> anyhow::Result<String> {
235    tracing::info!(
236        "Deploying rcpd {} to remote host '{}'",
237        version,
238        remote_host
239    );
240
241    // read local binary
242    let binary = tokio::fs::read(local_rcpd_path).await.with_context(|| {
243        format!(
244            "failed to read local rcpd binary from {}",
245            local_rcpd_path.display()
246        )
247    })?;
248
249    tracing::info!(
250        "Read local rcpd binary ({} bytes) from {}",
251        binary.len(),
252        local_rcpd_path.display()
253    );
254
255    // compute checksum before transfer
256    let expected_checksum = compute_sha256(&binary);
257    tracing::debug!("Expected SHA-256: {}", hex::encode(&expected_checksum));
258
259    // validate HOME is set and construct remote path
260    let home = crate::get_remote_home(session).await?;
261    let remote_path = format!("{}/.cache/rcp/bin/rcpd-{}", home, version);
262
263    // transfer binary via base64 over SSH
264    transfer_binary_base64(session, &binary, &remote_path).await?;
265
266    tracing::info!("Binary transferred to {}", remote_path);
267
268    // verify checksum on remote
269    verify_remote_checksum(session, &remote_path, &expected_checksum).await?;
270
271    tracing::info!("Checksum verified successfully");
272
273    Ok(remote_path)
274}
275
276/// Transfer binary to remote host using base64 encoding
277///
278/// Creates the target directory if needed, transfers the binary via base64
279/// encoding through SSH stdin, and sets appropriate permissions (700).
280///
281/// # Arguments
282///
283/// * `session` - SSH session to the remote host
284/// * `binary` - Binary content to transfer
285/// * `remote_path` - Destination path on remote host (should use $HOME, will be created)
286///
287/// # Errors
288///
289/// Returns an error if directory creation, transfer, or permission setting fails
290async fn transfer_binary_base64(
291    session: &Arc<openssh::Session>,
292    binary: &[u8],
293    remote_path: &str,
294) -> anyhow::Result<()> {
295    use base64::Engine;
296
297    // encode binary as base64
298    let encoded = base64::engine::general_purpose::STANDARD.encode(binary);
299
300    // extract directory and filename from remote_path
301    // remote_path format: $HOME/.cache/rcp/bin/rcpd-{version}
302    let path = std::path::Path::new(remote_path);
303    let dir = path
304        .parent()
305        .context("remote path must have a parent directory")?
306        .to_str()
307        .context("remote path parent must be valid UTF-8")?;
308    let filename = path
309        .file_name()
310        .context("remote path must have a filename")?
311        .to_str()
312        .context("remote filename must be valid UTF-8")?;
313
314    // use $$ (shell PID) for unique temp filename to prevent concurrent deployment conflicts
315    // the $$ expands to the shell process PID at runtime, ensuring each deployment has a unique temp file
316    // this allows multiple rcp instances to deploy simultaneously without interfering with each other
317    // extract version from filename (format: rcpd-{version})
318    let temp_filename = if let Some(version) = filename.strip_prefix("rcpd-") {
319        format!(".rcpd-{}.tmp.$$", version)
320    } else {
321        format!(".{}.tmp.$$", filename)
322    };
323
324    // escape all variables for safe shell usage
325    let dir_escaped = crate::shell_escape(dir);
326    let temp_path = format!("{}/{}", dir, temp_filename);
327    let temp_path_escaped = crate::shell_escape(&temp_path);
328    let final_path = format!("{}/{}", dir, filename);
329    let final_path_escaped = crate::shell_escape(&final_path);
330
331    // deployment command sequence (all connected with && to fail fast on any error):
332    // 1. mkdir -p: create cache directory (idempotent, safe for concurrent execution)
333    // 2. base64 -d > temp: decode and write to unique temp file ($$-suffixed)
334    // 3. chmod 700: mark temp file executable
335    // 4. mv -f: atomic rename to final location (POSIX guarantees atomicity)
336    //
337    // the final 'mv -f' is the critical atomic operation:
338    // - on POSIX filesystems, rename(2) is atomic - either the new file appears or the old remains
339    // - concurrent deployments will each complete their mv atomically in some order
340    // - readers of the final file will see either the old or new inode, never partial writes
341    let cmd = format!(
342        "mkdir -p {} && \
343         base64 -d > {} && \
344         chmod 700 {} && \
345         mv -f {} {}",
346        dir_escaped, temp_path_escaped, temp_path_escaped, temp_path_escaped, final_path_escaped
347    );
348
349    tracing::debug!("Running remote command: mkdir && base64 && chmod");
350
351    let mut child = session
352        .command("sh")
353        .arg("-c")
354        .arg(&cmd)
355        .stdin(openssh::Stdio::piped())
356        .stdout(openssh::Stdio::piped())
357        .stderr(openssh::Stdio::piped())
358        .spawn()
359        .await
360        .context("failed to spawn remote command for binary transfer")?;
361
362    // take handles for all streams
363    let mut stdin = child
364        .stdin()
365        .take()
366        .context("failed to get stdin for remote command")?;
367
368    let mut stdout = child
369        .stdout()
370        .take()
371        .context("failed to get stdout for remote command")?;
372
373    let mut stderr = child
374        .stderr()
375        .take()
376        .context("failed to get stderr for remote command")?;
377
378    // write to stdin and close it before reading stdout/stderr
379    // this ensures the child process receives EOF on stdin before we wait for it to finish
380    use tokio::io::{AsyncReadExt, AsyncWriteExt};
381
382    // write all base64 data to stdin, capturing errors instead of returning
383    // immediately — if this fails (e.g. broken pipe), we still need to read
384    // stderr to learn why the remote command failed
385    let write_result = stdin.write_all(encoded.as_bytes()).await;
386
387    if write_result.is_ok() {
388        // shutdown stdin to send EOF to the remote `base64 -d` process
389        stdin.shutdown().await.context("failed to shutdown stdin")?;
390    }
391    // drop stdin so the remote process can finish even if the write failed
392    drop(stdin);
393
394    // read stdout and stderr to completion — stderr is critical for diagnostics
395    // when the remote command fails before accepting all input
396    let stdout_fut = async {
397        let mut buf = Vec::new();
398        let _ = stdout.read_to_end(&mut buf).await;
399        buf
400    };
401
402    let stderr_fut = async {
403        let mut buf = Vec::new();
404        let _ = stderr.read_to_end(&mut buf).await;
405        buf
406    };
407
408    let (_stdout_data, stderr_data) = tokio::join!(stdout_fut, stderr_fut);
409
410    // wait for command to complete
411    let status = child
412        .wait()
413        .await
414        .context("failed to wait for remote command completion")?;
415
416    // if writing to stdin failed (broken pipe), the remote command exited early —
417    // include stderr so the user sees the actual cause (e.g. "Permission denied")
418    if let Err(write_err) = write_result {
419        anyhow::bail!("{}", format_write_error(&write_err, &stderr_data, &status));
420    }
421
422    if !status.success() {
423        let stderr = String::from_utf8_lossy(&stderr_data);
424        anyhow::bail!(
425            "failed to transfer binary to remote host\n\
426            \n\
427            stderr: {}\n\
428            \n\
429            {TRANSFER_HINTS}",
430            stderr
431        );
432    }
433
434    Ok(())
435}
436
437/// Verify checksum of transferred binary on remote host
438///
439/// Runs `sha256sum` on the remote host and compares the result with
440/// the expected checksum.
441///
442/// # Arguments
443///
444/// * `session` - SSH session to the remote host
445/// * `remote_path` - Path to the binary on the remote host (should use $HOME)
446/// * `expected_checksum` - Expected SHA-256 digest
447///
448/// # Errors
449///
450/// Returns an error if the checksum command fails or doesn't match
451async fn verify_remote_checksum(
452    session: &Arc<openssh::Session>,
453    remote_path: &str,
454    expected_checksum: &[u8],
455) -> anyhow::Result<()> {
456    // escape remote_path for safe shell usage
457    let cmd = format!("sha256sum {}", crate::shell_escape(remote_path));
458
459    tracing::debug!("Verifying checksum on remote host");
460
461    let output = session
462        .command("sh")
463        .arg("-c")
464        .arg(&cmd)
465        .output()
466        .await
467        .context("failed to run sha256sum on remote host")?;
468
469    if !output.status.success() {
470        let stderr = String::from_utf8_lossy(&output.stderr);
471        anyhow::bail!(
472            "failed to compute checksum on remote host\n\
473            stderr: {}",
474            stderr
475        );
476    }
477
478    let stdout = String::from_utf8_lossy(&output.stdout);
479    // sha256sum output format: "checksum filename"
480    let remote_checksum = stdout
481        .split_whitespace()
482        .next()
483        .context("unexpected sha256sum output format")?;
484
485    let expected_hex = hex::encode(expected_checksum);
486
487    if remote_checksum != expected_hex {
488        anyhow::bail!(
489            "checksum mismatch after transfer\n\
490            \n\
491            Expected: {}\n\
492            Got:      {}\n\
493            \n\
494            The binary transfer may have been corrupted.\n\
495            Please try again or check network connectivity.",
496            expected_hex,
497            remote_checksum
498        );
499    }
500
501    Ok(())
502}
503
504/// Compute SHA-256 hash of data
505fn compute_sha256(data: &[u8]) -> Vec<u8> {
506    use sha2::{Digest, Sha256};
507    Sha256::digest(data).to_vec()
508}
509
510/// Clean up old rcpd versions on remote host
511///
512/// Keeps the most recent `keep_count` versions and removes older ones.
513/// This prevents disk space from growing unbounded as versions are deployed.
514///
515/// # Arguments
516///
517/// * `session` - SSH session to the remote host
518/// * `keep_count` - Number of recent versions to keep (default: 3)
519///
520/// # Errors
521///
522/// Returns an error if the cleanup command fails (but this is not fatal)
523pub async fn cleanup_old_versions(
524    session: &Arc<openssh::Session>,
525    keep_count: usize,
526) -> anyhow::Result<()> {
527    tracing::debug!("Cleaning up old rcpd versions (keeping {})", keep_count);
528
529    // validate HOME is set before constructing the cache path
530    // if this fails, we log and return Ok since cleanup is best-effort
531    let home = match crate::get_remote_home(session).await {
532        Ok(h) => h,
533        Err(e) => {
534            tracing::warn!(
535                "cleanup of old versions skipped (HOME not available): {:#}",
536                e
537            );
538            return Ok(());
539        }
540    };
541
542    // list all rcpd-* files sorted by modification time (newest first)
543    // keep the newest N, remove the rest
544    let cache_dir = format!("{}/.cache/rcp/bin", home);
545    let cmd = format!(
546        "cd {} 2>/dev/null && ls -t rcpd-* 2>/dev/null | tail -n +{} | xargs -r rm -f",
547        crate::shell_escape(&cache_dir),
548        keep_count + 1
549    );
550
551    let output = session
552        .command("sh")
553        .arg("-c")
554        .arg(&cmd)
555        .output()
556        .await
557        .context("failed to run cleanup command on remote host")?;
558
559    if !output.status.success() {
560        let stderr = String::from_utf8_lossy(&output.stderr);
561        // log but don't fail - cleanup is best-effort
562        tracing::warn!("cleanup of old versions failed (non-fatal): {}", stderr);
563    } else {
564        tracing::debug!("Old versions cleaned up successfully");
565    }
566
567    Ok(())
568}
569
570#[cfg(test)]
571mod tests {
572    use super::*;
573
574    #[test]
575    fn test_compute_sha256() {
576        let data = b"hello world";
577        let hash = compute_sha256(data);
578        // known SHA-256 of "hello world"
579        let expected =
580            hex::decode("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
581                .unwrap();
582        assert_eq!(hash, expected);
583    }
584
585    #[test]
586    fn test_compute_sha256_empty() {
587        let data = b"";
588        let hash = compute_sha256(data);
589        // known SHA-256 of empty string
590        let expected =
591            hex::decode("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
592                .unwrap();
593        assert_eq!(hash, expected);
594    }
595
596    #[test]
597    fn test_compute_sha256_binary() {
598        // test with actual binary data (non-UTF8)
599        let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
600        let hash = compute_sha256(&data);
601        // verify it produces a 32-byte hash
602        assert_eq!(hash.len(), 32);
603        // verify it's deterministic
604        let hash2 = compute_sha256(&data);
605        assert_eq!(hash, hash2);
606    }
607
608    #[test]
609    fn write_error_with_stderr_includes_remote_output() {
610        let err = std::io::Error::from_raw_os_error(32); // EPIPE
611        let stderr = b"mkdir: cannot create directory: Permission denied";
612        let msg = format_write_error(&err, stderr, &"exited with 1");
613        assert!(msg.contains("Broken pipe"), "should contain write error");
614        assert!(
615            msg.contains("Permission denied"),
616            "should contain remote stderr"
617        );
618        assert!(
619            msg.contains("This may indicate"),
620            "should contain hint text"
621        );
622        // should NOT contain the exit status line when stderr is present
623        assert!(
624            !msg.contains("remote command exited with status"),
625            "should omit status when stderr is available"
626        );
627    }
628
629    #[test]
630    fn write_error_without_stderr_includes_exit_status() {
631        let err = std::io::Error::from_raw_os_error(32);
632        let stderr = b"";
633        let msg = format_write_error(&err, stderr, &"exited with 126");
634        assert!(msg.contains("Broken pipe"), "should contain write error");
635        assert!(
636            msg.contains("remote command exited with status: exited with 126"),
637            "should contain exit status"
638        );
639        assert!(
640            msg.contains("remote stderr was empty"),
641            "should note stderr was empty"
642        );
643        assert!(
644            msg.contains("This may indicate"),
645            "should contain hint text"
646        );
647    }
648
649    #[test]
650    fn write_error_trims_whitespace_only_stderr() {
651        let err = std::io::Error::from_raw_os_error(32);
652        let stderr = b"  \n\t  ";
653        let msg = format_write_error(&err, stderr, &"exited with 1");
654        // whitespace-only stderr should be treated as empty
655        assert!(
656            msg.contains("remote stderr was empty"),
657            "whitespace-only stderr should be treated as empty"
658        );
659    }
660}