Skip to main content

remote/
deploy.rs

1//! Binary deployment for rcpd
2//!
3//! This module handles automatic deployment of rcpd binaries to remote hosts.
4//! It transfers static rcpd binaries via SSH using base64 encoding, verifies integrity with SHA-256 checksums, and manages cached versions.
5//!
6//! ## Atomicity and Concurrent Deployment Safety
7//!
8//! The deployment mechanism is designed to handle concurrent deployments from multiple rcp instances safely:
9//!
10//! ### Atomic Operations
11//!
12//! 1. **Unique Temporary Files**: Each deployment uses a shell-PID-unique temp file
13//!    (`.rcpd-{version}.tmp.$$`) which ensures concurrent deployments don't
14//!    interfere with each other. The `$$` expands to the shell process PID,
15//!    guaranteeing uniqueness even when multiple deployments run simultaneously.
16//!
17//! 2. **Atomic Rename**: The final deployment step uses `mv -f` which is atomic
18//!    on POSIX-compliant filesystems. This means:
19//!    - The binary is either fully present at the final location or not present at all
20//!    - No partial writes are visible to readers
21//!    - Concurrent renames of the same file complete in a well-defined order
22//!
23//! 3. **Write-Then-Verify**: The deployment sequence ensures the binary is:
24//!    - Fully written to the temp file
25//!    - Marked executable (chmod 700)
26//!    - Moved atomically to the final location
27//!    - Checksummed after the move completes
28//!
29//! ### Race Condition Scenarios
30//!
31//! **Scenario 1: Multiple rcp instances deploying the same version concurrently**
32//!
33//! - Each uses a unique temp file (`.rcpd-0.22.0.tmp.1234`, `.rcpd-0.22.0.tmp.5678`)
34//! - Both successfully write and verify their temp files
35//! - Both attempt `mv -f .rcpd-0.22.0.tmp.$$ rcpd-0.22.0`
36//! - The filesystem ensures one wins atomically, the other overwrites atomically
37//! - Result: Final binary is valid (both were identical and checksummed)
38//!
39//! **Scenario 2: One deployment while another is reading**
40//!
41//! - Reader opens `rcpd-0.22.0` and gets a valid file descriptor
42//! - Writer completes deployment and `mv -f` replaces the inode
43//! - Reader continues reading from the original inode (POSIX semantics)
44//! - Result: Reader gets the old version (but it's still valid)
45//!
46//! **Scenario 3: Deployment interrupted (network failure, SIGKILL)**
47//!
48//! - Temp file may be left in `.cache/rcp/bin/.rcpd-{version}.tmp.*`
49//! - Final file is either:
50//!   - Not present (deployment never completed)
51//!   - Present and valid (mv completed before interruption)
52//! - Temp files are hidden (dotfiles) and don't interfere with discovery
53//! - Result: Safe to retry; old temp files are harmless
54//!
55//! ### Assumptions
56//!
57//! 1. **POSIX Filesystem Semantics**: The deployment assumes the remote filesystem
58//!    supports atomic `mv` (rename) operations. This is true for all POSIX-compliant
59//!    filesystems (ext4, xfs, btrfs, etc.) but may not hold for network filesystems
60//!    with relaxed consistency (NFSv3 without proper locking).
61//!
62//! 2. **Unique Shell PIDs**: The `$$` shell variable expands to the process ID,
63//!    which is assumed to be unique during the lifetime of the deployment. This is
64//!    guaranteed by the OS but requires PIDs not to wrap around extremely rapidly.
65//!
66//! 3. **Checksum Integrity**: SHA-256 checksums are assumed to be collision-resistant.
67//!    If two different binaries produce the same checksum (astronomically unlikely),
68//!    the deployment would consider them identical.
69//!
70//! 4. **No Malicious Interference**: The deployment assumes the remote host is not
71//!    actively malicious (no adversary replacing files during deployment). Protection
72//!    against malicious hosts is provided by SSH authentication, not by this module.
73//!
74//! ### Non-Atomic Operations
75//!
76//! The following operations are **not** atomic and may observe intermediate states:
77//!
78//! - **Cleanup of old versions**: Uses `ls -t | tail | xargs rm` which may race with
79//!   concurrent deployments. This is acceptable because cleanup only removes old
80//!   versions, never the current version being deployed. Worst case: a version is
81//!   not cleaned up and remains on disk.
82//!
83//! - **Directory creation**: `mkdir -p` may race with concurrent deployments creating
84//!   the same directory. This is safe because `mkdir -p` is idempotent and succeeds
85//!   if the directory already exists.
86
87use anyhow::Context;
88use std::path::PathBuf;
89use std::sync::Arc;
90
91/// Find local static rcpd binary suitable for deployment
92///
93/// Searches in the following order:
94/// 1. Same directory as the current rcp executable
95/// 2. PATH via `which rcpd`
96///
97/// This covers:
98/// - Development builds (cargo run/test): rcpd is in same directory as rcp in target/
99/// - cargo install: rcpd is in ~/.cargo/bin (which should be in PATH)
100/// - nixpkgs: rcpd is available via nix profile (which adds to PATH)
101/// - Production deployments: rcp and rcpd are co-located
102///
103/// # Returns
104///
105/// Path to the local rcpd binary suitable for deployment
106///
107/// # Errors
108///
109/// Returns an error if no suitable binary is found
110pub fn find_local_rcpd_binary() -> anyhow::Result<PathBuf> {
111    let mut searched_paths = Vec::new();
112
113    // try same directory as current executable first
114    // this ensures we use the same build (debug/release) as the running rcp
115    // and covers development builds where rcp and rcpd are both in target/
116    if let Ok(current_exe) = std::env::current_exe() {
117        if let Some(bin_dir) = current_exe.parent() {
118            let path = bin_dir.join("rcpd");
119            searched_paths.push(format!("Same directory: {}", path.display()));
120            if path.exists() && path.is_file() {
121                tracing::info!("Found local rcpd binary at {}", path.display());
122                return Ok(path);
123            }
124        }
125    }
126
127    // try PATH (covers cargo install, nixpkgs, and other system installations)
128    tracing::debug!("Trying to find rcpd in PATH");
129    let which_output = std::process::Command::new("which")
130        .arg("rcpd")
131        .output()
132        .ok();
133
134    if let Some(output) = which_output {
135        if output.status.success() {
136            let path_str = String::from_utf8_lossy(&output.stdout);
137            let path_str = path_str.trim();
138            if !path_str.is_empty() {
139                let path = PathBuf::from(path_str);
140                searched_paths.push(format!("PATH: {}", path.display()));
141                if path.exists() && path.is_file() {
142                    tracing::info!("Found local rcpd binary in PATH: {}", path.display());
143                    return Ok(path);
144                }
145            }
146        }
147    }
148
149    anyhow::bail!(
150        "no local rcpd binary found for deployment\n\
151        \n\
152        Searched in:\n\
153        {}\n\
154        \n\
155        To use auto-deployment, ensure rcpd is available:\n\
156        - cargo install rcp-tools-rcp (installs to ~/.cargo/bin)\n\
157        - or add rcpd to PATH\n\
158        - or build with: cargo build --release --bin rcpd",
159        searched_paths
160            .iter()
161            .map(|p| format!("- {}", p))
162            .collect::<Vec<_>>()
163            .join("\n")
164    )
165}
166
167/// Deploy rcpd binary to remote host
168///
169/// Transfers the local static rcpd binary to the remote host at
170/// `~/.cache/rcp/bin/rcpd-{version}`, verifies the checksum, and returns
171/// the path to the deployed binary.
172///
173/// # Arguments
174///
175/// * `session` - SSH session to the remote host
176/// * `local_rcpd_path` - Path to the local static rcpd binary to deploy
177/// * `version` - Semantic version string for the binary
178/// * `remote_host` - Hostname for logging/error messages
179///
180/// # Returns
181///
182/// The path to the deployed binary on the remote host
183///
184/// # Errors
185///
186/// Returns an error if:
187/// - Local binary cannot be read
188/// - Remote directory creation fails
189/// - Transfer fails
190/// - Checksum verification fails
191pub async fn deploy_rcpd(
192    session: &Arc<openssh::Session>,
193    local_rcpd_path: &std::path::Path,
194    version: &str,
195    remote_host: &str,
196) -> anyhow::Result<String> {
197    tracing::info!(
198        "Deploying rcpd {} to remote host '{}'",
199        version,
200        remote_host
201    );
202
203    // read local binary
204    let binary = tokio::fs::read(local_rcpd_path).await.with_context(|| {
205        format!(
206            "failed to read local rcpd binary from {}",
207            local_rcpd_path.display()
208        )
209    })?;
210
211    tracing::info!(
212        "Read local rcpd binary ({} bytes) from {}",
213        binary.len(),
214        local_rcpd_path.display()
215    );
216
217    // compute checksum before transfer
218    let expected_checksum = compute_sha256(&binary);
219    tracing::debug!("Expected SHA-256: {}", hex::encode(&expected_checksum));
220
221    // validate HOME is set and construct remote path
222    let home = crate::get_remote_home(session).await?;
223    let remote_path = format!("{}/.cache/rcp/bin/rcpd-{}", home, version);
224
225    // transfer binary via base64 over SSH
226    transfer_binary_base64(session, &binary, &remote_path).await?;
227
228    tracing::info!("Binary transferred to {}", remote_path);
229
230    // verify checksum on remote
231    verify_remote_checksum(session, &remote_path, &expected_checksum).await?;
232
233    tracing::info!("Checksum verified successfully");
234
235    Ok(remote_path)
236}
237
238/// Transfer binary to remote host using base64 encoding
239///
240/// Creates the target directory if needed, transfers the binary via base64
241/// encoding through SSH stdin, and sets appropriate permissions (700).
242///
243/// # Arguments
244///
245/// * `session` - SSH session to the remote host
246/// * `binary` - Binary content to transfer
247/// * `remote_path` - Destination path on remote host (should use $HOME, will be created)
248///
249/// # Errors
250///
251/// Returns an error if directory creation, transfer, or permission setting fails
252async fn transfer_binary_base64(
253    session: &Arc<openssh::Session>,
254    binary: &[u8],
255    remote_path: &str,
256) -> anyhow::Result<()> {
257    use base64::Engine;
258
259    // encode binary as base64
260    let encoded = base64::engine::general_purpose::STANDARD.encode(binary);
261
262    // extract directory and filename from remote_path
263    // remote_path format: $HOME/.cache/rcp/bin/rcpd-{version}
264    let path = std::path::Path::new(remote_path);
265    let dir = path
266        .parent()
267        .context("remote path must have a parent directory")?
268        .to_str()
269        .context("remote path parent must be valid UTF-8")?;
270    let filename = path
271        .file_name()
272        .context("remote path must have a filename")?
273        .to_str()
274        .context("remote filename must be valid UTF-8")?;
275
276    // use $$ (shell PID) for unique temp filename to prevent concurrent deployment conflicts
277    // the $$ expands to the shell process PID at runtime, ensuring each deployment has a unique temp file
278    // this allows multiple rcp instances to deploy simultaneously without interfering with each other
279    // extract version from filename (format: rcpd-{version})
280    let temp_filename = if let Some(version) = filename.strip_prefix("rcpd-") {
281        format!(".rcpd-{}.tmp.$$", version)
282    } else {
283        format!(".{}.tmp.$$", filename)
284    };
285
286    // escape all variables for safe shell usage
287    let dir_escaped = crate::shell_escape(dir);
288    let temp_path = format!("{}/{}", dir, temp_filename);
289    let temp_path_escaped = crate::shell_escape(&temp_path);
290    let final_path = format!("{}/{}", dir, filename);
291    let final_path_escaped = crate::shell_escape(&final_path);
292
293    // deployment command sequence (all connected with && to fail fast on any error):
294    // 1. mkdir -p: create cache directory (idempotent, safe for concurrent execution)
295    // 2. base64 -d > temp: decode and write to unique temp file ($$-suffixed)
296    // 3. chmod 700: mark temp file executable
297    // 4. mv -f: atomic rename to final location (POSIX guarantees atomicity)
298    //
299    // the final 'mv -f' is the critical atomic operation:
300    // - on POSIX filesystems, rename(2) is atomic - either the new file appears or the old remains
301    // - concurrent deployments will each complete their mv atomically in some order
302    // - readers of the final file will see either the old or new inode, never partial writes
303    let cmd = format!(
304        "mkdir -p {} && \
305         base64 -d > {} && \
306         chmod 700 {} && \
307         mv -f {} {}",
308        dir_escaped, temp_path_escaped, temp_path_escaped, temp_path_escaped, final_path_escaped
309    );
310
311    tracing::debug!("Running remote command: mkdir && base64 && chmod");
312
313    let mut child = session
314        .command("sh")
315        .arg("-c")
316        .arg(&cmd)
317        .stdin(openssh::Stdio::piped())
318        .stdout(openssh::Stdio::piped())
319        .stderr(openssh::Stdio::piped())
320        .spawn()
321        .await
322        .context("failed to spawn remote command for binary transfer")?;
323
324    // take handles for all streams
325    let mut stdin = child
326        .stdin()
327        .take()
328        .context("failed to get stdin for remote command")?;
329
330    let mut stdout = child
331        .stdout()
332        .take()
333        .context("failed to get stdout for remote command")?;
334
335    let mut stderr = child
336        .stderr()
337        .take()
338        .context("failed to get stderr for remote command")?;
339
340    // write to stdin and close it before reading stdout/stderr
341    // this ensures the child process receives EOF on stdin before we wait for it to finish
342    use tokio::io::{AsyncReadExt, AsyncWriteExt};
343
344    // write all base64 data to stdin
345    stdin
346        .write_all(encoded.as_bytes())
347        .await
348        .context("failed to write base64 data to remote stdin")?;
349
350    // shutdown and explicitly drop stdin to ensure EOF is sent to child process
351    stdin.shutdown().await.context("failed to shutdown stdin")?;
352    drop(stdin);
353
354    // now read stdout and stderr to completion
355    // these will complete once the child process exits and closes the pipes
356    let stdout_fut = async {
357        let mut buf = Vec::new();
358        let _ = stdout.read_to_end(&mut buf).await;
359        buf
360    };
361
362    let stderr_fut = async {
363        let mut buf = Vec::new();
364        let _ = stderr.read_to_end(&mut buf).await;
365        buf
366    };
367
368    let (_stdout_data, stderr_data) = tokio::join!(stdout_fut, stderr_fut);
369
370    // wait for command to complete
371    let status = child
372        .wait()
373        .await
374        .context("failed to wait for remote command completion")?;
375
376    if !status.success() {
377        let stderr = String::from_utf8_lossy(&stderr_data);
378        anyhow::bail!(
379            "failed to transfer binary to remote host\n\
380            \n\
381            stderr: {}\n\
382            \n\
383            This may indicate:\n\
384            - Insufficient disk space on remote host\n\
385            - Permission denied creating $HOME/.cache/rcp/bin\n\
386            - base64 command not available on remote host",
387            stderr
388        );
389    }
390
391    Ok(())
392}
393
394/// Verify checksum of transferred binary on remote host
395///
396/// Runs `sha256sum` on the remote host and compares the result with
397/// the expected checksum.
398///
399/// # Arguments
400///
401/// * `session` - SSH session to the remote host
402/// * `remote_path` - Path to the binary on the remote host (should use $HOME)
403/// * `expected_checksum` - Expected SHA-256 digest
404///
405/// # Errors
406///
407/// Returns an error if the checksum command fails or doesn't match
408async fn verify_remote_checksum(
409    session: &Arc<openssh::Session>,
410    remote_path: &str,
411    expected_checksum: &[u8],
412) -> anyhow::Result<()> {
413    // escape remote_path for safe shell usage
414    let cmd = format!("sha256sum {}", crate::shell_escape(remote_path));
415
416    tracing::debug!("Verifying checksum on remote host");
417
418    let output = session
419        .command("sh")
420        .arg("-c")
421        .arg(&cmd)
422        .output()
423        .await
424        .context("failed to run sha256sum on remote host")?;
425
426    if !output.status.success() {
427        let stderr = String::from_utf8_lossy(&output.stderr);
428        anyhow::bail!(
429            "failed to compute checksum on remote host\n\
430            stderr: {}",
431            stderr
432        );
433    }
434
435    let stdout = String::from_utf8_lossy(&output.stdout);
436    // sha256sum output format: "checksum filename"
437    let remote_checksum = stdout
438        .split_whitespace()
439        .next()
440        .context("unexpected sha256sum output format")?;
441
442    let expected_hex = hex::encode(expected_checksum);
443
444    if remote_checksum != expected_hex {
445        anyhow::bail!(
446            "checksum mismatch after transfer\n\
447            \n\
448            Expected: {}\n\
449            Got:      {}\n\
450            \n\
451            The binary transfer may have been corrupted.\n\
452            Please try again or check network connectivity.",
453            expected_hex,
454            remote_checksum
455        );
456    }
457
458    Ok(())
459}
460
461/// Compute SHA-256 hash of data
462fn compute_sha256(data: &[u8]) -> Vec<u8> {
463    use sha2::{Digest, Sha256};
464    Sha256::digest(data).to_vec()
465}
466
467/// Clean up old rcpd versions on remote host
468///
469/// Keeps the most recent `keep_count` versions and removes older ones.
470/// This prevents disk space from growing unbounded as versions are deployed.
471///
472/// # Arguments
473///
474/// * `session` - SSH session to the remote host
475/// * `keep_count` - Number of recent versions to keep (default: 3)
476///
477/// # Errors
478///
479/// Returns an error if the cleanup command fails (but this is not fatal)
480pub async fn cleanup_old_versions(
481    session: &Arc<openssh::Session>,
482    keep_count: usize,
483) -> anyhow::Result<()> {
484    tracing::debug!("Cleaning up old rcpd versions (keeping {})", keep_count);
485
486    // validate HOME is set before constructing the cache path
487    // if this fails, we log and return Ok since cleanup is best-effort
488    let home = match crate::get_remote_home(session).await {
489        Ok(h) => h,
490        Err(e) => {
491            tracing::warn!(
492                "cleanup of old versions skipped (HOME not available): {:#}",
493                e
494            );
495            return Ok(());
496        }
497    };
498
499    // list all rcpd-* files sorted by modification time (newest first)
500    // keep the newest N, remove the rest
501    let cache_dir = format!("{}/.cache/rcp/bin", home);
502    let cmd = format!(
503        "cd {} 2>/dev/null && ls -t rcpd-* 2>/dev/null | tail -n +{} | xargs -r rm -f",
504        crate::shell_escape(&cache_dir),
505        keep_count + 1
506    );
507
508    let output = session
509        .command("sh")
510        .arg("-c")
511        .arg(&cmd)
512        .output()
513        .await
514        .context("failed to run cleanup command on remote host")?;
515
516    if !output.status.success() {
517        let stderr = String::from_utf8_lossy(&output.stderr);
518        // log but don't fail - cleanup is best-effort
519        tracing::warn!("cleanup of old versions failed (non-fatal): {}", stderr);
520    } else {
521        tracing::debug!("Old versions cleaned up successfully");
522    }
523
524    Ok(())
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    #[test]
532    fn test_compute_sha256() {
533        let data = b"hello world";
534        let hash = compute_sha256(data);
535        // known SHA-256 of "hello world"
536        let expected =
537            hex::decode("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
538                .unwrap();
539        assert_eq!(hash, expected);
540    }
541
542    #[test]
543    fn test_compute_sha256_empty() {
544        let data = b"";
545        let hash = compute_sha256(data);
546        // known SHA-256 of empty string
547        let expected =
548            hex::decode("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
549                .unwrap();
550        assert_eq!(hash, expected);
551    }
552
553    #[test]
554    fn test_compute_sha256_binary() {
555        // test with actual binary data (non-UTF8)
556        let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
557        let hash = compute_sha256(&data);
558        // verify it produces a 32-byte hash
559        assert_eq!(hash.len(), 32);
560        // verify it's deterministic
561        let hash2 = compute_sha256(&data);
562        assert_eq!(hash, hash2);
563    }
564}