remote/deploy.rs
1//! Binary deployment for rcpd
2//!
3//! This module handles automatic deployment of rcpd binaries to remote hosts.
4//! It transfers static rcpd binaries via SSH using base64 encoding, verifies integrity with SHA-256 checksums, and manages cached versions.
5//!
6//! ## Atomicity and Concurrent Deployment Safety
7//!
8//! The deployment mechanism is designed to handle concurrent deployments from multiple rcp instances safely:
9//!
10//! ### Atomic Operations
11//!
12//! 1. **Unique Temporary Files**: Each deployment uses a shell-PID-unique temp file
13//! (`.rcpd-{version}.tmp.$$`) which ensures concurrent deployments don't
14//! interfere with each other. The `$$` expands to the shell process PID,
15//! guaranteeing uniqueness even when multiple deployments run simultaneously.
16//!
17//! 2. **Atomic Rename**: The final deployment step uses `mv -f` which is atomic
18//! on POSIX-compliant filesystems. This means:
19//! - The binary is either fully present at the final location or not present at all
20//! - No partial writes are visible to readers
21//! - Concurrent renames of the same file complete in a well-defined order
22//!
23//! 3. **Write-Then-Verify**: The deployment sequence ensures the binary is:
24//! - Fully written to the temp file
25//! - Marked executable (chmod 700)
26//! - Moved atomically to the final location
27//! - Checksummed after the move completes
28//!
29//! ### Race Condition Scenarios
30//!
31//! **Scenario 1: Multiple rcp instances deploying the same version concurrently**
32//!
33//! - Each uses a unique temp file (`.rcpd-0.22.0.tmp.1234`, `.rcpd-0.22.0.tmp.5678`)
34//! - Both successfully write and verify their temp files
35//! - Both attempt `mv -f .rcpd-0.22.0.tmp.$$ rcpd-0.22.0`
36//! - The filesystem ensures one wins atomically, the other overwrites atomically
37//! - Result: Final binary is valid (both were identical and checksummed)
38//!
39//! **Scenario 2: One deployment while another is reading**
40//!
41//! - Reader opens `rcpd-0.22.0` and gets a valid file descriptor
42//! - Writer completes deployment and `mv -f` replaces the inode
43//! - Reader continues reading from the original inode (POSIX semantics)
44//! - Result: Reader gets the old version (but it's still valid)
45//!
46//! **Scenario 3: Deployment interrupted (network failure, SIGKILL)**
47//!
48//! - Temp file may be left in `.cache/rcp/bin/.rcpd-{version}.tmp.*`
49//! - Final file is either:
50//! - Not present (deployment never completed)
51//! - Present and valid (mv completed before interruption)
52//! - Temp files are hidden (dotfiles) and don't interfere with discovery
53//! - Result: Safe to retry; old temp files are harmless
54//!
55//! ### Assumptions
56//!
57//! 1. **POSIX Filesystem Semantics**: The deployment assumes the remote filesystem
58//! supports atomic `mv` (rename) operations. This is true for all POSIX-compliant
59//! filesystems (ext4, xfs, btrfs, etc.) but may not hold for network filesystems
60//! with relaxed consistency (NFSv3 without proper locking).
61//!
62//! 2. **Unique Shell PIDs**: The `$$` shell variable expands to the process ID,
63//! which is assumed to be unique during the lifetime of the deployment. This is
64//! guaranteed by the OS but requires PIDs not to wrap around extremely rapidly.
65//!
66//! 3. **Checksum Integrity**: SHA-256 checksums are assumed to be collision-resistant.
67//! If two different binaries produce the same checksum (astronomically unlikely),
68//! the deployment would consider them identical.
69//!
70//! 4. **No Malicious Interference**: The deployment assumes the remote host is not
71//! actively malicious (no adversary replacing files during deployment). Protection
72//! against malicious hosts is provided by SSH authentication, not by this module.
73//!
74//! ### Non-Atomic Operations
75//!
76//! The following operations are **not** atomic and may observe intermediate states:
77//!
78//! - **Cleanup of old versions**: Uses `ls -t | tail | xargs rm` which may race with
79//! concurrent deployments. This is acceptable because cleanup only removes old
80//! versions, never the current version being deployed. Worst case: a version is
81//! not cleaned up and remains on disk.
82//!
83//! - **Directory creation**: `mkdir -p` may race with concurrent deployments creating
84//! the same directory. This is safe because `mkdir -p` is idempotent and succeeds
85//! if the directory already exists.
86
87use anyhow::Context;
88use std::path::PathBuf;
89use std::sync::Arc;
90
91const TRANSFER_HINTS: &str = "\
92 This may indicate:\n\
93 - Insufficient disk space on remote host\n\
94 - Permission denied creating $HOME/.cache/rcp/bin\n\
95 - base64 command not available on remote host";
96
97/// Build an error message for a failed stdin write during binary transfer.
98///
99/// When writing base64 data to the remote SSH process fails (typically a broken
100/// pipe because the remote command exited early), this formats the error to
101/// include remote stderr (which reveals the actual cause) and the exit status.
102fn format_write_error(
103 write_err: &std::io::Error,
104 stderr_data: &[u8],
105 status: &dyn std::fmt::Display,
106) -> String {
107 let stderr = String::from_utf8_lossy(stderr_data);
108 let stderr = stderr.trim();
109 if stderr.is_empty() {
110 format!(
111 "failed to write base64 data to remote stdin: {write_err}\n\
112 \n\
113 remote command exited with status: {status}\n\
114 remote stderr was empty\n\
115 \n\
116 {TRANSFER_HINTS}"
117 )
118 } else {
119 format!(
120 "failed to write base64 data to remote stdin: {write_err}\n\
121 \n\
122 remote stderr: {stderr}\n\
123 \n\
124 {TRANSFER_HINTS}"
125 )
126 }
127}
128
129/// Find local static rcpd binary suitable for deployment
130///
131/// Searches in the following order:
132/// 1. Same directory as the current rcp executable
133/// 2. PATH via `which rcpd`
134///
135/// This covers:
136/// - Development builds (cargo run/test): rcpd is in same directory as rcp in target/
137/// - cargo install: rcpd is in ~/.cargo/bin (which should be in PATH)
138/// - nixpkgs: rcpd is available via nix profile (which adds to PATH)
139/// - Production deployments: rcp and rcpd are co-located
140///
141/// # Returns
142///
143/// Path to the local rcpd binary suitable for deployment
144///
145/// # Errors
146///
147/// Returns an error if no suitable binary is found
148pub fn find_local_rcpd_binary() -> anyhow::Result<PathBuf> {
149 let mut searched_paths = Vec::new();
150
151 // try same directory as current executable first
152 // this ensures we use the same build (debug/release) as the running rcp
153 // and covers development builds where rcp and rcpd are both in target/
154 if let Ok(current_exe) = std::env::current_exe()
155 && let Some(bin_dir) = current_exe.parent()
156 {
157 let path = bin_dir.join("rcpd");
158 searched_paths.push(format!("Same directory: {}", path.display()));
159 if path.exists() && path.is_file() {
160 tracing::info!("Found local rcpd binary at {}", path.display());
161 return Ok(path);
162 }
163 }
164
165 // try PATH (covers cargo install, nixpkgs, and other system installations)
166 tracing::debug!("Trying to find rcpd in PATH");
167 let which_output = std::process::Command::new("which")
168 .arg("rcpd")
169 .output()
170 .ok();
171
172 if let Some(output) = which_output
173 && output.status.success()
174 {
175 let path_str = String::from_utf8_lossy(&output.stdout);
176 let path_str = path_str.trim();
177 if !path_str.is_empty() {
178 let path = PathBuf::from(path_str);
179 searched_paths.push(format!("PATH: {}", path.display()));
180 if path.exists() && path.is_file() {
181 tracing::info!("Found local rcpd binary in PATH: {}", path.display());
182 return Ok(path);
183 }
184 }
185 }
186
187 anyhow::bail!(
188 "no local rcpd binary found for deployment\n\
189 \n\
190 Searched in:\n\
191 {}\n\
192 \n\
193 To use auto-deployment, ensure rcpd is available:\n\
194 - cargo install rcp-tools-rcp (installs to ~/.cargo/bin)\n\
195 - or add rcpd to PATH\n\
196 - or build with: cargo build --release --bin rcpd",
197 searched_paths
198 .iter()
199 .map(|p| format!("- {}", p))
200 .collect::<Vec<_>>()
201 .join("\n")
202 )
203}
204
205/// Deploy rcpd binary to remote host
206///
207/// Transfers the local static rcpd binary to the remote host at
208/// `~/.cache/rcp/bin/rcpd-{version}`, verifies the checksum, and returns
209/// the path to the deployed binary.
210///
211/// # Arguments
212///
213/// * `session` - SSH session to the remote host
214/// * `local_rcpd_path` - Path to the local static rcpd binary to deploy
215/// * `version` - Semantic version string for the binary
216/// * `remote_host` - Hostname for logging/error messages
217///
218/// # Returns
219///
220/// The path to the deployed binary on the remote host
221///
222/// # Errors
223///
224/// Returns an error if:
225/// - Local binary cannot be read
226/// - Remote directory creation fails
227/// - Transfer fails
228/// - Checksum verification fails
229pub async fn deploy_rcpd(
230 session: &Arc<openssh::Session>,
231 local_rcpd_path: &std::path::Path,
232 version: &str,
233 remote_host: &str,
234) -> anyhow::Result<String> {
235 tracing::info!(
236 "Deploying rcpd {} to remote host '{}'",
237 version,
238 remote_host
239 );
240
241 // read local binary
242 let binary = tokio::fs::read(local_rcpd_path).await.with_context(|| {
243 format!(
244 "failed to read local rcpd binary from {}",
245 local_rcpd_path.display()
246 )
247 })?;
248
249 tracing::info!(
250 "Read local rcpd binary ({} bytes) from {}",
251 binary.len(),
252 local_rcpd_path.display()
253 );
254
255 // compute checksum before transfer
256 let expected_checksum = compute_sha256(&binary);
257 tracing::debug!("Expected SHA-256: {}", hex::encode(&expected_checksum));
258
259 // validate HOME is set and construct remote path
260 let home = crate::get_remote_home(session).await?;
261 let remote_path = format!("{}/.cache/rcp/bin/rcpd-{}", home, version);
262
263 // transfer binary via base64 over SSH
264 transfer_binary_base64(session, &binary, &remote_path).await?;
265
266 tracing::info!("Binary transferred to {}", remote_path);
267
268 // verify checksum on remote
269 verify_remote_checksum(session, &remote_path, &expected_checksum).await?;
270
271 tracing::info!("Checksum verified successfully");
272
273 Ok(remote_path)
274}
275
276/// Transfer binary to remote host using base64 encoding
277///
278/// Creates the target directory if needed, transfers the binary via base64
279/// encoding through SSH stdin, and sets appropriate permissions (700).
280///
281/// # Arguments
282///
283/// * `session` - SSH session to the remote host
284/// * `binary` - Binary content to transfer
285/// * `remote_path` - Destination path on remote host (should use $HOME, will be created)
286///
287/// # Errors
288///
289/// Returns an error if directory creation, transfer, or permission setting fails
290async fn transfer_binary_base64(
291 session: &Arc<openssh::Session>,
292 binary: &[u8],
293 remote_path: &str,
294) -> anyhow::Result<()> {
295 use base64::Engine;
296
297 // encode binary as base64
298 let encoded = base64::engine::general_purpose::STANDARD.encode(binary);
299
300 // extract directory and filename from remote_path
301 // remote_path format: $HOME/.cache/rcp/bin/rcpd-{version}
302 let path = std::path::Path::new(remote_path);
303 let dir = path
304 .parent()
305 .context("remote path must have a parent directory")?
306 .to_str()
307 .context("remote path parent must be valid UTF-8")?;
308 let filename = path
309 .file_name()
310 .context("remote path must have a filename")?
311 .to_str()
312 .context("remote filename must be valid UTF-8")?;
313
314 // use $$ (shell PID) for unique temp filename to prevent concurrent deployment conflicts
315 // the $$ expands to the shell process PID at runtime, ensuring each deployment has a unique temp file
316 // this allows multiple rcp instances to deploy simultaneously without interfering with each other
317 // extract version from filename (format: rcpd-{version})
318 let temp_filename = if let Some(version) = filename.strip_prefix("rcpd-") {
319 format!(".rcpd-{}.tmp.$$", version)
320 } else {
321 format!(".{}.tmp.$$", filename)
322 };
323
324 // escape all variables for safe shell usage
325 let dir_escaped = crate::shell_escape(dir);
326 let temp_path = format!("{}/{}", dir, temp_filename);
327 let temp_path_escaped = crate::shell_escape(&temp_path);
328 let final_path = format!("{}/{}", dir, filename);
329 let final_path_escaped = crate::shell_escape(&final_path);
330
331 // deployment command sequence (all connected with && to fail fast on any error):
332 // 1. mkdir -p: create cache directory (idempotent, safe for concurrent execution)
333 // 2. base64 -d > temp: decode and write to unique temp file ($$-suffixed)
334 // 3. chmod 700: mark temp file executable
335 // 4. mv -f: atomic rename to final location (POSIX guarantees atomicity)
336 //
337 // the final 'mv -f' is the critical atomic operation:
338 // - on POSIX filesystems, rename(2) is atomic - either the new file appears or the old remains
339 // - concurrent deployments will each complete their mv atomically in some order
340 // - readers of the final file will see either the old or new inode, never partial writes
341 let cmd = format!(
342 "mkdir -p {} && \
343 base64 -d > {} && \
344 chmod 700 {} && \
345 mv -f {} {}",
346 dir_escaped, temp_path_escaped, temp_path_escaped, temp_path_escaped, final_path_escaped
347 );
348
349 tracing::debug!("Running remote command: mkdir && base64 && chmod");
350
351 let mut child = session
352 .command("sh")
353 .arg("-c")
354 .arg(&cmd)
355 .stdin(openssh::Stdio::piped())
356 .stdout(openssh::Stdio::piped())
357 .stderr(openssh::Stdio::piped())
358 .spawn()
359 .await
360 .context("failed to spawn remote command for binary transfer")?;
361
362 // take handles for all streams
363 let mut stdin = child
364 .stdin()
365 .take()
366 .context("failed to get stdin for remote command")?;
367
368 let mut stdout = child
369 .stdout()
370 .take()
371 .context("failed to get stdout for remote command")?;
372
373 let mut stderr = child
374 .stderr()
375 .take()
376 .context("failed to get stderr for remote command")?;
377
378 // write to stdin and close it before reading stdout/stderr
379 // this ensures the child process receives EOF on stdin before we wait for it to finish
380 use tokio::io::{AsyncReadExt, AsyncWriteExt};
381
382 // write all base64 data to stdin, capturing errors instead of returning
383 // immediately — if this fails (e.g. broken pipe), we still need to read
384 // stderr to learn why the remote command failed
385 let write_result = stdin.write_all(encoded.as_bytes()).await;
386
387 if write_result.is_ok() {
388 // shutdown stdin to send EOF to the remote `base64 -d` process
389 stdin.shutdown().await.context("failed to shutdown stdin")?;
390 }
391 // drop stdin so the remote process can finish even if the write failed
392 drop(stdin);
393
394 // read stdout and stderr to completion — stderr is critical for diagnostics
395 // when the remote command fails before accepting all input
396 let stdout_fut = async {
397 let mut buf = Vec::new();
398 let _ = stdout.read_to_end(&mut buf).await;
399 buf
400 };
401
402 let stderr_fut = async {
403 let mut buf = Vec::new();
404 let _ = stderr.read_to_end(&mut buf).await;
405 buf
406 };
407
408 let (_stdout_data, stderr_data) = tokio::join!(stdout_fut, stderr_fut);
409
410 // wait for command to complete
411 let status = child
412 .wait()
413 .await
414 .context("failed to wait for remote command completion")?;
415
416 // if writing to stdin failed (broken pipe), the remote command exited early —
417 // include stderr so the user sees the actual cause (e.g. "Permission denied")
418 if let Err(write_err) = write_result {
419 anyhow::bail!("{}", format_write_error(&write_err, &stderr_data, &status));
420 }
421
422 if !status.success() {
423 let stderr = String::from_utf8_lossy(&stderr_data);
424 anyhow::bail!(
425 "failed to transfer binary to remote host\n\
426 \n\
427 stderr: {}\n\
428 \n\
429 {TRANSFER_HINTS}",
430 stderr
431 );
432 }
433
434 Ok(())
435}
436
437/// Verify checksum of transferred binary on remote host
438///
439/// Runs `sha256sum` on the remote host and compares the result with
440/// the expected checksum.
441///
442/// # Arguments
443///
444/// * `session` - SSH session to the remote host
445/// * `remote_path` - Path to the binary on the remote host (should use $HOME)
446/// * `expected_checksum` - Expected SHA-256 digest
447///
448/// # Errors
449///
450/// Returns an error if the checksum command fails or doesn't match
451async fn verify_remote_checksum(
452 session: &Arc<openssh::Session>,
453 remote_path: &str,
454 expected_checksum: &[u8],
455) -> anyhow::Result<()> {
456 // escape remote_path for safe shell usage
457 let cmd = format!("sha256sum {}", crate::shell_escape(remote_path));
458
459 tracing::debug!("Verifying checksum on remote host");
460
461 let output = session
462 .command("sh")
463 .arg("-c")
464 .arg(&cmd)
465 .output()
466 .await
467 .context("failed to run sha256sum on remote host")?;
468
469 if !output.status.success() {
470 let stderr = String::from_utf8_lossy(&output.stderr);
471 anyhow::bail!(
472 "failed to compute checksum on remote host\n\
473 stderr: {}",
474 stderr
475 );
476 }
477
478 let stdout = String::from_utf8_lossy(&output.stdout);
479 // sha256sum output format: "checksum filename"
480 let remote_checksum = stdout
481 .split_whitespace()
482 .next()
483 .context("unexpected sha256sum output format")?;
484
485 let expected_hex = hex::encode(expected_checksum);
486
487 if remote_checksum != expected_hex {
488 anyhow::bail!(
489 "checksum mismatch after transfer\n\
490 \n\
491 Expected: {}\n\
492 Got: {}\n\
493 \n\
494 The binary transfer may have been corrupted.\n\
495 Please try again or check network connectivity.",
496 expected_hex,
497 remote_checksum
498 );
499 }
500
501 Ok(())
502}
503
504/// Compute SHA-256 hash of data
505fn compute_sha256(data: &[u8]) -> Vec<u8> {
506 use sha2::{Digest, Sha256};
507 Sha256::digest(data).to_vec()
508}
509
510/// Clean up old rcpd versions on remote host
511///
512/// Keeps the most recent `keep_count` versions and removes older ones.
513/// This prevents disk space from growing unbounded as versions are deployed.
514///
515/// # Arguments
516///
517/// * `session` - SSH session to the remote host
518/// * `keep_count` - Number of recent versions to keep (default: 3)
519///
520/// # Errors
521///
522/// Returns an error if the cleanup command fails (but this is not fatal)
523pub async fn cleanup_old_versions(
524 session: &Arc<openssh::Session>,
525 keep_count: usize,
526) -> anyhow::Result<()> {
527 tracing::debug!("Cleaning up old rcpd versions (keeping {})", keep_count);
528
529 // validate HOME is set before constructing the cache path
530 // if this fails, we log and return Ok since cleanup is best-effort
531 let home = match crate::get_remote_home(session).await {
532 Ok(h) => h,
533 Err(e) => {
534 tracing::warn!(
535 "cleanup of old versions skipped (HOME not available): {:#}",
536 e
537 );
538 return Ok(());
539 }
540 };
541
542 // list all rcpd-* files sorted by modification time (newest first)
543 // keep the newest N, remove the rest
544 let cache_dir = format!("{}/.cache/rcp/bin", home);
545 let cmd = format!(
546 "cd {} 2>/dev/null && ls -t rcpd-* 2>/dev/null | tail -n +{} | xargs -r rm -f",
547 crate::shell_escape(&cache_dir),
548 keep_count + 1
549 );
550
551 let output = session
552 .command("sh")
553 .arg("-c")
554 .arg(&cmd)
555 .output()
556 .await
557 .context("failed to run cleanup command on remote host")?;
558
559 if !output.status.success() {
560 let stderr = String::from_utf8_lossy(&output.stderr);
561 // log but don't fail - cleanup is best-effort
562 tracing::warn!("cleanup of old versions failed (non-fatal): {}", stderr);
563 } else {
564 tracing::debug!("Old versions cleaned up successfully");
565 }
566
567 Ok(())
568}
569
570#[cfg(test)]
571mod tests {
572 use super::*;
573
574 #[test]
575 fn test_compute_sha256() {
576 let data = b"hello world";
577 let hash = compute_sha256(data);
578 // known SHA-256 of "hello world"
579 let expected =
580 hex::decode("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
581 .unwrap();
582 assert_eq!(hash, expected);
583 }
584
585 #[test]
586 fn test_compute_sha256_empty() {
587 let data = b"";
588 let hash = compute_sha256(data);
589 // known SHA-256 of empty string
590 let expected =
591 hex::decode("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
592 .unwrap();
593 assert_eq!(hash, expected);
594 }
595
596 #[test]
597 fn test_compute_sha256_binary() {
598 // test with actual binary data (non-UTF8)
599 let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
600 let hash = compute_sha256(&data);
601 // verify it produces a 32-byte hash
602 assert_eq!(hash.len(), 32);
603 // verify it's deterministic
604 let hash2 = compute_sha256(&data);
605 assert_eq!(hash, hash2);
606 }
607
608 #[test]
609 fn write_error_with_stderr_includes_remote_output() {
610 let err = std::io::Error::from_raw_os_error(32); // EPIPE
611 let stderr = b"mkdir: cannot create directory: Permission denied";
612 let msg = format_write_error(&err, stderr, &"exited with 1");
613 assert!(msg.contains("Broken pipe"), "should contain write error");
614 assert!(
615 msg.contains("Permission denied"),
616 "should contain remote stderr"
617 );
618 assert!(
619 msg.contains("This may indicate"),
620 "should contain hint text"
621 );
622 // should NOT contain the exit status line when stderr is present
623 assert!(
624 !msg.contains("remote command exited with status"),
625 "should omit status when stderr is available"
626 );
627 }
628
629 #[test]
630 fn write_error_without_stderr_includes_exit_status() {
631 let err = std::io::Error::from_raw_os_error(32);
632 let stderr = b"";
633 let msg = format_write_error(&err, stderr, &"exited with 126");
634 assert!(msg.contains("Broken pipe"), "should contain write error");
635 assert!(
636 msg.contains("remote command exited with status: exited with 126"),
637 "should contain exit status"
638 );
639 assert!(
640 msg.contains("remote stderr was empty"),
641 "should note stderr was empty"
642 );
643 assert!(
644 msg.contains("This may indicate"),
645 "should contain hint text"
646 );
647 }
648
649 #[test]
650 fn write_error_trims_whitespace_only_stderr() {
651 let err = std::io::Error::from_raw_os_error(32);
652 let stderr = b" \n\t ";
653 let msg = format_write_error(&err, stderr, &"exited with 1");
654 // whitespace-only stderr should be treated as empty
655 assert!(
656 msg.contains("remote stderr was empty"),
657 "whitespace-only stderr should be treated as empty"
658 );
659 }
660}