remote/deploy.rs
1//! Binary deployment for rcpd
2//!
3//! This module handles automatic deployment of rcpd binaries to remote hosts.
4//! It transfers static rcpd binaries via SSH using base64 encoding, verifies integrity with SHA-256 checksums, and manages cached versions.
5//!
6//! ## Atomicity and Concurrent Deployment Safety
7//!
8//! The deployment mechanism is designed to handle concurrent deployments from multiple rcp instances safely:
9//!
10//! ### Atomic Operations
11//!
12//! 1. **Unique Temporary Files**: Each deployment uses a shell-PID-unique temp file
13//! (`.rcpd-{version}.tmp.$$`) which ensures concurrent deployments don't
14//! interfere with each other. The `$$` expands to the shell process PID,
15//! guaranteeing uniqueness even when multiple deployments run simultaneously.
16//!
17//! 2. **Atomic Rename**: The final deployment step uses `mv -f` which is atomic
18//! on POSIX-compliant filesystems. This means:
19//! - The binary is either fully present at the final location or not present at all
20//! - No partial writes are visible to readers
21//! - Concurrent renames of the same file complete in a well-defined order
22//!
23//! 3. **Write-Then-Verify**: The deployment sequence ensures the binary is:
24//! - Fully written to the temp file
25//! - Marked executable (chmod 700)
26//! - Moved atomically to the final location
27//! - Checksummed after the move completes
28//!
29//! ### Race Condition Scenarios
30//!
31//! **Scenario 1: Multiple rcp instances deploying the same version concurrently**
32//!
33//! - Each uses a unique temp file (`.rcpd-0.22.0.tmp.1234`, `.rcpd-0.22.0.tmp.5678`)
34//! - Both successfully write and verify their temp files
35//! - Both attempt `mv -f .rcpd-0.22.0.tmp.$$ rcpd-0.22.0`
36//! - The filesystem ensures one wins atomically, the other overwrites atomically
37//! - Result: Final binary is valid (both were identical and checksummed)
38//!
39//! **Scenario 2: One deployment while another is reading**
40//!
41//! - Reader opens `rcpd-0.22.0` and gets a valid file descriptor
42//! - Writer completes deployment and `mv -f` replaces the inode
43//! - Reader continues reading from the original inode (POSIX semantics)
44//! - Result: Reader gets the old version (but it's still valid)
45//!
46//! **Scenario 3: Deployment interrupted (network failure, SIGKILL)**
47//!
48//! - Temp file may be left in `.cache/rcp/bin/.rcpd-{version}.tmp.*`
49//! - Final file is either:
50//! - Not present (deployment never completed)
51//! - Present and valid (mv completed before interruption)
52//! - Temp files are hidden (dotfiles) and don't interfere with discovery
53//! - Result: Safe to retry; old temp files are harmless
54//!
55//! ### Assumptions
56//!
57//! 1. **POSIX Filesystem Semantics**: The deployment assumes the remote filesystem
58//! supports atomic `mv` (rename) operations. This is true for all POSIX-compliant
59//! filesystems (ext4, xfs, btrfs, etc.) but may not hold for network filesystems
60//! with relaxed consistency (NFSv3 without proper locking).
61//!
62//! 2. **Unique Shell PIDs**: The `$$` shell variable expands to the process ID,
63//! which is assumed to be unique during the lifetime of the deployment. This is
64//! guaranteed by the OS but requires PIDs not to wrap around extremely rapidly.
65//!
66//! 3. **Checksum Integrity**: SHA-256 checksums are assumed to be collision-resistant.
67//! If two different binaries produce the same checksum (astronomically unlikely),
68//! the deployment would consider them identical.
69//!
70//! 4. **No Malicious Interference**: The deployment assumes the remote host is not
71//! actively malicious (no adversary replacing files during deployment). Protection
72//! against malicious hosts is provided by SSH authentication, not by this module.
73//!
74//! ### Non-Atomic Operations
75//!
76//! The following operations are **not** atomic and may observe intermediate states:
77//!
78//! - **Cleanup of old versions**: Uses `ls -t | tail | xargs rm` which may race with
79//! concurrent deployments. This is acceptable because cleanup only removes old
80//! versions, never the current version being deployed. Worst case: a version is
81//! not cleaned up and remains on disk.
82//!
83//! - **Directory creation**: `mkdir -p` may race with concurrent deployments creating
84//! the same directory. This is safe because `mkdir -p` is idempotent and succeeds
85//! if the directory already exists.
86
87use anyhow::Context;
88use std::path::PathBuf;
89use std::sync::Arc;
90
91/// Find local static rcpd binary suitable for deployment
92///
93/// Searches in the following order:
94/// 1. Same directory as the current rcp executable
95/// 2. PATH via `which rcpd`
96///
97/// This covers:
98/// - Development builds (cargo run/test): rcpd is in same directory as rcp in target/
99/// - cargo install: rcpd is in ~/.cargo/bin (which should be in PATH)
100/// - nixpkgs: rcpd is available via nix profile (which adds to PATH)
101/// - Production deployments: rcp and rcpd are co-located
102///
103/// # Returns
104///
105/// Path to the local rcpd binary suitable for deployment
106///
107/// # Errors
108///
109/// Returns an error if no suitable binary is found
110pub fn find_local_rcpd_binary() -> anyhow::Result<PathBuf> {
111 let mut searched_paths = Vec::new();
112
113 // try same directory as current executable first
114 // this ensures we use the same build (debug/release) as the running rcp
115 // and covers development builds where rcp and rcpd are both in target/
116 if let Ok(current_exe) = std::env::current_exe() {
117 if let Some(bin_dir) = current_exe.parent() {
118 let path = bin_dir.join("rcpd");
119 searched_paths.push(format!("Same directory: {}", path.display()));
120 if path.exists() && path.is_file() {
121 tracing::info!("Found local rcpd binary at {}", path.display());
122 return Ok(path);
123 }
124 }
125 }
126
127 // try PATH (covers cargo install, nixpkgs, and other system installations)
128 tracing::debug!("Trying to find rcpd in PATH");
129 let which_output = std::process::Command::new("which")
130 .arg("rcpd")
131 .output()
132 .ok();
133
134 if let Some(output) = which_output {
135 if output.status.success() {
136 let path_str = String::from_utf8_lossy(&output.stdout);
137 let path_str = path_str.trim();
138 if !path_str.is_empty() {
139 let path = PathBuf::from(path_str);
140 searched_paths.push(format!("PATH: {}", path.display()));
141 if path.exists() && path.is_file() {
142 tracing::info!("Found local rcpd binary in PATH: {}", path.display());
143 return Ok(path);
144 }
145 }
146 }
147 }
148
149 anyhow::bail!(
150 "no local rcpd binary found for deployment\n\
151 \n\
152 Searched in:\n\
153 {}\n\
154 \n\
155 To use auto-deployment, ensure rcpd is available:\n\
156 - cargo install rcp-tools-rcp (installs to ~/.cargo/bin)\n\
157 - or add rcpd to PATH\n\
158 - or build with: cargo build --release --bin rcpd",
159 searched_paths
160 .iter()
161 .map(|p| format!("- {}", p))
162 .collect::<Vec<_>>()
163 .join("\n")
164 )
165}
166
167/// Deploy rcpd binary to remote host
168///
169/// Transfers the local static rcpd binary to the remote host at
170/// `~/.cache/rcp/bin/rcpd-{version}`, verifies the checksum, and returns
171/// the path to the deployed binary.
172///
173/// # Arguments
174///
175/// * `session` - SSH session to the remote host
176/// * `local_rcpd_path` - Path to the local static rcpd binary to deploy
177/// * `version` - Semantic version string for the binary
178/// * `remote_host` - Hostname for logging/error messages
179///
180/// # Returns
181///
182/// The path to the deployed binary on the remote host
183///
184/// # Errors
185///
186/// Returns an error if:
187/// - Local binary cannot be read
188/// - Remote directory creation fails
189/// - Transfer fails
190/// - Checksum verification fails
191pub async fn deploy_rcpd(
192 session: &Arc<openssh::Session>,
193 local_rcpd_path: &std::path::Path,
194 version: &str,
195 remote_host: &str,
196) -> anyhow::Result<String> {
197 tracing::info!(
198 "Deploying rcpd {} to remote host '{}'",
199 version,
200 remote_host
201 );
202
203 // read local binary
204 let binary = tokio::fs::read(local_rcpd_path).await.with_context(|| {
205 format!(
206 "failed to read local rcpd binary from {}",
207 local_rcpd_path.display()
208 )
209 })?;
210
211 tracing::info!(
212 "Read local rcpd binary ({} bytes) from {}",
213 binary.len(),
214 local_rcpd_path.display()
215 );
216
217 // compute checksum before transfer
218 let expected_checksum = compute_sha256(&binary);
219 tracing::debug!("Expected SHA-256: {}", hex::encode(&expected_checksum));
220
221 // validate HOME is set and construct remote path
222 let home = crate::get_remote_home(session).await?;
223 let remote_path = format!("{}/.cache/rcp/bin/rcpd-{}", home, version);
224
225 // transfer binary via base64 over SSH
226 transfer_binary_base64(session, &binary, &remote_path).await?;
227
228 tracing::info!("Binary transferred to {}", remote_path);
229
230 // verify checksum on remote
231 verify_remote_checksum(session, &remote_path, &expected_checksum).await?;
232
233 tracing::info!("Checksum verified successfully");
234
235 Ok(remote_path)
236}
237
238/// Transfer binary to remote host using base64 encoding
239///
240/// Creates the target directory if needed, transfers the binary via base64
241/// encoding through SSH stdin, and sets appropriate permissions (700).
242///
243/// # Arguments
244///
245/// * `session` - SSH session to the remote host
246/// * `binary` - Binary content to transfer
247/// * `remote_path` - Destination path on remote host (should use $HOME, will be created)
248///
249/// # Errors
250///
251/// Returns an error if directory creation, transfer, or permission setting fails
252async fn transfer_binary_base64(
253 session: &Arc<openssh::Session>,
254 binary: &[u8],
255 remote_path: &str,
256) -> anyhow::Result<()> {
257 use base64::Engine;
258
259 // encode binary as base64
260 let encoded = base64::engine::general_purpose::STANDARD.encode(binary);
261
262 // extract directory and filename from remote_path
263 // remote_path format: $HOME/.cache/rcp/bin/rcpd-{version}
264 let path = std::path::Path::new(remote_path);
265 let dir = path
266 .parent()
267 .context("remote path must have a parent directory")?
268 .to_str()
269 .context("remote path parent must be valid UTF-8")?;
270 let filename = path
271 .file_name()
272 .context("remote path must have a filename")?
273 .to_str()
274 .context("remote filename must be valid UTF-8")?;
275
276 // use $$ (shell PID) for unique temp filename to prevent concurrent deployment conflicts
277 // the $$ expands to the shell process PID at runtime, ensuring each deployment has a unique temp file
278 // this allows multiple rcp instances to deploy simultaneously without interfering with each other
279 // extract version from filename (format: rcpd-{version})
280 let temp_filename = if let Some(version) = filename.strip_prefix("rcpd-") {
281 format!(".rcpd-{}.tmp.$$", version)
282 } else {
283 format!(".{}.tmp.$$", filename)
284 };
285
286 // escape all variables for safe shell usage
287 let dir_escaped = crate::shell_escape(dir);
288 let temp_path = format!("{}/{}", dir, temp_filename);
289 let temp_path_escaped = crate::shell_escape(&temp_path);
290 let final_path = format!("{}/{}", dir, filename);
291 let final_path_escaped = crate::shell_escape(&final_path);
292
293 // deployment command sequence (all connected with && to fail fast on any error):
294 // 1. mkdir -p: create cache directory (idempotent, safe for concurrent execution)
295 // 2. base64 -d > temp: decode and write to unique temp file ($$-suffixed)
296 // 3. chmod 700: mark temp file executable
297 // 4. mv -f: atomic rename to final location (POSIX guarantees atomicity)
298 //
299 // the final 'mv -f' is the critical atomic operation:
300 // - on POSIX filesystems, rename(2) is atomic - either the new file appears or the old remains
301 // - concurrent deployments will each complete their mv atomically in some order
302 // - readers of the final file will see either the old or new inode, never partial writes
303 let cmd = format!(
304 "mkdir -p {} && \
305 base64 -d > {} && \
306 chmod 700 {} && \
307 mv -f {} {}",
308 dir_escaped, temp_path_escaped, temp_path_escaped, temp_path_escaped, final_path_escaped
309 );
310
311 tracing::debug!("Running remote command: mkdir && base64 && chmod");
312
313 let mut child = session
314 .command("sh")
315 .arg("-c")
316 .arg(&cmd)
317 .stdin(openssh::Stdio::piped())
318 .stdout(openssh::Stdio::piped())
319 .stderr(openssh::Stdio::piped())
320 .spawn()
321 .await
322 .context("failed to spawn remote command for binary transfer")?;
323
324 // take handles for all streams
325 let mut stdin = child
326 .stdin()
327 .take()
328 .context("failed to get stdin for remote command")?;
329
330 let mut stdout = child
331 .stdout()
332 .take()
333 .context("failed to get stdout for remote command")?;
334
335 let mut stderr = child
336 .stderr()
337 .take()
338 .context("failed to get stderr for remote command")?;
339
340 // write to stdin and close it before reading stdout/stderr
341 // this ensures the child process receives EOF on stdin before we wait for it to finish
342 use tokio::io::{AsyncReadExt, AsyncWriteExt};
343
344 // write all base64 data to stdin
345 stdin
346 .write_all(encoded.as_bytes())
347 .await
348 .context("failed to write base64 data to remote stdin")?;
349
350 // shutdown and explicitly drop stdin to ensure EOF is sent to child process
351 stdin.shutdown().await.context("failed to shutdown stdin")?;
352 drop(stdin);
353
354 // now read stdout and stderr to completion
355 // these will complete once the child process exits and closes the pipes
356 let stdout_fut = async {
357 let mut buf = Vec::new();
358 let _ = stdout.read_to_end(&mut buf).await;
359 buf
360 };
361
362 let stderr_fut = async {
363 let mut buf = Vec::new();
364 let _ = stderr.read_to_end(&mut buf).await;
365 buf
366 };
367
368 let (_stdout_data, stderr_data) = tokio::join!(stdout_fut, stderr_fut);
369
370 // wait for command to complete
371 let status = child
372 .wait()
373 .await
374 .context("failed to wait for remote command completion")?;
375
376 if !status.success() {
377 let stderr = String::from_utf8_lossy(&stderr_data);
378 anyhow::bail!(
379 "failed to transfer binary to remote host\n\
380 \n\
381 stderr: {}\n\
382 \n\
383 This may indicate:\n\
384 - Insufficient disk space on remote host\n\
385 - Permission denied creating $HOME/.cache/rcp/bin\n\
386 - base64 command not available on remote host",
387 stderr
388 );
389 }
390
391 Ok(())
392}
393
394/// Verify checksum of transferred binary on remote host
395///
396/// Runs `sha256sum` on the remote host and compares the result with
397/// the expected checksum.
398///
399/// # Arguments
400///
401/// * `session` - SSH session to the remote host
402/// * `remote_path` - Path to the binary on the remote host (should use $HOME)
403/// * `expected_checksum` - Expected SHA-256 digest
404///
405/// # Errors
406///
407/// Returns an error if the checksum command fails or doesn't match
408async fn verify_remote_checksum(
409 session: &Arc<openssh::Session>,
410 remote_path: &str,
411 expected_checksum: &[u8],
412) -> anyhow::Result<()> {
413 // escape remote_path for safe shell usage
414 let cmd = format!("sha256sum {}", crate::shell_escape(remote_path));
415
416 tracing::debug!("Verifying checksum on remote host");
417
418 let output = session
419 .command("sh")
420 .arg("-c")
421 .arg(&cmd)
422 .output()
423 .await
424 .context("failed to run sha256sum on remote host")?;
425
426 if !output.status.success() {
427 let stderr = String::from_utf8_lossy(&output.stderr);
428 anyhow::bail!(
429 "failed to compute checksum on remote host\n\
430 stderr: {}",
431 stderr
432 );
433 }
434
435 let stdout = String::from_utf8_lossy(&output.stdout);
436 // sha256sum output format: "checksum filename"
437 let remote_checksum = stdout
438 .split_whitespace()
439 .next()
440 .context("unexpected sha256sum output format")?;
441
442 let expected_hex = hex::encode(expected_checksum);
443
444 if remote_checksum != expected_hex {
445 anyhow::bail!(
446 "checksum mismatch after transfer\n\
447 \n\
448 Expected: {}\n\
449 Got: {}\n\
450 \n\
451 The binary transfer may have been corrupted.\n\
452 Please try again or check network connectivity.",
453 expected_hex,
454 remote_checksum
455 );
456 }
457
458 Ok(())
459}
460
461/// Compute SHA-256 hash of data
462fn compute_sha256(data: &[u8]) -> Vec<u8> {
463 use sha2::{Digest, Sha256};
464 Sha256::digest(data).to_vec()
465}
466
467/// Clean up old rcpd versions on remote host
468///
469/// Keeps the most recent `keep_count` versions and removes older ones.
470/// This prevents disk space from growing unbounded as versions are deployed.
471///
472/// # Arguments
473///
474/// * `session` - SSH session to the remote host
475/// * `keep_count` - Number of recent versions to keep (default: 3)
476///
477/// # Errors
478///
479/// Returns an error if the cleanup command fails (but this is not fatal)
480pub async fn cleanup_old_versions(
481 session: &Arc<openssh::Session>,
482 keep_count: usize,
483) -> anyhow::Result<()> {
484 tracing::debug!("Cleaning up old rcpd versions (keeping {})", keep_count);
485
486 // validate HOME is set before constructing the cache path
487 // if this fails, we log and return Ok since cleanup is best-effort
488 let home = match crate::get_remote_home(session).await {
489 Ok(h) => h,
490 Err(e) => {
491 tracing::warn!(
492 "cleanup of old versions skipped (HOME not available): {:#}",
493 e
494 );
495 return Ok(());
496 }
497 };
498
499 // list all rcpd-* files sorted by modification time (newest first)
500 // keep the newest N, remove the rest
501 let cache_dir = format!("{}/.cache/rcp/bin", home);
502 let cmd = format!(
503 "cd {} 2>/dev/null && ls -t rcpd-* 2>/dev/null | tail -n +{} | xargs -r rm -f",
504 crate::shell_escape(&cache_dir),
505 keep_count + 1
506 );
507
508 let output = session
509 .command("sh")
510 .arg("-c")
511 .arg(&cmd)
512 .output()
513 .await
514 .context("failed to run cleanup command on remote host")?;
515
516 if !output.status.success() {
517 let stderr = String::from_utf8_lossy(&output.stderr);
518 // log but don't fail - cleanup is best-effort
519 tracing::warn!("cleanup of old versions failed (non-fatal): {}", stderr);
520 } else {
521 tracing::debug!("Old versions cleaned up successfully");
522 }
523
524 Ok(())
525}
526
527#[cfg(test)]
528mod tests {
529 use super::*;
530
531 #[test]
532 fn test_compute_sha256() {
533 let data = b"hello world";
534 let hash = compute_sha256(data);
535 // known SHA-256 of "hello world"
536 let expected =
537 hex::decode("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9")
538 .unwrap();
539 assert_eq!(hash, expected);
540 }
541
542 #[test]
543 fn test_compute_sha256_empty() {
544 let data = b"";
545 let hash = compute_sha256(data);
546 // known SHA-256 of empty string
547 let expected =
548 hex::decode("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
549 .unwrap();
550 assert_eq!(hash, expected);
551 }
552
553 #[test]
554 fn test_compute_sha256_binary() {
555 // test with actual binary data (non-UTF8)
556 let data: Vec<u8> = (0..256).map(|i| i as u8).collect();
557 let hash = compute_sha256(&data);
558 // verify it produces a 32-byte hash
559 assert_eq!(hash.len(), 32);
560 // verify it's deterministic
561 let hash2 = compute_sha256(&data);
562 assert_eq!(hash, hash2);
563 }
564}