commonware_deployer/ec2/
utils.rs

1//! Utility functions for interacting with EC2 instances
2
3use crate::ec2::Error;
4use std::path::Path;
5use tokio::{
6    fs::File,
7    io::AsyncWriteExt,
8    process::Command,
9    time::{sleep, Duration},
10};
11use tracing::{info, warn};
12
13/// Maximum number of SSH connection attempts before failing
14pub const MAX_SSH_ATTEMPTS: usize = 30;
15
16/// Maximum number of polling attempts for service status
17pub const MAX_POLL_ATTEMPTS: usize = 30;
18
19/// Interval between retries
20pub const RETRY_INTERVAL: Duration = Duration::from_secs(10);
21
22/// Protocol for deployer ingress
23pub const DEPLOYER_PROTOCOL: &str = "tcp";
24
25/// Minimum port for deployer ingress
26pub const DEPLOYER_MIN_PORT: i32 = 0;
27
28/// Maximum port for deployer ingress
29pub const DEPLOYER_MAX_PORT: i32 = 65535;
30
31/// Fetch the current machine's public IPv4 address
32pub async fn get_public_ip() -> Result<String, Error> {
33    // icanhazip.com is maintained by Cloudflare as of 6/6/2021 (https://major.io/p/a-new-future-for-icanhazip/)
34    let result = reqwest::get("https://ipv4.icanhazip.com")
35        .await?
36        .text()
37        .await?
38        .trim()
39        .to_string();
40    Ok(result)
41}
42
43/// Executes a command on a remote instance via SSH with retries
44pub async fn ssh_execute(key_file: &str, ip: &str, command: &str) -> Result<(), Error> {
45    for _ in 0..MAX_SSH_ATTEMPTS {
46        let output = Command::new("ssh")
47            .arg("-i")
48            .arg(key_file)
49            .arg("-o")
50            .arg("IdentitiesOnly=yes")
51            .arg("-o")
52            .arg("ServerAliveInterval=600")
53            .arg("-o")
54            .arg("StrictHostKeyChecking=no")
55            .arg(format!("ubuntu@{ip}"))
56            .arg(command)
57            .output()
58            .await?;
59        if output.status.success() {
60            return Ok(());
61        }
62        warn!(error = ?String::from_utf8_lossy(&output.stderr), "SSH failed");
63        sleep(RETRY_INTERVAL).await;
64    }
65    Err(Error::SshFailed)
66}
67
68/// Polls the status of a systemd service on a remote instance until active
69pub async fn poll_service_active(key_file: &str, ip: &str, service: &str) -> Result<(), Error> {
70    for _ in 0..MAX_POLL_ATTEMPTS {
71        let output = Command::new("ssh")
72            .arg("-i")
73            .arg(key_file)
74            .arg("-o")
75            .arg("IdentitiesOnly=yes")
76            .arg("-o")
77            .arg("ServerAliveInterval=600")
78            .arg("-o")
79            .arg("StrictHostKeyChecking=no")
80            .arg(format!("ubuntu@{ip}"))
81            .arg(format!("systemctl is-active {service}"))
82            .output()
83            .await?;
84        let parsed = String::from_utf8_lossy(&output.stdout);
85        let parsed = parsed.trim();
86        if parsed == "active" {
87            return Ok(());
88        }
89        if service == "binary" && parsed == "failed" {
90            warn!(service, "service failed to start (check logs and update)");
91            return Ok(());
92        }
93        warn!(error = ?String::from_utf8_lossy(&output.stderr), service, "active status check failed");
94        sleep(RETRY_INTERVAL).await;
95    }
96    Err(Error::ServiceTimeout(ip.to_string(), service.to_string()))
97}
98
99/// Polls the status of a systemd service on a remote instance until it becomes inactive
100pub async fn poll_service_inactive(key_file: &str, ip: &str, service: &str) -> Result<(), Error> {
101    for _ in 0..MAX_POLL_ATTEMPTS {
102        let output = Command::new("ssh")
103            .arg("-i")
104            .arg(key_file)
105            .arg("-o")
106            .arg("IdentitiesOnly=yes")
107            .arg("-o")
108            .arg("ServerAliveInterval=600")
109            .arg("-o")
110            .arg("StrictHostKeyChecking=no")
111            .arg(format!("ubuntu@{ip}"))
112            .arg(format!("systemctl is-active {service}"))
113            .output()
114            .await?;
115        let parsed = String::from_utf8_lossy(&output.stdout);
116        let parsed = parsed.trim();
117        if parsed == "inactive" {
118            return Ok(());
119        }
120        if service == "binary" && parsed == "failed" {
121            warn!(service, "service was never active");
122            return Ok(());
123        }
124        warn!(error = ?String::from_utf8_lossy(&output.stderr), service, "inactive status check failed");
125        sleep(RETRY_INTERVAL).await;
126    }
127    Err(Error::ServiceTimeout(ip.to_string(), service.to_string()))
128}
129
130/// Enables BBR on a remote instance by downloading config from S3 and applying sysctl settings.
131pub async fn enable_bbr(key_file: &str, ip: &str, bbr_conf_url: &str) -> Result<(), Error> {
132    let download_cmd = format!(
133        "wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/99-bbr.conf '{}'",
134        bbr_conf_url
135    );
136    ssh_execute(key_file, ip, &download_cmd).await?;
137    ssh_execute(
138        key_file,
139        ip,
140        "sudo mv /home/ubuntu/99-bbr.conf /etc/sysctl.d/99-bbr.conf",
141    )
142    .await?;
143    ssh_execute(key_file, ip, "sudo sysctl -p /etc/sysctl.d/99-bbr.conf").await?;
144    Ok(())
145}
146
147/// Converts an IP address to a CIDR block
148pub fn exact_cidr(ip: &str) -> String {
149    format!("{ip}/32")
150}
151
152/// Maximum number of download attempts before failing
153pub const MAX_DOWNLOAD_ATTEMPTS: usize = 10;
154
155/// Downloads a file from a URL to a local path with retries
156pub async fn download_file(url: &str, dest: &Path) -> Result<(), Error> {
157    for attempt in 1..=MAX_DOWNLOAD_ATTEMPTS {
158        match download_file_once(url, dest).await {
159            Ok(()) => {
160                info!(url = url, dest = ?dest, "downloaded file");
161                return Ok(());
162            }
163            Err(e) => {
164                warn!(
165                    url = url,
166                    attempt = attempt,
167                    error = ?e,
168                    "download attempt failed"
169                );
170                if attempt < MAX_DOWNLOAD_ATTEMPTS {
171                    sleep(RETRY_INTERVAL).await;
172                }
173            }
174        }
175    }
176    Err(Error::DownloadFailed(url.to_string()))
177}
178
179async fn download_file_once(url: &str, dest: &Path) -> Result<(), Error> {
180    let response = reqwest::get(url).await?;
181    if !response.status().is_success() {
182        return Err(Error::DownloadFailed(format!(
183            "HTTP {}: {}",
184            response.status(),
185            url
186        )));
187    }
188
189    let bytes = response.bytes().await?;
190
191    // Create parent directory if it doesn't exist
192    if let Some(parent) = dest.parent() {
193        tokio::fs::create_dir_all(parent).await?;
194    }
195
196    let mut file = File::create(dest).await?;
197    file.write_all(&bytes).await?;
198    file.flush().await?;
199
200    Ok(())
201}