Skip to main content

commonware_deployer/aws/
profile.rs

1//! `profile` subcommand for `ec2`
2
3use crate::aws::{
4    deployer_directory,
5    ec2::{self, *},
6    s3::{self, *},
7    services::*,
8    utils::{download_file, scp_download, ssh_execute},
9    Config, Error, CREATED_FILE_NAME, DESTROYED_FILE_NAME, MONITORING_REGION,
10};
11use aws_sdk_ec2::types::Filter;
12use std::{
13    fs::File,
14    path::{Path, PathBuf},
15    time::{SystemTime, UNIX_EPOCH},
16};
17use tokio::process::Command;
18use tracing::info;
19
20/// Captures a CPU profile from a running instance using samply
21pub async fn profile(
22    config_path: &PathBuf,
23    instance_name: &str,
24    duration: u64,
25    binary_path: &Path,
26) -> Result<(), Error> {
27    // Load config
28    let config: Config = {
29        let config_file = File::open(config_path)?;
30        serde_yaml::from_reader(config_file)?
31    };
32    let tag = &config.tag;
33    info!(tag = tag.as_str(), "loaded configuration");
34
35    // Find the instance config to get its instance_type and region
36    let instance_config = config
37        .instances
38        .iter()
39        .find(|i| i.name == instance_name)
40        .ok_or_else(|| Error::InstanceNotFound(instance_name.to_string()))?;
41    let instance_region = &instance_config.region;
42    let instance_type = &instance_config.instance_type;
43
44    // Ensure created file exists
45    let tag_directory = deployer_directory(Some(tag));
46    let created_file = tag_directory.join(CREATED_FILE_NAME);
47    if !created_file.exists() {
48        return Err(Error::DeploymentNotComplete(tag.clone()));
49    }
50
51    // Ensure destroyed file does not exist
52    let destroyed_file = tag_directory.join(DESTROYED_FILE_NAME);
53    if destroyed_file.exists() {
54        return Err(Error::DeploymentAlreadyDestroyed(tag.clone()));
55    }
56
57    // Construct private key path
58    let private_key_path = tag_directory.join(format!("id_rsa_{tag}"));
59    if !private_key_path.exists() {
60        return Err(Error::PrivateKeyNotFound);
61    }
62    let private_key = private_key_path.to_str().unwrap();
63
64    // Query AWS to find the instance IP
65    let ec2_client = ec2::create_client(Region::new(instance_region.clone())).await;
66    let resp = ec2_client
67        .describe_instances()
68        .filters(Filter::builder().name("tag:deployer").values(tag).build())
69        .filters(
70            Filter::builder()
71                .name("tag:name")
72                .values(instance_name)
73                .build(),
74        )
75        .filters(
76            Filter::builder()
77                .name("instance-state-name")
78                .values("running")
79                .build(),
80        )
81        .send()
82        .await
83        .map_err(|err| err.into_service_error())?;
84    let instance_ip = resp
85        .reservations
86        .unwrap_or_default()
87        .into_iter()
88        .flat_map(|r| r.instances.unwrap_or_default())
89        .filter_map(|i| i.public_ip_address)
90        .next()
91        .ok_or_else(|| Error::InstanceNotFound(instance_name.to_string()))?;
92    info!(
93        instance = instance_name,
94        ip = instance_ip.as_str(),
95        "found instance"
96    );
97
98    // Detect architecture from instance type
99    let arch = detect_architecture(&ec2_client, instance_type).await?;
100    info!(architecture = %arch, "detected architecture");
101
102    // Cache samply binary in S3 if needed and get presigned URL
103    let bucket_name = get_bucket_name();
104    let s3_client = s3::create_client(Region::new(MONITORING_REGION)).await;
105    ensure_bucket_exists(&s3_client, &bucket_name, MONITORING_REGION).await?;
106
107    // Cache samply archive in S3 (like other tools, we cache the archive and extract on the instance)
108    let samply_s3_key = samply_bin_s3_key(SAMPLY_VERSION, arch);
109    let samply_url = if object_exists(&s3_client, &bucket_name, &samply_s3_key).await? {
110        info!(key = samply_s3_key.as_str(), "samply already in S3");
111        presign_url(&s3_client, &bucket_name, &samply_s3_key, PRESIGN_DURATION).await?
112    } else {
113        info!(
114            key = samply_s3_key.as_str(),
115            "samply not in S3, downloading and uploading"
116        );
117        let download_url = samply_download_url(SAMPLY_VERSION, arch);
118        let temp_archive = tag_directory.join("samply.tar.xz");
119
120        // Download the archive
121        download_file(&download_url, &temp_archive).await?;
122
123        // Upload archive to S3
124        let url = cache_and_presign(
125            &s3_client,
126            &bucket_name,
127            &samply_s3_key,
128            UploadSource::File(&temp_archive),
129            PRESIGN_DURATION,
130        )
131        .await?;
132
133        // Clean up temp file
134        let _ = std::fs::remove_file(&temp_archive);
135
136        url
137    };
138
139    // Build the remote profiling script
140    let profile_script = format!(
141        r#"set -e
142
143# Download and extract samply if not present
144if [ ! -f /home/ubuntu/samply ]; then
145    {WGET} -O /tmp/samply.tar.xz '{samply_url}'
146    tar -xJf /tmp/samply.tar.xz -C /home/ubuntu --strip-components=1
147    chmod +x /home/ubuntu/samply
148    rm /tmp/samply.tar.xz
149fi
150
151# Get binary PID
152PID=$(systemctl show --property MainPID binary.service | cut -d= -f2)
153if [ -z "$PID" ] || [ "$PID" -eq 0 ]; then
154    echo "ERROR: binary.service not running" >&2
155    exit 1
156fi
157
158echo "Profiling PID $PID for {duration} seconds..."
159
160# Record profile (use timeout with SIGINT so samply saves the profile)
161rm -f /tmp/profile.json
162sudo timeout -s INT {duration}s /home/ubuntu/samply record -p $PID -s -o /tmp/profile.json || true
163sudo chown ubuntu:ubuntu /tmp/profile.json
164
165echo "Profile captured successfully"
166"#
167    );
168
169    // Run the profiling script on the remote instance
170    info!(
171        instance = instance_name,
172        duration = duration,
173        "starting profile capture"
174    );
175    ssh_execute(private_key, &instance_ip, &profile_script).await?;
176    info!("profile capture complete");
177
178    // Download the profile locally via scp
179    let timestamp = SystemTime::now()
180        .duration_since(UNIX_EPOCH)
181        .unwrap()
182        .as_secs();
183    let profile_path = format!("/tmp/profile-{}-{}.json", instance_name, timestamp);
184    scp_download(
185        private_key,
186        &instance_ip,
187        "/tmp/profile.json",
188        &profile_path,
189    )
190    .await?;
191    info!(profile = profile_path.as_str(), "downloaded profile");
192
193    // Create a temp directory with a copy of the debug binary named "binary"
194    // (samply looks for symbols by filename, and the remote binary is named "binary")
195    let binary_path = binary_path
196        .canonicalize()
197        .map_err(|e| Error::Symbolication(format!("failed to resolve binary path: {}", e)))?;
198    let symbol_dir = format!("/tmp/symbols-{}-{}", instance_name, timestamp);
199    std::fs::create_dir_all(&symbol_dir)?;
200    let binary_copy_path = format!("{}/binary", symbol_dir);
201    std::fs::copy(&binary_path, &binary_copy_path)
202        .map_err(|e| Error::Symbolication(format!("failed to copy binary: {}", e)))?;
203
204    // Use samply load with --symbol-dir to open the profile with symbols
205    info!(
206        binary = ?binary_path,
207        symbol_dir = symbol_dir.as_str(),
208        "opening profile with samply"
209    );
210    let mut cmd = Command::new("samply");
211    cmd.arg("load")
212        .arg(&profile_path)
213        .arg("--symbol-dir")
214        .arg(&symbol_dir);
215
216    let status = cmd.status().await?;
217    if !status.success() {
218        return Err(Error::Symbolication("samply load failed".to_string()));
219    }
220
221    Ok(())
222}