commonware_deployer/ec2/
services.rs

1//! Service configuration for Prometheus, Loki, Grafana, Promtail, and a caller-provided binary
2
3use crate::ec2::{
4    s3::{S3_DEPLOYMENTS_PREFIX, S3_TOOLS_BINARIES_PREFIX, S3_TOOLS_CONFIGS_PREFIX},
5    Architecture,
6};
7
8/// Deployer version used to namespace static configs in S3
9const DEPLOYER_VERSION: &str = env!("CARGO_PKG_VERSION");
10
11/// Version of Prometheus to download and install
12pub const PROMETHEUS_VERSION: &str = "3.2.0";
13
14/// Version of Promtail to download and install
15pub const PROMTAIL_VERSION: &str = "3.4.2";
16
17/// Version of Node Exporter to download and install
18pub const NODE_EXPORTER_VERSION: &str = "1.9.0";
19
20/// Version of Loki to download and install
21pub const LOKI_VERSION: &str = "3.4.2";
22
23/// Version of Tempo to download and install
24pub const TEMPO_VERSION: &str = "2.7.1";
25
26/// Version of Pyroscope to download and install
27pub const PYROSCOPE_VERSION: &str = "1.12.0";
28
29/// Version of Grafana to download and install
30pub const GRAFANA_VERSION: &str = "11.5.2";
31
32// S3 key functions for tool binaries
33//
34// Convention: {S3_TOOLS_BINARIES_PREFIX}/{tool}/{version}/{platform}/{filename}
35//
36// The filename matches the upstream download URL exactly. The version is placed
37// in the path (not embedded in the filename) to ensure consistent cache organization
38// across all tools, since some upstream releases include version in the filename
39// (e.g., prometheus-3.2.0.linux-arm64.tar.gz) while others do not
40// (e.g., loki-linux-arm64.zip).
41
42pub(crate) fn prometheus_bin_s3_key(version: &str, architecture: Architecture) -> String {
43    format!(
44        "{S3_TOOLS_BINARIES_PREFIX}/prometheus/{version}/linux-{arch}/prometheus-{version}.linux-{arch}.tar.gz",
45        arch = architecture.as_str()
46    )
47}
48
49pub(crate) fn grafana_bin_s3_key(version: &str, architecture: Architecture) -> String {
50    format!(
51        "{S3_TOOLS_BINARIES_PREFIX}/grafana/{version}/linux-{arch}/grafana_{version}_{arch}.deb",
52        arch = architecture.as_str()
53    )
54}
55
56pub(crate) fn loki_bin_s3_key(version: &str, architecture: Architecture) -> String {
57    format!(
58        "{S3_TOOLS_BINARIES_PREFIX}/loki/{version}/linux-{arch}/loki-linux-{arch}.zip",
59        arch = architecture.as_str()
60    )
61}
62
63pub(crate) fn pyroscope_bin_s3_key(version: &str, architecture: Architecture) -> String {
64    format!(
65        "{S3_TOOLS_BINARIES_PREFIX}/pyroscope/{version}/linux-{arch}/pyroscope_{version}_linux_{arch}.tar.gz",
66        arch = architecture.as_str()
67    )
68}
69
70pub(crate) fn tempo_bin_s3_key(version: &str, architecture: Architecture) -> String {
71    format!(
72        "{S3_TOOLS_BINARIES_PREFIX}/tempo/{version}/linux-{arch}/tempo_{version}_linux_{arch}.tar.gz",
73        arch = architecture.as_str()
74    )
75}
76
77pub(crate) fn node_exporter_bin_s3_key(version: &str, architecture: Architecture) -> String {
78    format!(
79        "{S3_TOOLS_BINARIES_PREFIX}/node-exporter/{version}/linux-{arch}/node_exporter-{version}.linux-{arch}.tar.gz",
80        arch = architecture.as_str()
81    )
82}
83
84pub(crate) fn promtail_bin_s3_key(version: &str, architecture: Architecture) -> String {
85    format!(
86        "{S3_TOOLS_BINARIES_PREFIX}/promtail/{version}/linux-{arch}/promtail-linux-{arch}.zip",
87        arch = architecture.as_str()
88    )
89}
90
91// S3 key functions for component configs and services (include deployer version for cache invalidation)
92//
93// Convention: {S3_TOOLS_CONFIGS_PREFIX}/{deployer_version}/{component}/{file}
94
95pub fn prometheus_service_s3_key() -> String {
96    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/prometheus/service")
97}
98
99pub fn grafana_datasources_s3_key() -> String {
100    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/grafana/datasources.yml")
101}
102
103pub fn grafana_dashboards_s3_key() -> String {
104    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/grafana/all.yml")
105}
106
107pub fn loki_config_s3_key() -> String {
108    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/loki/config.yml")
109}
110
111pub fn loki_service_s3_key() -> String {
112    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/loki/service")
113}
114
115pub fn pyroscope_config_s3_key() -> String {
116    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/pyroscope/config.yml")
117}
118
119pub fn pyroscope_service_s3_key() -> String {
120    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/pyroscope/service")
121}
122
123pub fn tempo_config_s3_key() -> String {
124    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/tempo/config.yml")
125}
126
127pub fn tempo_service_s3_key() -> String {
128    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/tempo/service")
129}
130
131pub fn node_exporter_service_s3_key() -> String {
132    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/node-exporter/service")
133}
134
135pub fn promtail_service_s3_key() -> String {
136    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/promtail/service")
137}
138
139// S3 key functions for pyroscope agent (lives with pyroscope component)
140
141pub fn pyroscope_agent_service_s3_key() -> String {
142    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/pyroscope/agent.service")
143}
144
145pub fn pyroscope_agent_timer_s3_key() -> String {
146    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/pyroscope/agent.timer")
147}
148
149// S3 key functions for system configs
150
151pub fn bbr_config_s3_key() -> String {
152    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/system/bbr.conf")
153}
154
155pub fn logrotate_config_s3_key() -> String {
156    format!("{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/system/logrotate.conf")
157}
158
159// S3 key functions for binary instance configs
160
161pub(crate) fn binary_service_s3_key_for_arch(architecture: Architecture) -> String {
162    format!(
163        "{S3_TOOLS_CONFIGS_PREFIX}/{DEPLOYER_VERSION}/binary/service-{arch}",
164        arch = architecture.as_str()
165    )
166}
167
168/// Returns the S3 key for an instance's binary by digest (deduplicated within deployment)
169pub fn binary_s3_key(tag: &str, digest: &str) -> String {
170    format!("{S3_DEPLOYMENTS_PREFIX}/{tag}/binaries/{digest}")
171}
172
173/// Returns the S3 key for an instance's config by digest (deduplicated within deployment)
174pub fn config_s3_key(tag: &str, digest: &str) -> String {
175    format!("{S3_DEPLOYMENTS_PREFIX}/{tag}/configs/{digest}")
176}
177
178/// Returns the S3 key for hosts.yaml by digest (deduplicated within deployment)
179pub fn hosts_s3_key(tag: &str, digest: &str) -> String {
180    format!("{S3_DEPLOYMENTS_PREFIX}/{tag}/hosts/{digest}")
181}
182
183/// Returns the S3 key for promtail config by digest (deduplicated within deployment)
184pub fn promtail_s3_key(tag: &str, digest: &str) -> String {
185    format!("{S3_DEPLOYMENTS_PREFIX}/{tag}/promtail/{digest}")
186}
187
188/// Returns the S3 key for pyroscope agent script by digest (deduplicated within deployment)
189pub fn pyroscope_s3_key(tag: &str, digest: &str) -> String {
190    format!("{S3_DEPLOYMENTS_PREFIX}/{tag}/pyroscope/{digest}")
191}
192
193/// Returns the S3 key for monitoring config by digest (deduplicated within deployment)
194pub fn monitoring_s3_key(tag: &str, digest: &str) -> String {
195    format!("{S3_DEPLOYMENTS_PREFIX}/{tag}/monitoring/{digest}")
196}
197
198/// Returns the download URL for Prometheus from GitHub
199pub(crate) fn prometheus_download_url(version: &str, architecture: Architecture) -> String {
200    format!(
201        "https://github.com/prometheus/prometheus/releases/download/v{version}/prometheus-{version}.linux-{arch}.tar.gz",
202        arch = architecture.as_str()
203    )
204}
205
206/// Returns the download URL for Grafana
207pub(crate) fn grafana_download_url(version: &str, architecture: Architecture) -> String {
208    format!(
209        "https://dl.grafana.com/oss/release/grafana_{version}_{arch}.deb",
210        arch = architecture.as_str()
211    )
212}
213
214/// Returns the download URL for Loki from GitHub
215pub(crate) fn loki_download_url(version: &str, architecture: Architecture) -> String {
216    format!(
217        "https://github.com/grafana/loki/releases/download/v{version}/loki-linux-{arch}.zip",
218        arch = architecture.as_str()
219    )
220}
221
222/// Returns the download URL for Pyroscope from GitHub
223pub(crate) fn pyroscope_download_url(version: &str, architecture: Architecture) -> String {
224    format!(
225        "https://github.com/grafana/pyroscope/releases/download/v{version}/pyroscope_{version}_linux_{arch}.tar.gz",
226        arch = architecture.as_str()
227    )
228}
229
230/// Returns the download URL for Tempo from GitHub
231pub(crate) fn tempo_download_url(version: &str, architecture: Architecture) -> String {
232    format!(
233        "https://github.com/grafana/tempo/releases/download/v{version}/tempo_{version}_linux_{arch}.tar.gz",
234        arch = architecture.as_str()
235    )
236}
237
238/// Returns the download URL for Node Exporter from GitHub
239pub(crate) fn node_exporter_download_url(version: &str, architecture: Architecture) -> String {
240    format!(
241        "https://github.com/prometheus/node_exporter/releases/download/v{version}/node_exporter-{version}.linux-{arch}.tar.gz",
242        arch = architecture.as_str()
243    )
244}
245
246/// Returns the download URL for Promtail from GitHub
247pub(crate) fn promtail_download_url(version: &str, architecture: Architecture) -> String {
248    format!(
249        "https://github.com/grafana/loki/releases/download/v{version}/promtail-linux-{arch}.zip",
250        arch = architecture.as_str()
251    )
252}
253
254/// YAML configuration for Grafana datasources (Prometheus, Loki, Tempo, and Pyroscope)
255pub const DATASOURCES_YML: &str = r#"
256apiVersion: 1
257datasources:
258  - name: Prometheus
259    type: prometheus
260    url: http://localhost:9090
261    access: proxy
262    isDefault: true
263  - name: Loki
264    type: loki
265    url: http://localhost:3100
266    access: proxy
267    isDefault: false
268  - name: Tempo
269    type: tempo
270    url: http://localhost:3200
271    access: proxy
272    isDefault: false
273  - name: Pyroscope
274    type: grafana-pyroscope-datasource
275    url: http://localhost:4040
276    access: proxy
277    isDefault: false
278"#;
279
280/// YAML configuration for Grafana dashboard providers
281pub const ALL_YML: &str = r#"
282apiVersion: 1
283providers:
284  - name: 'default'
285    orgId: 1
286    folder: ''
287    type: file
288    options:
289      path: /var/lib/grafana/dashboards
290"#;
291
292/// Systemd service file content for Prometheus
293pub const PROMETHEUS_SERVICE: &str = r#"[Unit]
294Description=Prometheus Monitoring Service
295After=network.target
296
297[Service]
298ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data
299TimeoutStopSec=60
300Restart=always
301User=ubuntu
302LimitNOFILE=infinity
303
304[Install]
305WantedBy=multi-user.target
306"#;
307
308/// Systemd service file content for Promtail
309pub const PROMTAIL_SERVICE: &str = r#"[Unit]
310Description=Promtail Log Forwarder
311After=network.target
312
313[Service]
314ExecStart=/opt/promtail/promtail -config.file=/etc/promtail/promtail.yml
315TimeoutStopSec=60
316Restart=always
317User=ubuntu
318LimitNOFILE=infinity
319
320[Install]
321WantedBy=multi-user.target
322"#;
323
324/// Systemd service file content for Loki
325pub const LOKI_SERVICE: &str = r#"[Unit]
326Description=Loki Log Aggregation Service
327After=network.target
328
329[Service]
330ExecStart=/opt/loki/loki -config.file=/etc/loki/loki.yml
331TimeoutStopSec=60
332Restart=always
333User=ubuntu
334LimitNOFILE=infinity
335
336[Install]
337WantedBy=multi-user.target
338"#;
339
340/// YAML configuration for Loki
341pub const LOKI_CONFIG: &str = r#"
342auth_enabled: false
343target: all
344server:
345  http_listen_port: 3100
346  grpc_listen_port: 9095
347common:
348  ring:
349    kvstore:
350      store: inmemory
351  replication_factor: 1
352  instance_addr: 127.0.0.1
353schema_config:
354  configs:
355    - from: 2020-10-24
356      store: tsdb
357      object_store: filesystem
358      schema: v13
359      index:
360        prefix: index_
361        period: 24h
362storage_config:
363  tsdb_shipper:
364    active_index_directory: /loki/index
365    cache_location: /loki/index_cache
366  filesystem:
367    directory: /loki/chunks
368table_manager:
369  retention_deletes_enabled: true
370  retention_period: 12h
371compactor:
372  working_directory: /loki/compactor
373ingester:
374  wal:
375    dir: /loki/wal
376"#;
377
378/// YAML configuration for Pyroscope
379pub const PYROSCOPE_CONFIG: &str = r#"
380target: all
381server:
382  http_listen_port: 4040
383  grpc_listen_port: 0
384pyroscopedb:
385  data_path: /var/lib/pyroscope
386self_profiling:
387  disable_push: true
388"#;
389
390/// Systemd service file content for Pyroscope
391pub const PYROSCOPE_SERVICE: &str = r#"[Unit]
392Description=Pyroscope Profiling Service
393After=network.target
394
395[Service]
396ExecStart=/opt/pyroscope/pyroscope --config.file=/etc/pyroscope/pyroscope.yml
397TimeoutStopSec=60
398Restart=always
399User=ubuntu
400LimitNOFILE=infinity
401
402[Install]
403WantedBy=multi-user.target
404"#;
405
406/// Systemd service file content for Tempo
407pub const TEMPO_SERVICE: &str = r#"[Unit]
408Description=Tempo Tracing Service
409After=network.target
410[Service]
411ExecStart=/opt/tempo/tempo -config.file=/etc/tempo/tempo.yml
412TimeoutStopSec=60
413Restart=always
414User=ubuntu
415LimitNOFILE=infinity
416[Install]
417WantedBy=multi-user.target
418"#;
419
420/// YAML configuration for Tempo
421pub const TEMPO_CONFIG: &str = r#"
422server:
423  grpc_listen_port: 9096
424  http_listen_port: 3200
425distributor:
426  receivers:
427    otlp:
428      protocols:
429        http:
430          endpoint: "0.0.0.0:4318"
431storage:
432  trace:
433    backend: local
434    local:
435      path: /tempo/traces
436    wal:
437      path: /tempo/wal
438ingester:
439  max_block_duration: 1h
440compactor:
441  compaction:
442    block_retention: 1h
443    compaction_cycle: 1h
444"#;
445
446/// URLs for monitoring service installation
447pub struct MonitoringUrls {
448    pub prometheus_bin: String,
449    pub grafana_bin: String,
450    pub loki_bin: String,
451    pub pyroscope_bin: String,
452    pub tempo_bin: String,
453    pub node_exporter_bin: String,
454    pub prometheus_config: String,
455    pub datasources_yml: String,
456    pub all_yml: String,
457    pub dashboard: String,
458    pub loki_yml: String,
459    pub pyroscope_yml: String,
460    pub tempo_yml: String,
461    pub prometheus_service: String,
462    pub loki_service: String,
463    pub pyroscope_service: String,
464    pub tempo_service: String,
465    pub node_exporter_service: String,
466}
467
468/// Command to install monitoring services (Prometheus, Loki, Grafana, Pyroscope, Tempo) on the monitoring instance
469pub(crate) fn install_monitoring_cmd(
470    urls: &MonitoringUrls,
471    prometheus_version: &str,
472    architecture: Architecture,
473) -> String {
474    let arch = architecture.as_str();
475    format!(
476        r#"
477sudo apt-get update -y
478sudo apt-get install -y unzip adduser libfontconfig1 wget tar
479
480# Download all files from S3 concurrently via pre-signed URLs
481wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/prometheus.tar.gz '{}' &
482wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/grafana.deb '{}' &
483wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/loki.zip '{}' &
484wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/pyroscope.tar.gz '{}' &
485wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/tempo.tar.gz '{}' &
486wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/node_exporter.tar.gz '{}' &
487wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/prometheus.yml '{}' &
488wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/datasources.yml '{}' &
489wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/all.yml '{}' &
490wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/dashboard.json '{}' &
491wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/loki.yml '{}' &
492wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/pyroscope.yml '{}' &
493wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/tempo.yml '{}' &
494wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/prometheus.service '{}' &
495wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/loki.service '{}' &
496wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/pyroscope.service '{}' &
497wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/tempo.service '{}' &
498wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/node_exporter.service '{}' &
499wait
500
501# Verify all downloads succeeded
502for f in prometheus.tar.gz grafana.deb loki.zip pyroscope.tar.gz tempo.tar.gz node_exporter.tar.gz \
503         prometheus.yml datasources.yml all.yml dashboard.json loki.yml pyroscope.yml tempo.yml \
504         prometheus.service loki.service pyroscope.service tempo.service node_exporter.service; do
505    if [ ! -f "/home/ubuntu/$f" ]; then
506        echo "ERROR: Failed to download $f" >&2
507        exit 1
508    fi
509done
510
511# Install Prometheus
512sudo mkdir -p /opt/prometheus /opt/prometheus/data
513sudo chown -R ubuntu:ubuntu /opt/prometheus
514tar xvfz /home/ubuntu/prometheus.tar.gz -C /home/ubuntu
515sudo mv /home/ubuntu/prometheus-{prometheus_version}.linux-{arch} /opt/prometheus/prometheus-{prometheus_version}.linux-{arch}
516sudo ln -s /opt/prometheus/prometheus-{prometheus_version}.linux-{arch}/prometheus /opt/prometheus/prometheus
517sudo chmod +x /opt/prometheus/prometheus
518
519# Install Grafana
520sudo dpkg -i /home/ubuntu/grafana.deb
521sudo apt-get install -f -y
522
523# Install Loki
524sudo mkdir -p /opt/loki /loki/index /loki/index_cache /loki/chunks /loki/compactor /loki/wal
525sudo chown -R ubuntu:ubuntu /opt/loki /loki
526unzip -o /home/ubuntu/loki.zip -d /home/ubuntu
527sudo mv /home/ubuntu/loki-linux-{arch} /opt/loki/loki
528sudo chmod +x /opt/loki/loki
529
530# Install Pyroscope
531sudo mkdir -p /opt/pyroscope /var/lib/pyroscope
532sudo chown -R ubuntu:ubuntu /opt/pyroscope /var/lib/pyroscope
533tar xvfz /home/ubuntu/pyroscope.tar.gz -C /home/ubuntu
534sudo mv /home/ubuntu/pyroscope /opt/pyroscope/pyroscope
535sudo chmod +x /opt/pyroscope/pyroscope
536
537# Install Tempo
538sudo mkdir -p /opt/tempo /tempo/traces /tempo/wal
539sudo chown -R ubuntu:ubuntu /opt/tempo /tempo
540tar xvfz /home/ubuntu/tempo.tar.gz -C /home/ubuntu
541sudo mv /home/ubuntu/tempo /opt/tempo/tempo
542sudo chmod +x /opt/tempo/tempo
543
544# Install Node Exporter
545sudo mkdir -p /opt/node_exporter
546sudo chown -R ubuntu:ubuntu /opt/node_exporter
547tar xvfz /home/ubuntu/node_exporter.tar.gz -C /home/ubuntu
548sudo mv /home/ubuntu/node_exporter-*.linux-{arch} /opt/node_exporter/
549sudo ln -s /opt/node_exporter/node_exporter-*.linux-{arch}/node_exporter /opt/node_exporter/node_exporter
550sudo chmod +x /opt/node_exporter/node_exporter
551
552# Configure Grafana
553sudo sed -i '/^\[auth.anonymous\]$/,/^\[/ {{ /^; *enabled = /s/.*/enabled = true/; /^; *org_role = /s/.*/org_role = Admin/ }}' /etc/grafana/grafana.ini
554sudo mkdir -p /etc/grafana/provisioning/datasources /etc/grafana/provisioning/dashboards /var/lib/grafana/dashboards
555
556# Install Pyroscope data source plugin
557sudo grafana-cli plugins install grafana-pyroscope-datasource
558
559# Install configuration files
560sudo mv /home/ubuntu/prometheus.yml /opt/prometheus/prometheus.yml
561sudo mv /home/ubuntu/datasources.yml /etc/grafana/provisioning/datasources/datasources.yml
562sudo mv /home/ubuntu/all.yml /etc/grafana/provisioning/dashboards/all.yml
563sudo mv /home/ubuntu/dashboard.json /var/lib/grafana/dashboards/dashboard.json
564sudo mkdir -p /etc/loki
565sudo mv /home/ubuntu/loki.yml /etc/loki/loki.yml
566sudo chown root:root /etc/loki/loki.yml
567sudo mkdir -p /etc/pyroscope
568sudo mv /home/ubuntu/pyroscope.yml /etc/pyroscope/pyroscope.yml
569sudo chown root:root /etc/pyroscope/pyroscope.yml
570sudo mkdir -p /etc/tempo
571sudo mv /home/ubuntu/tempo.yml /etc/tempo/tempo.yml
572sudo chown root:root /etc/tempo/tempo.yml
573
574# Install service files
575sudo mv /home/ubuntu/prometheus.service /etc/systemd/system/prometheus.service
576sudo mv /home/ubuntu/loki.service /etc/systemd/system/loki.service
577sudo mv /home/ubuntu/pyroscope.service /etc/systemd/system/pyroscope.service
578sudo mv /home/ubuntu/tempo.service /etc/systemd/system/tempo.service
579sudo mv /home/ubuntu/node_exporter.service /etc/systemd/system/node_exporter.service
580"#,
581        urls.prometheus_bin,
582        urls.grafana_bin,
583        urls.loki_bin,
584        urls.pyroscope_bin,
585        urls.tempo_bin,
586        urls.node_exporter_bin,
587        urls.prometheus_config,
588        urls.datasources_yml,
589        urls.all_yml,
590        urls.dashboard,
591        urls.loki_yml,
592        urls.pyroscope_yml,
593        urls.tempo_yml,
594        urls.prometheus_service,
595        urls.loki_service,
596        urls.pyroscope_service,
597        urls.tempo_service,
598        urls.node_exporter_service,
599    )
600}
601
602/// Continuation of monitoring install command (services startup)
603pub const fn start_monitoring_services_cmd() -> &'static str {
604    r#"
605sudo chown -R grafana:grafana /etc/grafana /var/lib/grafana
606
607# Start services
608sudo systemctl daemon-reload
609sudo systemctl start node_exporter
610sudo systemctl enable node_exporter
611sudo systemctl start prometheus
612sudo systemctl enable prometheus
613sudo systemctl start loki
614sudo systemctl enable loki
615sudo systemctl start pyroscope
616sudo systemctl enable pyroscope
617sudo systemctl start tempo
618sudo systemctl enable tempo
619sudo systemctl restart grafana-server
620sudo systemctl enable grafana-server
621"#
622}
623
624/// URLs for binary instance installation
625pub struct InstanceUrls {
626    pub binary: String,
627    pub config: String,
628    pub hosts: String,
629    pub promtail_bin: String,
630    pub promtail_config: String,
631    pub promtail_service: String,
632    pub node_exporter_bin: String,
633    pub node_exporter_service: String,
634    pub binary_service: String,
635    pub logrotate_conf: String,
636    pub pyroscope_script: String,
637    pub pyroscope_service: String,
638    pub pyroscope_timer: String,
639}
640
641/// Command to install all services on binary instances
642pub(crate) fn install_binary_cmd(
643    urls: &InstanceUrls,
644    profiling: bool,
645    architecture: Architecture,
646) -> String {
647    let arch = architecture.as_str();
648    let mut script = format!(
649        r#"
650# Install base tools and dependencies
651sudo apt-get update -y
652sudo apt-get install -y logrotate jq wget unzip libjemalloc2 linux-tools-common linux-tools-generic linux-tools-$(uname -r)
653
654# Download all files from S3 concurrently via pre-signed URLs
655wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/binary '{}' &
656wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/config.conf '{}' &
657wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/hosts.yaml '{}' &
658wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/promtail.zip '{}' &
659wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/promtail.yml '{}' &
660wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/promtail.service '{}' &
661wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/node_exporter.tar.gz '{}' &
662wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/node_exporter.service '{}' &
663wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/binary.service '{}' &
664wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/logrotate.conf '{}' &
665wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/pyroscope-agent.sh '{}' &
666wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/pyroscope-agent.service '{}' &
667wget -q --tries=10 --retry-connrefused --waitretry=5 -O /home/ubuntu/pyroscope-agent.timer '{}' &
668wait
669
670# Verify all downloads succeeded
671for f in binary config.conf hosts.yaml promtail.zip promtail.yml promtail.service \
672         node_exporter.tar.gz node_exporter.service binary.service logrotate.conf \
673         pyroscope-agent.sh pyroscope-agent.service pyroscope-agent.timer; do
674    if [ ! -f "/home/ubuntu/$f" ]; then
675        echo "ERROR: Failed to download $f" >&2
676        exit 1
677    fi
678done
679
680# Install Promtail
681sudo mkdir -p /opt/promtail /etc/promtail
682sudo chown -R ubuntu:ubuntu /opt/promtail
683unzip -o /home/ubuntu/promtail.zip -d /home/ubuntu
684sudo mv /home/ubuntu/promtail-linux-{arch} /opt/promtail/promtail
685sudo chmod +x /opt/promtail/promtail
686sudo mv /home/ubuntu/promtail.yml /etc/promtail/promtail.yml
687sudo mv /home/ubuntu/promtail.service /etc/systemd/system/promtail.service
688sudo chown root:root /etc/promtail/promtail.yml
689
690# Install Node Exporter
691sudo mkdir -p /opt/node_exporter
692sudo chown -R ubuntu:ubuntu /opt/node_exporter
693tar xvfz /home/ubuntu/node_exporter.tar.gz -C /home/ubuntu
694sudo mv /home/ubuntu/node_exporter-*.linux-{arch} /opt/node_exporter/
695sudo ln -s /opt/node_exporter/node_exporter-*.linux-{arch}/node_exporter /opt/node_exporter/node_exporter
696sudo chmod +x /opt/node_exporter/node_exporter
697sudo mv /home/ubuntu/node_exporter.service /etc/systemd/system/node_exporter.service
698
699# Setup binary
700chmod +x /home/ubuntu/binary
701sudo touch /var/log/binary.log && sudo chown ubuntu:ubuntu /var/log/binary.log
702sudo mv /home/ubuntu/binary.service /etc/systemd/system/binary.service
703
704# Setup logrotate
705sudo mv /home/ubuntu/logrotate.conf /etc/logrotate.d/binary
706sudo chown root:root /etc/logrotate.d/binary
707echo "0 * * * * /usr/sbin/logrotate /etc/logrotate.d/binary" | crontab -
708
709# Setup pyroscope agent
710sudo ln -s "$(find /usr/lib/linux-tools/*/perf | head -1)" /usr/local/bin/perf
711sudo chmod +x /home/ubuntu/pyroscope-agent.sh
712sudo mv /home/ubuntu/pyroscope-agent.service /etc/systemd/system/pyroscope-agent.service
713sudo mv /home/ubuntu/pyroscope-agent.timer /etc/systemd/system/pyroscope-agent.timer
714
715# Start services
716sudo systemctl daemon-reload
717sudo systemctl enable --now promtail
718sudo systemctl enable --now node_exporter
719sudo systemctl enable --now binary
720"#,
721        urls.binary,
722        urls.config,
723        urls.hosts,
724        urls.promtail_bin,
725        urls.promtail_config,
726        urls.promtail_service,
727        urls.node_exporter_bin,
728        urls.node_exporter_service,
729        urls.binary_service,
730        urls.logrotate_conf,
731        urls.pyroscope_script,
732        urls.pyroscope_service,
733        urls.pyroscope_timer,
734    );
735    if profiling {
736        script.push_str(
737            r#"
738sudo systemctl enable --now pyroscope-agent.timer
739"#,
740        );
741    }
742    script
743}
744
745/// Generates Promtail configuration with the monitoring instance's private IP and instance name
746pub fn promtail_config(
747    monitoring_private_ip: &str,
748    instance_name: &str,
749    ip: &str,
750    region: &str,
751    arch: &str,
752) -> String {
753    format!(
754        r#"
755server:
756  http_listen_port: 9080
757  grpc_listen_port: 0
758positions:
759  filename: /tmp/positions.yaml
760clients:
761  - url: http://{monitoring_private_ip}:3100/loki/api/v1/push
762scrape_configs:
763  - job_name: binary_logs
764    static_configs:
765      - targets:
766          - localhost
767        labels:
768          deployer_name: {instance_name}
769          deployer_ip: {ip}
770          deployer_region: {region}
771          deployer_arch: {arch}
772          __path__: /var/log/binary.log
773"#
774    )
775}
776
777/// Systemd service file content for Node Exporter
778pub const NODE_EXPORTER_SERVICE: &str = r#"[Unit]
779Description=Node Exporter
780After=network.target
781
782[Service]
783ExecStart=/opt/node_exporter/node_exporter
784TimeoutStopSec=60
785Restart=always
786User=ubuntu
787LimitNOFILE=infinity
788
789[Install]
790WantedBy=multi-user.target
791"#;
792
793/// Generates Prometheus configuration with scrape targets for all instance IPs
794pub fn generate_prometheus_config(instances: &[(&str, &str, &str, &str)]) -> String {
795    let mut config = String::from(
796        r#"
797global:
798  scrape_interval: 15s
799scrape_configs:
800  - job_name: 'monitoring_system'
801    static_configs:
802      - targets: ['localhost:9100']
803"#,
804    );
805    for (name, ip, region, arch) in instances {
806        config.push_str(&format!(
807            r#"
808  - job_name: '{name}_binary'
809    static_configs:
810      - targets: ['{ip}:9090']
811        labels:
812          deployer_name: '{name}'
813          deployer_ip: '{ip}'
814          deployer_region: '{region}'
815          deployer_arch: '{arch}'
816  - job_name: '{name}_system'
817    static_configs:
818      - targets: ['{ip}:9100']
819        labels:
820          deployer_name: '{name}'
821          deployer_ip: '{ip}'
822          deployer_region: '{region}'
823          deployer_arch: '{arch}'
824"#
825        ));
826    }
827    config
828}
829
830/// Logrotate configuration for binary logs
831pub const LOGROTATE_CONF: &str = r#"
832/var/log/binary.log {
833    rotate 0
834    copytruncate
835    missingok
836    notifempty
837}
838"#;
839
840/// Configuration for BBR sysctl settings
841pub const BBR_CONF: &str = "net.core.default_qdisc=fq\nnet.ipv4.tcp_congestion_control=bbr\n";
842
843/// Generates systemd service file content for the deployed binary
844pub(crate) fn binary_service(architecture: Architecture) -> String {
845    let lib_arch = architecture.linux_lib();
846    format!(
847        r#"[Unit]
848Description=Deployed Binary Service
849After=network.target
850
851[Service]
852Environment="LD_PRELOAD=/usr/lib/{lib_arch}/libjemalloc.so.2"
853ExecStart=/home/ubuntu/binary --hosts=/home/ubuntu/hosts.yaml --config=/home/ubuntu/config.conf
854TimeoutStopSec=60
855Restart=always
856User=ubuntu
857LimitNOFILE=infinity
858StandardOutput=append:/var/log/binary.log
859StandardError=append:/var/log/binary.log
860
861[Install]
862WantedBy=multi-user.target
863"#
864    )
865}
866
867/// Shell script content for the Pyroscope agent (perf + wget)
868pub fn generate_pyroscope_script(
869    monitoring_private_ip: &str,
870    name: &str,
871    ip: &str,
872    region: &str,
873    arch: &str,
874) -> String {
875    format!(
876        r#"#!/bin/bash
877set -e
878
879SERVICE_NAME="binary.service"
880PERF_DATA_FILE="/tmp/perf.data"
881PERF_STACK_FILE="/tmp/perf.stack"
882PROFILE_DURATION=60 # seconds
883PERF_FREQ=100 # Hz
884
885# Construct the Pyroscope application name with tags
886RAW_APP_NAME="binary{{deployer_name={name},deployer_ip={ip},deployer_region={region},deployer_arch={arch}}}"
887APP_NAME=$(jq -nr --arg str "$RAW_APP_NAME" '$str | @uri')
888
889# Get the PID of the binary service
890PID=$(systemctl show --property MainPID ${{SERVICE_NAME}} | cut -d= -f2)
891if [ -z "$PID" ] || [ "$PID" -eq 0 ]; then
892  echo "Error: Could not get PID for ${{SERVICE_NAME}}." >&2
893  exit 1
894fi
895
896# Record performance data
897echo "Recording perf data for PID ${{PID}}..."
898sudo perf record -F ${{PERF_FREQ}} -p ${{PID}} -o ${{PERF_DATA_FILE}} -g --call-graph fp -- sleep ${{PROFILE_DURATION}}
899
900# Generate folded stack report
901echo "Generating folded stack report..."
902sudo perf report -i ${{PERF_DATA_FILE}} --stdio --no-children -g folded,0,caller,count -s comm | \
903    awk '/^[0-9]+\.[0-9]+%/ {{ comm = $2 }} /^[0-9]/ {{ print comm ";" substr($0, index($0, $2)), $1 }}' > ${{PERF_STACK_FILE}}
904
905# Check if stack file is empty (perf might fail silently sometimes)
906if [ ! -s "${{PERF_STACK_FILE}}" ]; then
907    echo "Warning: ${{PERF_STACK_FILE}} is empty. Skipping upload." >&2
908    # Clean up empty perf.data
909    sudo rm -f ${{PERF_DATA_FILE}} ${{PERF_STACK_FILE}}
910    exit 0
911fi
912
913# Calculate timestamps
914UNTIL_TS=$(date +%s)
915FROM_TS=$((UNTIL_TS - PROFILE_DURATION))
916
917# Upload to Pyroscope
918echo "Uploading profile to Pyroscope at {monitoring_private_ip}..."
919wget --post-file="${{PERF_STACK_FILE}}" \
920    --header="Content-Type: text/plain" \
921    --quiet \
922    -O /dev/null \
923    "http://{monitoring_private_ip}:4040/ingest?name=${{APP_NAME}}&format=folded&units=samples&aggregationType=sum&sampleType=cpu&from=${{FROM_TS}}&until=${{UNTIL_TS}}&spyName=perf"
924
925echo "Profile upload complete."
926sudo rm -f ${{PERF_DATA_FILE}} ${{PERF_STACK_FILE}}
927"#
928    )
929}
930
931/// Systemd service file content for the Pyroscope agent script
932pub const PYROSCOPE_AGENT_SERVICE: &str = r#"[Unit]
933Description=Pyroscope Agent (Perf Script Runner)
934Wants=network-online.target
935After=network-online.target binary.service
936
937[Service]
938Type=oneshot
939User=ubuntu
940ExecStart=/home/ubuntu/pyroscope-agent.sh
941
942[Install]
943WantedBy=multi-user.target
944"#;
945
946/// Systemd timer file content for the Pyroscope agent service
947pub const PYROSCOPE_AGENT_TIMER: &str = r#"[Unit]
948Description=Run Pyroscope Agent periodically
949
950[Timer]
951# Wait a bit after boot before the first run
952OnBootSec=2min
953# Run roughly every minute after the last run finished
954OnUnitInactiveSec=1min
955Unit=pyroscope-agent.service
956# Randomize the delay to avoid thundering herd
957RandomizedDelaySec=10s
958
959[Install]
960WantedBy=timers.target
961"#;
962
963#[cfg(test)]
964mod tests {
965    use super::*;
966
967    #[test]
968    fn test_binary_s3_keys_arm64() {
969        let arch = Architecture::Arm64;
970        assert_eq!(
971            prometheus_bin_s3_key("3.2.0", arch),
972            "tools/binaries/prometheus/3.2.0/linux-arm64/prometheus-3.2.0.linux-arm64.tar.gz"
973        );
974        assert_eq!(
975            grafana_bin_s3_key("11.5.2", arch),
976            "tools/binaries/grafana/11.5.2/linux-arm64/grafana_11.5.2_arm64.deb"
977        );
978        assert_eq!(
979            loki_bin_s3_key("3.4.2", arch),
980            "tools/binaries/loki/3.4.2/linux-arm64/loki-linux-arm64.zip"
981        );
982        assert_eq!(
983            pyroscope_bin_s3_key("1.12.0", arch),
984            "tools/binaries/pyroscope/1.12.0/linux-arm64/pyroscope_1.12.0_linux_arm64.tar.gz"
985        );
986        assert_eq!(
987            tempo_bin_s3_key("2.7.1", arch),
988            "tools/binaries/tempo/2.7.1/linux-arm64/tempo_2.7.1_linux_arm64.tar.gz"
989        );
990        assert_eq!(
991            node_exporter_bin_s3_key("1.9.0", arch),
992            "tools/binaries/node-exporter/1.9.0/linux-arm64/node_exporter-1.9.0.linux-arm64.tar.gz"
993        );
994        assert_eq!(
995            promtail_bin_s3_key("3.4.2", arch),
996            "tools/binaries/promtail/3.4.2/linux-arm64/promtail-linux-arm64.zip"
997        );
998    }
999
1000    #[test]
1001    fn test_binary_s3_keys_x86_64() {
1002        let arch = Architecture::X86_64;
1003        assert_eq!(
1004            prometheus_bin_s3_key("3.2.0", arch),
1005            "tools/binaries/prometheus/3.2.0/linux-amd64/prometheus-3.2.0.linux-amd64.tar.gz"
1006        );
1007        assert_eq!(
1008            grafana_bin_s3_key("11.5.2", arch),
1009            "tools/binaries/grafana/11.5.2/linux-amd64/grafana_11.5.2_amd64.deb"
1010        );
1011        assert_eq!(
1012            loki_bin_s3_key("3.4.2", arch),
1013            "tools/binaries/loki/3.4.2/linux-amd64/loki-linux-amd64.zip"
1014        );
1015        assert_eq!(
1016            pyroscope_bin_s3_key("1.12.0", arch),
1017            "tools/binaries/pyroscope/1.12.0/linux-amd64/pyroscope_1.12.0_linux_amd64.tar.gz"
1018        );
1019        assert_eq!(
1020            tempo_bin_s3_key("2.7.1", arch),
1021            "tools/binaries/tempo/2.7.1/linux-amd64/tempo_2.7.1_linux_amd64.tar.gz"
1022        );
1023        assert_eq!(
1024            node_exporter_bin_s3_key("1.9.0", arch),
1025            "tools/binaries/node-exporter/1.9.0/linux-amd64/node_exporter-1.9.0.linux-amd64.tar.gz"
1026        );
1027        assert_eq!(
1028            promtail_bin_s3_key("3.4.2", arch),
1029            "tools/binaries/promtail/3.4.2/linux-amd64/promtail-linux-amd64.zip"
1030        );
1031    }
1032
1033    #[test]
1034    fn test_config_s3_keys() {
1035        let version = DEPLOYER_VERSION;
1036
1037        assert_eq!(
1038            prometheus_service_s3_key(),
1039            format!("tools/configs/{version}/prometheus/service")
1040        );
1041        assert_eq!(
1042            grafana_datasources_s3_key(),
1043            format!("tools/configs/{version}/grafana/datasources.yml")
1044        );
1045        assert_eq!(
1046            grafana_dashboards_s3_key(),
1047            format!("tools/configs/{version}/grafana/all.yml")
1048        );
1049        assert_eq!(
1050            loki_config_s3_key(),
1051            format!("tools/configs/{version}/loki/config.yml")
1052        );
1053        assert_eq!(
1054            loki_service_s3_key(),
1055            format!("tools/configs/{version}/loki/service")
1056        );
1057        assert_eq!(
1058            pyroscope_config_s3_key(),
1059            format!("tools/configs/{version}/pyroscope/config.yml")
1060        );
1061        assert_eq!(
1062            pyroscope_service_s3_key(),
1063            format!("tools/configs/{version}/pyroscope/service")
1064        );
1065        assert_eq!(
1066            tempo_config_s3_key(),
1067            format!("tools/configs/{version}/tempo/config.yml")
1068        );
1069        assert_eq!(
1070            tempo_service_s3_key(),
1071            format!("tools/configs/{version}/tempo/service")
1072        );
1073        assert_eq!(
1074            node_exporter_service_s3_key(),
1075            format!("tools/configs/{version}/node-exporter/service")
1076        );
1077        assert_eq!(
1078            promtail_service_s3_key(),
1079            format!("tools/configs/{version}/promtail/service")
1080        );
1081        assert_eq!(
1082            pyroscope_agent_service_s3_key(),
1083            format!("tools/configs/{version}/pyroscope/agent.service")
1084        );
1085        assert_eq!(
1086            pyroscope_agent_timer_s3_key(),
1087            format!("tools/configs/{version}/pyroscope/agent.timer")
1088        );
1089        assert_eq!(
1090            bbr_config_s3_key(),
1091            format!("tools/configs/{version}/system/bbr.conf")
1092        );
1093        assert_eq!(
1094            logrotate_config_s3_key(),
1095            format!("tools/configs/{version}/system/logrotate.conf")
1096        );
1097        assert_eq!(
1098            binary_service_s3_key_for_arch(Architecture::Arm64),
1099            format!("tools/configs/{version}/binary/service-arm64")
1100        );
1101        assert_eq!(
1102            binary_service_s3_key_for_arch(Architecture::X86_64),
1103            format!("tools/configs/{version}/binary/service-amd64")
1104        );
1105    }
1106
1107    #[test]
1108    fn test_deployment_s3_keys() {
1109        let digest = "abc123def456";
1110        assert_eq!(
1111            binary_s3_key("my-tag", digest),
1112            "deployments/my-tag/binaries/abc123def456"
1113        );
1114        assert_eq!(
1115            config_s3_key("my-tag", digest),
1116            "deployments/my-tag/configs/abc123def456"
1117        );
1118        assert_eq!(
1119            hosts_s3_key("my-tag", digest),
1120            "deployments/my-tag/hosts/abc123def456"
1121        );
1122        assert_eq!(
1123            promtail_s3_key("my-tag", digest),
1124            "deployments/my-tag/promtail/abc123def456"
1125        );
1126        assert_eq!(
1127            pyroscope_s3_key("my-tag", digest),
1128            "deployments/my-tag/pyroscope/abc123def456"
1129        );
1130        assert_eq!(
1131            monitoring_s3_key("my-tag", digest),
1132            "deployments/my-tag/monitoring/abc123def456"
1133        );
1134    }
1135}