commonware_deployer/ec2/
services.rs

1//! Service configuration for Prometheus, Loki, Grafana, Promtail, and a caller-provided binary
2
3/// Version of Prometheus to download and install
4pub const PROMETHEUS_VERSION: &str = "3.2.0";
5
6/// Version of Promtail to download and install
7pub const PROMTAIL_VERSION: &str = "3.4.2";
8
9/// Version of Node Exporter to download and install
10pub const NODE_EXPORTER_VERSION: &str = "1.9.0";
11
12/// Version of Loki to download and install
13pub const LOKI_VERSION: &str = "3.4.2";
14
15/// Version of Tempo to download and install
16pub const TEMPO_VERSION: &str = "2.7.1";
17
18/// Version of Pyroscope to download and install
19pub const PYROSCOPE_VERSION: &str = "1.12.0";
20
21/// Version of Grafana to download and install
22pub const GRAFANA_VERSION: &str = "11.5.2";
23
24/// YAML configuration for Grafana datasources (Prometheus, Loki, Tempo, and Pyroscope)
25pub const DATASOURCES_YML: &str = r#"
26apiVersion: 1
27datasources:
28  - name: Prometheus
29    type: prometheus
30    url: http://localhost:9090
31    access: proxy
32    isDefault: true
33  - name: Loki
34    type: loki
35    url: http://localhost:3100
36    access: proxy
37    isDefault: false
38  - name: Tempo
39    type: tempo
40    url: http://localhost:3200
41    access: proxy
42    isDefault: false
43  - name: Pyroscope
44    type: grafana-pyroscope-datasource
45    url: http://localhost:4040
46    access: proxy
47    isDefault: false
48"#;
49
50/// YAML configuration for Grafana dashboard providers
51pub const ALL_YML: &str = r#"
52apiVersion: 1
53providers:
54  - name: 'default'
55    orgId: 1
56    folder: ''
57    type: file
58    options:
59      path: /var/lib/grafana/dashboards
60"#;
61
62/// Systemd service file content for Prometheus
63pub const PROMETHEUS_SERVICE: &str = r#"
64[Unit]
65Description=Prometheus Monitoring Service
66After=network.target
67
68[Service]
69ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data
70TimeoutStopSec=60
71Restart=always
72User=ubuntu
73LimitNOFILE=infinity
74
75[Install]
76WantedBy=multi-user.target
77"#;
78
79/// Systemd service file content for Promtail
80pub const PROMTAIL_SERVICE: &str = r#"
81[Unit]
82Description=Promtail Log Forwarder
83After=network.target
84
85[Service]
86ExecStart=/opt/promtail/promtail -config.file=/etc/promtail/promtail.yml
87TimeoutStopSec=60
88Restart=always
89User=ubuntu
90LimitNOFILE=infinity
91
92[Install]
93WantedBy=multi-user.target
94"#;
95
96/// Systemd service file content for Loki
97pub const LOKI_SERVICE: &str = r#"
98[Unit]
99Description=Loki Log Aggregation Service
100After=network.target
101
102[Service]
103ExecStart=/opt/loki/loki -config.file=/etc/loki/loki.yml
104TimeoutStopSec=60
105Restart=always
106User=ubuntu
107LimitNOFILE=infinity
108
109[Install]
110WantedBy=multi-user.target
111"#;
112
113/// YAML configuration for Loki
114pub const LOKI_CONFIG: &str = r#"
115auth_enabled: false
116target: all
117server:
118  http_listen_port: 3100
119  grpc_listen_port: 9095
120common:
121  ring:
122    kvstore:
123      store: inmemory
124  replication_factor: 1
125  instance_addr: 127.0.0.1
126schema_config:
127  configs:
128    - from: 2020-10-24
129      store: tsdb
130      object_store: filesystem
131      schema: v13
132      index:
133        prefix: index_
134        period: 24h
135storage_config:
136  tsdb_shipper:
137    active_index_directory: /loki/index
138    cache_location: /loki/index_cache
139  filesystem:
140    directory: /loki/chunks
141table_manager:
142  retention_deletes_enabled: true
143  retention_period: 12h
144compactor:
145  working_directory: /loki/compactor
146ingester:
147  wal:
148    dir: /loki/wal
149"#;
150
151/// YAML configuration for Pyroscope
152pub const PYROSCOPE_CONFIG: &str = r#"
153target: all
154server:
155  http_listen_port: 4040
156  grpc_listen_port: 0
157pyroscopedb:
158  data_path: /var/lib/pyroscope
159self_profiling:
160  disable_push: true
161"#;
162
163/// Systemd service file content for Pyroscope
164pub const PYROSCOPE_SERVICE: &str = r#"
165[Unit]
166Description=Pyroscope Profiling Service
167After=network.target
168
169[Service]
170ExecStart=/opt/pyroscope/pyroscope --config.file=/etc/pyroscope/pyroscope.yml
171TimeoutStopSec=60
172Restart=always
173User=ubuntu
174LimitNOFILE=infinity
175
176[Install]
177WantedBy=multi-user.target
178"#;
179
180/// Systemd service file content for Tempo
181pub const TEMPO_SERVICE: &str = r#"
182[Unit]
183Description=Tempo Tracing Service
184After=network.target
185[Service]
186ExecStart=/opt/tempo/tempo -config.file=/etc/tempo/tempo.yml
187TimeoutStopSec=60
188Restart=always
189User=ubuntu
190LimitNOFILE=infinity
191[Install]
192WantedBy=multi-user.target
193"#;
194
195/// YAML configuration for Tempo
196pub const TEMPO_CONFIG: &str = r#"
197server:
198  grpc_listen_port: 9096
199  http_listen_port: 3200
200distributor:
201  receivers:
202    otlp:
203      protocols:
204        http:
205          endpoint: "0.0.0.0:4318"
206storage:
207  trace:
208    backend: local
209    local:
210      path: /tempo/traces
211    wal:
212      path: /tempo/wal
213ingester:
214  max_block_duration: 1h
215compactor:
216  compaction:
217    block_retention: 1h
218    compaction_cycle: 1h
219"#;
220
221/// Command to install monitoring services (Prometheus, Loki, Grafana, Pyroscope, Tempo) on the monitoring instance
222pub fn install_monitoring_cmd(
223    prometheus_version: &str,
224    grafana_version: &str,
225    loki_version: &str,
226    pyroscope_version: &str,
227    tempo_version: &str,
228) -> String {
229    let prometheus_url = format!(
230    "https://github.com/prometheus/prometheus/releases/download/v{prometheus_version}/prometheus-{prometheus_version}.linux-arm64.tar.gz",
231);
232    let grafana_url =
233        format!("https://dl.grafana.com/oss/release/grafana_{grafana_version}_arm64.deb");
234    let loki_url = format!(
235        "https://github.com/grafana/loki/releases/download/v{loki_version}/loki-linux-arm64.zip",
236    );
237    let pyroscope_url = format!(
238        "https://github.com/grafana/pyroscope/releases/download/v{pyroscope_version}/pyroscope_{pyroscope_version}_linux_arm64.tar.gz",
239    );
240    let tempo_url = format!(
241        "https://github.com/grafana/tempo/releases/download/v{tempo_version}/tempo_{tempo_version}_linux_arm64.tar.gz",
242    );
243    format!(
244        r#"
245sudo apt-get update -y
246sudo apt-get install -y wget curl unzip adduser libfontconfig1
247
248# Download Prometheus with retries
249for i in {{1..5}}; do
250  wget -O /home/ubuntu/prometheus.tar.gz {prometheus_url} && break
251  sleep 10
252done
253
254# Download Grafana with retries
255for i in {{1..5}}; do
256  wget -O /home/ubuntu/grafana.deb {grafana_url} && break
257  sleep 10
258done
259
260# Download Loki with retries
261for i in {{1..5}}; do
262  wget -O /home/ubuntu/loki.zip {loki_url} && break
263  sleep 10
264done
265
266# Download Pyroscope with retries
267for i in {{1..5}}; do
268  wget -O /home/ubuntu/pyroscope.tar.gz {pyroscope_url} && break
269  sleep 10
270done
271
272# Download Tempo with retries
273for i in {{1..5}}; do
274  wget -O /home/ubuntu/tempo.tar.gz {tempo_url} && break
275  sleep 10
276done
277
278# Install Prometheus
279sudo mkdir -p /opt/prometheus /opt/prometheus/data
280sudo chown -R ubuntu:ubuntu /opt/prometheus
281tar xvfz /home/ubuntu/prometheus.tar.gz -C /home/ubuntu
282sudo mv /home/ubuntu/prometheus-{prometheus_version}.linux-arm64 /opt/prometheus/prometheus-{prometheus_version}.linux-arm64
283sudo ln -s /opt/prometheus/prometheus-{prometheus_version}.linux-arm64/prometheus /opt/prometheus/prometheus
284sudo chmod +x /opt/prometheus/prometheus
285
286# Install Grafana
287sudo dpkg -i /home/ubuntu/grafana.deb
288sudo apt-get install -f -y
289
290# Install Loki
291sudo mkdir -p /opt/loki /loki/index /loki/index_cache /loki/chunks /loki/compactor /loki/wal
292sudo chown -R ubuntu:ubuntu /loki
293unzip -o /home/ubuntu/loki.zip -d /home/ubuntu
294sudo mv /home/ubuntu/loki-linux-arm64 /opt/loki/loki
295
296# Install Pyroscope
297sudo mkdir -p /opt/pyroscope /var/lib/pyroscope
298sudo chown -R ubuntu:ubuntu /opt/pyroscope /var/lib/pyroscope
299tar xvfz /home/ubuntu/pyroscope.tar.gz -C /home/ubuntu
300sudo mv /home/ubuntu/pyroscope /opt/pyroscope/pyroscope
301sudo chmod +x /opt/pyroscope/pyroscope
302
303# Install Tempo
304sudo mkdir -p /opt/tempo /tempo/traces /tempo/wal
305sudo chown -R ubuntu:ubuntu /tempo
306tar xvfz /home/ubuntu/tempo.tar.gz -C /home/ubuntu
307sudo mv /home/ubuntu/tempo /opt/tempo/tempo
308sudo chmod +x /opt/tempo/tempo
309
310# Configure Grafana
311sudo sed -i '/^\[auth.anonymous\]$/,/^\[/ {{ /^; *enabled = /s/.*/enabled = true/; /^; *org_role = /s/.*/org_role = Admin/ }}' /etc/grafana/grafana.ini
312sudo mkdir -p /etc/grafana/provisioning/datasources /etc/grafana/provisioning/dashboards /var/lib/grafana/dashboards
313
314# Install Pyroscope data source plugin
315sudo grafana-cli plugins install grafana-pyroscope-datasource
316
317# Move configuration files (assuming they are uploaded via SCP)
318sudo mv /home/ubuntu/prometheus.yml /opt/prometheus/prometheus.yml
319sudo mv /home/ubuntu/datasources.yml /etc/grafana/provisioning/datasources/datasources.yml
320sudo mv /home/ubuntu/all.yml /etc/grafana/provisioning/dashboards/all.yml
321sudo mv /home/ubuntu/dashboard.json /var/lib/grafana/dashboards/dashboard.json
322sudo mkdir -p /etc/loki
323sudo mv /home/ubuntu/loki.yml /etc/loki/loki.yml
324sudo chown root:root /etc/loki/loki.yml
325sudo mkdir -p /etc/pyroscope
326sudo mv /home/ubuntu/pyroscope.yml /etc/pyroscope/pyroscope.yml
327sudo chown root:root /etc/pyroscope/pyroscope.yml
328sudo mkdir -p /etc/tempo
329sudo mv /home/ubuntu/tempo.yml /etc/tempo/tempo.yml
330sudo chown root:root /etc/tempo/tempo.yml
331
332# Move service files
333sudo mv /home/ubuntu/prometheus.service /etc/systemd/system/prometheus.service
334sudo mv /home/ubuntu/loki.service /etc/systemd/system/loki.service
335sudo mv /home/ubuntu/pyroscope.service /etc/systemd/system/pyroscope.service
336sudo mv /home/ubuntu/tempo.service /etc/systemd/system/tempo.service
337
338# Set ownership
339sudo chown -R grafana:grafana /etc/grafana /var/lib/grafana
340
341# Start services
342sudo systemctl daemon-reload
343sudo systemctl start prometheus
344sudo systemctl enable prometheus
345sudo systemctl start loki
346sudo systemctl enable loki
347sudo systemctl start pyroscope
348sudo systemctl enable pyroscope
349sudo systemctl start tempo
350sudo systemctl enable tempo
351sudo systemctl restart grafana-server
352sudo systemctl enable grafana-server
353"#
354    )
355}
356
357/// Command to install the binary on binary instances
358pub fn install_binary_cmd(profiling: bool) -> String {
359    let mut script = String::from(
360        r#"
361# Install base tools and binary dependencies
362sudo apt-get update -y
363sudo apt-get install -y logrotate jq wget libjemalloc2 linux-tools-common linux-tools-generic linux-tools-$(uname -r)
364
365# Setup binary
366chmod +x /home/ubuntu/binary
367sudo touch /var/log/binary.log && sudo chown ubuntu:ubuntu /var/log/binary.log
368sudo mv /home/ubuntu/binary.service /etc/systemd/system/binary.service
369
370# Setup logrotate
371sudo mv /home/ubuntu/logrotate.conf /etc/logrotate.d/binary
372sudo chown root:root /etc/logrotate.d/binary
373echo "0 * * * * /usr/sbin/logrotate /etc/logrotate.d/binary" | crontab -
374
375# Setup pyroscope agent script and timer
376sudo ln -s "$(find /usr/lib/linux-tools/*/perf | head -1)" /usr/local/bin/perf
377sudo chmod +x /home/ubuntu/pyroscope-agent.sh
378sudo mv /home/ubuntu/pyroscope-agent.service /etc/systemd/system/pyroscope-agent.service
379sudo mv /home/ubuntu/pyroscope-agent.timer /etc/systemd/system/pyroscope-agent.timer
380
381# Start services
382sudo systemctl daemon-reload
383sudo systemctl enable --now binary
384"#,
385    );
386    if profiling {
387        script.push_str(
388            r#"
389sudo systemctl enable --now pyroscope-agent.timer
390"#,
391        );
392    }
393    script
394}
395
396/// Command to set up Promtail on binary instances
397pub fn setup_promtail_cmd(promtail_version: &str) -> String {
398    let promtail_url = format!(
399        "https://github.com/grafana/loki/releases/download/v{promtail_version}/promtail-linux-arm64.zip",
400    );
401    format!(
402        r#"
403sudo apt-get update -y
404sudo apt-get install -y wget unzip
405
406# Download Promtail with retries
407for i in {{1..5}}; do
408  wget -O /home/ubuntu/promtail.zip {promtail_url} && break
409  sleep 10
410done
411
412# Install Promtail
413sudo mkdir -p /opt/promtail
414unzip /home/ubuntu/promtail.zip -d /home/ubuntu
415sudo mv /home/ubuntu/promtail-linux-arm64 /opt/promtail/promtail
416sudo chmod +x /opt/promtail/promtail
417sudo mkdir -p /etc/promtail
418sudo mv /home/ubuntu/promtail.yml /etc/promtail/promtail.yml
419sudo mv /home/ubuntu/promtail.service /etc/systemd/system/promtail.service
420sudo chown root:root /etc/promtail/promtail.yml
421
422# Start service
423sudo systemctl daemon-reload
424sudo systemctl start promtail
425sudo systemctl enable promtail
426"#
427    )
428}
429
430/// Generates Promtail configuration with the monitoring instance's private IP and instance name
431pub fn promtail_config(
432    monitoring_private_ip: &str,
433    instance_name: &str,
434    ip: &str,
435    region: &str,
436) -> String {
437    format!(
438        r#"
439server:
440  http_listen_port: 9080
441  grpc_listen_port: 0
442positions:
443  filename: /tmp/positions.yaml
444clients:
445  - url: http://{monitoring_private_ip}:3100/loki/api/v1/push
446scrape_configs:
447  - job_name: binary_logs
448    static_configs:
449      - targets:
450          - localhost
451        labels:
452          deployer_name: {instance_name}
453          deployer_ip: {ip}
454          deployer_region: {region}
455          __path__: /var/log/binary.log
456"#
457    )
458}
459
460/// Command to install Node Exporter on instances
461pub fn setup_node_exporter_cmd(node_exporter_version: &str) -> String {
462    let node_exporter_url = format!(
463        "https://github.com/prometheus/node_exporter/releases/download/v{node_exporter_version}/node_exporter-{node_exporter_version}.linux-arm64.tar.gz",
464    );
465    format!(
466        r#"
467sudo apt-get update -y
468sudo apt-get install -y wget tar
469
470# Download Node Exporter with retries
471for i in {{1..5}}; do
472  wget -O /home/ubuntu/node_exporter.tar.gz {node_exporter_url} && break
473  sleep 10
474done
475
476# Install Node Exporter
477sudo mkdir -p /opt/node_exporter
478tar xvfz /home/ubuntu/node_exporter.tar.gz -C /home/ubuntu
479sudo mv /home/ubuntu/node_exporter-{node_exporter_version}.linux-arm64 /opt/node_exporter/node_exporter-{node_exporter_version}.linux-arm64
480sudo ln -s /opt/node_exporter/node_exporter-{node_exporter_version}.linux-arm64/node_exporter /opt/node_exporter/node_exporter
481sudo chmod +x /opt/node_exporter/node_exporter
482sudo mv /home/ubuntu/node_exporter.service /etc/systemd/system/node_exporter.service
483
484# Start service
485sudo systemctl daemon-reload
486sudo systemctl start node_exporter
487sudo systemctl enable node_exporter
488"#
489    )
490}
491
492/// Systemd service file content for Node Exporter
493pub const NODE_EXPORTER_SERVICE: &str = r#"
494[Unit]
495Description=Node Exporter
496After=network.target
497
498[Service]
499ExecStart=/opt/node_exporter/node_exporter
500TimeoutStopSec=60
501Restart=always
502User=ubuntu
503LimitNOFILE=infinity
504
505[Install]
506WantedBy=multi-user.target
507"#;
508
509/// Generates Prometheus configuration with scrape targets for all instance IPs
510pub fn generate_prometheus_config(instances: &[(&str, &str, &str)]) -> String {
511    let mut config = String::from(
512        r#"
513global:
514  scrape_interval: 15s
515scrape_configs:
516  - job_name: 'monitoring_system'
517    static_configs:
518      - targets: ['localhost:9100']
519"#,
520    );
521    for (name, ip, region) in instances {
522        config.push_str(&format!(
523            r#"
524  - job_name: '{name}_binary'
525    static_configs:
526      - targets: ['{ip}:9090']
527        labels:
528          deployer_name: '{name}'
529          deployer_ip: '{ip}'
530          deployer_region: '{region}'
531  - job_name: '{name}_system'
532    static_configs:
533      - targets: ['{ip}:9100']
534        labels:
535          deployer_name: '{name}'
536          deployer_ip: '{ip}'
537          deployer_region: '{region}'
538"#
539        ));
540    }
541    config
542}
543
544/// Logrotate configuration for binary logs
545pub const LOGROTATE_CONF: &str = r#"
546/var/log/binary.log {
547    rotate 0
548    copytruncate
549    missingok
550    notifempty
551}
552"#;
553
554/// Configuration for BBR sysctl settings
555pub const BBR_CONF: &str = "net.core.default_qdisc=fq\nnet.ipv4.tcp_congestion_control=bbr\n";
556
557/// Systemd service file content for the deployed binary
558pub const BINARY_SERVICE: &str = r#"
559[Unit]
560Description=Deployed Binary Service
561After=network.target
562
563[Service]
564Environment="LD_PRELOAD=/usr/lib/aarch64-linux-gnu/libjemalloc.so.2"
565ExecStart=/home/ubuntu/binary --hosts=/home/ubuntu/hosts.yaml --config=/home/ubuntu/config.conf
566TimeoutStopSec=60
567Restart=always
568User=ubuntu
569LimitNOFILE=infinity
570StandardOutput=append:/var/log/binary.log
571StandardError=append:/var/log/binary.log
572
573[Install]
574WantedBy=multi-user.target
575"#;
576
577/// Shell script content for the Pyroscope agent (perf + wget)
578pub fn generate_pyroscope_script(
579    monitoring_private_ip: &str,
580    name: &str,
581    ip: &str,
582    region: &str,
583) -> String {
584    format!(
585        r#"#!/bin/bash
586set -e
587
588SERVICE_NAME="binary.service"
589PERF_DATA_FILE="/tmp/perf.data"
590PERF_STACK_FILE="/tmp/perf.stack"
591PROFILE_DURATION=60 # seconds
592PERF_FREQ=100 # Hz
593
594# Construct the Pyroscope application name with tags
595RAW_APP_NAME="binary{{deployer_name={name},deployer_ip={ip},deployer_region={region}}}"
596APP_NAME=$(jq -nr --arg str "$RAW_APP_NAME" '$str | @uri')
597
598# Get the PID of the binary service
599PID=$(systemctl show --property MainPID ${{SERVICE_NAME}} | cut -d= -f2)
600if [ -z "$PID" ] || [ "$PID" -eq 0 ]; then
601  echo "Error: Could not get PID for ${{SERVICE_NAME}}." >&2
602  exit 1
603fi
604
605# Record performance data
606echo "Recording perf data for PID ${{PID}}..."
607sudo perf record -F ${{PERF_FREQ}} -p ${{PID}} -o ${{PERF_DATA_FILE}} -g --call-graph fp -- sleep ${{PROFILE_DURATION}}
608
609# Generate folded stack report
610echo "Generating folded stack report..."
611sudo perf report -i ${{PERF_DATA_FILE}} --stdio --no-children -g folded,0,caller,count -s comm | \
612    awk '/^[0-9]+\.[0-9]+%/ {{ comm = $2 }} /^[0-9]/ {{ print comm ";" substr($0, index($0, $2)), $1 }}' > ${{PERF_STACK_FILE}}
613
614# Check if stack file is empty (perf might fail silently sometimes)
615if [ ! -s "${{PERF_STACK_FILE}}" ]; then
616    echo "Warning: ${{PERF_STACK_FILE}} is empty. Skipping upload." >&2
617    # Clean up empty perf.data
618    sudo rm -f ${{PERF_DATA_FILE}} ${{PERF_STACK_FILE}}
619    exit 0
620fi
621
622# Calculate timestamps
623UNTIL_TS=$(date +%s)
624FROM_TS=$((UNTIL_TS - PROFILE_DURATION))
625
626# Upload to Pyroscope
627echo "Uploading profile to Pyroscope at {monitoring_private_ip}..."
628wget --post-file="${{PERF_STACK_FILE}}" \
629    --header="Content-Type: text/plain" \
630    --quiet \
631    -O /dev/null \
632    "http://{monitoring_private_ip}:4040/ingest?name=${{APP_NAME}}&format=folded&units=samples&aggregationType=sum&sampleType=cpu&from=${{FROM_TS}}&until=${{UNTIL_TS}}&spyName=perf"
633
634echo "Profile upload complete."
635sudo rm -f ${{PERF_DATA_FILE}} ${{PERF_STACK_FILE}}
636"#
637    )
638}
639
640/// Systemd service file content for the Pyroscope agent script
641pub const PYROSCOPE_AGENT_SERVICE: &str = r#"
642[Unit]
643Description=Pyroscope Agent (Perf Script Runner)
644Wants=network-online.target
645After=network-online.target binary.service
646
647[Service]
648Type=oneshot
649User=ubuntu
650ExecStart=/home/ubuntu/pyroscope-agent.sh
651
652[Install]
653WantedBy=multi-user.target
654"#;
655
656/// Systemd timer file content for the Pyroscope agent service
657pub const PYROSCOPE_AGENT_TIMER: &str = r#"
658[Unit]
659Description=Run Pyroscope Agent periodically
660
661[Timer]
662# Wait a bit after boot before the first run
663OnBootSec=2min
664# Run roughly every minute after the last run finished
665OnUnitInactiveSec=1min
666Unit=pyroscope-agent.service
667# Randomize the delay to avoid thundering herd
668RandomizedDelaySec=10s
669
670[Install]
671WantedBy=timers.target
672"#;