---
AWSTemplateFormatVersion: "2010-09-09"
Description: "Development machine"
Parameters:
Id:
Type: String
Description: Unique identifier, prefix for all resources created below.
KmsKeyArn:
Type: String
Description: KMS CMK ARN that de/encrypts resources.
AadTag:
Type: String
Description: AAD tag for envelope encryption with KMS.
S3BucketName:
Type: String
Description: S3 bucket name.
Ec2KeyPairName:
Type: AWS::EC2::KeyPair::KeyName
Description: EC2 SSH key name
InstanceProfileArn:
Type: String
Description: Instance profile ARN
PublicSubnetIds:
Type: List<AWS::EC2::Subnet::Id>
Description: The public subnet IDs where node instances are to be created.
SecurityGroupId:
Type: AWS::EC2::SecurityGroup::Id
Description: EC2 security group ID
ImageId:
Type: String
Default: ""
Description: (Optional) Custom image ID. This value overrides any AWS Systems Manager Parameter Store value specified above.
ImageIdSsmParameter:
Type: AWS::SSM::Parameter::Value<AWS::EC2::Image::Id>
Default: /aws/service/canonical/ubuntu/server/20.04/stable/current/amd64/hvm/ebs-gp2/ami-id
Description: AWS Systems Manager Parameter Store parameter of the AMI ID.
ArchType:
Type: String
AllowedValues: ["amd64", "arm64"]
Default: "amd64"
Description: The name of the CPU architecture. Used for cloudwatch agent download links.
RustOsType:
Type: String
AllowedValues: ["unknown", "ubuntu20.04"]
Default: "unknown"
Description: The name of the OS distribution and kind. Used for Rust binary download links.
InstanceTypes:
Type: CommaDelimitedList
Default: c6a.4xlarge,m6a.4xlarge,m5.4xlarge,c5.4xlarge
Description: EC2 instance types
InstanceTypesCount:
Type: Number
Default: 4
MinValue: 1
MaxValue: 10
Description: The number of instance types
VolumeType:
Type: String
Default: gp3
Description: Volume type.
VolumeSize:
Type: Number
Default: 1024
MinValue: 40
MaxValue: 1024
Description: Size of the root disk for the EC2 instances, in GiB.
VolumeIops:
Type: Number
Default: 3000
Description: The number of I/O operations per second (IOPS).
VolumeThroughput:
Type: Number
Default: 500
Description: The throughput to provision for a gp3 volume, with a maximum of 1,000 MiB/s.
VolumeProvisionerInitialWaitRandomSeconds:
Type: Number
Default: 10
MinValue: 0
MaxValue: 500
Description: Only set non-zero if multiple instances may compete for the same EBS volume in the same zone.
IpMode:
Type: String
AllowedValues: ["ephemeral", "elastic"]
Default: "elastic"
Description: Set "elastic" to allocate Elastic IP.
InstanceMode:
Type: String
AllowedValues: ["spot", "on-demand"]
Default: "spot"
Description: Set to "spot" to run spot instance.
AsgLaunchTemplateId:
Type: String
Default: ""
Description: (Optional) Non-empty to reuse.
AsgLaunchTemplateVersion:
Type: String
Default: ""
Description: (Optional) Non-empty to reuse.
AsgName:
Type: String
Description: Unique identifier for this Asg.
AsgMinInstancesInService:
Type: Number
Description: Minimum instances in service for update.
Default: 1
MinValue: 1
MaxValue: 1000
AsgMinSize:
Type: Number
Description: Minimum size auto scaling group
Default: 0
MinValue: 0
MaxValue: 1000
AsgMaxSize:
Type: Number
Description: Maximum size auto scaling group
Default: 2
MinValue: 1
MaxValue: 3
AsgDesiredCapacity:
Type: Number
Description: Desired size auto scaling group
Default: 1
MinValue: 1
MaxValue: 3
OnDemandPercentageAboveBaseCapacity:
Type: Number
Default: 100
MinValue: 0
MaxValue: 100
Description: 0 for Spot only. 100 for On-Demand only.
Conditions:
HasImageId:
Fn::Not:
- Fn::Equals:
- Ref: ImageId
- ""
Has2InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 2
- Fn::Equals:
- Ref: InstanceTypesCount
- 3
- Fn::Equals:
- Ref: InstanceTypesCount
- 4
- Fn::Equals:
- Ref: InstanceTypesCount
- 5
- Fn::Equals:
- Ref: InstanceTypesCount
- 6
- Fn::Equals:
- Ref: InstanceTypesCount
- 7
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has3InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 3
- Fn::Equals:
- Ref: InstanceTypesCount
- 4
- Fn::Equals:
- Ref: InstanceTypesCount
- 5
- Fn::Equals:
- Ref: InstanceTypesCount
- 6
- Fn::Equals:
- Ref: InstanceTypesCount
- 7
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has4InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 4
- Fn::Equals:
- Ref: InstanceTypesCount
- 5
- Fn::Equals:
- Ref: InstanceTypesCount
- 6
- Fn::Equals:
- Ref: InstanceTypesCount
- 7
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has5InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 5
- Fn::Equals:
- Ref: InstanceTypesCount
- 6
- Fn::Equals:
- Ref: InstanceTypesCount
- 7
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has6InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 6
- Fn::Equals:
- Ref: InstanceTypesCount
- 7
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has7InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 7
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has8InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 8
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has9InstanceTypes:
Fn::Or:
- Fn::Equals:
- Ref: InstanceTypesCount
- 9
- Fn::Equals:
- Ref: InstanceTypesCount
- 10
Has10InstanceTypes:
Fn::Equals:
- Ref: InstanceTypesCount
- 10
Resources:
AsgLaunchTemplate:
Type: AWS::EC2::LaunchTemplate
Properties:
LaunchTemplateName: !Join ["-", [!Ref Id, !Ref ArchType]]
LaunchTemplateData:
IamInstanceProfile:
Arn: !Ref InstanceProfileArn
ImageId:
Fn::If:
- HasImageId
- !Ref ImageId
- !Ref ImageIdSsmParameter
KeyName: !Ref Ec2KeyPairName
BlockDeviceMappings:
- DeviceName: "/dev/sda1"
Ebs:
VolumeType: gp3
VolumeSize: 200
Monitoring:
Enabled: true
NetworkInterfaces:
- AssociatePublicIpAddress: true
DeleteOnTermination: true
DeviceIndex: 0
Groups:
- !Ref SecurityGroupId
TagSpecifications:
- ResourceType: instance
Tags:
- { Key: Name, Value: !Sub "${Id}-${ArchType}" }
UserData:
Fn::Base64:
Fn::Sub: |
#!/bin/bash
set -xeu
# running as "root"
whoami
export DEBIAN_FRONTEND=noninteractive
sudo lscpu
while [ 1 ]; do
sudo apt-get update -y && sudo apt-get upgrade -y \
&& sudo apt-get install -yq \
build-essential tmux zsh git \
jq curl wget \
unzip zip gzip tar \
libssl-dev \
python3-pip \
pkg-config \
protobuf-compiler
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
gcc --version
cat<<EOF >> /home/ubuntu/.bashrc
export GOPATH=/home/ubuntu/go
export PATH=/usr/local/go/bin:/home/ubuntu/go/bin:/home/ubuntu/.cargo/bin:$PATH
. /opt/rust/env
EOF
# install go
sudo rm -rf /usr/local/go
GO_VERSION=1.20.3
sudo curl -s https://storage.googleapis.com/golang/go$GO_VERSION.linux-$(dpkg --print-architecture).tar.gz | sudo tar -v -C /usr/local/ -xz
/usr/local/go/bin/go version
# install rust
export RUSTUP_HOME=/opt/rust
export CARGO_HOME=/opt/rust
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y --no-modify-path --default-toolchain stable --profile default
echo '\n\n# added by cloud init\nsource /opt/rust/env' >> /home/ubuntu/.profile
sudo -H -u ubuntu bash -c 'source /opt/rust/env && rustup default stable'
# install docker
while [ 1 ]; do
sudo apt-get install -yq \
ca-certificates gnupg lsb-release
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
while [ 1 ]; do
sudo rm -f /usr/share/keyrings/docker-archive-keyring.gpg && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
echo \
"deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
while [ 1 ]; do
sudo apt-get update -y && sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-compose-plugin
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
sudo systemctl enable docker
sudo usermod -aG docker ubuntu
sudo newgrp docker
sudo systemctl start docker.service
sudo systemctl enable --now docker
sudo docker ps
sudo docker version
# http://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/intrinsic-function-reference-sub.html
# "x86_64" (mac, linux x86), "arm64" (M1), "aarch64" (graviton)
# https://aws.amazon.com/blogs/developer/aws-cli-v2-now-available-for-linux-arm/
export LINUX_ARCH_TYPE=$(uname -m)
echo LINUX_ARCH_TYPE: ${!LINUX_ARCH_TYPE}
while [ 1 ]; do
sudo rm -f ./awscli-exe-linux-${!LINUX_ARCH_TYPE}.zip || true;
wget --quiet --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=70 --continue https://awscli.amazonaws.com/awscli-exe-linux-${!LINUX_ARCH_TYPE}.zip
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
unzip ./awscli-exe-linux-${!LINUX_ARCH_TYPE}.zip
sudo ./aws/install
/usr/local/bin/aws --version
# https://docs.aws.amazon.com/systems-manager/latest/userguide/agent-install-ubuntu.html
sudo snap install amazon-ssm-agent --classic
sudo systemctl enable snap.amazon-ssm-agent.amazon-ssm-agent.service
sudo systemctl restart snap.amazon-ssm-agent.amazon-ssm-agent.service
mkdir -p /etc/systemd/system/snap.amazon-ssm-agent.amazon-ssm-agent.service.d
cat > /tmp/amazon-ssm-agent-10-restart-always.conf <<EOF
[Service]
Restart=always
RestartSec=60s
EOF
sudo mv /tmp/amazon-ssm-agent-10-restart-always.conf /etc/systemd/system/snap.amazon-ssm-agent.amazon-ssm-agent.service.d/10-restart-always.conf
sudo systemctl start --no-block snap.amazon-ssm-agent.amazon-ssm-agent.service
# https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/QuickStartEC2Instance.html
mkdir -p /tmp/install-cloudwatch-logs
while [ 1 ]; do
pushd /tmp/install-cloudwatch-logs
sudo rm -f ./amazon-cloudwatch-agent.deb || true;
wget --quiet --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=70 --continue https://s3.amazonaws.com/amazoncloudwatch-agent/ubuntu/${ArchType}/latest/amazon-cloudwatch-agent.deb
popd
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
while [ 1 ]; do
echo "installing amazon-cloudwatch-agent"
pushd /tmp/install-cloudwatch-logs && sudo dpkg -i -E ./amazon-cloudwatch-agent.deb && popd
if [ $? = 0 ]; then break; fi; # check return value, break if successful (0)
sleep 2s;
done;
# https://github.com/ava-labs/volume-manager/releases
wget --quiet --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=70 --continue https://github.com/ava-labs/volume-manager/releases/download/latest/aws-volume-provisioner.${!LINUX_ARCH_TYPE}-${RustOsType}-linux-gnu
mv ./aws-volume-provisioner.${!LINUX_ARCH_TYPE}-${RustOsType}-linux-gnu /tmp/aws-volume-provisioner
chmod +x /tmp/aws-volume-provisioner
/tmp/aws-volume-provisioner \
--log-level=info \
--region ${AWS::Region} \
--initial-wait-random-seconds=${VolumeProvisionerInitialWaitRandomSeconds} \
--id-tag-key=Id \
--id-tag-value=${Id} \
--kind-tag-key=Kind \
--kind-tag-value=aws-volume-provisioner \
--ec2-tag-asg-name-key=ASG_NAME \
--asg-tag-key=autoscaling:groupName \
--volume-type=${VolumeType} \
--volume-size=${VolumeSize} \
--volume-iops=${VolumeIops} \
--volume-throughput=${VolumeThroughput} \
--ebs-device-name=/dev/xvdb \
--block-device-name=/dev/nvme1n1 \
--filesystem-name=ext4 \
--mount-directory-path=/data
# https://github.com/ava-labs/ip-manager/releases
if [[ ${IpMode} == "elastic" ]]; then
sudo rm -f ./aws-ip-provisioner.${!LINUX_ARCH_TYPE}-${RustOsType}-linux-gnu
sudo rm -f /tmp/aws-ip-provisioner
sudo rm -f /usr/local/bin/aws-ip-provisioner
wget --quiet --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 --tries=70 --continue https://github.com/ava-labs/ip-manager/releases/download/latest/aws-ip-provisioner.${!LINUX_ARCH_TYPE}-${RustOsType}-linux-gnu
mv ./aws-ip-provisioner.${!LINUX_ARCH_TYPE}-${RustOsType}-linux-gnu /tmp/aws-ip-provisioner
chmod +x /tmp/aws-ip-provisioner
sudo mv /tmp/aws-ip-provisioner /usr/local/bin/aws-ip-provisioner
/usr/local/bin/aws-ip-provisioner --version
echo "Running /usr/local/bin/aws-ip-provisioner..."
/usr/local/bin/aws-ip-provisioner \
--log-level=info \
--region ${AWS::Region} \
--id-tag-key=Id \
--id-tag-value=${Id} \
--kind-tag-key=Kind \
--kind-tag-value=aws-ip-provisioner \
--ec2-tag-asg-name-key=ASG_NAME \
--asg-tag-key=autoscaling:groupName \
--mounted-eip-file-path=/data/eip.yaml
else
echo "skipping allocating elastic IP address..."
fi;
sudo timedatectl set-ntp on
# e.g.,
# "Accept error: accept tcp [::]:9650: accept4: too many open files; retrying in 1s"
sudo echo "* hard nofile 1000000" >> /etc/security/limits.conf
sudo echo "* soft nofile 1000000" >> /etc/security/limits.conf
sudo sysctl -w fs.file-max=1000000
sudo sysctl -p
ASG:
Type: AWS::AutoScaling::AutoScalingGroup
UpdatePolicy:
AutoScalingRollingUpdate:
MinInstancesInService: !Ref AsgMinInstancesInService
MaxBatchSize: 1
SuspendProcesses:
- HealthCheck
- ReplaceUnhealthy
- AZRebalance
- AlarmNotification
- ScheduledActions
Properties:
AutoScalingGroupName: !Ref AsgName
MinSize: !Ref AsgMinSize
MaxSize: !Ref AsgMaxSize
DesiredCapacity: !Ref AsgDesiredCapacity
VPCZoneIdentifier: !Ref PublicSubnetIds
HealthCheckType: EC2
HealthCheckGracePeriod: 120
MetricsCollection:
- Granularity: "1Minute"
Tags:
- Key: Name
Value: !Ref AsgName
PropagateAtLaunch: true
- Key: ASG_NAME
Value: !Ref AsgName
PropagateAtLaunch: true
- Key: ID
Value: !Ref Id
PropagateAtLaunch: true
- Key: ARCH_TYPE
Value: !Ref ArchType
PropagateAtLaunch: true
- Key: RUST_OS_TYPE
Value: !Ref RustOsType
PropagateAtLaunch: true
- Key: INSTANCE_MODE
Value: !Ref InstanceMode
PropagateAtLaunch: true
- Key: KMS_KEY_ARN
Value: !Ref KmsKeyArn
PropagateAtLaunch: true
- Key: AAD_TAG
Value: !Ref AadTag
PropagateAtLaunch: true
- Key: S3_BUCKET_NAME
Value: !Ref S3BucketName
PropagateAtLaunch: true
MixedInstancesPolicy:
InstancesDistribution:
OnDemandAllocationStrategy: "lowest-price"
OnDemandBaseCapacity: 0
OnDemandPercentageAboveBaseCapacity: !Ref OnDemandPercentageAboveBaseCapacity
SpotAllocationStrategy: "lowest-price"
SpotInstancePools: 3
LaunchTemplate:
LaunchTemplateSpecification:
LaunchTemplateId: !Ref AsgLaunchTemplate
Version: !GetAtt AsgLaunchTemplate.LatestVersionNumber
Overrides:
- InstanceType: !Select [0, !Ref InstanceTypes]
- Fn::If:
- Has2InstanceTypes
- InstanceType: !Select [1, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has3InstanceTypes
- InstanceType: !Select [2, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has4InstanceTypes
- InstanceType: !Select [3, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has5InstanceTypes
- InstanceType: !Select [4, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has6InstanceTypes
- InstanceType: !Select [5, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has7InstanceTypes
- InstanceType: !Select [6, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has8InstanceTypes
- InstanceType: !Select [7, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has9InstanceTypes
- InstanceType: !Select [8, !Ref InstanceTypes]
- !Ref AWS::NoValue
- Fn::If:
- Has10InstanceTypes
- InstanceType: !Select [9, !Ref InstanceTypes]
- !Ref AWS::NoValue
Outputs:
AsgLogicalId:
Value: !Ref ASG