sn_testnet_deploy/
upscale.rs

1// Copyright (c) 2023, MaidSafe.
2// All rights reserved.
3//
4// This SAFE Network Software is licensed under the BSD-3-Clause license.
5// Please see the LICENSE file for more details.
6
7use crate::{
8    ansible::{
9        inventory::AnsibleInventoryType,
10        provisioning::{PrivateNodeProvisionInventory, ProvisionOptions},
11    },
12    error::{Error, Result},
13    get_anvil_node_data, get_bootstrap_cache_url, get_genesis_multiaddr, get_multiaddr,
14    DeploymentInventory, DeploymentType, EvmNetwork, InfraRunOptions, NodeType, TestnetDeployer,
15};
16use colored::Colorize;
17use evmlib::common::U256;
18use log::debug;
19use std::{collections::HashSet, time::Duration};
20
21#[derive(Clone)]
22pub struct UpscaleOptions {
23    pub ansible_verbose: bool,
24    pub ant_version: Option<String>,
25    pub current_inventory: DeploymentInventory,
26    pub desired_client_vm_count: Option<u16>,
27    pub desired_full_cone_private_node_count: Option<u16>,
28    pub desired_full_cone_private_node_vm_count: Option<u16>,
29    pub desired_node_count: Option<u16>,
30    pub desired_node_vm_count: Option<u16>,
31    pub desired_peer_cache_node_count: Option<u16>,
32    pub desired_peer_cache_node_vm_count: Option<u16>,
33    pub desired_symmetric_private_node_count: Option<u16>,
34    pub desired_symmetric_private_node_vm_count: Option<u16>,
35    pub desired_uploaders_count: Option<u16>,
36    pub enable_download_verifier: bool,
37    pub enable_random_verifier: bool,
38    pub enable_performance_verifier: bool,
39    pub funding_wallet_secret_key: Option<String>,
40    pub gas_amount: Option<U256>,
41    pub interval: Duration,
42    pub infra_only: bool,
43    pub max_archived_log_files: u16,
44    pub max_log_files: u16,
45    pub network_dashboard_branch: Option<String>,
46    pub node_env_variables: Option<Vec<(String, String)>>,
47    pub plan: bool,
48    pub public_rpc: bool,
49    pub provision_only: bool,
50    pub token_amount: Option<U256>,
51}
52
53impl TestnetDeployer {
54    pub async fn upscale(&self, options: &UpscaleOptions) -> Result<()> {
55        let is_bootstrap_deploy = matches!(
56            options
57                .current_inventory
58                .environment_details
59                .deployment_type,
60            DeploymentType::Bootstrap
61        );
62
63        if is_bootstrap_deploy
64            && (options.desired_peer_cache_node_count.is_some()
65                || options.desired_peer_cache_node_vm_count.is_some()
66                || options.desired_client_vm_count.is_some())
67        {
68            return Err(Error::InvalidUpscaleOptionsForBootstrapDeployment);
69        }
70
71        let desired_peer_cache_node_vm_count = options
72            .desired_peer_cache_node_vm_count
73            .unwrap_or(options.current_inventory.peer_cache_node_vms.len() as u16);
74        if desired_peer_cache_node_vm_count
75            < options.current_inventory.peer_cache_node_vms.len() as u16
76        {
77            return Err(Error::InvalidUpscaleDesiredPeerCacheVmCount);
78        }
79        debug!("Using {desired_peer_cache_node_vm_count} for desired Peer Cache node VM count");
80
81        let desired_node_vm_count = options
82            .desired_node_vm_count
83            .unwrap_or(options.current_inventory.node_vms.len() as u16);
84        if desired_node_vm_count < options.current_inventory.node_vms.len() as u16 {
85            return Err(Error::InvalidUpscaleDesiredNodeVmCount);
86        }
87        debug!("Using {desired_node_vm_count} for desired node VM count");
88
89        let desired_full_cone_private_node_vm_count = options
90            .desired_full_cone_private_node_vm_count
91            .unwrap_or(options.current_inventory.full_cone_private_node_vms.len() as u16);
92        if desired_full_cone_private_node_vm_count
93            < options.current_inventory.full_cone_private_node_vms.len() as u16
94        {
95            return Err(Error::InvalidUpscaleDesiredFullConePrivateNodeVmCount);
96        }
97        debug!("Using {desired_full_cone_private_node_vm_count} for desired full cone private node VM count");
98
99        let desired_symmetric_private_node_vm_count = options
100            .desired_symmetric_private_node_vm_count
101            .unwrap_or(options.current_inventory.symmetric_private_node_vms.len() as u16);
102        if desired_symmetric_private_node_vm_count
103            < options.current_inventory.symmetric_private_node_vms.len() as u16
104        {
105            return Err(Error::InvalidUpscaleDesiredSymmetricPrivateNodeVmCount);
106        }
107        debug!("Using {desired_symmetric_private_node_vm_count} for desired full cone private node VM count");
108
109        let desired_client_vm_count = options
110            .desired_client_vm_count
111            .unwrap_or(options.current_inventory.client_vms.len() as u16);
112        if desired_client_vm_count < options.current_inventory.client_vms.len() as u16 {
113            return Err(Error::InvalidUpscaleDesiredClientVmCount);
114        }
115        debug!("Using {desired_client_vm_count} for desired Client VM count");
116
117        let desired_peer_cache_node_count = options
118            .desired_peer_cache_node_count
119            .unwrap_or(options.current_inventory.peer_cache_node_count() as u16);
120        if desired_peer_cache_node_count < options.current_inventory.peer_cache_node_count() as u16
121        {
122            return Err(Error::InvalidUpscaleDesiredPeerCacheNodeCount);
123        }
124        debug!("Using {desired_peer_cache_node_count} for desired peer cache node count");
125
126        let desired_node_count = options
127            .desired_node_count
128            .unwrap_or(options.current_inventory.node_count() as u16);
129        if desired_node_count < options.current_inventory.node_count() as u16 {
130            return Err(Error::InvalidUpscaleDesiredNodeCount);
131        }
132        debug!("Using {desired_node_count} for desired node count");
133
134        let desired_full_cone_private_node_count = options
135            .desired_full_cone_private_node_count
136            .unwrap_or(options.current_inventory.full_cone_private_node_count() as u16);
137        if desired_full_cone_private_node_count
138            < options.current_inventory.full_cone_private_node_count() as u16
139        {
140            return Err(Error::InvalidUpscaleDesiredFullConePrivateNodeCount);
141        }
142        debug!(
143            "Using {desired_full_cone_private_node_count} for desired full cone private node count"
144        );
145
146        let desired_symmetric_private_node_count = options
147            .desired_symmetric_private_node_count
148            .unwrap_or(options.current_inventory.symmetric_private_node_count() as u16);
149        if desired_symmetric_private_node_count
150            < options.current_inventory.symmetric_private_node_count() as u16
151        {
152            return Err(Error::InvalidUpscaleDesiredSymmetricPrivateNodeCount);
153        }
154        debug!(
155            "Using {desired_symmetric_private_node_count} for desired symmetric private node count"
156        );
157
158        let mut infra_run_options = InfraRunOptions::generate_existing(
159            &options.current_inventory.name,
160            &options.current_inventory.environment_details.region,
161            &self.terraform_runner,
162            Some(&options.current_inventory.environment_details),
163        )
164        .await?;
165        infra_run_options.peer_cache_node_vm_count = Some(desired_peer_cache_node_vm_count);
166        infra_run_options.node_vm_count = Some(desired_node_vm_count);
167        infra_run_options.full_cone_private_node_vm_count =
168            Some(desired_full_cone_private_node_vm_count);
169        infra_run_options.symmetric_private_node_vm_count =
170            Some(desired_symmetric_private_node_vm_count);
171        infra_run_options.client_vm_count = Some(desired_client_vm_count);
172
173        if options.plan {
174            self.plan(&infra_run_options)?;
175            return Ok(());
176        }
177
178        self.create_or_update_infra(&infra_run_options)
179            .map_err(|err| {
180                println!("Failed to create infra {err:?}");
181                err
182            })?;
183
184        if options.infra_only {
185            return Ok(());
186        }
187
188        let mut provision_options = ProvisionOptions {
189            ant_version: options.ant_version.clone(),
190            binary_option: options.current_inventory.binary_option.clone(),
191            chunk_size: None,
192            client_env_variables: None,
193            enable_download_verifier: options.enable_download_verifier,
194            enable_performance_verifier: options.enable_performance_verifier,
195            enable_random_verifier: options.enable_random_verifier,
196            enable_telegraf: true,
197            enable_uploaders: true,
198            evm_data_payments_address: options
199                .current_inventory
200                .environment_details
201                .evm_details
202                .data_payments_address
203                .clone(),
204            evm_network: options
205                .current_inventory
206                .environment_details
207                .evm_details
208                .network
209                .clone(),
210            evm_payment_token_address: options
211                .current_inventory
212                .environment_details
213                .evm_details
214                .payment_token_address
215                .clone(),
216            evm_rpc_url: options
217                .current_inventory
218                .environment_details
219                .evm_details
220                .rpc_url
221                .clone(),
222            expected_hash: None,
223            expected_size: None,
224            file_address: None,
225            full_cone_private_node_count: desired_full_cone_private_node_count,
226            funding_wallet_secret_key: options.funding_wallet_secret_key.clone(),
227            gas_amount: options.gas_amount,
228            interval: Some(options.interval),
229            log_format: None,
230            max_archived_log_files: options.max_archived_log_files,
231            max_log_files: options.max_log_files,
232            max_uploads: None,
233            name: options.current_inventory.name.clone(),
234            network_id: options.current_inventory.environment_details.network_id,
235            network_dashboard_branch: None,
236            node_count: desired_node_count,
237            node_env_variables: options.node_env_variables.clone(),
238            output_inventory_dir_path: self
239                .working_directory_path
240                .join("ansible")
241                .join("inventory"),
242            peer_cache_node_count: desired_peer_cache_node_count,
243            public_rpc: options.public_rpc,
244            rewards_address: options
245                .current_inventory
246                .environment_details
247                .rewards_address
248                .clone(),
249            symmetric_private_node_count: desired_symmetric_private_node_count,
250            token_amount: None,
251            upload_size: None,
252            uploaders_count: options.desired_uploaders_count,
253            upload_interval: None,
254            wallet_secret_keys: None,
255        };
256        let mut node_provision_failed = false;
257
258        let (initial_multiaddr, initial_ip_addr) = if is_bootstrap_deploy {
259            get_multiaddr(&self.ansible_provisioner.ansible_runner, &self.ssh_client).map_err(
260                |err| {
261                    println!("Failed to get node multiaddr {err:?}");
262                    err
263                },
264            )?
265        } else {
266            get_genesis_multiaddr(&self.ansible_provisioner.ansible_runner, &self.ssh_client)
267                .map_err(|err| {
268                    println!("Failed to get genesis multiaddr {err:?}");
269                    err
270                })?
271        };
272        let initial_network_contacts_url = get_bootstrap_cache_url(&initial_ip_addr);
273        debug!("Retrieved initial peer {initial_multiaddr} and initial network contacts {initial_network_contacts_url}");
274
275        if !is_bootstrap_deploy {
276            self.wait_for_ssh_availability_on_new_machines(
277                AnsibleInventoryType::PeerCacheNodes,
278                &options.current_inventory,
279            )?;
280            self.ansible_provisioner
281                .print_ansible_run_banner("Provision Peer Cache Nodes");
282            match self.ansible_provisioner.provision_nodes(
283                &provision_options,
284                Some(initial_multiaddr.clone()),
285                Some(initial_network_contacts_url.clone()),
286                NodeType::PeerCache,
287            ) {
288                Ok(()) => {
289                    println!("Provisioned Peer Cache nodes");
290                }
291                Err(err) => {
292                    log::error!("Failed to provision Peer Cache nodes: {err}");
293                    node_provision_failed = true;
294                }
295            }
296        }
297
298        self.wait_for_ssh_availability_on_new_machines(
299            AnsibleInventoryType::Nodes,
300            &options.current_inventory,
301        )?;
302        self.ansible_provisioner
303            .print_ansible_run_banner("Provision Normal Nodes");
304        match self.ansible_provisioner.provision_nodes(
305            &provision_options,
306            Some(initial_multiaddr.clone()),
307            Some(initial_network_contacts_url.clone()),
308            NodeType::Generic,
309        ) {
310            Ok(()) => {
311                println!("Provisioned normal nodes");
312            }
313            Err(err) => {
314                log::error!("Failed to provision normal nodes: {err}");
315                node_provision_failed = true;
316            }
317        }
318
319        let private_node_inventory = PrivateNodeProvisionInventory::new(
320            &self.ansible_provisioner,
321            Some(desired_full_cone_private_node_vm_count),
322            Some(desired_symmetric_private_node_vm_count),
323        )?;
324
325        if private_node_inventory.should_provision_full_cone_private_nodes() {
326            let full_cone_nat_gateway_inventory = self
327                .ansible_provisioner
328                .ansible_runner
329                .get_inventory(AnsibleInventoryType::FullConeNatGateway, true)?;
330
331            let full_cone_nat_gateway_new_vms: Vec<_> = full_cone_nat_gateway_inventory
332                .into_iter()
333                .filter(|item| {
334                    !options
335                        .current_inventory
336                        .full_cone_nat_gateway_vms
337                        .contains(item)
338                })
339                .collect();
340
341            for vm in full_cone_nat_gateway_new_vms.iter() {
342                self.ssh_client.wait_for_ssh_availability(
343                    &vm.public_ip_addr,
344                    &self.cloud_provider.get_ssh_user(),
345                )?;
346            }
347
348            let full_cone_nat_gateway_new_vms = if full_cone_nat_gateway_new_vms.is_empty() {
349                None
350            } else {
351                debug!("Full Cone NAT Gateway new VMs: {full_cone_nat_gateway_new_vms:?}");
352                Some(full_cone_nat_gateway_new_vms)
353            };
354
355            match self.ansible_provisioner.provision_full_cone(
356                &provision_options,
357                Some(initial_multiaddr.clone()),
358                Some(initial_network_contacts_url.clone()),
359                private_node_inventory.clone(),
360                full_cone_nat_gateway_new_vms,
361            ) {
362                Ok(()) => {
363                    println!("Provisioned Full Cone nodes and Gateway");
364                }
365                Err(err) => {
366                    log::error!("Failed to provision Full Cone nodes and Gateway: {err}");
367                    node_provision_failed = true;
368                }
369            }
370        }
371
372        if private_node_inventory.should_provision_symmetric_private_nodes() {
373            self.wait_for_ssh_availability_on_new_machines(
374                AnsibleInventoryType::SymmetricNatGateway,
375                &options.current_inventory,
376            )?;
377            self.ansible_provisioner
378                .print_ansible_run_banner("Provision Symmetric NAT Gateway");
379            self.ansible_provisioner
380                .provision_symmetric_nat_gateway(&provision_options, &private_node_inventory)
381                .map_err(|err| {
382                    println!("Failed to provision symmetric NAT gateway {err:?}");
383                    err
384                })?;
385
386            self.wait_for_ssh_availability_on_new_machines(
387                AnsibleInventoryType::SymmetricPrivateNodes,
388                &options.current_inventory,
389            )?;
390            self.ansible_provisioner
391                .print_ansible_run_banner("Provision Symmetric Private Nodes");
392            match self.ansible_provisioner.provision_symmetric_private_nodes(
393                &mut provision_options,
394                Some(initial_multiaddr.clone()),
395                Some(initial_network_contacts_url.clone()),
396                &private_node_inventory,
397            ) {
398                Ok(()) => {
399                    println!("Provisioned symmetric private nodes");
400                }
401                Err(err) => {
402                    log::error!("Failed to provision symmetric private nodes: {err}");
403                    node_provision_failed = true;
404                }
405            }
406        }
407
408        let should_provision_uploaders =
409            options.desired_uploaders_count.is_some() || options.desired_client_vm_count.is_some();
410        if should_provision_uploaders {
411            // get anvil funding sk
412            if provision_options.evm_network == EvmNetwork::Anvil {
413                let anvil_node_data =
414                    get_anvil_node_data(&self.ansible_provisioner.ansible_runner, &self.ssh_client)
415                        .map_err(|err| {
416                            println!("Failed to get evm testnet data {err:?}");
417                            err
418                        })?;
419
420                provision_options.funding_wallet_secret_key =
421                    Some(anvil_node_data.deployer_wallet_private_key);
422            }
423
424            self.wait_for_ssh_availability_on_new_machines(
425                AnsibleInventoryType::Clients,
426                &options.current_inventory,
427            )?;
428            let genesis_network_contacts = get_bootstrap_cache_url(&initial_ip_addr);
429            self.ansible_provisioner
430                .print_ansible_run_banner("Provision Clients");
431            self.ansible_provisioner
432                .provision_clients(
433                    &provision_options,
434                    Some(initial_multiaddr.clone()),
435                    Some(genesis_network_contacts.clone()),
436                )
437                .await
438                .map_err(|err| {
439                    println!("Failed to provision Clients {err:?}");
440                    err
441                })?;
442        }
443
444        if node_provision_failed {
445            println!();
446            println!("{}", "WARNING!".yellow());
447            println!("Some nodes failed to provision without error.");
448            println!("This usually means a small number of nodes failed to start on a few VMs.");
449            println!("However, most of the time the deployment will still be usable.");
450            println!("See the output from Ansible to determine which VMs had failures.");
451        }
452
453        Ok(())
454    }
455
456    pub async fn upscale_clients(&self, options: &UpscaleOptions) -> Result<()> {
457        let is_bootstrap_deploy = matches!(
458            options
459                .current_inventory
460                .environment_details
461                .deployment_type,
462            DeploymentType::Bootstrap
463        );
464
465        if is_bootstrap_deploy {
466            return Err(Error::InvalidClientUpscaleDeploymentType(
467                "bootstrap".to_string(),
468            ));
469        }
470
471        let desired_client_vm_count = options
472            .desired_client_vm_count
473            .unwrap_or(options.current_inventory.client_vms.len() as u16);
474        if desired_client_vm_count < options.current_inventory.client_vms.len() as u16 {
475            return Err(Error::InvalidUpscaleDesiredClientVmCount);
476        }
477        debug!("Using {desired_client_vm_count} for desired Client VM count");
478
479        let mut infra_run_options = InfraRunOptions::generate_existing(
480            &options.current_inventory.name,
481            &options.current_inventory.environment_details.region,
482            &self.terraform_runner,
483            Some(&options.current_inventory.environment_details),
484        )
485        .await?;
486        infra_run_options.client_vm_count = Some(desired_client_vm_count);
487
488        if options.plan {
489            self.plan(&infra_run_options)?;
490            return Ok(());
491        }
492
493        if !options.provision_only {
494            self.create_or_update_infra(&infra_run_options)
495                .map_err(|err| {
496                    println!("Failed to create infra {err:?}");
497                    err
498                })?;
499        }
500
501        if options.infra_only {
502            return Ok(());
503        }
504
505        let (initial_multiaddr, initial_ip_addr) =
506            get_genesis_multiaddr(&self.ansible_provisioner.ansible_runner, &self.ssh_client)
507                .map_err(|err| {
508                    println!("Failed to get genesis multiaddr {err:?}");
509                    err
510                })?;
511        let initial_network_contacts_url = get_bootstrap_cache_url(&initial_ip_addr);
512        debug!("Retrieved initial peer {initial_multiaddr} and initial network contacts {initial_network_contacts_url}");
513
514        let provision_options = ProvisionOptions {
515            ant_version: options.ant_version.clone(),
516            binary_option: options.current_inventory.binary_option.clone(),
517            chunk_size: None,
518            client_env_variables: None,
519            enable_download_verifier: options.enable_download_verifier,
520            enable_random_verifier: options.enable_random_verifier,
521            enable_performance_verifier: options.enable_performance_verifier,
522            enable_telegraf: true,
523            enable_uploaders: true,
524            evm_data_payments_address: options
525                .current_inventory
526                .environment_details
527                .evm_details
528                .data_payments_address
529                .clone(),
530            evm_network: options
531                .current_inventory
532                .environment_details
533                .evm_details
534                .network
535                .clone(),
536            evm_payment_token_address: options
537                .current_inventory
538                .environment_details
539                .evm_details
540                .payment_token_address
541                .clone(),
542            evm_rpc_url: options
543                .current_inventory
544                .environment_details
545                .evm_details
546                .rpc_url
547                .clone(),
548            expected_hash: None,
549            expected_size: None,
550            file_address: None,
551            full_cone_private_node_count: 0,
552            funding_wallet_secret_key: options.funding_wallet_secret_key.clone(),
553            gas_amount: options.gas_amount,
554            interval: Some(options.interval),
555            log_format: None,
556            max_archived_log_files: options.max_archived_log_files,
557            max_log_files: options.max_log_files,
558            max_uploads: None,
559            name: options.current_inventory.name.clone(),
560            network_id: options.current_inventory.environment_details.network_id,
561            network_dashboard_branch: None,
562            node_count: 0,
563            node_env_variables: None,
564            output_inventory_dir_path: self
565                .working_directory_path
566                .join("ansible")
567                .join("inventory"),
568            peer_cache_node_count: 0,
569            public_rpc: options.public_rpc,
570            rewards_address: options
571                .current_inventory
572                .environment_details
573                .rewards_address
574                .clone(),
575            symmetric_private_node_count: 0,
576            token_amount: options.token_amount,
577            uploaders_count: options.desired_uploaders_count,
578            upload_size: None,
579            upload_interval: None,
580            wallet_secret_keys: None,
581        };
582
583        self.wait_for_ssh_availability_on_new_machines(
584            AnsibleInventoryType::Clients,
585            &options.current_inventory,
586        )?;
587        self.ansible_provisioner
588            .print_ansible_run_banner("Provision Clients");
589        self.ansible_provisioner
590            .provision_clients(
591                &provision_options,
592                Some(initial_multiaddr),
593                Some(initial_network_contacts_url),
594            )
595            .await
596            .map_err(|err| {
597                println!("Failed to provision clients {err:?}");
598                err
599            })?;
600
601        Ok(())
602    }
603
604    fn wait_for_ssh_availability_on_new_machines(
605        &self,
606        inventory_type: AnsibleInventoryType,
607        current_inventory: &DeploymentInventory,
608    ) -> Result<()> {
609        let inventory = self
610            .ansible_provisioner
611            .ansible_runner
612            .get_inventory(inventory_type, true)?;
613        let old_set: HashSet<_> = match inventory_type {
614            AnsibleInventoryType::Clients => current_inventory
615                .client_vms
616                .iter()
617                .map(|client_vm| &client_vm.vm)
618                .cloned()
619                .collect(),
620            AnsibleInventoryType::PeerCacheNodes => current_inventory
621                .peer_cache_node_vms
622                .iter()
623                .map(|node_vm| &node_vm.vm)
624                .cloned()
625                .collect(),
626            AnsibleInventoryType::Nodes => current_inventory
627                .node_vms
628                .iter()
629                .map(|node_vm| &node_vm.vm)
630                .cloned()
631                .collect(),
632            AnsibleInventoryType::FullConeNatGateway => current_inventory
633                .full_cone_nat_gateway_vms
634                .iter()
635                .cloned()
636                .collect(),
637            AnsibleInventoryType::SymmetricNatGateway => current_inventory
638                .symmetric_nat_gateway_vms
639                .iter()
640                .cloned()
641                .collect(),
642            AnsibleInventoryType::FullConePrivateNodes => current_inventory
643                .full_cone_private_node_vms
644                .iter()
645                .map(|node_vm| &node_vm.vm)
646                .cloned()
647                .collect(),
648            AnsibleInventoryType::SymmetricPrivateNodes => current_inventory
649                .symmetric_private_node_vms
650                .iter()
651                .map(|node_vm| &node_vm.vm)
652                .cloned()
653                .collect(),
654            it => return Err(Error::UpscaleInventoryTypeNotSupported(it.to_string())),
655        };
656        let new_vms: Vec<_> = inventory
657            .into_iter()
658            .filter(|item| !old_set.contains(item))
659            .collect();
660        for vm in new_vms.iter() {
661            self.ssh_client.wait_for_ssh_availability(
662                &vm.public_ip_addr,
663                &self.cloud_provider.get_ssh_user(),
664            )?;
665        }
666        Ok(())
667    }
668}