sn_testnet_deploy/
upscale.rs

1// Copyright (c) 2023, MaidSafe.
2// All rights reserved.
3//
4// This SAFE Network Software is licensed under the BSD-3-Clause license.
5// Please see the LICENSE file for more details.
6
7use crate::{
8    ansible::{
9        inventory::AnsibleInventoryType,
10        provisioning::{PrivateNodeProvisionInventory, ProvisionOptions},
11    },
12    error::{Error, Result},
13    get_anvil_node_data, get_bootstrap_cache_url, get_genesis_multiaddr, get_multiaddr,
14    DeploymentInventory, DeploymentType, EvmNetwork, InfraRunOptions, NodeType, TestnetDeployer,
15};
16use colored::Colorize;
17use evmlib::common::U256;
18use log::debug;
19use std::{collections::HashSet, time::Duration};
20
21#[derive(Clone)]
22pub struct UpscaleOptions {
23    pub ansible_verbose: bool,
24    pub ant_version: Option<String>,
25    pub current_inventory: DeploymentInventory,
26    pub desired_client_vm_count: Option<u16>,
27    pub desired_full_cone_private_node_count: Option<u16>,
28    pub desired_full_cone_private_node_vm_count: Option<u16>,
29    pub desired_node_count: Option<u16>,
30    pub desired_node_vm_count: Option<u16>,
31    pub desired_peer_cache_node_count: Option<u16>,
32    pub desired_peer_cache_node_vm_count: Option<u16>,
33    pub desired_symmetric_private_node_count: Option<u16>,
34    pub desired_symmetric_private_node_vm_count: Option<u16>,
35    pub desired_uploaders_count: Option<u16>,
36    pub enable_delayed_verifier: bool,
37    pub enable_random_verifier: bool,
38    pub enable_performance_verifier: bool,
39    pub funding_wallet_secret_key: Option<String>,
40    pub gas_amount: Option<U256>,
41    pub interval: Duration,
42    pub infra_only: bool,
43    pub max_archived_log_files: u16,
44    pub max_log_files: u16,
45    pub network_dashboard_branch: Option<String>,
46    pub node_env_variables: Option<Vec<(String, String)>>,
47    pub plan: bool,
48    pub public_rpc: bool,
49    pub provision_only: bool,
50    pub token_amount: Option<U256>,
51}
52
53impl TestnetDeployer {
54    pub async fn upscale(&self, options: &UpscaleOptions) -> Result<()> {
55        let is_bootstrap_deploy = matches!(
56            options
57                .current_inventory
58                .environment_details
59                .deployment_type,
60            DeploymentType::Bootstrap
61        );
62
63        if is_bootstrap_deploy
64            && (options.desired_peer_cache_node_count.is_some()
65                || options.desired_peer_cache_node_vm_count.is_some()
66                || options.desired_client_vm_count.is_some())
67        {
68            return Err(Error::InvalidUpscaleOptionsForBootstrapDeployment);
69        }
70
71        let desired_peer_cache_node_vm_count = options
72            .desired_peer_cache_node_vm_count
73            .unwrap_or(options.current_inventory.peer_cache_node_vms.len() as u16);
74        if desired_peer_cache_node_vm_count
75            < options.current_inventory.peer_cache_node_vms.len() as u16
76        {
77            return Err(Error::InvalidUpscaleDesiredPeerCacheVmCount);
78        }
79        debug!("Using {desired_peer_cache_node_vm_count} for desired Peer Cache node VM count");
80
81        let desired_node_vm_count = options
82            .desired_node_vm_count
83            .unwrap_or(options.current_inventory.node_vms.len() as u16);
84        if desired_node_vm_count < options.current_inventory.node_vms.len() as u16 {
85            return Err(Error::InvalidUpscaleDesiredNodeVmCount);
86        }
87        debug!("Using {desired_node_vm_count} for desired node VM count");
88
89        let desired_full_cone_private_node_vm_count = options
90            .desired_full_cone_private_node_vm_count
91            .unwrap_or(options.current_inventory.full_cone_private_node_vms.len() as u16);
92        if desired_full_cone_private_node_vm_count
93            < options.current_inventory.full_cone_private_node_vms.len() as u16
94        {
95            return Err(Error::InvalidUpscaleDesiredFullConePrivateNodeVmCount);
96        }
97        debug!("Using {desired_full_cone_private_node_vm_count} for desired full cone private node VM count");
98
99        let desired_symmetric_private_node_vm_count = options
100            .desired_symmetric_private_node_vm_count
101            .unwrap_or(options.current_inventory.symmetric_private_node_vms.len() as u16);
102        if desired_symmetric_private_node_vm_count
103            < options.current_inventory.symmetric_private_node_vms.len() as u16
104        {
105            return Err(Error::InvalidUpscaleDesiredSymmetricPrivateNodeVmCount);
106        }
107        debug!("Using {desired_symmetric_private_node_vm_count} for desired full cone private node VM count");
108
109        let desired_client_vm_count = options
110            .desired_client_vm_count
111            .unwrap_or(options.current_inventory.client_vms.len() as u16);
112        if desired_client_vm_count < options.current_inventory.client_vms.len() as u16 {
113            return Err(Error::InvalidUpscaleDesiredClientVmCount);
114        }
115        debug!("Using {desired_client_vm_count} for desired Client VM count");
116
117        let desired_peer_cache_node_count = options
118            .desired_peer_cache_node_count
119            .unwrap_or(options.current_inventory.peer_cache_node_count() as u16);
120        if desired_peer_cache_node_count < options.current_inventory.peer_cache_node_count() as u16
121        {
122            return Err(Error::InvalidUpscaleDesiredPeerCacheNodeCount);
123        }
124        debug!("Using {desired_peer_cache_node_count} for desired peer cache node count");
125
126        let desired_node_count = options
127            .desired_node_count
128            .unwrap_or(options.current_inventory.node_count() as u16);
129        if desired_node_count < options.current_inventory.node_count() as u16 {
130            return Err(Error::InvalidUpscaleDesiredNodeCount);
131        }
132        debug!("Using {desired_node_count} for desired node count");
133
134        let desired_full_cone_private_node_count = options
135            .desired_full_cone_private_node_count
136            .unwrap_or(options.current_inventory.full_cone_private_node_count() as u16);
137        if desired_full_cone_private_node_count
138            < options.current_inventory.full_cone_private_node_count() as u16
139        {
140            return Err(Error::InvalidUpscaleDesiredFullConePrivateNodeCount);
141        }
142        debug!(
143            "Using {desired_full_cone_private_node_count} for desired full cone private node count"
144        );
145
146        let desired_symmetric_private_node_count = options
147            .desired_symmetric_private_node_count
148            .unwrap_or(options.current_inventory.symmetric_private_node_count() as u16);
149        if desired_symmetric_private_node_count
150            < options.current_inventory.symmetric_private_node_count() as u16
151        {
152            return Err(Error::InvalidUpscaleDesiredSymmetricPrivateNodeCount);
153        }
154        debug!(
155            "Using {desired_symmetric_private_node_count} for desired symmetric private node count"
156        );
157
158        let mut infra_run_options = InfraRunOptions::generate_existing(
159            &options.current_inventory.name,
160            &options.current_inventory.environment_details.region,
161            &self.terraform_runner,
162            Some(&options.current_inventory.environment_details),
163        )
164        .await?;
165        infra_run_options.peer_cache_node_vm_count = Some(desired_peer_cache_node_vm_count);
166        infra_run_options.node_vm_count = Some(desired_node_vm_count);
167        infra_run_options.full_cone_private_node_vm_count =
168            Some(desired_full_cone_private_node_vm_count);
169        infra_run_options.symmetric_private_node_vm_count =
170            Some(desired_symmetric_private_node_vm_count);
171        infra_run_options.client_vm_count = Some(desired_client_vm_count);
172
173        if options.plan {
174            self.plan(&infra_run_options)?;
175            return Ok(());
176        }
177
178        self.create_or_update_infra(&infra_run_options)
179            .map_err(|err| {
180                println!("Failed to create infra {err:?}");
181                err
182            })?;
183
184        if options.infra_only {
185            return Ok(());
186        }
187
188        let mut provision_options = ProvisionOptions {
189            ant_version: options.ant_version.clone(),
190            binary_option: options.current_inventory.binary_option.clone(),
191            chunk_size: None,
192            client_env_variables: None,
193            delayed_verifier_batch_size: None,
194            delayed_verifier_quorum_value: None,
195            enable_delayed_verifier: options.enable_delayed_verifier,
196            enable_performance_verifier: options.enable_performance_verifier,
197            enable_random_verifier: options.enable_random_verifier,
198            enable_telegraf: true,
199            enable_uploaders: true,
200            evm_data_payments_address: options
201                .current_inventory
202                .environment_details
203                .evm_details
204                .data_payments_address
205                .clone(),
206            evm_network: options
207                .current_inventory
208                .environment_details
209                .evm_details
210                .network
211                .clone(),
212            evm_payment_token_address: options
213                .current_inventory
214                .environment_details
215                .evm_details
216                .payment_token_address
217                .clone(),
218            evm_rpc_url: options
219                .current_inventory
220                .environment_details
221                .evm_details
222                .rpc_url
223                .clone(),
224            expected_hash: None,
225            expected_size: None,
226            file_address: None,
227            full_cone_private_node_count: desired_full_cone_private_node_count,
228            funding_wallet_secret_key: options.funding_wallet_secret_key.clone(),
229            gas_amount: options.gas_amount,
230            interval: Some(options.interval),
231            log_format: None,
232            max_archived_log_files: options.max_archived_log_files,
233            max_log_files: options.max_log_files,
234            max_uploads: None,
235            name: options.current_inventory.name.clone(),
236            network_id: options.current_inventory.environment_details.network_id,
237            network_dashboard_branch: None,
238            node_count: desired_node_count,
239            node_env_variables: options.node_env_variables.clone(),
240            output_inventory_dir_path: self
241                .working_directory_path
242                .join("ansible")
243                .join("inventory"),
244            peer_cache_node_count: desired_peer_cache_node_count,
245            performance_verifier_batch_size: None,
246            public_rpc: options.public_rpc,
247            random_verifier_batch_size: None,
248            rewards_address: options
249                .current_inventory
250                .environment_details
251                .rewards_address
252                .clone(),
253            symmetric_private_node_count: desired_symmetric_private_node_count,
254            token_amount: None,
255            upload_size: None,
256            uploaders_count: options.desired_uploaders_count,
257            upload_interval: None,
258            wallet_secret_keys: None,
259        };
260        let mut node_provision_failed = false;
261
262        let (initial_multiaddr, initial_ip_addr) = if is_bootstrap_deploy {
263            get_multiaddr(&self.ansible_provisioner.ansible_runner, &self.ssh_client).map_err(
264                |err| {
265                    println!("Failed to get node multiaddr {err:?}");
266                    err
267                },
268            )?
269        } else {
270            get_genesis_multiaddr(&self.ansible_provisioner.ansible_runner, &self.ssh_client)
271                .map_err(|err| {
272                    println!("Failed to get genesis multiaddr {err:?}");
273                    err
274                })?
275        };
276        let initial_network_contacts_url = get_bootstrap_cache_url(&initial_ip_addr);
277        debug!("Retrieved initial peer {initial_multiaddr} and initial network contacts {initial_network_contacts_url}");
278
279        if !is_bootstrap_deploy {
280            self.wait_for_ssh_availability_on_new_machines(
281                AnsibleInventoryType::PeerCacheNodes,
282                &options.current_inventory,
283            )?;
284            self.ansible_provisioner
285                .print_ansible_run_banner("Provision Peer Cache Nodes");
286            match self.ansible_provisioner.provision_nodes(
287                &provision_options,
288                Some(initial_multiaddr.clone()),
289                Some(initial_network_contacts_url.clone()),
290                NodeType::PeerCache,
291            ) {
292                Ok(()) => {
293                    println!("Provisioned Peer Cache nodes");
294                }
295                Err(err) => {
296                    log::error!("Failed to provision Peer Cache nodes: {err}");
297                    node_provision_failed = true;
298                }
299            }
300        }
301
302        self.wait_for_ssh_availability_on_new_machines(
303            AnsibleInventoryType::Nodes,
304            &options.current_inventory,
305        )?;
306        self.ansible_provisioner
307            .print_ansible_run_banner("Provision Normal Nodes");
308        match self.ansible_provisioner.provision_nodes(
309            &provision_options,
310            Some(initial_multiaddr.clone()),
311            Some(initial_network_contacts_url.clone()),
312            NodeType::Generic,
313        ) {
314            Ok(()) => {
315                println!("Provisioned normal nodes");
316            }
317            Err(err) => {
318                log::error!("Failed to provision normal nodes: {err}");
319                node_provision_failed = true;
320            }
321        }
322
323        let private_node_inventory = PrivateNodeProvisionInventory::new(
324            &self.ansible_provisioner,
325            Some(desired_full_cone_private_node_vm_count),
326            Some(desired_symmetric_private_node_vm_count),
327        )?;
328
329        if private_node_inventory.should_provision_full_cone_private_nodes() {
330            let full_cone_nat_gateway_inventory = self
331                .ansible_provisioner
332                .ansible_runner
333                .get_inventory(AnsibleInventoryType::FullConeNatGateway, true)?;
334
335            let full_cone_nat_gateway_new_vms: Vec<_> = full_cone_nat_gateway_inventory
336                .into_iter()
337                .filter(|item| {
338                    !options
339                        .current_inventory
340                        .full_cone_nat_gateway_vms
341                        .contains(item)
342                })
343                .collect();
344
345            for vm in full_cone_nat_gateway_new_vms.iter() {
346                self.ssh_client.wait_for_ssh_availability(
347                    &vm.public_ip_addr,
348                    &self.cloud_provider.get_ssh_user(),
349                )?;
350            }
351
352            let full_cone_nat_gateway_new_vms = if full_cone_nat_gateway_new_vms.is_empty() {
353                None
354            } else {
355                debug!("Full Cone NAT Gateway new VMs: {full_cone_nat_gateway_new_vms:?}");
356                Some(full_cone_nat_gateway_new_vms)
357            };
358
359            match self.ansible_provisioner.provision_full_cone(
360                &provision_options,
361                Some(initial_multiaddr.clone()),
362                Some(initial_network_contacts_url.clone()),
363                private_node_inventory.clone(),
364                full_cone_nat_gateway_new_vms,
365            ) {
366                Ok(()) => {
367                    println!("Provisioned Full Cone nodes and Gateway");
368                }
369                Err(err) => {
370                    log::error!("Failed to provision Full Cone nodes and Gateway: {err}");
371                    node_provision_failed = true;
372                }
373            }
374        }
375
376        if private_node_inventory.should_provision_symmetric_private_nodes() {
377            self.wait_for_ssh_availability_on_new_machines(
378                AnsibleInventoryType::SymmetricNatGateway,
379                &options.current_inventory,
380            )?;
381            self.ansible_provisioner
382                .print_ansible_run_banner("Provision Symmetric NAT Gateway");
383            self.ansible_provisioner
384                .provision_symmetric_nat_gateway(&provision_options, &private_node_inventory)
385                .map_err(|err| {
386                    println!("Failed to provision symmetric NAT gateway {err:?}");
387                    err
388                })?;
389
390            self.wait_for_ssh_availability_on_new_machines(
391                AnsibleInventoryType::SymmetricPrivateNodes,
392                &options.current_inventory,
393            )?;
394            self.ansible_provisioner
395                .print_ansible_run_banner("Provision Symmetric Private Nodes");
396            match self.ansible_provisioner.provision_symmetric_private_nodes(
397                &mut provision_options,
398                Some(initial_multiaddr.clone()),
399                Some(initial_network_contacts_url.clone()),
400                &private_node_inventory,
401            ) {
402                Ok(()) => {
403                    println!("Provisioned symmetric private nodes");
404                }
405                Err(err) => {
406                    log::error!("Failed to provision symmetric private nodes: {err}");
407                    node_provision_failed = true;
408                }
409            }
410        }
411
412        let should_provision_uploaders =
413            options.desired_uploaders_count.is_some() || options.desired_client_vm_count.is_some();
414        if should_provision_uploaders {
415            // get anvil funding sk
416            if provision_options.evm_network == EvmNetwork::Anvil {
417                let anvil_node_data =
418                    get_anvil_node_data(&self.ansible_provisioner.ansible_runner, &self.ssh_client)
419                        .map_err(|err| {
420                            println!("Failed to get evm testnet data {err:?}");
421                            err
422                        })?;
423
424                provision_options.funding_wallet_secret_key =
425                    Some(anvil_node_data.deployer_wallet_private_key);
426            }
427
428            self.wait_for_ssh_availability_on_new_machines(
429                AnsibleInventoryType::Clients,
430                &options.current_inventory,
431            )?;
432            let genesis_network_contacts = get_bootstrap_cache_url(&initial_ip_addr);
433            self.ansible_provisioner
434                .print_ansible_run_banner("Provision Clients");
435            self.ansible_provisioner
436                .provision_clients(
437                    &provision_options,
438                    Some(initial_multiaddr.clone()),
439                    Some(genesis_network_contacts.clone()),
440                )
441                .await
442                .map_err(|err| {
443                    println!("Failed to provision Clients {err:?}");
444                    err
445                })?;
446        }
447
448        if node_provision_failed {
449            println!();
450            println!("{}", "WARNING!".yellow());
451            println!("Some nodes failed to provision without error.");
452            println!("This usually means a small number of nodes failed to start on a few VMs.");
453            println!("However, most of the time the deployment will still be usable.");
454            println!("See the output from Ansible to determine which VMs had failures.");
455        }
456
457        Ok(())
458    }
459
460    pub async fn upscale_clients(&self, options: &UpscaleOptions) -> Result<()> {
461        let is_bootstrap_deploy = matches!(
462            options
463                .current_inventory
464                .environment_details
465                .deployment_type,
466            DeploymentType::Bootstrap
467        );
468
469        if is_bootstrap_deploy {
470            return Err(Error::InvalidClientUpscaleDeploymentType(
471                "bootstrap".to_string(),
472            ));
473        }
474
475        let desired_client_vm_count = options
476            .desired_client_vm_count
477            .unwrap_or(options.current_inventory.client_vms.len() as u16);
478        if desired_client_vm_count < options.current_inventory.client_vms.len() as u16 {
479            return Err(Error::InvalidUpscaleDesiredClientVmCount);
480        }
481        debug!("Using {desired_client_vm_count} for desired Client VM count");
482
483        let mut infra_run_options = InfraRunOptions::generate_existing(
484            &options.current_inventory.name,
485            &options.current_inventory.environment_details.region,
486            &self.terraform_runner,
487            Some(&options.current_inventory.environment_details),
488        )
489        .await?;
490        infra_run_options.client_vm_count = Some(desired_client_vm_count);
491
492        if options.plan {
493            self.plan(&infra_run_options)?;
494            return Ok(());
495        }
496
497        if !options.provision_only {
498            self.create_or_update_infra(&infra_run_options)
499                .map_err(|err| {
500                    println!("Failed to create infra {err:?}");
501                    err
502                })?;
503        }
504
505        if options.infra_only {
506            return Ok(());
507        }
508
509        let (initial_multiaddr, initial_ip_addr) =
510            get_genesis_multiaddr(&self.ansible_provisioner.ansible_runner, &self.ssh_client)
511                .map_err(|err| {
512                    println!("Failed to get genesis multiaddr {err:?}");
513                    err
514                })?;
515        let initial_network_contacts_url = get_bootstrap_cache_url(&initial_ip_addr);
516        debug!("Retrieved initial peer {initial_multiaddr} and initial network contacts {initial_network_contacts_url}");
517
518        let provision_options = ProvisionOptions {
519            ant_version: options.ant_version.clone(),
520            binary_option: options.current_inventory.binary_option.clone(),
521            chunk_size: None,
522            client_env_variables: None,
523            delayed_verifier_batch_size: None,
524            delayed_verifier_quorum_value: None,
525            enable_delayed_verifier: options.enable_delayed_verifier,
526            enable_random_verifier: options.enable_random_verifier,
527            enable_performance_verifier: options.enable_performance_verifier,
528            enable_telegraf: true,
529            enable_uploaders: true,
530            evm_data_payments_address: options
531                .current_inventory
532                .environment_details
533                .evm_details
534                .data_payments_address
535                .clone(),
536            evm_network: options
537                .current_inventory
538                .environment_details
539                .evm_details
540                .network
541                .clone(),
542            evm_payment_token_address: options
543                .current_inventory
544                .environment_details
545                .evm_details
546                .payment_token_address
547                .clone(),
548            evm_rpc_url: options
549                .current_inventory
550                .environment_details
551                .evm_details
552                .rpc_url
553                .clone(),
554            expected_hash: None,
555            expected_size: None,
556            file_address: None,
557            full_cone_private_node_count: 0,
558            funding_wallet_secret_key: options.funding_wallet_secret_key.clone(),
559            gas_amount: options.gas_amount,
560            interval: Some(options.interval),
561            log_format: None,
562            max_archived_log_files: options.max_archived_log_files,
563            max_log_files: options.max_log_files,
564            max_uploads: None,
565            name: options.current_inventory.name.clone(),
566            network_id: options.current_inventory.environment_details.network_id,
567            network_dashboard_branch: None,
568            node_count: 0,
569            node_env_variables: None,
570            output_inventory_dir_path: self
571                .working_directory_path
572                .join("ansible")
573                .join("inventory"),
574            peer_cache_node_count: 0,
575            performance_verifier_batch_size: None,
576            public_rpc: options.public_rpc,
577            random_verifier_batch_size: None,
578            rewards_address: options
579                .current_inventory
580                .environment_details
581                .rewards_address
582                .clone(),
583            symmetric_private_node_count: 0,
584            token_amount: options.token_amount,
585            uploaders_count: options.desired_uploaders_count,
586            upload_size: None,
587            upload_interval: None,
588            wallet_secret_keys: None,
589        };
590
591        self.wait_for_ssh_availability_on_new_machines(
592            AnsibleInventoryType::Clients,
593            &options.current_inventory,
594        )?;
595        self.ansible_provisioner
596            .print_ansible_run_banner("Provision Clients");
597        self.ansible_provisioner
598            .provision_clients(
599                &provision_options,
600                Some(initial_multiaddr),
601                Some(initial_network_contacts_url),
602            )
603            .await
604            .map_err(|err| {
605                println!("Failed to provision clients {err:?}");
606                err
607            })?;
608
609        Ok(())
610    }
611
612    fn wait_for_ssh_availability_on_new_machines(
613        &self,
614        inventory_type: AnsibleInventoryType,
615        current_inventory: &DeploymentInventory,
616    ) -> Result<()> {
617        let inventory = self
618            .ansible_provisioner
619            .ansible_runner
620            .get_inventory(inventory_type, true)?;
621        let old_set: HashSet<_> = match inventory_type {
622            AnsibleInventoryType::Clients => current_inventory
623                .client_vms
624                .iter()
625                .map(|client_vm| &client_vm.vm)
626                .cloned()
627                .collect(),
628            AnsibleInventoryType::PeerCacheNodes => current_inventory
629                .peer_cache_node_vms
630                .iter()
631                .map(|node_vm| &node_vm.vm)
632                .cloned()
633                .collect(),
634            AnsibleInventoryType::Nodes => current_inventory
635                .node_vms
636                .iter()
637                .map(|node_vm| &node_vm.vm)
638                .cloned()
639                .collect(),
640            AnsibleInventoryType::FullConeNatGateway => current_inventory
641                .full_cone_nat_gateway_vms
642                .iter()
643                .cloned()
644                .collect(),
645            AnsibleInventoryType::SymmetricNatGateway => current_inventory
646                .symmetric_nat_gateway_vms
647                .iter()
648                .cloned()
649                .collect(),
650            AnsibleInventoryType::FullConePrivateNodes => current_inventory
651                .full_cone_private_node_vms
652                .iter()
653                .map(|node_vm| &node_vm.vm)
654                .cloned()
655                .collect(),
656            AnsibleInventoryType::SymmetricPrivateNodes => current_inventory
657                .symmetric_private_node_vms
658                .iter()
659                .map(|node_vm| &node_vm.vm)
660                .cloned()
661                .collect(),
662            it => return Err(Error::UpscaleInventoryTypeNotSupported(it.to_string())),
663        };
664        let new_vms: Vec<_> = inventory
665            .into_iter()
666            .filter(|item| !old_set.contains(item))
667            .collect();
668        for vm in new_vms.iter() {
669            self.ssh_client.wait_for_ssh_availability(
670                &vm.public_ip_addr,
671                &self.cloud_provider.get_ssh_user(),
672            )?;
673        }
674        Ok(())
675    }
676}