Skip to main content

forest/daemon/
mod.rs

1// Copyright 2019-2026 ChainSafe Systems
2// SPDX-License-Identifier: Apache-2.0, MIT
3
4pub mod bundle;
5mod context;
6pub mod db_util;
7pub mod main;
8
9use crate::blocks::Tipset;
10use crate::chain::ChainStore;
11use crate::chain::index::ResolveNullTipset;
12use crate::chain_sync::ChainFollower;
13use crate::chain_sync::network_context::SyncNetworkContext;
14use crate::cli_shared::snapshot;
15use crate::cli_shared::{
16    chain_path,
17    cli::{CliOpts, Config},
18};
19use crate::daemon::{
20    context::{AppContext, DbType},
21    db_util::import_chain_as_forest_car,
22};
23use crate::db::gc::SnapshotGarbageCollector;
24use crate::db::ttl::EthMappingCollector;
25use crate::libp2p::{Libp2pService, PeerManager};
26use crate::message_pool::{MessagePool, MpoolConfig, MpoolLocker, NonceTracker};
27use crate::networks::{self, ChainConfig};
28use crate::rpc::RPCState;
29use crate::rpc::eth::filter::EthEventHandler;
30use crate::rpc::start_rpc;
31use crate::shim::clock::ChainEpoch;
32use crate::shim::state_tree::StateTree;
33use crate::shim::version::NetworkVersion;
34use crate::utils::misc::env::is_env_truthy;
35use crate::utils::{self, ShallowClone as _};
36use crate::utils::{proofs_api::ensure_proof_params_downloaded, version::FOREST_VERSION_STRING};
37use anyhow::{Context as _, bail};
38use backon::{ExponentialBuilder, Retryable};
39use dialoguer::theme::ColorfulTheme;
40use futures::{Future, FutureExt};
41use std::path::Path;
42use std::sync::Arc;
43use std::sync::OnceLock;
44use std::time::{Duration, Instant};
45use tokio::{
46    net::TcpListener,
47    signal::{
48        ctrl_c,
49        unix::{SignalKind, signal},
50    },
51    sync::mpsc,
52    task::JoinSet,
53};
54use tracing::{debug, info, warn};
55
56pub static GLOBAL_SNAPSHOT_GC: OnceLock<Arc<SnapshotGarbageCollector<DbType>>> = OnceLock::new();
57
58/// Increase the file descriptor limit to a reasonable number.
59/// This prevents the node from failing if the default soft limit is too low.
60/// Note that the value is only increased, never decreased.
61fn maybe_increase_fd_limit() -> anyhow::Result<()> {
62    static DESIRED_SOFT_LIMIT: u64 = 8192;
63    let (soft_before, _) = rlimit::Resource::NOFILE.get()?;
64
65    let soft_after = rlimit::increase_nofile_limit(DESIRED_SOFT_LIMIT)?;
66    if soft_before < soft_after {
67        debug!("Increased file descriptor limit from {soft_before} to {soft_after}");
68    }
69    if soft_after < DESIRED_SOFT_LIMIT {
70        warn!(
71            "File descriptor limit is too low: {soft_after} < {DESIRED_SOFT_LIMIT}. \
72            You may encounter 'too many open files' errors.",
73        );
74    }
75
76    Ok(())
77}
78
79// Start the daemon and abort if we're interrupted by ctrl-c, SIGTERM, or `forest-cli shutdown`.
80pub async fn start_interruptable(opts: CliOpts, config: Config) -> anyhow::Result<()> {
81    let start_time = chrono::Utc::now();
82    let mut terminate = signal(SignalKind::terminate())?;
83    let (shutdown_send, mut shutdown_recv) = mpsc::channel(1);
84    let (rpc_stop_handle, rpc_server_handle) = jsonrpsee::server::stop_channel();
85    let result = tokio::select! {
86        ret = start(start_time, opts, config, shutdown_send, rpc_stop_handle) => ret,
87        _ = ctrl_c() => {
88            info!("Keyboard interrupt.");
89            Ok(())
90        },
91        _ = terminate.recv() => {
92            info!("Received SIGTERM.");
93            Ok(())
94        },
95        _ = shutdown_recv.recv() => {
96            info!("Client requested a shutdown.");
97            Ok(())
98        },
99    };
100    _ = rpc_server_handle.stop();
101    crate::utils::io::terminal_cleanup();
102    result
103}
104
105/// This function initialize Forest with below steps
106/// - increase file descriptor limit (for parity-db)
107/// - setup proofs parameter cache directory
108/// - prints Forest version
109fn startup_init(config: &Config) -> anyhow::Result<()> {
110    maybe_increase_fd_limit()?;
111    // Sets proof parameter file download path early, the files will be checked and
112    // downloaded later right after snapshot import step
113    crate::utils::proofs_api::maybe_set_proofs_parameter_cache_dir_env(&config.client.data_dir);
114    info!(
115        "Starting Forest daemon, version {}",
116        FOREST_VERSION_STRING.as_str()
117    );
118    Ok(())
119}
120
121async fn maybe_import_snapshot(
122    opts: &CliOpts,
123    config: &mut Config,
124    ctx: &AppContext,
125) -> anyhow::Result<()> {
126    let chain_config = ctx.state_manager.chain_config();
127    // Sets the latest snapshot if needed for downloading later
128    if config.client.snapshot_path.is_none() && !opts.stateless {
129        maybe_set_snapshot_path(
130            config,
131            chain_config,
132            ctx.state_manager.chain_store().heaviest_tipset().epoch(),
133            opts.auto_download_snapshot,
134            &ctx.db_meta_data.get_root_dir(),
135        )
136        .await?;
137    }
138
139    let snapshot_tracker = ctx.snapshot_progress_tracker.clone();
140    // Import chain if needed
141    if !opts.skip_load.unwrap_or_default()
142        && let Some(path) = &config.client.snapshot_path
143    {
144        let (car_db_path, ts) = import_chain_as_forest_car(
145            path,
146            &ctx.db_meta_data.get_forest_car_db_dir(),
147            config.client.import_mode,
148            config.client.rpc_v1_endpoint()?,
149            &crate::f3::get_f3_root(config),
150            ctx.chain_config(),
151            &snapshot_tracker,
152        )
153        .await?;
154        ctx.db
155            .read_only_files(std::iter::once(car_db_path.clone()))?;
156        let ts_epoch = ts.epoch();
157        // Explicitly set heaviest tipset here in case HEAD_KEY has already been set
158        // in the current setting store
159        ctx.state_manager.chain_store().set_heaviest_tipset(ts)?;
160        debug!(
161            "Loaded car DB at {} and set current head to epoch {ts_epoch}",
162            car_db_path.display(),
163        );
164    }
165
166    // If the snapshot progress state is not completed,
167    // set the state to not required
168    if !snapshot_tracker.is_completed() {
169        snapshot_tracker.not_required();
170    }
171
172    if let Some(validate_from) = config.client.snapshot_height {
173        // We've been provided a snapshot and asked to validate it
174        ensure_proof_params_downloaded().await?;
175        // Use the specified HEAD, otherwise take the current HEAD.
176        let current_height = config
177            .client
178            .snapshot_head
179            .unwrap_or_else(|| ctx.state_manager.chain_store().heaviest_tipset().epoch());
180
181        let validation_range = validation_range(current_height, validate_from)?;
182        // `validate_range` is CPU-bound (drives rayon-parallel VM execution) and
183        // can run for minutes. Safer to spawn it on a blocking thread.
184        let state_manager = ctx.state_manager.clone();
185        tokio::task::spawn_blocking(move || state_manager.validate_range(validation_range))
186            .await??;
187    }
188
189    Ok(())
190}
191
192/// Returns the range of epochs to validate. This includes special handling for negative `from`
193/// values, which are interpreted as offsets from the current epoch.
194fn validation_range(
195    current: ChainEpoch,
196    from: ChainEpoch,
197) -> anyhow::Result<std::ops::RangeInclusive<ChainEpoch>> {
198    anyhow::ensure!(
199        current.is_positive(),
200        "current head epoch {current} is invalid"
201    );
202
203    // Negative values scroll back from the current head (e.g. --height=-1000).
204    // `saturating_add` + `.max(0)` keeps extreme negatives from underflowing or
205    // wrapping to a huge positive (which would silently produce an empty range).
206    let start = if from.is_negative() {
207        current.saturating_add(from).max(0)
208    } else {
209        from
210    };
211
212    // An absolute `--height` past the head would otherwise produce an empty
213    // range and silently succeed without validating anything.
214    anyhow::ensure!(
215        start <= current,
216        "requested validation start epoch {start} is beyond the current head at epoch {current}",
217    );
218
219    Ok(start..=current)
220}
221
222async fn maybe_start_metrics_service(
223    services: &mut JoinSet<anyhow::Result<()>>,
224    config: &Config,
225    ctx: &AppContext,
226) -> anyhow::Result<()> {
227    if config.client.enable_metrics_endpoint {
228        // Start Prometheus server port
229        let prometheus_listener = TcpListener::bind(config.client.metrics_address)
230            .await
231            .with_context(|| format!("could not bind to {}", config.client.metrics_address))?;
232        info!(
233            "Prometheus server started at {}",
234            config.client.metrics_address
235        );
236        let db_directory = crate::db::db_engine::db_root(&chain_path(config))?;
237        let db = ctx.db.writer().clone();
238
239        let get_chain_head_height = Arc::new({
240            // Use `Weak` to not dead lock GC.
241            let chain_store = Arc::downgrade(ctx.state_manager.chain_store());
242            move || {
243                chain_store
244                    .upgrade()
245                    .map(|cs| cs.heaviest_tipset().epoch())
246                    .unwrap_or_default()
247            }
248        });
249        let get_chain_head_actor_version = Arc::new({
250            // Use `Weak` to not dead lock GC.
251            let chain_store = Arc::downgrade(ctx.state_manager.chain_store());
252            move || {
253                if let Some(cs) = chain_store.upgrade()
254                    && let Ok(state) = StateTree::new_from_root(
255                        cs.blockstore().clone(),
256                        cs.heaviest_tipset().parent_state(),
257                    )
258                    && let Ok(bundle_meta) = state.get_actor_bundle_metadata()
259                    && let Ok(actor_version) = bundle_meta.actor_major_version()
260                {
261                    return actor_version;
262                }
263                0
264            }
265        });
266        services.spawn({
267            let chain_config = ctx.chain_config().clone();
268            let get_chain_head_height = get_chain_head_height.clone();
269            async {
270                crate::metrics::init_prometheus(
271                    prometheus_listener,
272                    db_directory,
273                    db,
274                    chain_config,
275                    get_chain_head_height,
276                    get_chain_head_actor_version,
277                )
278                .await
279                .context("Failed to initiate prometheus server")
280            }
281        });
282
283        crate::metrics::register_collector(Box::new(
284            networks::metrics::NetworkHeightCollector::new(
285                ctx.state_manager.chain_config().block_delay_secs,
286                ctx.state_manager
287                    .chain_store()
288                    .genesis_block_header()
289                    .timestamp,
290                get_chain_head_height,
291            ),
292        ));
293    }
294    Ok(())
295}
296
297async fn create_p2p_service(
298    services: &mut JoinSet<anyhow::Result<()>>,
299    config: &mut Config,
300    ctx: &AppContext,
301) -> anyhow::Result<Libp2pService<DbType>> {
302    // if bootstrap peers are not set, set them
303    if config.network.bootstrap_peers.is_empty() {
304        config.network.bootstrap_peers = ctx.state_manager.chain_config().bootstrap_peers.clone();
305    }
306
307    let peer_manager = Arc::new(PeerManager::default());
308    services.spawn(peer_manager.clone().peer_operation_event_loop_task());
309    // Libp2p service setup
310    let p2p_service = Libp2pService::new(
311        config.network.clone(),
312        Arc::clone(ctx.state_manager.chain_store()),
313        peer_manager.clone(),
314        ctx.net_keypair.clone(),
315        config.chain.genesis_name(),
316        *ctx.state_manager.chain_store().genesis_block_header().cid(),
317    )
318    .await?;
319    Ok(p2p_service)
320}
321
322fn create_mpool(
323    services: &mut JoinSet<anyhow::Result<()>>,
324    p2p_service: &Libp2pService<DbType>,
325    ctx: &AppContext,
326) -> anyhow::Result<Arc<MessagePool<Arc<ChainStore<DbType>>>>> {
327    Ok(MessagePool::new(
328        ctx.state_manager.chain_store().clone(),
329        p2p_service.network_sender().clone(),
330        MpoolConfig::load_config(ctx.db.writer().as_ref())?,
331        ctx.state_manager.chain_config().clone(),
332        services,
333    )
334    .map(Arc::new)?)
335}
336
337fn create_chain_follower(
338    opts: &CliOpts,
339    p2p_service: &Libp2pService<DbType>,
340    mpool: Arc<MessagePool<Arc<ChainStore<DbType>>>>,
341    ctx: &AppContext,
342) -> anyhow::Result<Arc<ChainFollower<DbType>>> {
343    let network_send = p2p_service.network_sender().clone();
344    let peer_manager = p2p_service.peer_manager().clone();
345    let network = SyncNetworkContext::new(network_send, peer_manager, ctx.db.clone());
346    Ok(Arc::new(ChainFollower::new(
347        ctx.state_manager.clone(),
348        network,
349        Tipset::from(ctx.state_manager.chain_store().genesis_block_header()),
350        p2p_service.network_receiver(),
351        opts.stateless,
352        mpool,
353    )))
354}
355
356fn start_chain_follower_service(
357    services: &mut JoinSet<anyhow::Result<()>>,
358    chain_follower: Arc<ChainFollower<DbType>>,
359) {
360    services.spawn(async move { chain_follower.run().await });
361}
362
363async fn maybe_start_health_check_service(
364    services: &mut JoinSet<anyhow::Result<()>>,
365    config: &Config,
366    p2p_service: &Libp2pService<DbType>,
367    chain_follower: &ChainFollower<DbType>,
368    ctx: &AppContext,
369) -> anyhow::Result<()> {
370    if config.client.enable_health_check {
371        let forest_state = crate::health::ForestState {
372            config: config.clone(),
373            chain_config: ctx.state_manager.chain_config().clone(),
374            genesis_timestamp: ctx
375                .state_manager
376                .chain_store()
377                .genesis_block_header()
378                .timestamp,
379            sync_status: chain_follower.sync_status.clone(),
380            peer_manager: p2p_service.peer_manager().clone(),
381        };
382        let healthcheck_address = forest_state.config.client.healthcheck_address;
383        info!("Healthcheck endpoint will listen at {healthcheck_address}");
384        let listener = tokio::net::TcpListener::bind(healthcheck_address).await?;
385        services.spawn(async move {
386            crate::health::init_healthcheck_server(forest_state, listener)
387                .await
388                .context("Failed to initiate healthcheck server")
389        });
390    } else {
391        info!("Healthcheck service is disabled");
392    }
393    Ok(())
394}
395
396fn maybe_start_gc_service(
397    services: &mut JoinSet<anyhow::Result<()>>,
398    opts: &CliOpts,
399    config: &Config,
400    chain_follower: Arc<ChainFollower<DbType>>,
401) -> anyhow::Result<()> {
402    // If the node is stateless, GC shouldn't get triggered even on demand.
403    if opts.stateless {
404        return Ok(());
405    }
406
407    let snap_gc = Arc::new(SnapshotGarbageCollector::new(chain_follower, config)?);
408
409    GLOBAL_SNAPSHOT_GC
410        .set(snap_gc.clone())
411        .ok()
412        .context("failed to set GLOBAL_SNAPSHOT_GC")?;
413
414    services.spawn({
415        let snap_gc = snap_gc.clone();
416        async move {
417            snap_gc.event_loop().await;
418            Ok(())
419        }
420    });
421
422    // GC shouldn't run periodically if the node is stateless or if the user has disabled it.
423    if !opts.no_gc {
424        services.spawn({
425            let snap_gc = snap_gc.clone();
426            async move {
427                snap_gc.scheduler_loop().await;
428                Ok(())
429            }
430        });
431    }
432
433    Ok(())
434}
435
436#[allow(clippy::too_many_arguments)]
437fn maybe_start_rpc_service(
438    services: &mut JoinSet<anyhow::Result<()>>,
439    config: &Config,
440    mpool: Arc<MessagePool<Arc<ChainStore<DbType>>>>,
441    chain_follower: &ChainFollower<DbType>,
442    start_time: chrono::DateTime<chrono::Utc>,
443    shutdown: mpsc::Sender<()>,
444    rpc_stop_handle: jsonrpsee::server::StopHandle,
445    ctx: &AppContext,
446) -> anyhow::Result<()> {
447    if config.client.enable_rpc {
448        let rpc_address = config.client.rpc_address;
449        let filter_list = config
450            .client
451            .rpc_filter_list
452            .as_ref()
453            .map(|path| crate::rpc::FilterList::new_from_file(path))
454            .transpose()?;
455        info!("JSON-RPC endpoint will listen at {rpc_address}");
456        let eth_event_handler = Arc::new(EthEventHandler::from_config(&config.events));
457        if is_env_truthy("FOREST_JWT_DISABLE_EXP_VALIDATION") {
458            warn!(
459                "JWT expiration validation is disabled; this significantly weakens security and should only be used in tightly controlled environments"
460            );
461        }
462        services.spawn({
463            let state_manager = ctx.state_manager.shallow_clone();
464            let bad_blocks = chain_follower.bad_blocks.shallow_clone();
465            let sync_status = chain_follower.sync_status.shallow_clone();
466            let sync_network_context = chain_follower.network.shallow_clone();
467            let tipset_send = chain_follower.tipset_sender.clone();
468            let keystore = ctx.keystore.shallow_clone();
469            let snapshot_progress_tracker = ctx.snapshot_progress_tracker.clone();
470            let nonce_tracker = NonceTracker::new();
471            let mpool_locker = MpoolLocker::new();
472            let temp_dir = Arc::new(ctx.temp_dir.clone());
473            async move {
474                let rpc_listener = tokio::net::TcpListener::bind(rpc_address)
475                    .await
476                    .map_err(|e| {
477                        anyhow::anyhow!("Unable to listen on RPC endpoint {rpc_address}: {e}")
478                    })
479                    .unwrap();
480                start_rpc(
481                    RPCState {
482                        state_manager,
483                        keystore,
484                        mpool,
485                        bad_blocks,
486                        sync_status,
487                        eth_event_handler,
488                        sync_network_context,
489                        start_time,
490                        shutdown,
491                        tipset_send,
492                        snapshot_progress_tracker,
493                        mpool_locker,
494                        nonce_tracker,
495                        temp_dir,
496                    },
497                    rpc_listener,
498                    rpc_stop_handle,
499                    filter_list,
500                )
501                .await
502            }
503        });
504    } else {
505        debug!("RPC disabled.");
506    };
507    Ok(())
508}
509
510fn maybe_start_f3_service(opts: &CliOpts, config: &Config, ctx: &AppContext) -> anyhow::Result<()> {
511    // already running
512    if crate::rpc::f3::F3_LEASE_MANAGER.get().is_some() {
513        return Ok(());
514    }
515
516    if !config.client.enable_rpc {
517        if crate::f3::is_sidecar_ffi_enabled(ctx.state_manager.chain_config()) {
518            tracing::warn!("F3 sidecar is enabled but not run because RPC is disabled. ")
519        }
520        return Ok(());
521    }
522
523    if !opts.halt_after_import && !opts.stateless {
524        let rpc_endpoint = config.client.rpc_v1_endpoint()?;
525        let state_manager = &ctx.state_manager;
526        let p2p_peer_id = ctx.p2p_peer_id;
527        let admin_jwt = ctx.admin_jwt.clone();
528        tokio::task::spawn_blocking({
529            crate::rpc::f3::F3_LEASE_MANAGER
530                .set(crate::rpc::f3::F3LeaseManager::new(
531                    state_manager.chain_config().network.clone(),
532                    p2p_peer_id,
533                ))
534                .expect("F3 lease manager should not have been initialized before");
535            let chain_config = state_manager.chain_config().clone();
536            let f3_root = crate::f3::get_f3_root(config);
537            let crate::f3::F3Options {
538                chain_finality,
539                bootstrap_epoch,
540                initial_power_table,
541            } = crate::f3::get_f3_sidecar_params(&chain_config);
542            move || {
543                crate::f3::run_f3_sidecar_if_enabled(
544                    &chain_config,
545                    rpc_endpoint.to_string(),
546                    admin_jwt,
547                    crate::rpc::f3::get_f3_rpc_endpoint().to_string(),
548                    initial_power_table
549                        .map(|i| i.to_string())
550                        .unwrap_or_default(),
551                    bootstrap_epoch,
552                    chain_finality,
553                    f3_root.display().to_string(),
554                );
555            }
556        });
557        tokio::task::spawn({
558            let chain_store = ctx.chain_store().clone();
559            async move {
560                // wait 1s to let F3 RPC server start
561                tokio::time::sleep(Duration::from_secs(1)).await;
562                match (|| crate::rpc::f3::F3GetLatestCertificate::get())
563                    .retry(ExponentialBuilder::default())
564                    .await
565                {
566                    Ok(f3_finalized_cert) => {
567                        let f3_finalized_head = f3_finalized_cert.chain_head();
568                        match chain_store
569                            .chain_index()
570                            .load_required_tipset(&f3_finalized_head.key)
571                        {
572                            Ok(ts) => {
573                                chain_store.set_f3_finalized_tipset(ts);
574                                tracing::info!(
575                                    "Set F3 finalized tipset to epoch {} and key {}",
576                                    f3_finalized_head.epoch,
577                                    f3_finalized_head.key,
578                                );
579                            }
580                            Err(e) => {
581                                tracing::error!(
582                                    "Failed to get F3 finalized tipset epoch {} and key {}: {e}",
583                                    f3_finalized_head.epoch,
584                                    f3_finalized_head.key
585                                );
586                            }
587                        }
588                    }
589                    Err(e) => {
590                        tracing::error!("Failed to get F3 latest certificate: {e:#}");
591                    }
592                }
593            }
594        });
595    }
596
597    Ok(())
598}
599
600fn maybe_start_indexer_service(
601    services: &mut JoinSet<anyhow::Result<()>>,
602    opts: &CliOpts,
603    config: &Config,
604    ctx: &AppContext,
605) {
606    if config.chain_indexer.enable_indexer
607        && !opts.stateless
608        && !ctx.state_manager.chain_config().is_devnet()
609    {
610        let mut head_changes_rx = ctx.state_manager.chain_store().subscribe_head_changes();
611        let chain_store = ctx.state_manager.chain_store().clone();
612        services.spawn(async move {
613            tracing::info!("Starting indexer service");
614
615            // Continuously listen for head changes
616            loop {
617                for ts in head_changes_rx.recv().await?.applies {
618                    tracing::debug!("Indexing tipset {}", ts.key());
619                    let delegated_messages =
620                        chain_store.headers_delegated_messages(ts.block_headers().iter())?;
621                    chain_store.process_signed_messages(&delegated_messages)?;
622                }
623            }
624        });
625
626        // Run the collector only if chain indexer is enabled
627        if let Some(retention_epochs) = config.chain_indexer.gc_retention_epochs {
628            let chain_store = ctx.state_manager.chain_store().clone();
629            let chain_config = ctx.state_manager.chain_config().clone();
630            services.spawn(async move {
631                tracing::info!("Starting collector for eth_mappings");
632                let mut collector = EthMappingCollector::new(
633                    chain_store.blockstore().clone(),
634                    chain_config.eth_chain_id,
635                    retention_epochs.into(),
636                );
637                collector.run().await
638            });
639        }
640    }
641}
642
643/// Starts daemon process
644pub(super) async fn start(
645    start_time: chrono::DateTime<chrono::Utc>,
646    opts: CliOpts,
647    config: Config,
648    shutdown_send: mpsc::Sender<()>,
649    rpc_stop_handle: jsonrpsee::server::StopHandle,
650) -> anyhow::Result<()> {
651    startup_init(&config)?;
652    start_services(
653        start_time,
654        &opts,
655        config.clone(),
656        shutdown_send.clone(),
657        rpc_stop_handle,
658    )
659    .await
660}
661
662pub(super) async fn start_services(
663    start_time: chrono::DateTime<chrono::Utc>,
664    opts: &CliOpts,
665    mut config: Config,
666    shutdown_send: mpsc::Sender<()>,
667    rpc_stop_handle: jsonrpsee::server::StopHandle,
668) -> anyhow::Result<()> {
669    // Cleanup the collector prometheus metrics registry on start
670    crate::metrics::reset_collector_registry();
671    let mut services = JoinSet::new();
672    let network = config.chain();
673    let ctx = AppContext::init(opts, &config).await?;
674    info!("Using network :: {network}");
675    utils::misc::display_chain_logo(config.chain());
676    if opts.exit_after_init {
677        return Ok(());
678    }
679    if !opts.stateless
680        && !opts.skip_load_actors
681        && let Err(e) = ctx.state_manager.maybe_rewind_heaviest_tipset()
682    {
683        tracing::warn!("error in maybe_rewind_heaviest_tipset: {e:#}");
684    }
685
686    let p2p_service = create_p2p_service(&mut services, &mut config, &ctx).await?;
687    let mpool = create_mpool(&mut services, &p2p_service, &ctx)?;
688    let chain_follower = create_chain_follower(opts, &p2p_service, mpool.clone(), &ctx)?;
689
690    maybe_start_rpc_service(
691        &mut services,
692        &config,
693        mpool.clone(),
694        &chain_follower,
695        start_time,
696        shutdown_send.clone(),
697        rpc_stop_handle,
698        &ctx,
699    )?;
700
701    maybe_import_snapshot(opts, &mut config, &ctx).await?;
702    if opts.halt_after_import {
703        // Cancel all async services
704        services.shutdown().await;
705        return Ok(());
706    }
707
708    warmup_in_background(&ctx);
709    maybe_start_gc_service(&mut services, opts, &config, chain_follower.clone())?;
710    maybe_start_metrics_service(&mut services, &config, &ctx).await?;
711    maybe_start_f3_service(opts, &config, &ctx)?;
712    maybe_start_health_check_service(&mut services, &config, &p2p_service, &chain_follower, &ctx)
713        .await?;
714    maybe_start_indexer_service(&mut services, opts, &config, &ctx);
715    if !opts.stateless {
716        ensure_proof_params_downloaded().await?;
717    }
718    services.spawn(p2p_service.run());
719    start_chain_follower_service(&mut services, chain_follower);
720    // blocking until any of the services returns an error,
721    propagate_error(&mut services)
722        .await
723        .context("services failure")
724        .map(|_| {})
725}
726
727fn warmup_in_background(ctx: &AppContext) {
728    // Populate `tipset_by_height` cache
729    let cs = ctx.chain_store().clone();
730    tokio::task::spawn_blocking(move || {
731        let start = Instant::now();
732        match cs.chain_index().tipset_by_height(
733            // 0 would short-circuit the cache
734            1,
735            cs.heaviest_tipset(),
736            ResolveNullTipset::TakeOlder,
737        ) {
738            Ok(_) => {
739                tracing::info!(
740                    "Successfully populated tipset_by_height cache, took {}",
741                    humantime::format_duration(start.elapsed())
742                );
743            }
744            Err(e) => {
745                tracing::warn!("Failed to populate tipset_by_height cache: {e}");
746            }
747        }
748    });
749}
750
751/// If our current chain is below a supported height, we need a snapshot to bring it up
752/// to a supported height. If we've not been given a snapshot by the user, get one.
753///
754/// An [`Err`] should be considered fatal.
755async fn maybe_set_snapshot_path(
756    config: &mut Config,
757    chain_config: &ChainConfig,
758    epoch: ChainEpoch,
759    auto_download_snapshot: bool,
760    download_directory: &Path,
761) -> anyhow::Result<()> {
762    if !download_directory.is_dir() {
763        anyhow::bail!(
764            "`download_directory` does not exist: {}",
765            download_directory.display()
766        );
767    }
768
769    let vendor = snapshot::TrustedVendor::default();
770    let chain = config.chain();
771
772    // What height is our chain at right now, and what network version does that correspond to?
773    let network_version = chain_config.network_version(epoch);
774    let network_version_is_small = network_version < NetworkVersion::V16;
775
776    // We don't support small network versions (we can't validate from e.g genesis).
777    // So we need a snapshot (which will be from a recent network version)
778    let require_a_snapshot = network_version_is_small;
779    let have_a_snapshot = config.client.snapshot_path.is_some();
780
781    match (require_a_snapshot, have_a_snapshot, auto_download_snapshot) {
782        (false, _, _) => {}   // noop - don't need a snapshot
783        (true, true, _) => {} // noop - we need a snapshot, and we have one
784        (true, false, true) => {
785            const AUTO_SNAPSHOT_PATH_ENV_KEY: &str = "FOREST_AUTO_DOWNLOAD_SNAPSHOT_PATH";
786            match std::env::var(AUTO_SNAPSHOT_PATH_ENV_KEY) {
787                Ok(path) if !path.is_empty() => {
788                    tracing::info!(
789                        "importing snapshot from {path} set by `{AUTO_SNAPSHOT_PATH_ENV_KEY}`"
790                    );
791                    config.client.snapshot_path = Some(path.into());
792                }
793                _ => {
794                    // Resolve the redirect URL to get the actual snapshot URL
795                    // This ensures all chunks download from the same snapshot even if
796                    // a new snapshot is published during the download
797                    let (resolved_url, _num_bytes, filename) =
798                        crate::cli_shared::snapshot::peek(vendor, chain).await?;
799                    tracing::info!("Downloading snapshot: {filename}");
800                    config.client.snapshot_path = Some(resolved_url.to_string().into());
801                }
802            }
803        }
804        (true, false, false) => {
805            // we need a snapshot, don't have one, and don't have permission to download one, so ask the user
806            let (url, num_bytes, filename) = crate::cli_shared::snapshot::peek(vendor, chain)
807                .await
808                .context("couldn't get snapshot size")?;
809            // dialoguer will double-print long lines, so manually print the first clause ourselves,
810            // then let `Confirm` handle the second.
811            println!(
812                "Forest requires a snapshot to sync with the network, but automatic fetching is disabled."
813            );
814            let message = format!(
815                "Fetch a {} snapshot? (denying will exit the program). ",
816                indicatif::HumanBytes(num_bytes)
817            );
818            let have_permission = asyncify(|| {
819                dialoguer::Confirm::with_theme(&ColorfulTheme::default())
820                    .with_prompt(message)
821                    .default(false)
822                    .interact()
823                    // e.g not a tty (or some other error), so haven't got permission.
824                    .unwrap_or(false)
825            })
826            .await;
827            if !have_permission {
828                bail!(
829                    "Forest requires a snapshot to sync with the network, but automatic fetching is disabled."
830                )
831            }
832            tracing::info!("Downloading snapshot: {filename}");
833            config.client.snapshot_path = Some(url.to_string().into());
834        }
835    };
836
837    Ok(())
838}
839
840/// returns the first error with which any of the services end, or never returns at all
841// This should return anyhow::Result<!> once the `Never` type is stabilized
842async fn propagate_error(
843    services: &mut JoinSet<anyhow::Result<()>>,
844) -> anyhow::Result<std::convert::Infallible> {
845    while let Some(result) = services.join_next().await {
846        if let Ok(Err(error_message)) = result {
847            return Err(error_message);
848        }
849    }
850    std::future::pending().await
851}
852
853/// Run the closure on a thread where blocking is allowed
854///
855/// # Panics
856/// If the closure panics
857fn asyncify<T>(f: impl FnOnce() -> T + Send + 'static) -> impl Future<Output = T>
858where
859    T: Send + 'static,
860{
861    tokio::task::spawn_blocking(f).then(|res| async { res.expect("spawned task panicked") })
862}
863
864#[cfg(test)]
865mod tests {
866    use rstest::rstest;
867
868    use super::*;
869
870    #[rstest]
871    #[case::current_non_positive(0, 1, anyhow::Result::Err(anyhow::anyhow!(
872        "current head epoch 0 is invalid"
873    )))]
874    #[case::current_non_positive(-1, 1, anyhow::Result::Err(anyhow::anyhow!(
875        "current head epoch 0 is invalid"
876    )))]
877    #[case::from_positive_beyond_head(10, 11, anyhow::Result::Err(anyhow::anyhow!(
878        "requested validation start epoch 11 is beyond the current head at epoch 10"
879    )))]
880    #[case::from_positive_within_range(10, 5, anyhow::Result::Ok(5..=10))]
881    #[case::from_zero(10, 0, anyhow::Result::Ok(0..=10))]
882    #[case::from_negative_within_range(10, -5, anyhow::Result::Ok(5..=10))]
883    #[case::from_negative_beyond_range(10, -15, anyhow::Result::Ok(0..=10))]
884    fn test_validation_range(
885        #[case] current: ChainEpoch,
886        #[case] from: ChainEpoch,
887        #[case] expected: anyhow::Result<std::ops::RangeInclusive<ChainEpoch>>,
888    ) {
889        let result = validation_range(current, from);
890        match expected {
891            Ok(expected_range) => {
892                assert_eq!(result.unwrap(), expected_range);
893            }
894            Err(_) => {
895                assert!(result.is_err());
896            }
897        }
898    }
899}