unc_chain_configs/
client_config.rs

1//! Chain Client Configuration
2use crate::ExternalStorageLocation::GCS;
3use crate::MutableConfigValue;
4use std::cmp::{max, min};
5use std::path::PathBuf;
6use std::sync::atomic::AtomicBool;
7use std::sync::Arc;
8use std::time::Duration;
9use unc_primitives::types::{BlockHeight, BlockHeightDelta, Gas, NumBlocks, NumSeats, ShardId};
10use unc_primitives::version::Version;
11
12pub const TEST_STATE_SYNC_TIMEOUT: u64 = 5;
13
14#[derive(Debug, Copy, Clone, serde::Serialize, serde::Deserialize)]
15pub enum LogSummaryStyle {
16    #[serde(rename = "plain")]
17    Plain,
18    #[serde(rename = "colored")]
19    Colored,
20}
21
22/// Minimum number of epochs for which we keep store data
23pub const MIN_GC_NUM_EPOCHS_TO_KEEP: u64 = 3;
24
25/// Default number of epochs for which we keep store data
26pub const DEFAULT_GC_NUM_EPOCHS_TO_KEEP: u64 = 5;
27
28/// Default number of concurrent requests to external storage to fetch state parts.
29pub const DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL: u32 = 25;
30pub const DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_ON_CATCHUP_EXTERNAL: u32 = 5;
31
32/// Configuration for garbage collection.
33#[derive(Clone, Debug, serde::Serialize, serde::Deserialize, PartialEq)]
34#[serde(default)]
35pub struct GCConfig {
36    /// Maximum number of blocks to garbage collect at every garbage collection
37    /// call.
38    pub gc_blocks_limit: NumBlocks,
39
40    /// Maximum number of height to go through at each garbage collection step
41    /// when cleaning forks during garbage collection.
42    pub gc_fork_clean_step: u64,
43
44    /// Number of epochs for which we keep store data.
45    pub gc_num_epochs_to_keep: u64,
46}
47
48impl Default for GCConfig {
49    fn default() -> Self {
50        Self {
51            gc_blocks_limit: 2,
52            gc_fork_clean_step: 100,
53            gc_num_epochs_to_keep: DEFAULT_GC_NUM_EPOCHS_TO_KEEP,
54        }
55    }
56}
57
58impl GCConfig {
59    pub fn gc_num_epochs_to_keep(&self) -> u64 {
60        max(MIN_GC_NUM_EPOCHS_TO_KEEP, self.gc_num_epochs_to_keep)
61    }
62}
63
64fn default_num_concurrent_requests() -> u32 {
65    DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL
66}
67
68fn default_num_concurrent_requests_during_catchup() -> u32 {
69    DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_ON_CATCHUP_EXTERNAL
70}
71
72#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
73pub struct ExternalStorageConfig {
74    /// Location of state parts.
75    pub location: ExternalStorageLocation,
76    /// When fetching state parts from external storage, throttle fetch requests
77    /// to this many concurrent requests.
78    #[serde(default = "default_num_concurrent_requests")]
79    pub num_concurrent_requests: u32,
80    /// During catchup, the node will use a different number of concurrent requests
81    /// to reduce the performance impact of state sync.
82    #[serde(default = "default_num_concurrent_requests_during_catchup")]
83    pub num_concurrent_requests_during_catchup: u32,
84}
85
86#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
87pub enum ExternalStorageLocation {
88    S3 {
89        /// Location of state dumps on S3.
90        bucket: String,
91        /// Data may only be available in certain locations.
92        region: String,
93    },
94    Filesystem {
95        root_dir: PathBuf,
96    },
97    GCS {
98        bucket: String,
99    },
100}
101
102/// Configures how to dump state to external storage.
103#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
104pub struct DumpConfig {
105    /// Specifies where to write the obtained state parts.
106    pub location: ExternalStorageLocation,
107    /// Use in case a node that dumps state to the external storage
108    /// gets in trouble.
109    #[serde(skip_serializing_if = "Option::is_none")]
110    pub restart_dump_for_shards: Option<Vec<ShardId>>,
111    /// How often to check if a new epoch has started.
112    /// Feel free to set to `None`, defaults are sensible.
113    #[serde(skip_serializing_if = "Option::is_none")]
114    pub iteration_delay: Option<Duration>,
115    /// Location of a json file with credentials allowing write access to the bucket.
116    #[serde(skip_serializing_if = "Option::is_none")]
117    pub credentials_file: Option<PathBuf>,
118}
119
120/// Configures how to fetch state parts during state sync.
121#[derive(serde::Serialize, serde::Deserialize, Clone, Debug)]
122pub enum SyncConfig {
123    /// Syncs state from the peers without reading anything from external storage.
124    Peers,
125    /// Expects parts to be available in external storage.
126    ExternalStorage(ExternalStorageConfig),
127}
128
129impl Default for SyncConfig {
130    fn default() -> Self {
131        Self::Peers
132    }
133}
134
135#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Default)]
136/// Options for dumping state to S3.
137pub struct StateSyncConfig {
138    #[serde(skip_serializing_if = "Option::is_none")]
139    /// `none` value disables state dump to external storage.
140    pub dump: Option<DumpConfig>,
141    #[serde(skip_serializing_if = "SyncConfig::is_default", default = "SyncConfig::default")]
142    pub sync: SyncConfig,
143}
144
145impl SyncConfig {
146    /// Checks whether the object equals its default value.
147    fn is_default(&self) -> bool {
148        matches!(self, Self::Peers)
149    }
150}
151
152// A handle that allows the main process to interrupt resharding if needed.
153// This typically happens when the main process is interrupted.
154#[derive(Clone)]
155pub struct ReshardingHandle {
156    keep_going: Arc<AtomicBool>,
157}
158
159impl ReshardingHandle {
160    pub fn new() -> Self {
161        Self { keep_going: Arc::new(AtomicBool::new(true)) }
162    }
163
164    pub fn get(&self) -> bool {
165        self.keep_going.load(std::sync::atomic::Ordering::Relaxed)
166    }
167
168    pub fn stop(&self) -> () {
169        self.keep_going.store(false, std::sync::atomic::Ordering::Relaxed);
170    }
171}
172
173/// Configuration for resharding.
174#[derive(serde::Serialize, serde::Deserialize, Clone, Copy, Debug, PartialEq)]
175#[serde(default)]
176pub struct ReshardingConfig {
177    /// The soft limit on the size of a single batch. The batch size can be
178    /// decreased if resharding is consuming too many resources and interfering
179    /// with regular node operation.
180    pub batch_size: bytesize::ByteSize,
181
182    /// The delay between writing batches to the db. The batch delay can be
183    /// increased if resharding is consuming too many resources and interfering
184    /// with regular node operation.
185    pub batch_delay: Duration,
186
187    /// The delay between attempts to start resharding while waiting for the
188    /// state snapshot to become available.
189    pub retry_delay: Duration,
190
191    /// The delay between the resharding request is received and when the actor
192    /// actually starts working on it. This delay should only be used in tests.
193    pub initial_delay: Duration,
194
195    /// The maximum time that the actor will wait for the snapshot to be ready,
196    /// before starting resharding. Do not wait indefinitely since we want to
197    /// report error early enough for the node maintainer to have time to recover.
198    pub max_poll_time: Duration,
199}
200
201impl Default for ReshardingConfig {
202    fn default() -> Self {
203        // Conservative default for a slower resharding that puts as little
204        // extra load on the node as possible.
205        Self {
206            batch_size: bytesize::ByteSize::kb(500),
207            batch_delay: Duration::from_millis(100),
208            retry_delay: Duration::from_secs(10),
209            initial_delay: Duration::from_secs(0),
210            // The snapshot typically is available within a minute from the
211            // epoch start. Set the default higher in case we need to wait for
212            // state sync.
213            max_poll_time: Duration::from_secs(2 * 60 * 60), // 2 hours
214        }
215    }
216}
217
218pub fn default_header_sync_initial_timeout() -> Duration {
219    Duration::from_secs(10)
220}
221
222pub fn default_header_sync_progress_timeout() -> Duration {
223    Duration::from_secs(2)
224}
225
226pub fn default_header_sync_stall_ban_timeout() -> Duration {
227    Duration::from_secs(120)
228}
229
230pub fn default_state_sync_timeout() -> Duration {
231    Duration::from_secs(60)
232}
233
234pub fn default_header_sync_expected_height_per_second() -> u64 {
235    10
236}
237
238pub fn default_sync_check_period() -> Duration {
239    Duration::from_secs(10 * 30)
240}
241
242pub fn default_sync_step_period() -> Duration {
243    Duration::from_millis(10 * 30)
244}
245
246pub fn default_sync_height_threshold() -> u64 {
247    1
248}
249
250pub fn default_state_sync() -> Option<StateSyncConfig> {
251    Some(StateSyncConfig {
252        dump: None,
253        sync: SyncConfig::ExternalStorage(ExternalStorageConfig {
254            location: GCS { bucket: "state-parts".to_string() },
255            num_concurrent_requests: DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_EXTERNAL,
256            num_concurrent_requests_during_catchup:
257                DEFAULT_STATE_SYNC_NUM_CONCURRENT_REQUESTS_ON_CATCHUP_EXTERNAL,
258        }),
259    })
260}
261
262pub fn default_state_sync_enabled() -> bool {
263    true
264}
265
266pub fn default_view_client_threads() -> usize {
267    4
268}
269
270pub fn default_log_summary_period() -> Duration {
271    Duration::from_secs(10)
272}
273
274pub fn default_view_client_throttle_period() -> Duration {
275    Duration::from_secs(30)
276}
277
278pub fn default_trie_viewer_state_size_limit() -> Option<u64> {
279    Some(50_000)
280}
281
282pub fn default_transaction_pool_size_limit() -> Option<u64> {
283    Some(100_000_000) // 100 MB.
284}
285
286pub fn default_tx_routing_height_horizon() -> BlockHeightDelta {
287    4
288}
289
290pub fn default_enable_multiline_logging() -> Option<bool> {
291    Some(true)
292}
293
294pub fn default_produce_chunk_add_transactions_time_limit() -> Option<Duration> {
295    Some(Duration::from_millis(200))
296}
297
298/// ClientConfig where some fields can be updated at runtime.
299#[derive(Clone, serde::Serialize)]
300pub struct ClientConfig {
301    /// Version of the binary.
302    pub version: Version,
303    /// Chain id for status.
304    pub chain_id: String,
305    /// Listening rpc port for status.
306    pub rpc_addr: Option<String>,
307    /// Graceful shutdown at expected block height.
308    pub expected_shutdown: MutableConfigValue<Option<BlockHeight>>,
309    /// Duration to check for producing / skipping block.
310    pub block_production_tracking_delay: Duration,
311    /// Minimum duration before producing block.
312    pub min_block_production_delay: Duration,
313    /// Maximum wait for approvals before producing block.
314    pub max_block_production_delay: Duration,
315    /// Maximum duration before skipping given height.
316    pub max_block_wait_delay: Duration,
317    /// Skip waiting for sync (for testing or single node testnet).
318    pub skip_sync_wait: bool,
319    /// How often to check that we are not out of sync.
320    pub sync_check_period: Duration,
321    /// While syncing, how long to check for each step.
322    pub sync_step_period: Duration,
323    /// Sync height threshold: below this difference in height don't start syncing.
324    pub sync_height_threshold: BlockHeightDelta,
325    /// How much time to wait after initial header sync
326    pub header_sync_initial_timeout: Duration,
327    /// How much time to wait after some progress is made in header sync
328    pub header_sync_progress_timeout: Duration,
329    /// How much time to wait before banning a peer in header sync if sync is too slow
330    pub header_sync_stall_ban_timeout: Duration,
331    /// Expected increase of header head height per second during header sync
332    pub header_sync_expected_height_per_second: u64,
333    /// How long to wait for a response during state sync
334    pub state_sync_timeout: Duration,
335    /// Minimum number of peers to start syncing.
336    pub min_num_peers: usize,
337    /// Period between logging summary information.
338    pub log_summary_period: Duration,
339    /// Enable coloring of the logs
340    pub log_summary_style: LogSummaryStyle,
341    /// Produce empty blocks, use `false` for testing.
342    pub produce_empty_blocks: bool,
343    /// Epoch length.
344    pub epoch_length: BlockHeightDelta,
345    /// Number of block producer seats
346    pub num_block_producer_seats: NumSeats,
347    /// Time to persist Accounts Id in the router without removing them.
348    pub ttl_account_id_router: Duration,
349    /// Horizon at which instead of fetching block, fetch full state.
350    pub block_fetch_horizon: BlockHeightDelta,
351    /// Time between check to perform catchup.
352    pub catchup_step_period: Duration,
353    /// Time between checking to re-request chunks.
354    pub chunk_request_retry_period: Duration,
355    /// Time between running doomslug timer.
356    pub doosmslug_step_period: Duration,
357    /// Behind this horizon header fetch kicks in.
358    pub block_header_fetch_horizon: BlockHeightDelta,
359    /// Garbage collection configuration.
360    pub gc: GCConfig,
361    /// Not clear old data, set `true` for archive nodes.
362    pub archive: bool,
363    /// save_trie_changes should be set to true iff
364    /// - archive if false - non-archivale nodes need trie changes to perform garbage collection
365    /// - archive is true, cold_store is configured and migration to split_storage is finished - node
366    /// working in split storage mode needs trie changes in order to do garbage collection on hot.
367    pub save_trie_changes: bool,
368    /// Number of threads for ViewClientActor pool.
369    pub view_client_threads: usize,
370    /// Number of seconds between state requests for view client.
371    pub view_client_throttle_period: Duration,
372    /// Upper bound of the byte size of contract state that is still viewable. None is no limit
373    pub trie_viewer_state_size_limit: Option<u64>,
374    /// Max burnt gas per view method.  If present, overrides value stored in
375    /// genesis file.  The value only affects the RPCs without influencing the
376    /// protocol thus changing it per-node doesn’t affect the blockchain.
377    pub max_gas_burnt_view: Option<Gas>,
378    /// Re-export storage layer statistics as prometheus metrics.
379    pub enable_statistics_export: bool,
380    /// Number of threads to execute background migration work in client.
381    pub client_background_migration_threads: usize,
382    /// Enables background flat storage creation.
383    pub flat_storage_creation_enabled: bool,
384    /// Duration to perform background flat storage creation step.
385    pub flat_storage_creation_period: Duration,
386    /// Whether to use the State Sync mechanism.
387    /// If disabled, the node will do Block Sync instead of State Sync.
388    pub state_sync_enabled: bool,
389    /// Options for syncing state.
390    pub state_sync: StateSyncConfig,
391    /// Limit of the size of per-shard transaction pool measured in bytes. If not set, the size
392    /// will be unbounded.
393    pub transaction_pool_size_limit: Option<u64>,
394    // Allows more detailed logging, for example a list of orphaned blocks.
395    pub enable_multiline_logging: bool,
396    // Configuration for resharding.
397    pub resharding_config: MutableConfigValue<ReshardingConfig>,
398    /// If the node is not a chunk producer within that many blocks, then route
399    /// to upcoming chunk producers.
400    pub tx_routing_height_horizon: BlockHeightDelta,
401    /// Limit the time of adding transactions to a chunk.
402    /// A node produces a chunk by adding transactions from the transaction pool until
403    /// some limit is reached. This time limit ensures that adding transactions won't take
404    /// longer than the specified duration, which helps to produce the chunk quickly.
405    pub produce_chunk_add_transactions_time_limit: MutableConfigValue<Option<Duration>>,
406}
407
408impl ClientConfig {
409    pub fn test(
410        skip_sync_wait: bool,
411        min_block_prod_time: u64,
412        max_block_prod_time: u64,
413        num_block_producer_seats: NumSeats,
414        archive: bool,
415        save_trie_changes: bool,
416        state_sync_enabled: bool,
417    ) -> Self {
418        assert!(
419            archive || save_trie_changes,
420            "Configuration with archive = false and save_trie_changes = false is not supported \
421            because non-archival nodes must save trie changes in order to do do garbage collection."
422        );
423
424        Self {
425            version: Default::default(),
426            chain_id: "unittest".to_string(),
427            rpc_addr: Some("0.0.0.0:3030".to_string()),
428            expected_shutdown: MutableConfigValue::new(None, "expected_shutdown"),
429            block_production_tracking_delay: Duration::from_millis(std::cmp::max(
430                10,
431                min_block_prod_time / 5,
432            )),
433            min_block_production_delay: Duration::from_millis(min_block_prod_time),
434            max_block_production_delay: Duration::from_millis(max_block_prod_time),
435            max_block_wait_delay: Duration::from_millis(3 * min_block_prod_time),
436            skip_sync_wait,
437            sync_check_period: Duration::from_millis(100),
438            sync_step_period: Duration::from_millis(10),
439            sync_height_threshold: 1,
440            header_sync_initial_timeout: Duration::from_secs(10),
441            header_sync_progress_timeout: Duration::from_secs(2),
442            header_sync_stall_ban_timeout: Duration::from_secs(30),
443            state_sync_timeout: Duration::from_secs(TEST_STATE_SYNC_TIMEOUT),
444            header_sync_expected_height_per_second: 1,
445            min_num_peers: 1,
446            log_summary_period: Duration::from_secs(10),
447            produce_empty_blocks: true,
448            epoch_length: 10,
449            num_block_producer_seats,
450            ttl_account_id_router: Duration::from_secs(60 * 60),
451            block_fetch_horizon: 50,
452            catchup_step_period: Duration::from_millis(1),
453            chunk_request_retry_period: min(
454                Duration::from_millis(100),
455                Duration::from_millis(min_block_prod_time / 5),
456            ),
457            doosmslug_step_period: Duration::from_millis(100),
458            block_header_fetch_horizon: 50,
459            gc: GCConfig { gc_blocks_limit: 100, ..GCConfig::default() },
460            archive,
461            save_trie_changes,
462            log_summary_style: LogSummaryStyle::Colored,
463            view_client_threads: 1,
464            view_client_throttle_period: Duration::from_secs(1),
465            trie_viewer_state_size_limit: None,
466            max_gas_burnt_view: None,
467            enable_statistics_export: true,
468            client_background_migration_threads: 1,
469            flat_storage_creation_enabled: true,
470            flat_storage_creation_period: Duration::from_secs(1),
471            state_sync_enabled,
472            state_sync: StateSyncConfig::default(),
473            transaction_pool_size_limit: None,
474            enable_multiline_logging: false,
475            resharding_config: MutableConfigValue::new(
476                ReshardingConfig::default(),
477                "resharding_config",
478            ),
479            tx_routing_height_horizon: 4,
480            produce_chunk_add_transactions_time_limit: MutableConfigValue::new(
481                default_produce_chunk_add_transactions_time_limit(),
482                "produce_chunk_add_transactions_time_limit",
483            ),
484        }
485    }
486}