Skip to main content

zebra_state/
config.rs

1//! Cached state configuration for Zebra.
2
3use std::{
4    fs::{self, canonicalize, remove_dir_all, DirEntry, ReadDir},
5    io::ErrorKind,
6    path::{Path, PathBuf},
7    time::Duration,
8};
9
10use semver::Version;
11use serde::{Deserialize, Serialize};
12use tokio::task::{spawn_blocking, JoinHandle};
13use tracing::Span;
14
15use zebra_chain::{common::default_cache_dir, parameters::Network};
16
17use crate::{
18    constants::{DATABASE_FORMAT_VERSION_FILE_NAME, STATE_DATABASE_KIND},
19    service::finalized_state::restorable_db_versions,
20    state_database_format_version_in_code, BoxError,
21};
22
23/// Configuration for the state service.
24#[derive(Clone, Debug, Eq, PartialEq, Deserialize, Serialize)]
25#[serde(deny_unknown_fields, default)]
26pub struct Config {
27    /// The root directory for storing cached block data.
28    ///
29    /// If you change this directory, you might also want to change `network.cache_dir`.
30    ///
31    /// This cache stores permanent blockchain state that can be replicated from
32    /// the network, including the best chain, blocks, the UTXO set, and other indexes.
33    /// Any state that can be rolled back is only stored in memory.
34    ///
35    /// The `zebra-state` cache does *not* include any private data, such as wallet data.
36    ///
37    /// You can delete the entire cached state directory, but it will impact your node's
38    /// readiness and network usage. If you do, Zebra will re-sync from genesis the next
39    /// time it is launched.
40    ///
41    /// The default directory is platform dependent, based on
42    /// [`dirs::cache_dir()`](https://docs.rs/dirs/3.0.1/dirs/fn.cache_dir.html):
43    ///
44    /// |Platform | Value                                           | Example                              |
45    /// | ------- | ----------------------------------------------- | ------------------------------------ |
46    /// | Linux   | `$XDG_CACHE_HOME/zebra` or `$HOME/.cache/zebra` | `/home/alice/.cache/zebra`           |
47    /// | macOS   | `$HOME/Library/Caches/zebra`                    | `/Users/Alice/Library/Caches/zebra`  |
48    /// | Windows | `{FOLDERID_LocalAppData}\zebra`                 | `C:\Users\Alice\AppData\Local\zebra` |
49    /// | Other   | `std::env::current_dir()/cache/zebra`           | `/cache/zebra`                       |
50    ///
51    /// # Security
52    ///
53    /// If you are running Zebra with elevated permissions ("root"), create the
54    /// directory for this file before running Zebra, and make sure the Zebra user
55    /// account has exclusive access to that directory, and other users can't modify
56    /// its parent directories.
57    ///
58    /// # Implementation Details
59    ///
60    /// Each state format version and network has a separate state.
61    /// These states are stored in `state/vN/mainnet` and `state/vN/testnet` subdirectories,
62    /// underneath the `cache_dir` path, where `N` is the state format version.
63    ///
64    /// When Zebra's state format changes, it creates a new state subdirectory for that version,
65    /// and re-syncs from genesis.
66    ///
67    /// Old state versions are automatically deleted at startup. You can also manually delete old
68    /// state versions.
69    pub cache_dir: PathBuf,
70
71    /// Whether to use an ephemeral database.
72    ///
73    /// Ephemeral databases are stored in a temporary directory created using [`tempfile::tempdir()`].
74    /// They are deleted when Zebra exits successfully.
75    /// (If Zebra panics or crashes, the ephemeral database won't be deleted.)
76    ///
77    /// Set to `false` by default. If this is set to `true`, [`cache_dir`] is ignored.
78    ///
79    /// Ephemeral directories are created in the [`std::env::temp_dir()`].
80    /// Zebra names each directory after the state version and network, for example: `zebra-state-v21-mainnet-XnyGnE`.
81    ///
82    /// [`cache_dir`]: struct.Config.html#structfield.cache_dir
83    pub ephemeral: bool,
84
85    /// Whether to cache non-finalized blocks on disk to be restored when Zebra restarts.
86    ///
87    /// Set to `true` by default. If this is set to `false`, Zebra will irrecoverably drop
88    /// non-finalized blocks when the process exits and will have to re-download them from
89    /// the network when it restarts, if those blocks are still available in the network.
90    ///
91    /// Note: The non-finalized state will be written to a backup cache once per 5 seconds at most.
92    ///       If blocks are added to the non-finalized state more frequently, the backup may not reflect
93    ///       Zebra's last non-finalized state before it shut down.
94    pub should_backup_non_finalized_state: bool,
95
96    /// Whether to delete the old database directories when present.
97    ///
98    /// Set to `true` by default. If this is set to `false`,
99    /// no check for old database versions will be made and nothing will be
100    /// deleted.
101    pub delete_old_database: bool,
102
103    // Debug configs
104    //
105    /// Commit blocks to the finalized state up to this height, then exit Zebra.
106    ///
107    /// Set to `None` by default: Zebra continues syncing indefinitely.
108    pub debug_stop_at_height: Option<u32>,
109
110    /// While Zebra is running, check state validity this often.
111    ///
112    /// Set to `None` by default: Zebra only checks state format validity on startup and shutdown.
113    #[serde(with = "humantime_serde")]
114    pub debug_validity_check_interval: Option<Duration>,
115
116    // Elasticsearch configs
117    //
118    #[cfg(feature = "elasticsearch")]
119    /// The elasticsearch database url.
120    pub elasticsearch_url: String,
121
122    #[cfg(feature = "elasticsearch")]
123    /// The elasticsearch database username.
124    pub elasticsearch_username: String,
125
126    #[cfg(feature = "elasticsearch")]
127    /// The elasticsearch database password.
128    pub elasticsearch_password: String,
129}
130
131fn gen_temp_path(prefix: &str) -> PathBuf {
132    tempfile::Builder::new()
133        .prefix(prefix)
134        .tempdir()
135        .expect("temporary directory is created successfully")
136        .keep()
137}
138
139impl Config {
140    /// Returns the path for the database, based on the kind, major version and network.
141    /// Each incompatible database format or network gets its own unique path.
142    pub fn db_path(
143        &self,
144        db_kind: impl AsRef<str>,
145        major_version: u64,
146        network: &Network,
147    ) -> PathBuf {
148        let db_kind = db_kind.as_ref();
149        let major_version = format!("v{major_version}");
150        let net_dir = network.lowercase_name();
151
152        if self.ephemeral {
153            gen_temp_path(&format!("zebra-{db_kind}-{major_version}-{net_dir}-"))
154        } else {
155            self.cache_dir
156                .join(db_kind)
157                .join(major_version)
158                .join(net_dir)
159        }
160    }
161
162    /// Returns the path for the non-finalized state backup directory, based on the network.
163    /// Non-finalized state backup files are encoded in the network protocol format and remain
164    /// valid across db format upgrades.
165    pub fn non_finalized_state_backup_dir(&self, network: &Network) -> Option<PathBuf> {
166        if self.ephemeral || !self.should_backup_non_finalized_state {
167            // Ephemeral databases are intended to be irrecoverable across restarts and don't
168            // require a backup for the non-finalized state.
169            return None;
170        }
171
172        let net_dir = network.lowercase_name();
173        Some(self.cache_dir.join("non_finalized_state").join(net_dir))
174    }
175
176    /// Returns the path for the database format minor/patch version file,
177    /// based on the kind, major version and network.
178    pub fn version_file_path(
179        &self,
180        db_kind: impl AsRef<str>,
181        major_version: u64,
182        network: &Network,
183    ) -> PathBuf {
184        let mut version_path = self.db_path(db_kind, major_version, network);
185
186        version_path.push(DATABASE_FORMAT_VERSION_FILE_NAME);
187
188        version_path
189    }
190
191    /// Returns a config for a temporary database that is deleted when it is dropped.
192    pub fn ephemeral() -> Config {
193        Config {
194            ephemeral: true,
195            ..Config::default()
196        }
197    }
198}
199
200impl Default for Config {
201    fn default() -> Self {
202        Self {
203            cache_dir: default_cache_dir(),
204            ephemeral: false,
205            should_backup_non_finalized_state: true,
206            delete_old_database: true,
207            debug_stop_at_height: None,
208            debug_validity_check_interval: None,
209            #[cfg(feature = "elasticsearch")]
210            elasticsearch_url: "https://localhost:9200".to_string(),
211            #[cfg(feature = "elasticsearch")]
212            elasticsearch_username: "elastic".to_string(),
213            #[cfg(feature = "elasticsearch")]
214            elasticsearch_password: "".to_string(),
215        }
216    }
217}
218
219// Cleaning up old database versions
220// TODO: put this in a different module?
221
222/// Spawns a task that checks if there are old state database folders,
223/// and deletes them from the filesystem.
224///
225/// See `check_and_delete_old_databases()` for details.
226pub fn check_and_delete_old_state_databases(config: &Config, network: &Network) -> JoinHandle<()> {
227    check_and_delete_old_databases(
228        config,
229        STATE_DATABASE_KIND,
230        state_database_format_version_in_code().major,
231        network,
232    )
233}
234
235/// Spawns a task that checks if there are old database folders,
236/// and deletes them from the filesystem.
237///
238/// Iterate over the files and directories in the databases folder and delete if:
239/// - The `db_kind` directory exists.
240/// - The entry in `db_kind` is a directory.
241/// - The directory name has a prefix `v`.
242/// - The directory name without the prefix can be parsed as an unsigned number.
243/// - The parsed number is lower than the `major_version`.
244///
245/// The network is used to generate the path, then ignored.
246/// If `config` is an ephemeral database, no databases are deleted.
247///
248/// # Panics
249///
250/// If the path doesn't match the expected `db_kind/major_version/network` format.
251pub fn check_and_delete_old_databases(
252    config: &Config,
253    db_kind: impl AsRef<str>,
254    major_version: u64,
255    network: &Network,
256) -> JoinHandle<()> {
257    let current_span = Span::current();
258    let config = config.clone();
259    let db_kind = db_kind.as_ref().to_string();
260    let network = network.clone();
261
262    spawn_blocking(move || {
263        current_span.in_scope(|| {
264            delete_old_databases(config, db_kind, major_version, &network);
265            info!("finished old database version cleanup task");
266        })
267    })
268}
269
270/// Check if there are old database folders and delete them from the filesystem.
271///
272/// See [`check_and_delete_old_databases`] for details.
273fn delete_old_databases(config: Config, db_kind: String, major_version: u64, network: &Network) {
274    if config.ephemeral || !config.delete_old_database {
275        return;
276    }
277
278    info!(db_kind, "checking for old database versions");
279
280    let restorable_db_versions = restorable_db_versions();
281
282    let mut db_path = config.db_path(&db_kind, major_version, network);
283    // Check and remove the network path.
284    assert_eq!(
285        db_path.file_name(),
286        Some(network.lowercase_name().as_ref()),
287        "unexpected database network path structure"
288    );
289    assert!(db_path.pop());
290
291    // Check and remove the major version path, we'll iterate over them all below.
292    assert_eq!(
293        db_path.file_name(),
294        Some(format!("v{major_version}").as_ref()),
295        "unexpected database version path structure"
296    );
297    assert!(db_path.pop());
298
299    // Check for the correct database kind to iterate within.
300    assert_eq!(
301        db_path.file_name(),
302        Some(db_kind.as_ref()),
303        "unexpected database kind path structure"
304    );
305
306    if let Some(db_kind_dir) = read_dir(&db_path) {
307        for entry in db_kind_dir.flatten() {
308            let deleted_db =
309                check_and_delete_database(&config, major_version, &restorable_db_versions, &entry);
310
311            if let Some(deleted_db) = deleted_db {
312                info!(?deleted_db, "deleted outdated {db_kind} database directory");
313            }
314        }
315    }
316}
317
318/// Return a `ReadDir` for `dir`, after checking that `dir` exists and can be read.
319///
320/// Returns `None` if any operation fails.
321fn read_dir(dir: &Path) -> Option<ReadDir> {
322    if dir.exists() {
323        if let Ok(read_dir) = dir.read_dir() {
324            return Some(read_dir);
325        }
326    }
327    None
328}
329
330/// Check if `entry` is an old database directory, and delete it from the filesystem.
331/// See [`check_and_delete_old_databases`] for details.
332///
333/// If the directory was deleted, returns its path.
334fn check_and_delete_database(
335    config: &Config,
336    major_version: u64,
337    restorable_db_versions: &[u64],
338    entry: &DirEntry,
339) -> Option<PathBuf> {
340    let dir_name = parse_dir_name(entry)?;
341    let dir_major_version = parse_major_version(&dir_name)?;
342
343    if dir_major_version >= major_version {
344        return None;
345    }
346
347    // Don't delete databases that can be reused.
348    if restorable_db_versions
349        .iter()
350        .map(|v| v - 1)
351        .any(|v| v == dir_major_version)
352    {
353        return None;
354    }
355
356    let outdated_path = entry.path();
357
358    // # Correctness
359    //
360    // Check that the path we're about to delete is inside the cache directory.
361    // If the user has symlinked the outdated state directory to a non-cache directory,
362    // we don't want to delete it, because it might contain other files.
363    //
364    // We don't attempt to guard against malicious symlinks created by attackers
365    // (TOCTOU attacks). Zebra should not be run with elevated privileges.
366    let cache_path = canonicalize(&config.cache_dir).ok()?;
367    let outdated_path = canonicalize(outdated_path).ok()?;
368
369    if !outdated_path.starts_with(&cache_path) {
370        info!(
371            skipped_path = ?outdated_path,
372            ?cache_path,
373            "skipped cleanup of outdated state directory: state is outside cache directory",
374        );
375
376        return None;
377    }
378
379    remove_dir_all(&outdated_path).ok().map(|()| outdated_path)
380}
381
382/// Check if `entry` is a directory with a valid UTF-8 name.
383/// (State directory names are guaranteed to be UTF-8.)
384///
385/// Returns `None` if any operation fails.
386fn parse_dir_name(entry: &DirEntry) -> Option<String> {
387    if let Ok(file_type) = entry.file_type() {
388        if file_type.is_dir() {
389            if let Ok(dir_name) = entry.file_name().into_string() {
390                return Some(dir_name);
391            }
392        }
393    }
394    None
395}
396
397/// Parse the database major version number from `dir_name`.
398///
399/// Returns `None` if parsing fails, or the directory name is not in the expected format.
400fn parse_major_version(dir_name: &str) -> Option<u64> {
401    dir_name
402        .strip_prefix('v')
403        .and_then(|version| version.parse().ok())
404}
405
406// TODO: move these to the format upgrade module
407
408/// Returns the full semantic version of the on-disk state database, based on its config and network.
409pub fn state_database_format_version_on_disk(
410    config: &Config,
411    network: &Network,
412) -> Result<Option<Version>, BoxError> {
413    database_format_version_on_disk(
414        config,
415        STATE_DATABASE_KIND,
416        state_database_format_version_in_code().major,
417        network,
418    )
419}
420
421/// Returns the full semantic version of the on-disk database, based on its config, kind, major version,
422/// and network.
423///
424/// Typically, the version is read from a version text file.
425///
426/// If there is an existing on-disk database, but no version file,
427/// returns `Ok(Some(major_version.0.0))`.
428/// (This happens even if the database directory was just newly created.)
429///
430/// If there is no existing on-disk database, returns `Ok(None)`.
431///
432/// This is the format of the data on disk, the version
433/// implemented by the running Zebra code can be different.
434pub fn database_format_version_on_disk(
435    config: &Config,
436    db_kind: impl AsRef<str>,
437    major_version: u64,
438    network: &Network,
439) -> Result<Option<Version>, BoxError> {
440    let version_path = config.version_file_path(&db_kind, major_version, network);
441    let db_path = config.db_path(db_kind, major_version, network);
442
443    database_format_version_at_path(&version_path, &db_path, major_version)
444}
445
446/// Returns the full semantic version of the on-disk database at `version_path`.
447///
448/// See [`database_format_version_on_disk()`] for details.
449pub(crate) fn database_format_version_at_path(
450    version_path: &Path,
451    db_path: &Path,
452    major_version: u64,
453) -> Result<Option<Version>, BoxError> {
454    let disk_version_file = match fs::read_to_string(version_path) {
455        Ok(version) => Some(version),
456        Err(e) if e.kind() == ErrorKind::NotFound => {
457            // If the version file doesn't exist, don't guess the version yet.
458            None
459        }
460        Err(e) => Err(e)?,
461    };
462
463    // The database has a version file on disk
464    if let Some(version) = disk_version_file {
465        return Ok(Some(
466            version
467                .parse()
468                // Try to parse the previous format of the disk version file if it cannot be parsed as a `Version` directly.
469                .or_else(|err| {
470                    format!("{major_version}.{version}")
471                        .parse()
472                        .map_err(|err2| format!("failed to parse format version: {err}, {err2}"))
473                })?,
474        ));
475    }
476
477    // There's no version file on disk, so we need to guess the version
478    // based on the database content
479    match fs::metadata(db_path) {
480        // But there is a database on disk, so it has the current major version with no upgrades.
481        // If the database directory was just newly created, we also return this version.
482        Ok(_metadata) => Ok(Some(Version::new(major_version, 0, 0))),
483
484        // There's no version file and no database on disk, so it's a new database.
485        // It will be created with the current version,
486        // but temporarily return the default version above until the version file is written.
487        Err(e) if e.kind() == ErrorKind::NotFound => Ok(None),
488
489        Err(e) => Err(e)?,
490    }
491}
492
493// Hide this destructive method from the public API, except in tests.
494#[allow(unused_imports)]
495pub(crate) use hidden::{
496    write_database_format_version_to_disk, write_state_database_format_version_to_disk,
497};
498
499pub(crate) mod hidden {
500    #![allow(dead_code)]
501
502    use zebra_chain::common::atomic_write;
503
504    use super::*;
505
506    /// Writes `changed_version` to the on-disk state database after the format is changed.
507    /// (Or a new database is created.)
508    ///
509    /// See `write_database_format_version_to_disk()` for details.
510    pub fn write_state_database_format_version_to_disk(
511        config: &Config,
512        changed_version: &Version,
513        network: &Network,
514    ) -> Result<(), BoxError> {
515        write_database_format_version_to_disk(
516            config,
517            STATE_DATABASE_KIND,
518            state_database_format_version_in_code().major,
519            changed_version,
520            network,
521        )
522    }
523
524    /// Writes `changed_version` to the on-disk database after the format is changed.
525    /// (Or a new database is created.)
526    ///
527    /// The database path is based on its kind, `major_version_in_code`, and network.
528    ///
529    /// # Correctness
530    ///
531    /// This should only be called:
532    /// - after each format upgrade is complete,
533    /// - when creating a new database, or
534    /// - when an older Zebra version opens a newer database.
535    ///
536    /// # Concurrency
537    ///
538    /// This must only be called while RocksDB has an open database for `config`.
539    /// Otherwise, multiple Zebra processes could write the version at the same time,
540    /// corrupting the file.
541    pub fn write_database_format_version_to_disk(
542        config: &Config,
543        db_kind: impl AsRef<str>,
544        major_version_in_code: u64,
545        changed_version: &Version,
546        network: &Network,
547    ) -> Result<(), BoxError> {
548        // Write the version file atomically so the cache is not corrupted if Zebra shuts down or
549        // crashes.
550        atomic_write(
551            config.version_file_path(db_kind, major_version_in_code, network),
552            changed_version.to_string().as_bytes(),
553        )??;
554
555        Ok(())
556    }
557}