solana_runtime/
snapshot_utils.rs

1use {
2    crate::{
3        account_storage::AccountStorageMap,
4        accounts_db::{
5            AccountShrinkThreshold, AccountStorageEntry, AccountsDbConfig, AtomicAppendVecId,
6            CalcAccountsHashDataSource,
7        },
8        accounts_index::AccountSecondaryIndexes,
9        accounts_update_notifier_interface::AccountsUpdateNotifier,
10        append_vec::AppendVec,
11        bank::{Bank, BankFieldsToDeserialize, BankSlotDelta},
12        builtins::Builtins,
13        hardened_unpack::{
14            streaming_unpack_snapshot, unpack_snapshot, ParallelSelector, UnpackError,
15            UnpackedAppendVecMap,
16        },
17        runtime_config::RuntimeConfig,
18        serde_snapshot::{
19            bank_from_streams, bank_to_stream, fields_from_streams, serialized_bank_from_stream,
20            SerdeStyle, SnapshotStreams,
21        },
22        shared_buffer_reader::{SharedBuffer, SharedBufferReader},
23        snapshot_archive_info::{
24            FullSnapshotArchiveInfo, IncrementalSnapshotArchiveInfo, SnapshotArchiveInfoGetter,
25        },
26        snapshot_hash::SnapshotHash,
27        snapshot_package::{AccountsPackage, AccountsPackageType, SnapshotPackage, SnapshotType},
28        snapshot_utils::snapshot_storage_rebuilder::{
29            RebuiltSnapshotStorage, SnapshotStorageRebuilder,
30        },
31        status_cache,
32    },
33    bincode::{config::Options, serialize_into},
34    bzip2::bufread::BzDecoder,
35    crossbeam_channel::Sender,
36    flate2::read::GzDecoder,
37    lazy_static::lazy_static,
38    log::*,
39    rayon::prelude::*,
40    regex::Regex,
41    solana_measure::{measure, measure::Measure},
42    solana_sdk::{
43        clock::Slot,
44        genesis_config::GenesisConfig,
45        hash::Hash,
46        pubkey::Pubkey,
47        slot_history::{Check, SlotHistory},
48    },
49    std::{
50        cmp::Ordering,
51        collections::{HashMap, HashSet},
52        fmt,
53        fs::{self, File},
54        io::{BufReader, BufWriter, Error as IoError, ErrorKind, Read, Seek, Write},
55        path::{Path, PathBuf},
56        process::ExitStatus,
57        str::FromStr,
58        sync::{
59            atomic::{AtomicBool, AtomicU32},
60            Arc,
61        },
62        thread::{Builder, JoinHandle},
63    },
64    tar::{self, Archive},
65    tempfile::TempDir,
66    thiserror::Error,
67};
68
69mod archive_format;
70mod snapshot_storage_rebuilder;
71use std::sync::RwLock;
72
73pub use archive_format::*;
74use dashmap::DashMap;
75
76use crate::serde_snapshot::SnapshotAccountsDbFields;
77use crate::{
78    account_storage::AccountStorageReference,
79    serde_snapshot::storage::SerializableAccountStorageEntry,
80};
81
82pub const SNAPSHOT_STATUS_CACHE_FILENAME: &str = "status_cache";
83pub const SNAPSHOT_VERSION_FILENAME: &str = "version";
84pub const SNAPSHOT_STATE_COMPLETE_FILENAME: &str = "state_complete";
85pub const SNAPSHOT_ARCHIVE_DOWNLOAD_DIR: &str = "remote";
86pub const DEFAULT_FULL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS: Slot = 25_000;
87pub const DEFAULT_INCREMENTAL_SNAPSHOT_ARCHIVE_INTERVAL_SLOTS: Slot = 100;
88const MAX_SNAPSHOT_DATA_FILE_SIZE: u64 = 32 * 1024 * 1024 * 1024; // 32 GiB
89const MAX_SNAPSHOT_VERSION_FILE_SIZE: u64 = 8; // byte
90const VERSION_STRING_V1_2_0: &str = "1.2.0";
91pub(crate) const TMP_BANK_SNAPSHOT_PREFIX: &str = "tmp-bank-snapshot-";
92pub const TMP_SNAPSHOT_ARCHIVE_PREFIX: &str = "tmp-snapshot-archive-";
93pub const BANK_SNAPSHOT_PRE_FILENAME_EXTENSION: &str = "pre";
94pub const MAX_BANK_SNAPSHOTS_TO_RETAIN: usize = 8; // Save some bank snapshots but not too many
95pub const DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN: usize = 2;
96pub const DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN: usize = 4;
97pub const FULL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^snapshot-(?P<slot>[[:digit:]]+)-(?P<hash>[[:alnum:]]+)\.(?P<ext>tar|tar\.bz2|tar\.zst|tar\.gz|tar\.lz4)$";
98pub const INCREMENTAL_SNAPSHOT_ARCHIVE_FILENAME_REGEX: &str = r"^incremental-snapshot-(?P<base>[[:digit:]]+)-(?P<slot>[[:digit:]]+)-(?P<hash>[[:alnum:]]+)\.(?P<ext>tar|tar\.bz2|tar\.zst|tar\.gz|tar\.lz4)$";
99
100#[derive(Copy, Clone, Default, Eq, PartialEq, Debug)]
101pub enum SnapshotVersion {
102    #[default]
103    V1_2_0,
104}
105
106impl fmt::Display for SnapshotVersion {
107    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
108        f.write_str(From::from(*self))
109    }
110}
111
112impl From<SnapshotVersion> for &'static str {
113    fn from(snapshot_version: SnapshotVersion) -> &'static str {
114        match snapshot_version {
115            SnapshotVersion::V1_2_0 => VERSION_STRING_V1_2_0,
116        }
117    }
118}
119
120impl FromStr for SnapshotVersion {
121    type Err = &'static str;
122
123    fn from_str(version_string: &str) -> std::result::Result<Self, Self::Err> {
124        // Remove leading 'v' or 'V' from slice
125        let version_string = if version_string
126            .get(..1)
127            .map_or(false, |s| s.eq_ignore_ascii_case("v"))
128        {
129            &version_string[1..]
130        } else {
131            version_string
132        };
133        match version_string {
134            VERSION_STRING_V1_2_0 => Ok(SnapshotVersion::V1_2_0),
135            _ => Err("unsupported snapshot version"),
136        }
137    }
138}
139
140impl SnapshotVersion {
141    pub fn as_str(self) -> &'static str {
142        <&str as From<Self>>::from(self)
143    }
144}
145
146/// Information about a bank snapshot. Namely the slot of the bank, the path to the snapshot, and
147/// the type of the snapshot.
148#[derive(PartialEq, Eq, Debug)]
149pub struct BankSnapshotInfo {
150    /// Slot of the bank
151    pub slot: Slot,
152    /// Type of the snapshot
153    pub snapshot_type: BankSnapshotType,
154    /// Path to the bank snapshot directory
155    pub snapshot_dir: PathBuf,
156}
157
158impl PartialOrd for BankSnapshotInfo {
159    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
160        Some(self.cmp(other))
161    }
162}
163
164// Order BankSnapshotInfo by slot (ascending), which practically is sorting chronologically
165impl Ord for BankSnapshotInfo {
166    fn cmp(&self, other: &Self) -> Ordering {
167        self.slot.cmp(&other.slot)
168    }
169}
170
171impl BankSnapshotInfo {
172    pub fn new_from_dir(
173        bank_snapshots_dir: impl AsRef<Path>,
174        slot: Slot,
175    ) -> Option<BankSnapshotInfo> {
176        // check this directory to see if there is a BankSnapshotPre and/or
177        // BankSnapshotPost file
178        let bank_snapshot_dir = get_bank_snapshots_dir(&bank_snapshots_dir, slot);
179        let bank_snapshot_post_path = bank_snapshot_dir.join(get_snapshot_file_name(slot));
180        let bank_snapshot_pre_path =
181            bank_snapshot_post_path.with_extension(BANK_SNAPSHOT_PRE_FILENAME_EXTENSION);
182
183        if bank_snapshot_pre_path.is_file() {
184            return Some(BankSnapshotInfo {
185                slot,
186                snapshot_type: BankSnapshotType::Pre,
187                snapshot_dir: bank_snapshot_dir,
188            });
189        }
190
191        if bank_snapshot_post_path.is_file() {
192            return Some(BankSnapshotInfo {
193                slot,
194                snapshot_type: BankSnapshotType::Post,
195                snapshot_dir: bank_snapshot_dir,
196            });
197        }
198
199        None
200    }
201
202    pub fn snapshot_path(&self) -> PathBuf {
203        let mut bank_snapshot_path = self.snapshot_dir.join(get_snapshot_file_name(self.slot));
204
205        let ext = match self.snapshot_type {
206            BankSnapshotType::Pre => BANK_SNAPSHOT_PRE_FILENAME_EXTENSION,
207            BankSnapshotType::Post => "",
208        };
209        bank_snapshot_path.set_extension(ext);
210
211        bank_snapshot_path
212    }
213}
214/// Bank snapshots traditionally had their accounts hash calculated prior to serialization.  Since
215/// the hash calculation takes a long time, an optimization has been put in to offload the accounts
216/// hash calculation.  The bank serialization format has not changed, so we need another way to
217/// identify if a bank snapshot contains the calculated accounts hash or not.
218///
219/// When a bank snapshot is first taken, it does not have the calculated accounts hash.  It is said
220/// that this bank snapshot is "pre" accounts hash.  Later, when the accounts hash is calculated,
221/// the bank snapshot is re-serialized, and is now "post" accounts hash.
222#[derive(Debug, Copy, Clone, Eq, PartialEq)]
223pub enum BankSnapshotType {
224    /// This bank snapshot has *not* yet had its accounts hash calculated
225    Pre,
226    /// This bank snapshot *has* had its accounts hash calculated
227    Post,
228}
229
230/// Helper type when rebuilding from snapshots.  Designed to handle when rebuilding from just a
231/// full snapshot, or from both a full snapshot and an incremental snapshot.
232#[derive(Debug)]
233struct SnapshotRootPaths {
234    full_snapshot_root_file_path: PathBuf,
235    incremental_snapshot_root_file_path: Option<PathBuf>,
236}
237
238/// Helper type to bundle up the results from `unarchive_snapshot()`
239#[derive(Debug)]
240struct UnarchivedSnapshot {
241    #[allow(dead_code)]
242    unpack_dir: TempDir,
243    storage: AccountStorageMap,
244    unpacked_snapshots_dir_and_version: UnpackedSnapshotsDirAndVersion,
245    measure_untar: Measure,
246}
247
248/// Helper type for passing around the unpacked snapshots dir and the snapshot version together
249#[derive(Debug)]
250struct UnpackedSnapshotsDirAndVersion {
251    unpacked_snapshots_dir: PathBuf,
252    snapshot_version: SnapshotVersion,
253}
254
255/// Helper type for passing around account storage map and next append vec id
256/// for reconstructing accounts from a snapshot
257pub(crate) struct StorageAndNextAppendVecId {
258    pub storage: AccountStorageMap,
259    pub next_append_vec_id: AtomicAppendVecId,
260}
261
262#[derive(Error, Debug)]
263#[allow(clippy::large_enum_variant)]
264pub enum SnapshotError {
265    #[error("I/O error: {0}")]
266    Io(#[from] std::io::Error),
267
268    #[error("serialization error: {0}")]
269    Serialize(#[from] bincode::Error),
270
271    #[error("archive generation failure {0}")]
272    ArchiveGenerationFailure(ExitStatus),
273
274    #[error("storage path symlink is invalid")]
275    StoragePathSymlinkInvalid,
276
277    #[error("Unpack error: {0}")]
278    UnpackError(#[from] UnpackError),
279
280    #[error("source({1}) - I/O error: {0}")]
281    IoWithSource(std::io::Error, &'static str),
282
283    #[error("source({1}) - I/O error: {0}, file: {2}")]
284    IoWithSourceAndFile(#[source] std::io::Error, &'static str, PathBuf),
285
286    #[error("could not get file name from path: {}", .0.display())]
287    PathToFileNameError(PathBuf),
288
289    #[error("could not get str from file name: {}", .0.display())]
290    FileNameToStrError(PathBuf),
291
292    #[error("could not parse snapshot archive's file name: {0}")]
293    ParseSnapshotArchiveFileNameError(String),
294
295    #[error("snapshots are incompatible: full snapshot slot ({0}) and incremental snapshot base slot ({1}) do not match")]
296    MismatchedBaseSlot(Slot, Slot),
297
298    #[error("no snapshot archives to load from")]
299    NoSnapshotArchives,
300
301    #[error("snapshot has mismatch: deserialized bank: {:?}, snapshot archive info: {:?}", .0, .1)]
302    MismatchedSlotHash((Slot, SnapshotHash), (Slot, SnapshotHash)),
303
304    #[error("snapshot slot deltas are invalid: {0}")]
305    VerifySlotDeltas(#[from] VerifySlotDeltasError),
306
307    #[error("invalid AppendVec path: {}", .0.display())]
308    InvalidAppendVecPath(PathBuf),
309}
310pub type Result<T> = std::result::Result<T, SnapshotError>;
311
312/// Errors that can happen in `verify_slot_deltas()`
313#[derive(Error, Debug, PartialEq, Eq)]
314pub enum VerifySlotDeltasError {
315    #[error("too many entries: {0} (max: {1})")]
316    TooManyEntries(usize, usize),
317
318    #[error("slot {0} is not a root")]
319    SlotIsNotRoot(Slot),
320
321    #[error("slot {0} is greater than bank slot {1}")]
322    SlotGreaterThanMaxRoot(Slot, Slot),
323
324    #[error("slot {0} has multiple entries")]
325    SlotHasMultipleEntries(Slot),
326
327    #[error("slot {0} was not found in slot history")]
328    SlotNotFoundInHistory(Slot),
329
330    #[error("slot {0} was in history but missing from slot deltas")]
331    SlotNotFoundInDeltas(Slot),
332
333    #[error("slot history is bad and cannot be used to verify slot deltas")]
334    BadSlotHistory,
335}
336
337/// Delete the files and subdirectories in a directory.
338/// This is useful if the process does not have permission
339/// to delete the top level directory it might be able to
340/// delete the contents of that directory.
341fn delete_contents_of_path(path: impl AsRef<Path> + Copy) {
342    if let Ok(dir_entries) = std::fs::read_dir(path) {
343        for entry in dir_entries.flatten() {
344            let sub_path = entry.path();
345            let metadata = match entry.metadata() {
346                Ok(metadata) => metadata,
347                Err(err) => {
348                    warn!(
349                        "Failed to get metadata for {}. Error: {}",
350                        sub_path.display(),
351                        err.to_string()
352                    );
353                    break;
354                }
355            };
356            if metadata.is_dir() {
357                if let Err(err) = std::fs::remove_dir_all(&sub_path) {
358                    warn!(
359                        "Failed to remove sub directory {}.  Error: {}",
360                        sub_path.display(),
361                        err.to_string()
362                    );
363                }
364            } else if metadata.is_file() {
365                if let Err(err) = std::fs::remove_file(&sub_path) {
366                    warn!(
367                        "Failed to remove file {}.  Error: {}",
368                        sub_path.display(),
369                        err.to_string()
370                    );
371                }
372            }
373        }
374    } else {
375        warn!(
376            "Failed to read the sub paths of {}",
377            path.as_ref().display()
378        );
379    }
380}
381
382/// Delete directories/files asynchronously to avoid blocking on it.
383/// Fist, in sync context, rename the original path to *_deleted,
384/// then spawn a thread to delete the renamed path.
385/// If the process is killed and the deleting process is not done,
386/// the leftover path will be deleted in the next process life, so
387/// there is no file space leaking.
388pub fn move_and_async_delete_path(path: impl AsRef<Path> + Copy) {
389    let mut path_delete = PathBuf::new();
390    path_delete.push(path);
391    path_delete.set_file_name(format!(
392        "{}{}",
393        path_delete.file_name().unwrap().to_str().unwrap(),
394        "_to_be_deleted"
395    ));
396
397    if path_delete.exists() {
398        std::fs::remove_dir_all(&path_delete).unwrap();
399    }
400
401    if !path.as_ref().exists() {
402        return;
403    }
404
405    if let Err(err) = std::fs::rename(path, &path_delete) {
406        warn!(
407            "Path renaming failed: {}.  Falling back to rm_dir in sync mode",
408            err.to_string()
409        );
410        delete_contents_of_path(path);
411        return;
412    }
413
414    Builder::new()
415        .name("solDeletePath".to_string())
416        .spawn(move || {
417            std::fs::remove_dir_all(path_delete).unwrap();
418        })
419        .unwrap();
420}
421
422/// If the validator halts in the middle of `archive_snapshot_package()`, the temporary staging
423/// directory won't be cleaned up.  Call this function to clean them up.
424pub fn remove_tmp_snapshot_archives(snapshot_archives_dir: impl AsRef<Path>) {
425    if let Ok(entries) = fs::read_dir(snapshot_archives_dir) {
426        for entry in entries.filter_map(|entry| entry.ok()) {
427            let file_name = entry
428                .file_name()
429                .into_string()
430                .unwrap_or_else(|_| String::new());
431            if file_name.starts_with(TMP_SNAPSHOT_ARCHIVE_PREFIX) {
432                if entry.path().is_file() {
433                    fs::remove_file(entry.path())
434                } else {
435                    fs::remove_dir_all(entry.path())
436                }
437                .unwrap_or_else(|err| {
438                    warn!("Failed to remove {}: {}", entry.path().display(), err)
439                });
440            }
441        }
442    }
443}
444
445/// Write the snapshot version as a file into the bank snapshot directory
446pub fn write_snapshot_version_file(
447    version_file: impl AsRef<Path>,
448    version: SnapshotVersion,
449) -> Result<()> {
450    let mut f = fs::File::create(version_file)
451        .map_err(|e| SnapshotError::IoWithSource(e, "create version file"))?;
452    f.write_all(version.as_str().as_bytes())
453        .map_err(|e| SnapshotError::IoWithSource(e, "write version file"))?;
454    Ok(())
455}
456
457/// Make a snapshot archive out of the snapshot package
458pub fn archive_snapshot_package(
459    snapshot_package: &SnapshotPackage,
460    full_snapshot_archives_dir: impl AsRef<Path>,
461    incremental_snapshot_archives_dir: impl AsRef<Path>,
462    maximum_full_snapshot_archives_to_retain: usize,
463    maximum_incremental_snapshot_archives_to_retain: usize,
464) -> Result<()> {
465    info!(
466        "Generating snapshot archive for slot {}",
467        snapshot_package.slot()
468    );
469
470    let mut timer = Measure::start("snapshot_package-package_snapshots");
471    let tar_dir = snapshot_package
472        .path()
473        .parent()
474        .expect("Tar output path is invalid");
475
476    fs::create_dir_all(tar_dir).map_err(|e| {
477        SnapshotError::IoWithSourceAndFile(e, "create archive path", tar_dir.into())
478    })?;
479
480    // Create the staging directories
481    let staging_dir_prefix = TMP_SNAPSHOT_ARCHIVE_PREFIX;
482    let staging_dir = tempfile::Builder::new()
483        .prefix(&format!(
484            "{}{}-",
485            staging_dir_prefix,
486            snapshot_package.slot()
487        ))
488        .tempdir_in(tar_dir)
489        .map_err(|e| SnapshotError::IoWithSource(e, "create archive tempdir"))?;
490
491    let staging_accounts_dir = staging_dir.path().join("accounts");
492    let staging_snapshots_dir = staging_dir.path().join("snapshots");
493    let staging_version_file = staging_dir.path().join(SNAPSHOT_VERSION_FILENAME);
494    fs::create_dir_all(&staging_accounts_dir).map_err(|e| {
495        SnapshotError::IoWithSourceAndFile(e, "create staging path", staging_accounts_dir.clone())
496    })?;
497
498    // Add the snapshots to the staging directory
499    symlink::symlink_dir(
500        snapshot_package.snapshot_links.path(),
501        staging_snapshots_dir,
502    )
503    .map_err(|e| SnapshotError::IoWithSource(e, "create staging symlinks"))?;
504
505    // Add the AppendVecs into the compressible list
506    for storage in snapshot_package.snapshot_storages.iter() {
507        storage.flush()?;
508        let storage_path = storage.get_path();
509        let output_path = staging_accounts_dir.join(crate::append_vec::AppendVec::file_name(
510            storage.slot(),
511            storage.append_vec_id(),
512        ));
513
514        // `storage_path` - The file path where the AppendVec itself is located
515        // `output_path` - The file path where the AppendVec will be placed in the staging directory.
516        let storage_path =
517            fs::canonicalize(storage_path).expect("Could not get absolute path for accounts");
518        symlink::symlink_file(storage_path, &output_path)
519            .map_err(|e| SnapshotError::IoWithSource(e, "create storage symlink"))?;
520        if !output_path.is_file() {
521            return Err(SnapshotError::StoragePathSymlinkInvalid);
522        }
523    }
524
525    write_snapshot_version_file(staging_version_file, snapshot_package.snapshot_version)?;
526
527    // Tar the staging directory into the archive at `archive_path`
528    let archive_path = tar_dir.join(format!(
529        "{}{}.{}",
530        staging_dir_prefix,
531        snapshot_package.slot(),
532        snapshot_package.archive_format().extension(),
533    ));
534
535    {
536        let mut archive_file = fs::File::create(&archive_path)?;
537
538        let do_archive_files = |encoder: &mut dyn Write| -> Result<()> {
539            let mut archive = tar::Builder::new(encoder);
540            // Serialize the version and snapshots files before accounts so we can quickly determine the version
541            // and other bank fields. This is necessary if we want to interleave unpacking with reconstruction
542            archive.append_path_with_name(
543                staging_dir.as_ref().join(SNAPSHOT_VERSION_FILENAME),
544                SNAPSHOT_VERSION_FILENAME,
545            )?;
546            for dir in ["snapshots", "accounts"] {
547                archive.append_dir_all(dir, staging_dir.as_ref().join(dir))?;
548            }
549            archive.into_inner()?;
550            Ok(())
551        };
552
553        match snapshot_package.archive_format() {
554            ArchiveFormat::TarBzip2 => {
555                let mut encoder =
556                    bzip2::write::BzEncoder::new(archive_file, bzip2::Compression::best());
557                do_archive_files(&mut encoder)?;
558                encoder.finish()?;
559            }
560            ArchiveFormat::TarGzip => {
561                let mut encoder =
562                    flate2::write::GzEncoder::new(archive_file, flate2::Compression::default());
563                do_archive_files(&mut encoder)?;
564                encoder.finish()?;
565            }
566            ArchiveFormat::TarZstd => {
567                let mut encoder = zstd::stream::Encoder::new(archive_file, 0)?;
568                do_archive_files(&mut encoder)?;
569                encoder.finish()?;
570            }
571            ArchiveFormat::TarLz4 => {
572                let mut encoder = lz4::EncoderBuilder::new().level(1).build(archive_file)?;
573                do_archive_files(&mut encoder)?;
574                let (_output, result) = encoder.finish();
575                result?
576            }
577            ArchiveFormat::Tar => {
578                do_archive_files(&mut archive_file)?;
579            }
580        };
581    }
582
583    // Atomically move the archive into position for other validators to find
584    let metadata = fs::metadata(&archive_path).map_err(|e| {
585        SnapshotError::IoWithSourceAndFile(e, "archive path stat", archive_path.clone())
586    })?;
587    fs::rename(&archive_path, snapshot_package.path())
588        .map_err(|e| SnapshotError::IoWithSource(e, "archive path rename"))?;
589
590    purge_old_snapshot_archives(
591        full_snapshot_archives_dir,
592        incremental_snapshot_archives_dir,
593        maximum_full_snapshot_archives_to_retain,
594        maximum_incremental_snapshot_archives_to_retain,
595    );
596
597    timer.stop();
598    info!(
599        "Successfully created {:?}. slot: {}, elapsed ms: {}, size={}",
600        snapshot_package.path(),
601        snapshot_package.slot(),
602        timer.as_ms(),
603        metadata.len()
604    );
605
606    datapoint_info!(
607        "archive-snapshot-package",
608        ("slot", snapshot_package.slot(), i64),
609        (
610            "archive_format",
611            snapshot_package.archive_format().to_string(),
612            String
613        ),
614        ("duration_ms", timer.as_ms(), i64),
615        (
616            if snapshot_package.snapshot_type.is_full_snapshot() {
617                "full-snapshot-archive-size"
618            } else {
619                "incremental-snapshot-archive-size"
620            },
621            metadata.len(),
622            i64
623        ),
624    );
625    Ok(())
626}
627
628/// Get the bank snapshots in a directory
629pub fn get_bank_snapshots(bank_snapshots_dir: impl AsRef<Path>) -> Vec<BankSnapshotInfo> {
630    let mut bank_snapshots = Vec::default();
631    match fs::read_dir(&bank_snapshots_dir) {
632        Err(err) => {
633            info!(
634                "Unable to read bank snapshots directory {}: {}",
635                bank_snapshots_dir.as_ref().display(),
636                err
637            );
638        }
639        Ok(paths) => paths
640            .filter_map(|entry| {
641                // check if this entry is a directory and only a Slot
642                // bank snapshots are bank_snapshots_dir/slot/slot(BANK_SNAPSHOT_PRE_FILENAME_EXTENSION)
643                entry
644                    .ok()
645                    .filter(|entry| entry.path().is_dir())
646                    .and_then(|entry| {
647                        entry
648                            .path()
649                            .file_name()
650                            .and_then(|file_name| file_name.to_str())
651                            .and_then(|file_name| file_name.parse::<Slot>().ok())
652                    })
653            })
654            .for_each(|slot| {
655                if let Some(snapshot_info) =
656                    BankSnapshotInfo::new_from_dir(&bank_snapshots_dir, slot)
657                {
658                    bank_snapshots.push(snapshot_info);
659                }
660            }),
661    }
662    bank_snapshots
663}
664
665/// Get the bank snapshots in a directory
666///
667/// This function retains only the bank snapshots of type BankSnapshotType::Pre
668pub fn get_bank_snapshots_pre(bank_snapshots_dir: impl AsRef<Path>) -> Vec<BankSnapshotInfo> {
669    let mut bank_snapshots = get_bank_snapshots(bank_snapshots_dir);
670    bank_snapshots.retain(|bank_snapshot| bank_snapshot.snapshot_type == BankSnapshotType::Pre);
671    bank_snapshots
672}
673
674/// Get the bank snapshots in a directory
675///
676/// This function retains only the bank snapshots of type BankSnapshotType::Post
677pub fn get_bank_snapshots_post(bank_snapshots_dir: impl AsRef<Path>) -> Vec<BankSnapshotInfo> {
678    let mut bank_snapshots = get_bank_snapshots(bank_snapshots_dir);
679    bank_snapshots.retain(|bank_snapshot| bank_snapshot.snapshot_type == BankSnapshotType::Post);
680    bank_snapshots
681}
682
683/// Get the bank snapshot with the highest slot in a directory
684///
685/// This function gets the highest bank snapshot of type BankSnapshotType::Pre
686pub fn get_highest_bank_snapshot_pre(
687    bank_snapshots_dir: impl AsRef<Path>,
688) -> Option<BankSnapshotInfo> {
689    do_get_highest_bank_snapshot(get_bank_snapshots_pre(bank_snapshots_dir))
690}
691
692/// Get the bank snapshot with the highest slot in a directory
693///
694/// This function gets the highest bank snapshot of type BankSnapshotType::Post
695pub fn get_highest_bank_snapshot_post(
696    bank_snapshots_dir: impl AsRef<Path>,
697) -> Option<BankSnapshotInfo> {
698    do_get_highest_bank_snapshot(get_bank_snapshots_post(bank_snapshots_dir))
699}
700
701fn do_get_highest_bank_snapshot(
702    mut bank_snapshots: Vec<BankSnapshotInfo>,
703) -> Option<BankSnapshotInfo> {
704    bank_snapshots.sort_unstable();
705    bank_snapshots.into_iter().rev().next()
706}
707
708pub fn serialize_snapshot_data_file<F>(data_file_path: &Path, serializer: F) -> Result<u64>
709where
710    F: FnOnce(&mut BufWriter<File>) -> Result<()>,
711{
712    serialize_snapshot_data_file_capped::<F>(
713        data_file_path,
714        MAX_SNAPSHOT_DATA_FILE_SIZE,
715        serializer,
716    )
717}
718
719pub fn deserialize_snapshot_data_file<T: Sized>(
720    data_file_path: &Path,
721    deserializer: impl FnOnce(&mut BufReader<File>) -> Result<T>,
722) -> Result<T> {
723    let wrapped_deserializer = move |streams: &mut SnapshotStreams<File>| -> Result<T> {
724        deserializer(streams.full_snapshot_stream)
725    };
726
727    let wrapped_data_file_path = SnapshotRootPaths {
728        full_snapshot_root_file_path: data_file_path.to_path_buf(),
729        incremental_snapshot_root_file_path: None,
730    };
731
732    deserialize_snapshot_data_files_capped(
733        &wrapped_data_file_path,
734        MAX_SNAPSHOT_DATA_FILE_SIZE,
735        wrapped_deserializer,
736    )
737}
738
739fn deserialize_snapshot_data_files<T: Sized>(
740    snapshot_root_paths: &SnapshotRootPaths,
741    deserializer: impl FnOnce(&mut SnapshotStreams<File>) -> Result<T>,
742) -> Result<T> {
743    deserialize_snapshot_data_files_capped(
744        snapshot_root_paths,
745        MAX_SNAPSHOT_DATA_FILE_SIZE,
746        deserializer,
747    )
748}
749
750fn serialize_snapshot_data_file_capped<F>(
751    data_file_path: &Path,
752    maximum_file_size: u64,
753    serializer: F,
754) -> Result<u64>
755where
756    F: FnOnce(&mut BufWriter<File>) -> Result<()>,
757{
758    let data_file = File::create(data_file_path)?;
759    let mut data_file_stream = BufWriter::new(data_file);
760    serializer(&mut data_file_stream)?;
761    data_file_stream.flush()?;
762
763    let consumed_size = data_file_stream.stream_position()?;
764    if consumed_size > maximum_file_size {
765        let error_message = format!(
766            "too large snapshot data file to serialize: {data_file_path:?} has {consumed_size} bytes"
767        );
768        return Err(get_io_error(&error_message));
769    }
770    Ok(consumed_size)
771}
772
773fn deserialize_snapshot_data_files_capped<T: Sized>(
774    snapshot_root_paths: &SnapshotRootPaths,
775    maximum_file_size: u64,
776    deserializer: impl FnOnce(&mut SnapshotStreams<File>) -> Result<T>,
777) -> Result<T> {
778    let (full_snapshot_file_size, mut full_snapshot_data_file_stream) =
779        create_snapshot_data_file_stream(
780            &snapshot_root_paths.full_snapshot_root_file_path,
781            maximum_file_size,
782        )?;
783
784    let (incremental_snapshot_file_size, mut incremental_snapshot_data_file_stream) =
785        if let Some(ref incremental_snapshot_root_file_path) =
786            snapshot_root_paths.incremental_snapshot_root_file_path
787        {
788            let (incremental_snapshot_file_size, incremental_snapshot_data_file_stream) =
789                create_snapshot_data_file_stream(
790                    incremental_snapshot_root_file_path,
791                    maximum_file_size,
792                )?;
793            (
794                Some(incremental_snapshot_file_size),
795                Some(incremental_snapshot_data_file_stream),
796            )
797        } else {
798            (None, None)
799        };
800
801    let mut snapshot_streams = SnapshotStreams {
802        full_snapshot_stream: &mut full_snapshot_data_file_stream,
803        incremental_snapshot_stream: incremental_snapshot_data_file_stream.as_mut(),
804    };
805    let ret = deserializer(&mut snapshot_streams)?;
806
807    check_deserialize_file_consumed(
808        full_snapshot_file_size,
809        &snapshot_root_paths.full_snapshot_root_file_path,
810        &mut full_snapshot_data_file_stream,
811    )?;
812
813    if let Some(ref incremental_snapshot_root_file_path) =
814        snapshot_root_paths.incremental_snapshot_root_file_path
815    {
816        check_deserialize_file_consumed(
817            incremental_snapshot_file_size.unwrap(),
818            incremental_snapshot_root_file_path,
819            incremental_snapshot_data_file_stream.as_mut().unwrap(),
820        )?;
821    }
822
823    Ok(ret)
824}
825
826/// Before running the deserializer function, perform common operations on the snapshot archive
827/// files, such as checking the file size and opening the file into a stream.
828fn create_snapshot_data_file_stream<P>(
829    snapshot_root_file_path: P,
830    maximum_file_size: u64,
831) -> Result<(u64, BufReader<File>)>
832where
833    P: AsRef<Path>,
834{
835    let snapshot_file_size = fs::metadata(&snapshot_root_file_path)?.len();
836
837    if snapshot_file_size > maximum_file_size {
838        let error_message =
839            format!(
840            "too large snapshot data file to deserialize: {} has {} bytes (max size is {} bytes)",
841            snapshot_root_file_path.as_ref().display(), snapshot_file_size, maximum_file_size
842        );
843        return Err(get_io_error(&error_message));
844    }
845
846    let snapshot_data_file = File::open(&snapshot_root_file_path)?;
847    let snapshot_data_file_stream = BufReader::new(snapshot_data_file);
848
849    Ok((snapshot_file_size, snapshot_data_file_stream))
850}
851
852/// After running the deserializer function, perform common checks to ensure the snapshot archive
853/// files were consumed correctly.
854fn check_deserialize_file_consumed<P>(
855    file_size: u64,
856    file_path: P,
857    file_stream: &mut BufReader<File>,
858) -> Result<()>
859where
860    P: AsRef<Path>,
861{
862    let consumed_size = file_stream.stream_position()?;
863
864    if consumed_size != file_size {
865        let error_message =
866            format!(
867            "invalid snapshot data file: {} has {} bytes, however consumed {} bytes to deserialize",
868            file_path.as_ref().display(), file_size, consumed_size
869        );
870        return Err(get_io_error(&error_message));
871    }
872
873    Ok(())
874}
875
876/// To allow generating a bank snapshot directory with full state information, we need to
877/// hardlink account appendvec files from the runtime operation directory to a snapshot
878/// hardlink directory.  This is to create the run/ and snapshot sub directories for an
879/// account_path provided by the user.  These two sub directories are on the same file
880/// system partition to allow hard-linking.
881pub fn create_accounts_run_and_snapshot_dirs(
882    account_dir: impl AsRef<Path>,
883) -> std::io::Result<(PathBuf, PathBuf)> {
884    let run_path = account_dir.as_ref().join("run");
885    let snapshot_path = account_dir.as_ref().join("snapshot");
886    if (!run_path.is_dir()) || (!snapshot_path.is_dir()) {
887        // If the "run/" or "snapshot" sub directories do not exist, the directory may be from
888        // an older version for which the appendvec files are at this directory.  Clean up
889        // them first.
890        // This will be done only once when transitioning from an old image without run directory
891        // to this new version using run and snapshot directories.
892        // The run/ content cleanup will be done at a later point.  The snapshot/ content persists
893        // accross the process boot, and will be purged by the account_background_service.
894        if fs::remove_dir_all(&account_dir).is_err() {
895            delete_contents_of_path(&account_dir);
896        }
897        fs::create_dir_all(&run_path)?;
898        fs::create_dir_all(&snapshot_path)?;
899    }
900
901    Ok((run_path, snapshot_path))
902}
903
904/// Return account path from the appendvec path after checking its format.
905fn get_account_path_from_appendvec_path(appendvec_path: &Path) -> Option<PathBuf> {
906    let run_path = appendvec_path.parent()?;
907    let run_file_name = run_path.file_name()?;
908    // All appendvec files should be under <account_path>/run/.
909    // When generating the bank snapshot directory, they are hardlinked to <account_path>/snapshot/<slot>/
910    if run_file_name != "run" {
911        error!(
912            "The account path {} does not have run/ as its immediate parent directory.",
913            run_path.display()
914        );
915        return None;
916    }
917    let account_path = run_path.parent()?;
918    Some(account_path.to_path_buf())
919}
920
921/// From an appendvec path, derive the snapshot hardlink path.  If the corresponding snapshot hardlink
922/// directory does not exist, create it.
923fn get_snapshot_accounts_hardlink_dir(
924    appendvec_path: &Path,
925    bank_slot: Slot,
926    account_paths: &mut HashSet<PathBuf>,
927    hardlinks_dir: impl AsRef<Path>,
928) -> Result<PathBuf> {
929    let account_path = get_account_path_from_appendvec_path(appendvec_path)
930        .ok_or_else(|| SnapshotError::InvalidAppendVecPath(appendvec_path.to_path_buf()))?;
931
932    let snapshot_hardlink_dir = account_path.join("snapshot").join(bank_slot.to_string());
933
934    // Use the hashset to track, to avoid checking the file system.  Only set up the hardlink directory
935    // and the symlink to it at the first time of seeing the account_path.
936    if !account_paths.contains(&account_path) {
937        let idx = account_paths.len();
938        debug!(
939            "for appendvec_path {}, create hard-link path {}",
940            appendvec_path.display(),
941            snapshot_hardlink_dir.display()
942        );
943        fs::create_dir_all(&snapshot_hardlink_dir).map_err(|e| {
944            SnapshotError::IoWithSourceAndFile(
945                e,
946                "create hard-link dir",
947                snapshot_hardlink_dir.clone(),
948            )
949        })?;
950        let symlink_path = hardlinks_dir.as_ref().join(format!("account_path_{idx}"));
951        symlink::symlink_dir(&snapshot_hardlink_dir, symlink_path).map_err(|e| {
952            SnapshotError::IoWithSourceAndFile(
953                e,
954                "simlink the hard-link dir",
955                snapshot_hardlink_dir.clone(),
956            )
957        })?;
958        account_paths.insert(account_path);
959    };
960
961    Ok(snapshot_hardlink_dir)
962}
963
964/// Hard-link the files from accounts/ to snapshot/<bank_slot>/accounts/
965/// This keeps the appendvec files alive and with the bank snapshot.  The slot and id
966/// in the file names are also updated in case its file is a recycled one with inconsistent slot
967/// and id.
968fn hard_link_storages_to_snapshot(
969    bank_snapshot_dir: impl AsRef<Path>,
970    bank_slot: Slot,
971    snapshot_storages: &[Arc<AccountStorageEntry>],
972) -> Result<()> {
973    let accounts_hardlinks_dir = bank_snapshot_dir.as_ref().join("accounts_hardlinks");
974    fs::create_dir_all(&accounts_hardlinks_dir)?;
975
976    let mut account_paths: HashSet<PathBuf> = HashSet::new();
977    for storage in snapshot_storages {
978        storage.flush()?;
979        let storage_path = storage.accounts.get_path();
980        let snapshot_hardlink_dir = get_snapshot_accounts_hardlink_dir(
981            &storage_path,
982            bank_slot,
983            &mut account_paths,
984            &accounts_hardlinks_dir,
985        )?;
986        // The appendvec could be recycled, so its filename may not be consistent to the slot and id.
987        // Use the storage slot and id to compose a consistent file name for the hard-link file.
988        let hardlink_filename = AppendVec::file_name(storage.slot(), storage.append_vec_id());
989        let hard_link_path = snapshot_hardlink_dir.join(hardlink_filename);
990        fs::hard_link(&storage_path, &hard_link_path).map_err(|e| {
991            let err_msg = format!(
992                "hard-link appendvec file {} to {} failed.  Error: {}",
993                storage_path.display(),
994                hard_link_path.display(),
995                e,
996            );
997            SnapshotError::Io(IoError::new(ErrorKind::Other, err_msg))
998        })?;
999    }
1000    Ok(())
1001}
1002
1003/// Serialize a bank to a snapshot
1004///
1005/// **DEVELOPER NOTE** Any error that is returned from this function may bring down the node!  This
1006/// function is called from AccountsBackgroundService to handle snapshot requests.  Since taking a
1007/// snapshot is not permitted to fail, any errors returned here will trigger the node to shutdown.
1008/// So, be careful whenever adding new code that may return errors.
1009pub fn add_bank_snapshot(
1010    bank_snapshots_dir: impl AsRef<Path>,
1011    bank: &Bank,
1012    snapshot_storages: &[Arc<AccountStorageEntry>],
1013    snapshot_version: SnapshotVersion,
1014    slot_deltas: Vec<BankSlotDelta>,
1015) -> Result<BankSnapshotInfo> {
1016    let mut add_snapshot_time = Measure::start("add-snapshot-ms");
1017    let slot = bank.slot();
1018    // bank_snapshots_dir/slot
1019    let bank_snapshot_dir = get_bank_snapshots_dir(&bank_snapshots_dir, slot);
1020    if bank_snapshot_dir.is_dir() {
1021        // There is a time window from when a snapshot directory is created to when its content
1022        // is fully filled to become a full state good to construct a bank from.  At the init time,
1023        // the system may not be booted from the latest snapshot directory, but an older and complete
1024        // directory.  Then, when adding new snapshots, the newer incomplete snapshot directory could
1025        // be found.  If so, it should be removed.
1026        remove_bank_snapshot(slot, &bank_snapshots_dir)?;
1027    }
1028    fs::create_dir_all(&bank_snapshot_dir)?;
1029
1030    // the bank snapshot is stored as bank_snapshots_dir/slot/slot.BANK_SNAPSHOT_PRE_FILENAME_EXTENSION
1031    let bank_snapshot_path = bank_snapshot_dir
1032        .join(get_snapshot_file_name(slot))
1033        .with_extension(BANK_SNAPSHOT_PRE_FILENAME_EXTENSION);
1034
1035    info!(
1036        "Creating bank snapshot for slot {}, path: {}",
1037        slot,
1038        bank_snapshot_path.display(),
1039    );
1040
1041    // We are contructing the snapshot directory to contain the full snapshot state information to allow
1042    // constructing a bank from this directory.  It acts like an archive to include the full state.
1043    // The set of the account appendvec files is the necessary part of this snapshot state.  Hard-link them
1044    // from the operational accounts/ directory to here.
1045    hard_link_storages_to_snapshot(&bank_snapshot_dir, slot, snapshot_storages)?;
1046
1047    let mut bank_serialize = Measure::start("bank-serialize-ms");
1048    let bank_snapshot_serializer = move |stream: &mut BufWriter<File>| -> Result<()> {
1049        let serde_style = match snapshot_version {
1050            SnapshotVersion::V1_2_0 => SerdeStyle::Newer,
1051        };
1052        bank_to_stream(
1053            serde_style,
1054            stream.by_ref(),
1055            bank,
1056            &get_storages_to_serialize(snapshot_storages),
1057        )?;
1058        Ok(())
1059    };
1060    let consumed_size =
1061        serialize_snapshot_data_file(&bank_snapshot_path, bank_snapshot_serializer)?;
1062    bank_serialize.stop();
1063    add_snapshot_time.stop();
1064
1065    let status_cache_path = bank_snapshot_dir.join(SNAPSHOT_STATUS_CACHE_FILENAME);
1066    serialize_status_cache(slot, &slot_deltas, &status_cache_path)?;
1067
1068    let version_path = bank_snapshot_dir.join(SNAPSHOT_VERSION_FILENAME);
1069    write_snapshot_version_file(version_path, snapshot_version).unwrap();
1070
1071    // Mark this directory complete so it can be used.  Check this flag first before selecting for deserialization.
1072    let state_complete_path = bank_snapshot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME);
1073    fs::File::create(state_complete_path)?;
1074
1075    // Monitor sizes because they're capped to MAX_SNAPSHOT_DATA_FILE_SIZE
1076    datapoint_info!(
1077        "snapshot-bank-file",
1078        ("slot", slot, i64),
1079        ("size", consumed_size, i64)
1080    );
1081
1082    inc_new_counter_info!("bank-serialize-ms", bank_serialize.as_ms() as usize);
1083    inc_new_counter_info!("add-snapshot-ms", add_snapshot_time.as_ms() as usize);
1084
1085    info!(
1086        "{} for slot {} at {}",
1087        bank_serialize,
1088        slot,
1089        bank_snapshot_path.display(),
1090    );
1091
1092    Ok(BankSnapshotInfo {
1093        slot,
1094        snapshot_type: BankSnapshotType::Pre,
1095        snapshot_dir: bank_snapshot_dir,
1096    })
1097}
1098
1099/// serializing needs Vec<Vec<Arc<AccountStorageEntry>>>, but data structure at runtime is Vec<Arc<AccountStorageEntry>>
1100/// translates to what we need
1101pub(crate) fn get_storages_to_serialize(
1102    snapshot_storages: &[Arc<AccountStorageEntry>],
1103) -> Vec<Vec<Arc<AccountStorageEntry>>> {
1104    snapshot_storages
1105        .iter()
1106        .map(|storage| vec![Arc::clone(storage)])
1107        .collect::<Vec<_>>()
1108}
1109
1110fn serialize_status_cache(
1111    slot: Slot,
1112    slot_deltas: &[BankSlotDelta],
1113    status_cache_path: &Path,
1114) -> Result<()> {
1115    let mut status_cache_serialize = Measure::start("status_cache_serialize-ms");
1116    let consumed_size = serialize_snapshot_data_file(status_cache_path, |stream| {
1117        serialize_into(stream, slot_deltas)?;
1118        Ok(())
1119    })?;
1120    status_cache_serialize.stop();
1121
1122    // Monitor sizes because they're capped to MAX_SNAPSHOT_DATA_FILE_SIZE
1123    datapoint_info!(
1124        "snapshot-status-cache-file",
1125        ("slot", slot, i64),
1126        ("size", consumed_size, i64)
1127    );
1128
1129    inc_new_counter_info!(
1130        "serialize-status-cache-ms",
1131        status_cache_serialize.as_ms() as usize
1132    );
1133    Ok(())
1134}
1135
1136/// Remove the snapshot directory for this slot
1137pub fn remove_bank_snapshot<P>(slot: Slot, bank_snapshots_dir: P) -> Result<()>
1138where
1139    P: AsRef<Path>,
1140{
1141    let bank_snapshot_dir = get_bank_snapshots_dir(&bank_snapshots_dir, slot);
1142    let accounts_hardlinks_dir = bank_snapshot_dir.join("accounts_hardlinks");
1143    if fs::metadata(&accounts_hardlinks_dir).is_ok() {
1144        // This directory contain symlinks to all accounts snapshot directories.
1145        // They should all be removed.
1146        for entry in fs::read_dir(accounts_hardlinks_dir)? {
1147            let dst_path = fs::read_link(entry?.path())?;
1148            fs::remove_dir_all(dst_path)?;
1149        }
1150    }
1151    fs::remove_dir_all(bank_snapshot_dir)?;
1152    Ok(())
1153}
1154
1155#[derive(Debug, Default)]
1156pub struct BankFromArchiveTimings {
1157    pub rebuild_bank_from_snapshots_us: u64,
1158    pub full_snapshot_untar_us: u64,
1159    pub incremental_snapshot_untar_us: u64,
1160    pub verify_snapshot_bank_us: u64,
1161}
1162
1163// From testing, 4 seems to be a sweet spot for ranges of 60M-360M accounts and 16-64 cores. This may need to be tuned later.
1164const PARALLEL_UNTAR_READERS_DEFAULT: usize = 4;
1165
1166fn verify_and_unarchive_snapshots(
1167    bank_snapshots_dir: impl AsRef<Path>,
1168    full_snapshot_archive_info: &FullSnapshotArchiveInfo,
1169    incremental_snapshot_archive_info: Option<&IncrementalSnapshotArchiveInfo>,
1170    account_paths: &[PathBuf],
1171) -> Result<(UnarchivedSnapshot, Option<UnarchivedSnapshot>, AtomicU32)> {
1172    check_are_snapshots_compatible(
1173        full_snapshot_archive_info,
1174        incremental_snapshot_archive_info,
1175    )?;
1176
1177    let parallel_divisions = (num_cpus::get() / 4).clamp(1, PARALLEL_UNTAR_READERS_DEFAULT);
1178
1179    let next_append_vec_id = Arc::new(AtomicU32::new(0));
1180    let unarchived_full_snapshot = unarchive_snapshot(
1181        &bank_snapshots_dir,
1182        TMP_SNAPSHOT_ARCHIVE_PREFIX,
1183        full_snapshot_archive_info.path(),
1184        "snapshot untar",
1185        account_paths,
1186        full_snapshot_archive_info.archive_format(),
1187        parallel_divisions,
1188        next_append_vec_id.clone(),
1189    )?;
1190
1191    let unarchived_incremental_snapshot =
1192        if let Some(incremental_snapshot_archive_info) = incremental_snapshot_archive_info {
1193            let unarchived_incremental_snapshot = unarchive_snapshot(
1194                &bank_snapshots_dir,
1195                TMP_SNAPSHOT_ARCHIVE_PREFIX,
1196                incremental_snapshot_archive_info.path(),
1197                "incremental snapshot untar",
1198                account_paths,
1199                incremental_snapshot_archive_info.archive_format(),
1200                parallel_divisions,
1201                next_append_vec_id.clone(),
1202            )?;
1203            Some(unarchived_incremental_snapshot)
1204        } else {
1205            None
1206        };
1207
1208    Ok((
1209        unarchived_full_snapshot,
1210        unarchived_incremental_snapshot,
1211        Arc::try_unwrap(next_append_vec_id).unwrap(),
1212    ))
1213}
1214
1215/// Rebuild bank from snapshot archives.  Handles either just a full snapshot, or both a full
1216/// snapshot and an incremental snapshot.
1217#[allow(clippy::too_many_arguments)]
1218pub fn bank_from_gcs_snapshot_archives(
1219    account_paths: &[PathBuf],
1220    bank_snapshots_dir: impl AsRef<Path>,
1221    full_snapshot_archive_info: &FullSnapshotArchiveInfo,
1222    incremental_snapshot_archive_info: Option<&IncrementalSnapshotArchiveInfo>,
1223) -> Result<(
1224    BankFieldsToDeserialize,
1225    DashMap<u64, AccountStorageReference>,
1226    SnapshotAccountsDbFields<SerializableAccountStorageEntry>,
1227)> {
1228    let (unarchived_full_snapshot, mut unarchived_incremental_snapshot, next_append_vec_id) =
1229        verify_and_unarchive_snapshots(
1230            bank_snapshots_dir,
1231            full_snapshot_archive_info,
1232            incremental_snapshot_archive_info,
1233            account_paths,
1234        )?;
1235
1236    let mut storage = unarchived_full_snapshot.storage;
1237    if let Some(ref mut unarchive_preparation_result) = unarchived_incremental_snapshot {
1238        let incremental_snapshot_storages =
1239            std::mem::take(&mut unarchive_preparation_result.storage);
1240        storage.extend(incremental_snapshot_storages.into_iter());
1241    }
1242
1243    let (full_snapshot_version, full_snapshot_root_paths) =
1244        verify_unpacked_snapshots_dir_and_version(
1245            &unarchived_full_snapshot.unpacked_snapshots_dir_and_version,
1246        )?;
1247
1248    let incremental_snapshot_unpacked_snapshots_dir_and_version = unarchived_incremental_snapshot
1249        .as_ref()
1250        .map(|unarchive_preparation_result| {
1251            &unarchive_preparation_result.unpacked_snapshots_dir_and_version
1252        });
1253
1254    let (incremental_snapshot_version, incremental_snapshot_root_paths) =
1255        if let Some(snapshot_unpacked_snapshots_dir_and_version) =
1256            incremental_snapshot_unpacked_snapshots_dir_and_version
1257        {
1258            let (snapshot_version, bank_snapshot_info) = verify_unpacked_snapshots_dir_and_version(
1259                snapshot_unpacked_snapshots_dir_and_version,
1260            )?;
1261            (Some(snapshot_version), Some(bank_snapshot_info))
1262        } else {
1263            (None, None)
1264        };
1265
1266    let snapshot_root_paths = SnapshotRootPaths {
1267        full_snapshot_root_file_path: full_snapshot_root_paths.snapshot_path(),
1268        incremental_snapshot_root_file_path: incremental_snapshot_root_paths
1269            .map(|root_paths| root_paths.snapshot_path()),
1270    };
1271
1272    let (bank_fields, full_snapshot_db_fields) =
1273        deserialize_snapshot_data_files(&snapshot_root_paths, |snapshot_streams| {
1274            Ok(
1275                match incremental_snapshot_version.unwrap_or(full_snapshot_version) {
1276                    SnapshotVersion::V1_2_0 => {
1277                        serialized_bank_from_stream(SerdeStyle::Newer, snapshot_streams)
1278                    }
1279                }?,
1280            )
1281        })?;
1282
1283    return Ok((bank_fields, storage, full_snapshot_db_fields));
1284}
1285
1286pub fn storage_from_snapshot_archives(
1287    account_paths: &[PathBuf],
1288    bank_snapshots_dir: impl AsRef<Path>,
1289    full_snapshot_archive_info: &FullSnapshotArchiveInfo,
1290    incremental_snapshot_archive_info: Option<&IncrementalSnapshotArchiveInfo>,
1291) -> Result<DashMap<u64, AccountStorageReference>> {
1292    let (unarchived_full_snapshot, mut unarchived_incremental_snapshot, next_append_vec_id) =
1293        verify_and_unarchive_snapshots(
1294            bank_snapshots_dir,
1295            full_snapshot_archive_info,
1296            incremental_snapshot_archive_info,
1297            account_paths,
1298        )?;
1299
1300    let mut storage = unarchived_full_snapshot.storage;
1301    if let Some(ref mut unarchive_preparation_result) = unarchived_incremental_snapshot {
1302        let incremental_snapshot_storages =
1303            std::mem::take(&mut unarchive_preparation_result.storage);
1304        storage.extend(incremental_snapshot_storages.into_iter());
1305    }
1306    return Ok(storage);
1307}
1308/// Utility for parsing out bank specific information from a snapshot archive. This utility can be used
1309/// to parse out bank specific information like the leader schedule, epoch schedule, etc.
1310pub fn bank_fields_from_snapshot_archives(
1311    bank_snapshots_dir: impl AsRef<Path>,
1312    full_snapshot_archives_dir: impl AsRef<Path>,
1313    incremental_snapshot_archives_dir: impl AsRef<Path>,
1314) -> Result<BankFieldsToDeserialize> {
1315    let full_snapshot_archive_info =
1316        get_highest_full_snapshot_archive_info(&full_snapshot_archives_dir)
1317            .ok_or(SnapshotError::NoSnapshotArchives)?;
1318
1319    let incremental_snapshot_archive_info = get_highest_incremental_snapshot_archive_info(
1320        &incremental_snapshot_archives_dir,
1321        full_snapshot_archive_info.slot(),
1322    );
1323
1324    let temp_dir = tempfile::Builder::new()
1325        .prefix("dummy-accounts-path")
1326        .tempdir()?;
1327
1328    let account_paths = vec![temp_dir.path().to_path_buf()];
1329
1330    let (unarchived_full_snapshot, unarchived_incremental_snapshot, _next_append_vec_id) =
1331        verify_and_unarchive_snapshots(
1332            &bank_snapshots_dir,
1333            &full_snapshot_archive_info,
1334            incremental_snapshot_archive_info.as_ref(),
1335            &account_paths,
1336        )?;
1337
1338    bank_fields_from_snapshots(
1339        &unarchived_full_snapshot.unpacked_snapshots_dir_and_version,
1340        unarchived_incremental_snapshot
1341            .as_ref()
1342            .map(|unarchive_preparation_result| {
1343                &unarchive_preparation_result.unpacked_snapshots_dir_and_version
1344            }),
1345    )
1346}
1347
1348/// Rebuild bank from snapshot archives.  Handles either just a full snapshot, or both a full
1349/// snapshot and an incremental snapshot.
1350#[allow(clippy::too_many_arguments)]
1351pub fn bank_from_snapshot_archives(
1352    account_paths: &[PathBuf],
1353    bank_snapshots_dir: impl AsRef<Path>,
1354    full_snapshot_archive_info: &FullSnapshotArchiveInfo,
1355    incremental_snapshot_archive_info: Option<&IncrementalSnapshotArchiveInfo>,
1356    genesis_config: &GenesisConfig,
1357    runtime_config: &RuntimeConfig,
1358    debug_keys: Option<Arc<HashSet<Pubkey>>>,
1359    additional_builtins: Option<&Builtins>,
1360    account_secondary_indexes: AccountSecondaryIndexes,
1361    limit_load_slot_count_from_snapshot: Option<usize>,
1362    shrink_ratio: AccountShrinkThreshold,
1363    test_hash_calculation: bool,
1364    accounts_db_skip_shrink: bool,
1365    verify_index: bool,
1366    accounts_db_config: Option<AccountsDbConfig>,
1367    accounts_update_notifier: Option<AccountsUpdateNotifier>,
1368    exit: &Arc<AtomicBool>,
1369) -> Result<(Bank, BankFromArchiveTimings)> {
1370    let (unarchived_full_snapshot, mut unarchived_incremental_snapshot, next_append_vec_id) =
1371        verify_and_unarchive_snapshots(
1372            bank_snapshots_dir,
1373            full_snapshot_archive_info,
1374            incremental_snapshot_archive_info,
1375            account_paths,
1376        )?;
1377
1378    let mut storage = unarchived_full_snapshot.storage;
1379    if let Some(ref mut unarchive_preparation_result) = unarchived_incremental_snapshot {
1380        let incremental_snapshot_storages =
1381            std::mem::take(&mut unarchive_preparation_result.storage);
1382        storage.extend(incremental_snapshot_storages.into_iter());
1383    }
1384
1385    let storage_and_next_append_vec_id = StorageAndNextAppendVecId {
1386        storage,
1387        next_append_vec_id,
1388    };
1389
1390    let mut measure_rebuild = Measure::start("rebuild bank from snapshots");
1391    let bank = rebuild_bank_from_snapshots(
1392        &unarchived_full_snapshot.unpacked_snapshots_dir_and_version,
1393        unarchived_incremental_snapshot
1394            .as_ref()
1395            .map(|unarchive_preparation_result| {
1396                &unarchive_preparation_result.unpacked_snapshots_dir_and_version
1397            }),
1398        account_paths,
1399        storage_and_next_append_vec_id,
1400        genesis_config,
1401        runtime_config,
1402        debug_keys,
1403        additional_builtins,
1404        account_secondary_indexes,
1405        limit_load_slot_count_from_snapshot,
1406        shrink_ratio,
1407        verify_index,
1408        accounts_db_config,
1409        accounts_update_notifier,
1410        exit,
1411    )?;
1412    measure_rebuild.stop();
1413    info!("{}", measure_rebuild);
1414
1415    let snapshot_archive_info = incremental_snapshot_archive_info.map_or_else(
1416        || full_snapshot_archive_info.snapshot_archive_info(),
1417        |incremental_snapshot_archive_info| {
1418            incremental_snapshot_archive_info.snapshot_archive_info()
1419        },
1420    );
1421    verify_bank_against_expected_slot_hash(
1422        &bank,
1423        snapshot_archive_info.slot,
1424        snapshot_archive_info.hash,
1425    )?;
1426
1427    let mut measure_verify = Measure::start("verify");
1428    if !bank.verify_snapshot_bank(
1429        test_hash_calculation,
1430        accounts_db_skip_shrink || !full_snapshot_archive_info.is_remote(),
1431        full_snapshot_archive_info.slot(),
1432    ) && limit_load_slot_count_from_snapshot.is_none()
1433    {
1434        panic!("Snapshot bank for slot {} failed to verify", bank.slot());
1435    }
1436    measure_verify.stop();
1437
1438    let timings = BankFromArchiveTimings {
1439        rebuild_bank_from_snapshots_us: measure_rebuild.as_us(),
1440        full_snapshot_untar_us: unarchived_full_snapshot.measure_untar.as_us(),
1441        incremental_snapshot_untar_us: unarchived_incremental_snapshot
1442            .map_or(0, |unarchive_preparation_result| {
1443                unarchive_preparation_result.measure_untar.as_us()
1444            }),
1445        verify_snapshot_bank_us: measure_verify.as_us(),
1446    };
1447    Ok((bank, timings))
1448}
1449
1450/// Rebuild bank from snapshot archives.  This function searches `full_snapshot_archives_dir` and `incremental_snapshot_archives_dir` for the
1451/// highest full snapshot and highest corresponding incremental snapshot, then rebuilds the bank.
1452#[allow(clippy::too_many_arguments)]
1453pub fn bank_from_latest_snapshot_archives(
1454    bank_snapshots_dir: impl AsRef<Path>,
1455    full_snapshot_archives_dir: impl AsRef<Path>,
1456    incremental_snapshot_archives_dir: impl AsRef<Path>,
1457    account_paths: &[PathBuf],
1458    genesis_config: &GenesisConfig,
1459    runtime_config: &RuntimeConfig,
1460    debug_keys: Option<Arc<HashSet<Pubkey>>>,
1461    additional_builtins: Option<&Builtins>,
1462    account_secondary_indexes: AccountSecondaryIndexes,
1463    limit_load_slot_count_from_snapshot: Option<usize>,
1464    shrink_ratio: AccountShrinkThreshold,
1465    test_hash_calculation: bool,
1466    accounts_db_skip_shrink: bool,
1467    verify_index: bool,
1468    accounts_db_config: Option<AccountsDbConfig>,
1469    accounts_update_notifier: Option<AccountsUpdateNotifier>,
1470    exit: &Arc<AtomicBool>,
1471) -> Result<(
1472    Bank,
1473    FullSnapshotArchiveInfo,
1474    Option<IncrementalSnapshotArchiveInfo>,
1475)> {
1476    let full_snapshot_archive_info =
1477        get_highest_full_snapshot_archive_info(&full_snapshot_archives_dir)
1478            .ok_or(SnapshotError::NoSnapshotArchives)?;
1479
1480    let incremental_snapshot_archive_info = get_highest_incremental_snapshot_archive_info(
1481        &incremental_snapshot_archives_dir,
1482        full_snapshot_archive_info.slot(),
1483    );
1484
1485    info!(
1486        "Loading bank from full snapshot: {}, and incremental snapshot: {:?}",
1487        full_snapshot_archive_info.path().display(),
1488        incremental_snapshot_archive_info
1489            .as_ref()
1490            .map(
1491                |incremental_snapshot_archive_info| incremental_snapshot_archive_info
1492                    .path()
1493                    .display()
1494            )
1495    );
1496
1497    let (bank, timings) = bank_from_snapshot_archives(
1498        account_paths,
1499        bank_snapshots_dir.as_ref(),
1500        &full_snapshot_archive_info,
1501        incremental_snapshot_archive_info.as_ref(),
1502        genesis_config,
1503        runtime_config,
1504        debug_keys,
1505        additional_builtins,
1506        account_secondary_indexes,
1507        limit_load_slot_count_from_snapshot,
1508        shrink_ratio,
1509        test_hash_calculation,
1510        accounts_db_skip_shrink,
1511        verify_index,
1512        accounts_db_config,
1513        accounts_update_notifier,
1514        exit,
1515    )?;
1516
1517    datapoint_info!(
1518        "bank_from_snapshot_archives",
1519        (
1520            "full_snapshot_untar_us",
1521            timings.full_snapshot_untar_us,
1522            i64
1523        ),
1524        (
1525            "incremental_snapshot_untar_us",
1526            timings.incremental_snapshot_untar_us,
1527            i64
1528        ),
1529        (
1530            "rebuild_bank_from_snapshots_us",
1531            timings.rebuild_bank_from_snapshots_us,
1532            i64
1533        ),
1534        (
1535            "verify_snapshot_bank_us",
1536            timings.verify_snapshot_bank_us,
1537            i64
1538        ),
1539    );
1540
1541    Ok((
1542        bank,
1543        full_snapshot_archive_info,
1544        incremental_snapshot_archive_info,
1545    ))
1546}
1547
1548/// Check to make sure the deserialized bank's slot and hash matches the snapshot archive's slot
1549/// and hash
1550fn verify_bank_against_expected_slot_hash(
1551    bank: &Bank,
1552    expected_slot: Slot,
1553    expected_hash: SnapshotHash,
1554) -> Result<()> {
1555    let bank_slot = bank.slot();
1556    let bank_hash = bank.get_snapshot_hash();
1557
1558    if bank_slot != expected_slot || bank_hash != expected_hash {
1559        return Err(SnapshotError::MismatchedSlotHash(
1560            (bank_slot, bank_hash),
1561            (expected_slot, expected_hash),
1562        ));
1563    }
1564
1565    Ok(())
1566}
1567
1568/// Spawns a thread for unpacking a snapshot
1569fn spawn_unpack_snapshot_thread(
1570    file_sender: Sender<PathBuf>,
1571    account_paths: Arc<Vec<PathBuf>>,
1572    ledger_dir: Arc<PathBuf>,
1573    mut archive: Archive<SharedBufferReader>,
1574    parallel_selector: Option<ParallelSelector>,
1575    thread_index: usize,
1576) -> JoinHandle<()> {
1577    Builder::new()
1578        .name(format!("solUnpkSnpsht{thread_index:02}"))
1579        .spawn(move || {
1580            streaming_unpack_snapshot(
1581                &mut archive,
1582                ledger_dir.as_path(),
1583                &account_paths,
1584                parallel_selector,
1585                &file_sender,
1586            )
1587            .unwrap();
1588        })
1589        .unwrap()
1590}
1591
1592/// Streams unpacked files across channel
1593fn streaming_unarchive_snapshot(
1594    file_sender: Sender<PathBuf>,
1595    account_paths: Vec<PathBuf>,
1596    ledger_dir: PathBuf,
1597    snapshot_archive_path: PathBuf,
1598    archive_format: ArchiveFormat,
1599    num_threads: usize,
1600) -> Vec<JoinHandle<()>> {
1601    let account_paths = Arc::new(account_paths);
1602    let ledger_dir = Arc::new(ledger_dir);
1603    let shared_buffer = untar_snapshot_create_shared_buffer(&snapshot_archive_path, archive_format);
1604
1605    // All shared buffer readers need to be created before the threads are spawned
1606    #[allow(clippy::needless_collect)]
1607    let archives: Vec<_> = (0..num_threads)
1608        .map(|_| {
1609            let reader = SharedBufferReader::new(&shared_buffer);
1610            Archive::new(reader)
1611        })
1612        .collect();
1613
1614    archives
1615        .into_iter()
1616        .enumerate()
1617        .map(|(thread_index, archive)| {
1618            let parallel_selector = Some(ParallelSelector {
1619                index: thread_index,
1620                divisions: num_threads,
1621            });
1622
1623            spawn_unpack_snapshot_thread(
1624                file_sender.clone(),
1625                account_paths.clone(),
1626                ledger_dir.clone(),
1627                archive,
1628                parallel_selector,
1629                thread_index,
1630            )
1631        })
1632        .collect()
1633}
1634
1635/// Perform the common tasks when unarchiving a snapshot.  Handles creating the temporary
1636/// directories, untaring, reading the version file, and then returning those fields plus the
1637/// rebuilt storage
1638fn unarchive_snapshot<P, Q>(
1639    bank_snapshots_dir: P,
1640    unpacked_snapshots_dir_prefix: &'static str,
1641    snapshot_archive_path: Q,
1642    measure_name: &'static str,
1643    account_paths: &[PathBuf],
1644    archive_format: ArchiveFormat,
1645    parallel_divisions: usize,
1646    next_append_vec_id: Arc<AtomicU32>,
1647) -> Result<UnarchivedSnapshot>
1648where
1649    P: AsRef<Path>,
1650    Q: AsRef<Path>,
1651{
1652    let unpack_dir = tempfile::Builder::new()
1653        .prefix(unpacked_snapshots_dir_prefix)
1654        .tempdir_in(bank_snapshots_dir)?;
1655    let unpacked_snapshots_dir = unpack_dir.path().join("snapshots");
1656
1657    let (file_sender, file_receiver) = crossbeam_channel::unbounded();
1658    streaming_unarchive_snapshot(
1659        file_sender,
1660        account_paths.to_vec(),
1661        unpack_dir.path().to_path_buf(),
1662        snapshot_archive_path.as_ref().to_path_buf(),
1663        archive_format,
1664        parallel_divisions,
1665    );
1666
1667    let num_rebuilder_threads = num_cpus::get_physical()
1668        .saturating_sub(parallel_divisions)
1669        .max(1);
1670    let (version_and_storages, measure_untar) = measure!(
1671        SnapshotStorageRebuilder::rebuild_storage(
1672            file_receiver,
1673            num_rebuilder_threads,
1674            next_append_vec_id
1675        )?,
1676        measure_name
1677    );
1678    info!("{}", measure_untar);
1679
1680    let RebuiltSnapshotStorage {
1681        snapshot_version,
1682        storage,
1683    } = version_and_storages;
1684    Ok(UnarchivedSnapshot {
1685        unpack_dir,
1686        storage,
1687        unpacked_snapshots_dir_and_version: UnpackedSnapshotsDirAndVersion {
1688            unpacked_snapshots_dir,
1689            snapshot_version,
1690        },
1691        measure_untar,
1692    })
1693}
1694
1695/// Reads the `snapshot_version` from a file. Before opening the file, its size
1696/// is compared to `MAX_SNAPSHOT_VERSION_FILE_SIZE`. If the size exceeds this
1697/// threshold, it is not opened and an error is returned.
1698fn snapshot_version_from_file(path: impl AsRef<Path>) -> Result<String> {
1699    // Check file size.
1700    let file_size = fs::metadata(&path)?.len();
1701    if file_size > MAX_SNAPSHOT_VERSION_FILE_SIZE {
1702        let error_message = format!(
1703            "snapshot version file too large: {} has {} bytes (max size is {} bytes)",
1704            path.as_ref().display(),
1705            file_size,
1706            MAX_SNAPSHOT_VERSION_FILE_SIZE,
1707        );
1708        return Err(get_io_error(&error_message));
1709    }
1710
1711    // Read snapshot_version from file.
1712    let mut snapshot_version = String::new();
1713    File::open(path).and_then(|mut f| f.read_to_string(&mut snapshot_version))?;
1714    Ok(snapshot_version.trim().to_string())
1715}
1716
1717/// Check if an incremental snapshot is compatible with a full snapshot.  This is done by checking
1718/// if the incremental snapshot's base slot is the same as the full snapshot's slot.
1719fn check_are_snapshots_compatible(
1720    full_snapshot_archive_info: &FullSnapshotArchiveInfo,
1721    incremental_snapshot_archive_info: Option<&IncrementalSnapshotArchiveInfo>,
1722) -> Result<()> {
1723    if incremental_snapshot_archive_info.is_none() {
1724        return Ok(());
1725    }
1726
1727    let incremental_snapshot_archive_info = incremental_snapshot_archive_info.unwrap();
1728
1729    (full_snapshot_archive_info.slot() == incremental_snapshot_archive_info.base_slot())
1730        .then_some(())
1731        .ok_or_else(|| {
1732            SnapshotError::MismatchedBaseSlot(
1733                full_snapshot_archive_info.slot(),
1734                incremental_snapshot_archive_info.base_slot(),
1735            )
1736        })
1737}
1738
1739/// Get the `&str` from a `&Path`
1740pub fn path_to_file_name_str(path: &Path) -> Result<&str> {
1741    path.file_name()
1742        .ok_or_else(|| SnapshotError::PathToFileNameError(path.to_path_buf()))?
1743        .to_str()
1744        .ok_or_else(|| SnapshotError::FileNameToStrError(path.to_path_buf()))
1745}
1746
1747pub fn build_snapshot_archives_remote_dir(snapshot_archives_dir: impl AsRef<Path>) -> PathBuf {
1748    snapshot_archives_dir
1749        .as_ref()
1750        .join(SNAPSHOT_ARCHIVE_DOWNLOAD_DIR)
1751}
1752
1753/// Build the full snapshot archive path from its components: the snapshot archives directory, the
1754/// snapshot slot, the accounts hash, and the archive format.
1755pub fn build_full_snapshot_archive_path(
1756    full_snapshot_archives_dir: impl AsRef<Path>,
1757    slot: Slot,
1758    hash: &SnapshotHash,
1759    archive_format: ArchiveFormat,
1760) -> PathBuf {
1761    full_snapshot_archives_dir.as_ref().join(format!(
1762        "snapshot-{}-{}.{}",
1763        slot,
1764        hash.0,
1765        archive_format.extension(),
1766    ))
1767}
1768
1769/// Build the incremental snapshot archive path from its components: the snapshot archives
1770/// directory, the snapshot base slot, the snapshot slot, the accounts hash, and the archive
1771/// format.
1772pub fn build_incremental_snapshot_archive_path(
1773    incremental_snapshot_archives_dir: impl AsRef<Path>,
1774    base_slot: Slot,
1775    slot: Slot,
1776    hash: &SnapshotHash,
1777    archive_format: ArchiveFormat,
1778) -> PathBuf {
1779    incremental_snapshot_archives_dir.as_ref().join(format!(
1780        "incremental-snapshot-{}-{}-{}.{}",
1781        base_slot,
1782        slot,
1783        hash.0,
1784        archive_format.extension(),
1785    ))
1786}
1787
1788/// Parse a full snapshot archive filename into its Slot, Hash, and Archive Format
1789pub(crate) fn parse_full_snapshot_archive_filename(
1790    archive_filename: &str,
1791) -> Result<(Slot, SnapshotHash, ArchiveFormat)> {
1792    lazy_static! {
1793        static ref RE: Regex = Regex::new(FULL_SNAPSHOT_ARCHIVE_FILENAME_REGEX).unwrap();
1794    }
1795
1796    let do_parse = || {
1797        RE.captures(archive_filename).and_then(|captures| {
1798            let slot = captures
1799                .name("slot")
1800                .map(|x| x.as_str().parse::<Slot>())?
1801                .ok()?;
1802            let hash = captures
1803                .name("hash")
1804                .map(|x| x.as_str().parse::<Hash>())?
1805                .ok()?;
1806            let archive_format = captures
1807                .name("ext")
1808                .map(|x| x.as_str().parse::<ArchiveFormat>())?
1809                .ok()?;
1810
1811            Some((slot, SnapshotHash(hash), archive_format))
1812        })
1813    };
1814
1815    do_parse().ok_or_else(|| {
1816        SnapshotError::ParseSnapshotArchiveFileNameError(archive_filename.to_string())
1817    })
1818}
1819
1820/// Parse an incremental snapshot archive filename into its base Slot, actual Slot, Hash, and Archive Format
1821pub(crate) fn parse_incremental_snapshot_archive_filename(
1822    archive_filename: &str,
1823) -> Result<(Slot, Slot, SnapshotHash, ArchiveFormat)> {
1824    lazy_static! {
1825        static ref RE: Regex = Regex::new(INCREMENTAL_SNAPSHOT_ARCHIVE_FILENAME_REGEX).unwrap();
1826    }
1827
1828    let do_parse = || {
1829        RE.captures(archive_filename).and_then(|captures| {
1830            let base_slot = captures
1831                .name("base")
1832                .map(|x| x.as_str().parse::<Slot>())?
1833                .ok()?;
1834            let slot = captures
1835                .name("slot")
1836                .map(|x| x.as_str().parse::<Slot>())?
1837                .ok()?;
1838            let hash = captures
1839                .name("hash")
1840                .map(|x| x.as_str().parse::<Hash>())?
1841                .ok()?;
1842            let archive_format = captures
1843                .name("ext")
1844                .map(|x| x.as_str().parse::<ArchiveFormat>())?
1845                .ok()?;
1846
1847            Some((base_slot, slot, SnapshotHash(hash), archive_format))
1848        })
1849    };
1850
1851    do_parse().ok_or_else(|| {
1852        SnapshotError::ParseSnapshotArchiveFileNameError(archive_filename.to_string())
1853    })
1854}
1855
1856/// Walk down the snapshot archive to collect snapshot archive file info
1857fn get_snapshot_archives<T, F>(snapshot_archives_dir: &Path, cb: F) -> Vec<T>
1858where
1859    F: Fn(PathBuf) -> Result<T>,
1860{
1861    let walk_dir = |dir: &Path| -> Vec<T> {
1862        let entry_iter = fs::read_dir(dir);
1863        match entry_iter {
1864            Err(err) => {
1865                info!(
1866                    "Unable to read snapshot archives directory: err: {}, path: {}",
1867                    err,
1868                    dir.display()
1869                );
1870                vec![]
1871            }
1872            Ok(entries) => entries
1873                .filter_map(|entry| entry.map_or(None, |entry| cb(entry.path()).ok()))
1874                .collect(),
1875        }
1876    };
1877
1878    let mut ret = walk_dir(snapshot_archives_dir);
1879    let remote_dir = build_snapshot_archives_remote_dir(snapshot_archives_dir);
1880    if remote_dir.exists() {
1881        ret.append(&mut walk_dir(remote_dir.as_ref()));
1882    }
1883    ret
1884}
1885
1886/// Get a list of the full snapshot archives from a directory
1887pub fn get_full_snapshot_archives(
1888    full_snapshot_archives_dir: impl AsRef<Path>,
1889) -> Vec<FullSnapshotArchiveInfo> {
1890    get_snapshot_archives(
1891        full_snapshot_archives_dir.as_ref(),
1892        FullSnapshotArchiveInfo::new_from_path,
1893    )
1894}
1895
1896/// Get a list of the incremental snapshot archives from a directory
1897pub fn get_incremental_snapshot_archives(
1898    incremental_snapshot_archives_dir: impl AsRef<Path>,
1899) -> Vec<IncrementalSnapshotArchiveInfo> {
1900    get_snapshot_archives(
1901        incremental_snapshot_archives_dir.as_ref(),
1902        IncrementalSnapshotArchiveInfo::new_from_path,
1903    )
1904}
1905
1906/// Get the highest slot of the full snapshot archives in a directory
1907pub fn get_highest_full_snapshot_archive_slot(
1908    full_snapshot_archives_dir: impl AsRef<Path>,
1909) -> Option<Slot> {
1910    get_highest_full_snapshot_archive_info(full_snapshot_archives_dir)
1911        .map(|full_snapshot_archive_info| full_snapshot_archive_info.slot())
1912}
1913
1914/// Get the highest slot of the incremental snapshot archives in a directory, for a given full
1915/// snapshot slot
1916pub fn get_highest_incremental_snapshot_archive_slot(
1917    incremental_snapshot_archives_dir: impl AsRef<Path>,
1918    full_snapshot_slot: Slot,
1919) -> Option<Slot> {
1920    get_highest_incremental_snapshot_archive_info(
1921        incremental_snapshot_archives_dir,
1922        full_snapshot_slot,
1923    )
1924    .map(|incremental_snapshot_archive_info| incremental_snapshot_archive_info.slot())
1925}
1926
1927/// Get the path (and metadata) for the full snapshot archive with the highest slot in a directory
1928pub fn get_highest_full_snapshot_archive_info(
1929    full_snapshot_archives_dir: impl AsRef<Path>,
1930) -> Option<FullSnapshotArchiveInfo> {
1931    let mut full_snapshot_archives = get_full_snapshot_archives(full_snapshot_archives_dir);
1932    full_snapshot_archives.sort_unstable();
1933    full_snapshot_archives.into_iter().rev().next()
1934}
1935
1936/// Get the path for the incremental snapshot archive with the highest slot, for a given full
1937/// snapshot slot, in a directory
1938pub fn get_highest_incremental_snapshot_archive_info(
1939    incremental_snapshot_archives_dir: impl AsRef<Path>,
1940    full_snapshot_slot: Slot,
1941) -> Option<IncrementalSnapshotArchiveInfo> {
1942    // Since we want to filter down to only the incremental snapshot archives that have the same
1943    // full snapshot slot as the value passed in, perform the filtering before sorting to avoid
1944    // doing unnecessary work.
1945    let mut incremental_snapshot_archives =
1946        get_incremental_snapshot_archives(incremental_snapshot_archives_dir)
1947            .into_iter()
1948            .filter(|incremental_snapshot_archive_info| {
1949                incremental_snapshot_archive_info.base_slot() == full_snapshot_slot
1950            })
1951            .collect::<Vec<_>>();
1952    incremental_snapshot_archives.sort_unstable();
1953    incremental_snapshot_archives.into_iter().rev().next()
1954}
1955
1956pub fn purge_old_snapshot_archives(
1957    full_snapshot_archives_dir: impl AsRef<Path>,
1958    incremental_snapshot_archives_dir: impl AsRef<Path>,
1959    maximum_full_snapshot_archives_to_retain: usize,
1960    maximum_incremental_snapshot_archives_to_retain: usize,
1961) {
1962    info!(
1963        "Purging old full snapshot archives in {}, retaining up to {} full snapshots",
1964        full_snapshot_archives_dir.as_ref().display(),
1965        maximum_full_snapshot_archives_to_retain
1966    );
1967
1968    let mut full_snapshot_archives = get_full_snapshot_archives(&full_snapshot_archives_dir);
1969    full_snapshot_archives.sort_unstable();
1970    full_snapshot_archives.reverse();
1971
1972    let num_to_retain = full_snapshot_archives.len().min(
1973        maximum_full_snapshot_archives_to_retain
1974            .max(1 /* Always keep at least one full snapshot */),
1975    );
1976    trace!(
1977        "There are {} full snapshot archives, retaining {}",
1978        full_snapshot_archives.len(),
1979        num_to_retain,
1980    );
1981
1982    let (full_snapshot_archives_to_retain, full_snapshot_archives_to_remove) =
1983        if full_snapshot_archives.is_empty() {
1984            None
1985        } else {
1986            Some(full_snapshot_archives.split_at(num_to_retain))
1987        }
1988        .unwrap_or_default();
1989
1990    let retained_full_snapshot_slots = full_snapshot_archives_to_retain
1991        .iter()
1992        .map(|ai| ai.slot())
1993        .collect::<HashSet<_>>();
1994
1995    fn remove_archives<T: SnapshotArchiveInfoGetter>(archives: &[T]) {
1996        for path in archives.iter().map(|a| a.path()) {
1997            trace!("Removing snapshot archive: {}", path.display());
1998            fs::remove_file(path)
1999                .unwrap_or_else(|err| info!("Failed to remove {}: {}", path.display(), err));
2000        }
2001    }
2002    remove_archives(full_snapshot_archives_to_remove);
2003
2004    info!(
2005        "Purging old incremental snapshot archives in {}, retaining up to {} incremental snapshots",
2006        incremental_snapshot_archives_dir.as_ref().display(),
2007        maximum_incremental_snapshot_archives_to_retain
2008    );
2009    let mut incremental_snapshot_archives_by_base_slot = HashMap::<Slot, Vec<_>>::new();
2010    for incremental_snapshot_archive in
2011        get_incremental_snapshot_archives(&incremental_snapshot_archives_dir)
2012    {
2013        incremental_snapshot_archives_by_base_slot
2014            .entry(incremental_snapshot_archive.base_slot())
2015            .or_default()
2016            .push(incremental_snapshot_archive)
2017    }
2018
2019    let highest_full_snapshot_slot = retained_full_snapshot_slots.iter().max().copied();
2020    for (base_slot, mut incremental_snapshot_archives) in incremental_snapshot_archives_by_base_slot
2021    {
2022        incremental_snapshot_archives.sort_unstable();
2023        let num_to_retain = if Some(base_slot) == highest_full_snapshot_slot {
2024            maximum_incremental_snapshot_archives_to_retain
2025        } else {
2026            usize::from(retained_full_snapshot_slots.contains(&base_slot))
2027        };
2028        trace!(
2029            "There are {} incremental snapshot archives for base slot {}, removing {} of them",
2030            incremental_snapshot_archives.len(),
2031            base_slot,
2032            incremental_snapshot_archives
2033                .len()
2034                .saturating_sub(num_to_retain),
2035        );
2036
2037        incremental_snapshot_archives.truncate(
2038            incremental_snapshot_archives
2039                .len()
2040                .saturating_sub(num_to_retain),
2041        );
2042        remove_archives(&incremental_snapshot_archives);
2043    }
2044}
2045
2046fn unpack_snapshot_local(
2047    shared_buffer: SharedBuffer,
2048    ledger_dir: &Path,
2049    account_paths: &[PathBuf],
2050    parallel_divisions: usize,
2051) -> Result<UnpackedAppendVecMap> {
2052    assert!(parallel_divisions > 0);
2053
2054    // allocate all readers before any readers start reading
2055    let readers = (0..parallel_divisions)
2056        .map(|_| SharedBufferReader::new(&shared_buffer))
2057        .collect::<Vec<_>>();
2058
2059    // create 'parallel_divisions' # of parallel workers, each responsible for 1/parallel_divisions of all the files to extract.
2060    let all_unpacked_append_vec_map = readers
2061        .into_par_iter()
2062        .enumerate()
2063        .map(|(index, reader)| {
2064            let parallel_selector = Some(ParallelSelector {
2065                index,
2066                divisions: parallel_divisions,
2067            });
2068            let mut archive = Archive::new(reader);
2069            unpack_snapshot(&mut archive, ledger_dir, account_paths, parallel_selector)
2070        })
2071        .collect::<Vec<_>>();
2072
2073    let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
2074    for h in all_unpacked_append_vec_map {
2075        unpacked_append_vec_map.extend(h?);
2076    }
2077
2078    Ok(unpacked_append_vec_map)
2079}
2080
2081fn untar_snapshot_create_shared_buffer(
2082    snapshot_tar: &Path,
2083    archive_format: ArchiveFormat,
2084) -> SharedBuffer {
2085    let open_file = || File::open(snapshot_tar).unwrap();
2086    match archive_format {
2087        ArchiveFormat::TarBzip2 => SharedBuffer::new(BzDecoder::new(BufReader::new(open_file()))),
2088        ArchiveFormat::TarGzip => SharedBuffer::new(GzDecoder::new(BufReader::new(open_file()))),
2089        ArchiveFormat::TarZstd => SharedBuffer::new(
2090            zstd::stream::read::Decoder::new(BufReader::new(open_file())).unwrap(),
2091        ),
2092        ArchiveFormat::TarLz4 => {
2093            SharedBuffer::new(lz4::Decoder::new(BufReader::new(open_file())).unwrap())
2094        }
2095        ArchiveFormat::Tar => SharedBuffer::new(BufReader::new(open_file())),
2096    }
2097}
2098
2099fn untar_snapshot_in<P: AsRef<Path>>(
2100    snapshot_tar: P,
2101    unpack_dir: &Path,
2102    account_paths: &[PathBuf],
2103    archive_format: ArchiveFormat,
2104    parallel_divisions: usize,
2105) -> Result<UnpackedAppendVecMap> {
2106    let shared_buffer = untar_snapshot_create_shared_buffer(snapshot_tar.as_ref(), archive_format);
2107    unpack_snapshot_local(shared_buffer, unpack_dir, account_paths, parallel_divisions)
2108}
2109
2110fn verify_unpacked_snapshots_dir_and_version(
2111    unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
2112) -> Result<(SnapshotVersion, BankSnapshotInfo)> {
2113    info!(
2114        "snapshot version: {}",
2115        &unpacked_snapshots_dir_and_version.snapshot_version
2116    );
2117
2118    let snapshot_version = unpacked_snapshots_dir_and_version.snapshot_version;
2119    let mut bank_snapshots =
2120        get_bank_snapshots_post(&unpacked_snapshots_dir_and_version.unpacked_snapshots_dir);
2121    if bank_snapshots.len() > 1 {
2122        return Err(get_io_error("invalid snapshot format"));
2123    }
2124    let root_paths = bank_snapshots
2125        .pop()
2126        .ok_or_else(|| get_io_error("No snapshots found in snapshots directory"))?;
2127    Ok((snapshot_version, root_paths))
2128}
2129
2130fn bank_fields_from_snapshots(
2131    full_snapshot_unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
2132    incremental_snapshot_unpacked_snapshots_dir_and_version: Option<
2133        &UnpackedSnapshotsDirAndVersion,
2134    >,
2135) -> Result<BankFieldsToDeserialize> {
2136    let (full_snapshot_version, full_snapshot_root_paths) =
2137        verify_unpacked_snapshots_dir_and_version(
2138            full_snapshot_unpacked_snapshots_dir_and_version,
2139        )?;
2140    let (incremental_snapshot_version, incremental_snapshot_root_paths) =
2141        if let Some(snapshot_unpacked_snapshots_dir_and_version) =
2142            incremental_snapshot_unpacked_snapshots_dir_and_version
2143        {
2144            let (snapshot_version, bank_snapshot_info) = verify_unpacked_snapshots_dir_and_version(
2145                snapshot_unpacked_snapshots_dir_and_version,
2146            )?;
2147            (Some(snapshot_version), Some(bank_snapshot_info))
2148        } else {
2149            (None, None)
2150        };
2151    info!(
2152        "Loading bank from full snapshot {} and incremental snapshot {:?}",
2153        full_snapshot_root_paths.snapshot_path().display(),
2154        incremental_snapshot_root_paths
2155            .as_ref()
2156            .map(|paths| paths.snapshot_path()),
2157    );
2158
2159    let snapshot_root_paths = SnapshotRootPaths {
2160        full_snapshot_root_file_path: full_snapshot_root_paths.snapshot_path(),
2161        incremental_snapshot_root_file_path: incremental_snapshot_root_paths
2162            .map(|root_paths| root_paths.snapshot_path()),
2163    };
2164
2165    deserialize_snapshot_data_files(&snapshot_root_paths, |snapshot_streams| {
2166        Ok(
2167            match incremental_snapshot_version.unwrap_or(full_snapshot_version) {
2168                SnapshotVersion::V1_2_0 => fields_from_streams(SerdeStyle::Newer, snapshot_streams)
2169                    .map(|(bank_fields, _accountsdb_fields)| bank_fields),
2170            }?,
2171        )
2172    })
2173}
2174
2175#[allow(clippy::too_many_arguments)]
2176fn rebuild_bank_from_snapshots(
2177    full_snapshot_unpacked_snapshots_dir_and_version: &UnpackedSnapshotsDirAndVersion,
2178    incremental_snapshot_unpacked_snapshots_dir_and_version: Option<
2179        &UnpackedSnapshotsDirAndVersion,
2180    >,
2181    account_paths: &[PathBuf],
2182    storage_and_next_append_vec_id: StorageAndNextAppendVecId,
2183    genesis_config: &GenesisConfig,
2184    runtime_config: &RuntimeConfig,
2185    debug_keys: Option<Arc<HashSet<Pubkey>>>,
2186    additional_builtins: Option<&Builtins>,
2187    account_secondary_indexes: AccountSecondaryIndexes,
2188    limit_load_slot_count_from_snapshot: Option<usize>,
2189    shrink_ratio: AccountShrinkThreshold,
2190    verify_index: bool,
2191    accounts_db_config: Option<AccountsDbConfig>,
2192    accounts_update_notifier: Option<AccountsUpdateNotifier>,
2193    exit: &Arc<AtomicBool>,
2194) -> Result<Bank> {
2195    let (full_snapshot_version, full_snapshot_root_paths) =
2196        verify_unpacked_snapshots_dir_and_version(
2197            full_snapshot_unpacked_snapshots_dir_and_version,
2198        )?;
2199    let (incremental_snapshot_version, incremental_snapshot_root_paths) =
2200        if let Some(snapshot_unpacked_snapshots_dir_and_version) =
2201            incremental_snapshot_unpacked_snapshots_dir_and_version
2202        {
2203            let (snapshot_version, bank_snapshot_info) = verify_unpacked_snapshots_dir_and_version(
2204                snapshot_unpacked_snapshots_dir_and_version,
2205            )?;
2206            (Some(snapshot_version), Some(bank_snapshot_info))
2207        } else {
2208            (None, None)
2209        };
2210    info!(
2211        "Loading bank from full snapshot {} and incremental snapshot {:?}",
2212        full_snapshot_root_paths.snapshot_path().display(),
2213        incremental_snapshot_root_paths
2214            .as_ref()
2215            .map(|paths| paths.snapshot_path()),
2216    );
2217
2218    let snapshot_root_paths = SnapshotRootPaths {
2219        full_snapshot_root_file_path: full_snapshot_root_paths.snapshot_path(),
2220        incremental_snapshot_root_file_path: incremental_snapshot_root_paths
2221            .map(|root_paths| root_paths.snapshot_path()),
2222    };
2223
2224    let bank = deserialize_snapshot_data_files(&snapshot_root_paths, |snapshot_streams| {
2225        Ok(
2226            match incremental_snapshot_version.unwrap_or(full_snapshot_version) {
2227                SnapshotVersion::V1_2_0 => bank_from_streams(
2228                    SerdeStyle::Newer,
2229                    snapshot_streams,
2230                    account_paths,
2231                    storage_and_next_append_vec_id,
2232                    genesis_config,
2233                    runtime_config,
2234                    debug_keys,
2235                    additional_builtins,
2236                    account_secondary_indexes,
2237                    limit_load_slot_count_from_snapshot,
2238                    shrink_ratio,
2239                    verify_index,
2240                    accounts_db_config,
2241                    accounts_update_notifier,
2242                    exit,
2243                ),
2244            }?,
2245        )
2246    })?;
2247
2248    // The status cache is rebuilt from the latest snapshot.  So, if there's an incremental
2249    // snapshot, use that.  Otherwise use the full snapshot.
2250    let status_cache_path = incremental_snapshot_unpacked_snapshots_dir_and_version
2251        .map_or_else(
2252            || {
2253                full_snapshot_unpacked_snapshots_dir_and_version
2254                    .unpacked_snapshots_dir
2255                    .as_path()
2256            },
2257            |unpacked_snapshots_dir_and_version| {
2258                unpacked_snapshots_dir_and_version
2259                    .unpacked_snapshots_dir
2260                    .as_path()
2261            },
2262        )
2263        .join(SNAPSHOT_STATUS_CACHE_FILENAME);
2264    let slot_deltas = deserialize_snapshot_data_file(&status_cache_path, |stream| {
2265        info!(
2266            "Rebuilding status cache from {}",
2267            status_cache_path.display()
2268        );
2269        let slot_deltas: Vec<BankSlotDelta> = bincode::options()
2270            .with_limit(MAX_SNAPSHOT_DATA_FILE_SIZE)
2271            .with_fixint_encoding()
2272            .allow_trailing_bytes()
2273            .deserialize_from(stream)?;
2274        Ok(slot_deltas)
2275    })?;
2276
2277    verify_slot_deltas(slot_deltas.as_slice(), &bank)?;
2278
2279    bank.status_cache.write().unwrap().append(&slot_deltas);
2280
2281    info!("Loaded bank for slot: {}", bank.slot());
2282    Ok(bank)
2283}
2284
2285/// Verify that the snapshot's slot deltas are not corrupt/invalid
2286fn verify_slot_deltas(
2287    slot_deltas: &[BankSlotDelta],
2288    bank: &Bank,
2289) -> std::result::Result<(), VerifySlotDeltasError> {
2290    let info = verify_slot_deltas_structural(slot_deltas, bank.slot())?;
2291    verify_slot_deltas_with_history(&info.slots, &bank.get_slot_history(), bank.slot())
2292}
2293
2294/// Verify that the snapshot's slot deltas are not corrupt/invalid
2295/// These checks are simple/structural
2296fn verify_slot_deltas_structural(
2297    slot_deltas: &[BankSlotDelta],
2298    bank_slot: Slot,
2299) -> std::result::Result<VerifySlotDeltasStructuralInfo, VerifySlotDeltasError> {
2300    // there should not be more entries than that status cache's max
2301    let num_entries = slot_deltas.len();
2302    if num_entries > status_cache::MAX_CACHE_ENTRIES {
2303        return Err(VerifySlotDeltasError::TooManyEntries(
2304            num_entries,
2305            status_cache::MAX_CACHE_ENTRIES,
2306        ));
2307    }
2308
2309    let mut slots_seen_so_far = HashSet::new();
2310    for &(slot, is_root, ..) in slot_deltas {
2311        // all entries should be roots
2312        if !is_root {
2313            return Err(VerifySlotDeltasError::SlotIsNotRoot(slot));
2314        }
2315
2316        // all entries should be for slots less than or equal to the bank's slot
2317        if slot > bank_slot {
2318            return Err(VerifySlotDeltasError::SlotGreaterThanMaxRoot(
2319                slot, bank_slot,
2320            ));
2321        }
2322
2323        // there should only be one entry per slot
2324        let is_duplicate = !slots_seen_so_far.insert(slot);
2325        if is_duplicate {
2326            return Err(VerifySlotDeltasError::SlotHasMultipleEntries(slot));
2327        }
2328    }
2329
2330    // detect serious logic error for future careless changes. :)
2331    assert_eq!(slots_seen_so_far.len(), slot_deltas.len());
2332
2333    Ok(VerifySlotDeltasStructuralInfo {
2334        slots: slots_seen_so_far,
2335    })
2336}
2337
2338/// Computed information from `verify_slot_deltas_structural()`, that may be reused/useful later.
2339#[derive(Debug, PartialEq, Eq)]
2340struct VerifySlotDeltasStructuralInfo {
2341    /// All the slots in the slot deltas
2342    slots: HashSet<Slot>,
2343}
2344
2345/// Verify that the snapshot's slot deltas are not corrupt/invalid
2346/// These checks use the slot history for verification
2347fn verify_slot_deltas_with_history(
2348    slots_from_slot_deltas: &HashSet<Slot>,
2349    slot_history: &SlotHistory,
2350    bank_slot: Slot,
2351) -> std::result::Result<(), VerifySlotDeltasError> {
2352    // ensure the slot history is valid (as much as possible), since we're using it to verify the
2353    // slot deltas
2354    if slot_history.newest() != bank_slot {
2355        return Err(VerifySlotDeltasError::BadSlotHistory);
2356    }
2357
2358    // all slots in the slot deltas should be in the bank's slot history
2359    let slot_missing_from_history = slots_from_slot_deltas
2360        .iter()
2361        .find(|slot| slot_history.check(**slot) != Check::Found);
2362    if let Some(slot) = slot_missing_from_history {
2363        return Err(VerifySlotDeltasError::SlotNotFoundInHistory(*slot));
2364    }
2365
2366    // all slots in the history should be in the slot deltas (up to MAX_CACHE_ENTRIES)
2367    // this ensures nothing was removed from the status cache
2368    //
2369    // go through the slot history and make sure there's an entry for each slot
2370    // note: it's important to go highest-to-lowest since the status cache removes
2371    // older entries first
2372    // note: we already checked above that `bank_slot == slot_history.newest()`
2373    let slot_missing_from_deltas = (slot_history.oldest()..=slot_history.newest())
2374        .rev()
2375        .filter(|slot| slot_history.check(*slot) == Check::Found)
2376        .take(status_cache::MAX_CACHE_ENTRIES)
2377        .find(|slot| !slots_from_slot_deltas.contains(slot));
2378    if let Some(slot) = slot_missing_from_deltas {
2379        return Err(VerifySlotDeltasError::SlotNotFoundInDeltas(slot));
2380    }
2381
2382    Ok(())
2383}
2384
2385pub(crate) fn get_snapshot_file_name(slot: Slot) -> String {
2386    slot.to_string()
2387}
2388
2389pub(crate) fn get_bank_snapshots_dir<P: AsRef<Path>>(path: P, slot: Slot) -> PathBuf {
2390    path.as_ref().join(slot.to_string())
2391}
2392
2393fn get_io_error(error: &str) -> SnapshotError {
2394    warn!("Snapshot Error: {:?}", error);
2395    SnapshotError::Io(IoError::new(ErrorKind::Other, error))
2396}
2397
2398#[derive(Debug, Copy, Clone)]
2399/// allow tests to specify what happened to the serialized format
2400pub enum VerifyBank {
2401    /// the bank's serialized format is expected to be identical to what we are comparing against
2402    Deterministic,
2403    /// the serialized bank was 'reserialized' into a non-deterministic format at the specified slot
2404    /// so, deserialize both files and compare deserialized results
2405    NonDeterministic(Slot),
2406}
2407
2408pub fn verify_snapshot_archive<P, Q, R>(
2409    snapshot_archive: P,
2410    snapshots_to_verify: Q,
2411    storages_to_verify: R,
2412    archive_format: ArchiveFormat,
2413    verify_bank: VerifyBank,
2414) where
2415    P: AsRef<Path>,
2416    Q: AsRef<Path>,
2417    R: AsRef<Path>,
2418{
2419    let temp_dir = tempfile::TempDir::new().unwrap();
2420    let unpack_dir = temp_dir.path();
2421    let account_dir = create_accounts_run_and_snapshot_dirs(unpack_dir).unwrap().0;
2422    untar_snapshot_in(
2423        snapshot_archive,
2424        unpack_dir,
2425        &[account_dir.clone()],
2426        archive_format,
2427        1,
2428    )
2429    .unwrap();
2430
2431    // Check snapshots are the same
2432    let unpacked_snapshots = unpack_dir.join("snapshots");
2433    if let VerifyBank::NonDeterministic(slot) = verify_bank {
2434        // file contents may be different, but deserialized structs should be equal
2435        let slot = slot.to_string();
2436        let snapshot_slot_dir = snapshots_to_verify.as_ref().join(&slot);
2437        let p1 = snapshots_to_verify.as_ref().join(&slot).join(&slot);
2438        let p2 = unpacked_snapshots.join(&slot).join(&slot);
2439        assert!(crate::serde_snapshot::compare_two_serialized_banks(&p1, &p2).unwrap());
2440        std::fs::remove_file(p1).unwrap();
2441        std::fs::remove_file(p2).unwrap();
2442
2443        // The new the status_cache file is inside the slot directory together with the snapshot file.
2444        // When unpacking an archive, the status_cache file from the archive is one-level up outside of
2445        //  the slot direcotry.
2446        // The unpacked status_cache file need to be put back into the slot directory for the directory
2447        // comparison to pass.
2448        let existing_unpacked_status_cache_file =
2449            unpacked_snapshots.join(SNAPSHOT_STATUS_CACHE_FILENAME);
2450        let new_unpacked_status_cache_file = unpacked_snapshots
2451            .join(&slot)
2452            .join(SNAPSHOT_STATUS_CACHE_FILENAME);
2453        fs::rename(
2454            existing_unpacked_status_cache_file,
2455            new_unpacked_status_cache_file,
2456        )
2457        .unwrap();
2458
2459        let accounts_hardlinks_dir = snapshot_slot_dir.join("accounts_hardlinks");
2460        if accounts_hardlinks_dir.is_dir() {
2461            // This directory contain symlinks to all <account_path>/snapshot/<slot> directories.
2462            // They should all be removed.
2463            for entry in fs::read_dir(&accounts_hardlinks_dir).unwrap() {
2464                let dst_path = fs::read_link(entry.unwrap().path()).unwrap();
2465                fs::remove_dir_all(dst_path).unwrap();
2466            }
2467            std::fs::remove_dir_all(accounts_hardlinks_dir).unwrap();
2468        }
2469
2470        let version_path = snapshot_slot_dir.join(SNAPSHOT_VERSION_FILENAME);
2471        if version_path.is_file() {
2472            std::fs::remove_file(version_path).unwrap();
2473        }
2474
2475        let state_complete_path = snapshot_slot_dir.join(SNAPSHOT_STATE_COMPLETE_FILENAME);
2476        if state_complete_path.is_file() {
2477            std::fs::remove_file(state_complete_path).unwrap();
2478        }
2479    }
2480
2481    assert!(!dir_diff::is_different(&snapshots_to_verify, unpacked_snapshots).unwrap());
2482
2483    // In the unarchiving case, there is an extra empty "accounts" directory. The account
2484    // files in the archive accounts/ have been expanded to [account_paths].
2485    // Remove the empty "accounts" directory for the directory comparison below.
2486    // In some test cases the directory to compare do not come from unarchiving.
2487    // Ignore the error when this directory does not exist.
2488    _ = std::fs::remove_dir(account_dir.join("accounts"));
2489    // Check the account entries are the same
2490    assert!(!dir_diff::is_different(&storages_to_verify, account_dir).unwrap());
2491}
2492
2493/// Remove outdated bank snapshots
2494pub fn purge_old_bank_snapshots(
2495    bank_snapshots_dir: impl AsRef<Path>,
2496    num_bank_snapshots_to_retain: usize,
2497) {
2498    let do_purge = |mut bank_snapshots: Vec<BankSnapshotInfo>| {
2499        bank_snapshots.sort_unstable();
2500        bank_snapshots
2501            .into_iter()
2502            .rev()
2503            .skip(num_bank_snapshots_to_retain)
2504            .for_each(|bank_snapshot| {
2505                let r = remove_bank_snapshot(bank_snapshot.slot, &bank_snapshots_dir);
2506                if r.is_err() {
2507                    warn!(
2508                        "Couldn't remove bank snapshot at: {}",
2509                        bank_snapshot.snapshot_dir.display()
2510                    );
2511                }
2512            })
2513    };
2514
2515    do_purge(get_bank_snapshots_pre(&bank_snapshots_dir));
2516    do_purge(get_bank_snapshots_post(&bank_snapshots_dir));
2517}
2518
2519/// Get the snapshot storages for this bank
2520pub fn get_snapshot_storages(bank: &Bank) -> Vec<Arc<AccountStorageEntry>> {
2521    let mut measure_snapshot_storages = Measure::start("snapshot-storages");
2522    let snapshot_storages = bank.get_snapshot_storages(None);
2523    measure_snapshot_storages.stop();
2524    datapoint_info!(
2525        "get_snapshot_storages",
2526        (
2527            "snapshot-storages-time-ms",
2528            measure_snapshot_storages.as_ms(),
2529            i64
2530        ),
2531    );
2532
2533    snapshot_storages
2534}
2535
2536/// Convenience function to create a full snapshot archive out of any Bank, regardless of state.
2537/// The Bank will be frozen during the process.
2538/// This is only called from ledger-tool or tests. Warping is a special case as well.
2539///
2540/// Requires:
2541///     - `bank` is complete
2542pub fn bank_to_full_snapshot_archive(
2543    bank_snapshots_dir: impl AsRef<Path>,
2544    bank: &Bank,
2545    snapshot_version: Option<SnapshotVersion>,
2546    full_snapshot_archives_dir: impl AsRef<Path>,
2547    incremental_snapshot_archives_dir: impl AsRef<Path>,
2548    archive_format: ArchiveFormat,
2549    maximum_full_snapshot_archives_to_retain: usize,
2550    maximum_incremental_snapshot_archives_to_retain: usize,
2551) -> Result<FullSnapshotArchiveInfo> {
2552    let snapshot_version = snapshot_version.unwrap_or_default();
2553
2554    assert!(bank.is_complete());
2555    bank.squash(); // Bank may not be a root
2556    bank.force_flush_accounts_cache();
2557    bank.clean_accounts(Some(bank.slot()));
2558    bank.update_accounts_hash(CalcAccountsHashDataSource::Storages, false, false);
2559    bank.rehash(); // Bank accounts may have been manually modified by the caller
2560
2561    let temp_dir = tempfile::tempdir_in(bank_snapshots_dir)?;
2562    let snapshot_storages = bank.get_snapshot_storages(None);
2563    let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas();
2564    let bank_snapshot_info = add_bank_snapshot(
2565        &temp_dir,
2566        bank,
2567        &snapshot_storages,
2568        snapshot_version,
2569        slot_deltas,
2570    )?;
2571
2572    package_and_archive_full_snapshot(
2573        bank,
2574        &bank_snapshot_info,
2575        &temp_dir,
2576        full_snapshot_archives_dir,
2577        incremental_snapshot_archives_dir,
2578        snapshot_storages,
2579        archive_format,
2580        snapshot_version,
2581        maximum_full_snapshot_archives_to_retain,
2582        maximum_incremental_snapshot_archives_to_retain,
2583    )
2584}
2585
2586/// Convenience function to create an incremental snapshot archive out of any Bank, regardless of
2587/// state.  The Bank will be frozen during the process.
2588/// This is only called from ledger-tool or tests. Warping is a special case as well.
2589///
2590/// Requires:
2591///     - `bank` is complete
2592///     - `bank`'s slot is greater than `full_snapshot_slot`
2593pub fn bank_to_incremental_snapshot_archive(
2594    bank_snapshots_dir: impl AsRef<Path>,
2595    bank: &Bank,
2596    full_snapshot_slot: Slot,
2597    snapshot_version: Option<SnapshotVersion>,
2598    full_snapshot_archives_dir: impl AsRef<Path>,
2599    incremental_snapshot_archives_dir: impl AsRef<Path>,
2600    archive_format: ArchiveFormat,
2601    maximum_full_snapshot_archives_to_retain: usize,
2602    maximum_incremental_snapshot_archives_to_retain: usize,
2603) -> Result<IncrementalSnapshotArchiveInfo> {
2604    let snapshot_version = snapshot_version.unwrap_or_default();
2605
2606    assert!(bank.is_complete());
2607    assert!(bank.slot() > full_snapshot_slot);
2608    bank.squash(); // Bank may not be a root
2609    bank.force_flush_accounts_cache();
2610    bank.clean_accounts(Some(full_snapshot_slot));
2611    bank.update_accounts_hash(CalcAccountsHashDataSource::Storages, false, false);
2612    bank.rehash(); // Bank accounts may have been manually modified by the caller
2613
2614    let temp_dir = tempfile::tempdir_in(bank_snapshots_dir)?;
2615    let snapshot_storages = bank.get_snapshot_storages(Some(full_snapshot_slot));
2616    let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas();
2617    let bank_snapshot_info = add_bank_snapshot(
2618        &temp_dir,
2619        bank,
2620        &snapshot_storages,
2621        snapshot_version,
2622        slot_deltas,
2623    )?;
2624
2625    package_and_archive_incremental_snapshot(
2626        bank,
2627        full_snapshot_slot,
2628        &bank_snapshot_info,
2629        &temp_dir,
2630        full_snapshot_archives_dir,
2631        incremental_snapshot_archives_dir,
2632        snapshot_storages,
2633        archive_format,
2634        snapshot_version,
2635        maximum_full_snapshot_archives_to_retain,
2636        maximum_incremental_snapshot_archives_to_retain,
2637    )
2638}
2639
2640/// Helper function to hold shared code to package, process, and archive full snapshots
2641#[allow(clippy::too_many_arguments)]
2642pub fn package_and_archive_full_snapshot(
2643    bank: &Bank,
2644    bank_snapshot_info: &BankSnapshotInfo,
2645    bank_snapshots_dir: impl AsRef<Path>,
2646    full_snapshot_archives_dir: impl AsRef<Path>,
2647    incremental_snapshot_archives_dir: impl AsRef<Path>,
2648    snapshot_storages: Vec<Arc<AccountStorageEntry>>,
2649    archive_format: ArchiveFormat,
2650    snapshot_version: SnapshotVersion,
2651    maximum_full_snapshot_archives_to_retain: usize,
2652    maximum_incremental_snapshot_archives_to_retain: usize,
2653) -> Result<FullSnapshotArchiveInfo> {
2654    let accounts_package = AccountsPackage::new_for_snapshot(
2655        AccountsPackageType::Snapshot(SnapshotType::FullSnapshot),
2656        bank,
2657        bank_snapshot_info,
2658        bank_snapshots_dir,
2659        &full_snapshot_archives_dir,
2660        &incremental_snapshot_archives_dir,
2661        snapshot_storages,
2662        archive_format,
2663        snapshot_version,
2664        None,
2665    )?;
2666
2667    let accounts_hash = bank
2668        .get_accounts_hash()
2669        .expect("accounts hash is required for snapshot");
2670    crate::serde_snapshot::reserialize_bank_with_new_accounts_hash(
2671        accounts_package.snapshot_links_dir(),
2672        accounts_package.slot,
2673        &accounts_hash,
2674        None,
2675    );
2676
2677    let snapshot_package = SnapshotPackage::new(accounts_package, accounts_hash);
2678    archive_snapshot_package(
2679        &snapshot_package,
2680        full_snapshot_archives_dir,
2681        incremental_snapshot_archives_dir,
2682        maximum_full_snapshot_archives_to_retain,
2683        maximum_incremental_snapshot_archives_to_retain,
2684    )?;
2685
2686    Ok(FullSnapshotArchiveInfo::new(
2687        snapshot_package.snapshot_archive_info,
2688    ))
2689}
2690
2691/// Helper function to hold shared code to package, process, and archive incremental snapshots
2692#[allow(clippy::too_many_arguments)]
2693pub fn package_and_archive_incremental_snapshot(
2694    bank: &Bank,
2695    incremental_snapshot_base_slot: Slot,
2696    bank_snapshot_info: &BankSnapshotInfo,
2697    bank_snapshots_dir: impl AsRef<Path>,
2698    full_snapshot_archives_dir: impl AsRef<Path>,
2699    incremental_snapshot_archives_dir: impl AsRef<Path>,
2700    snapshot_storages: Vec<Arc<AccountStorageEntry>>,
2701    archive_format: ArchiveFormat,
2702    snapshot_version: SnapshotVersion,
2703    maximum_full_snapshot_archives_to_retain: usize,
2704    maximum_incremental_snapshot_archives_to_retain: usize,
2705) -> Result<IncrementalSnapshotArchiveInfo> {
2706    let accounts_package = AccountsPackage::new_for_snapshot(
2707        AccountsPackageType::Snapshot(SnapshotType::IncrementalSnapshot(
2708            incremental_snapshot_base_slot,
2709        )),
2710        bank,
2711        bank_snapshot_info,
2712        bank_snapshots_dir,
2713        &full_snapshot_archives_dir,
2714        &incremental_snapshot_archives_dir,
2715        snapshot_storages,
2716        archive_format,
2717        snapshot_version,
2718        None,
2719    )?;
2720
2721    let accounts_hash = bank
2722        .get_accounts_hash()
2723        .expect("accounts hash is required for snapshot");
2724    crate::serde_snapshot::reserialize_bank_with_new_accounts_hash(
2725        accounts_package.snapshot_links_dir(),
2726        accounts_package.slot,
2727        &accounts_hash,
2728        None,
2729    );
2730
2731    let snapshot_package = SnapshotPackage::new(accounts_package, accounts_hash);
2732    archive_snapshot_package(
2733        &snapshot_package,
2734        full_snapshot_archives_dir,
2735        incremental_snapshot_archives_dir,
2736        maximum_full_snapshot_archives_to_retain,
2737        maximum_incremental_snapshot_archives_to_retain,
2738    )?;
2739
2740    Ok(IncrementalSnapshotArchiveInfo::new(
2741        incremental_snapshot_base_slot,
2742        snapshot_package.snapshot_archive_info,
2743    ))
2744}
2745
2746pub fn should_take_full_snapshot(
2747    block_height: Slot,
2748    full_snapshot_archive_interval_slots: Slot,
2749) -> bool {
2750    block_height % full_snapshot_archive_interval_slots == 0
2751}
2752
2753pub fn should_take_incremental_snapshot(
2754    block_height: Slot,
2755    incremental_snapshot_archive_interval_slots: Slot,
2756    last_full_snapshot_slot: Option<Slot>,
2757) -> bool {
2758    block_height % incremental_snapshot_archive_interval_slots == 0
2759        && last_full_snapshot_slot.is_some()
2760}
2761
2762pub fn create_tmp_accounts_dir_for_tests() -> (TempDir, PathBuf) {
2763    let tmp_dir = tempfile::TempDir::new().unwrap();
2764    let account_dir = create_accounts_run_and_snapshot_dirs(&tmp_dir).unwrap().0;
2765    (tmp_dir, account_dir)
2766}
2767
2768#[cfg(test)]
2769mod tests {
2770    use {
2771        super::*,
2772        crate::{accounts_db::ACCOUNTS_DB_CONFIG_FOR_TESTING, status_cache::Status},
2773        assert_matches::assert_matches,
2774        bincode::{deserialize_from, serialize_into},
2775        solana_sdk::{
2776            genesis_config::create_genesis_config,
2777            native_token::sol_to_lamports,
2778            signature::{Keypair, Signer},
2779            slot_history::SlotHistory,
2780            system_transaction,
2781            transaction::SanitizedTransaction,
2782        },
2783        std::{convert::TryFrom, mem::size_of},
2784        tempfile::NamedTempFile,
2785    };
2786
2787    #[test]
2788    fn test_serialize_snapshot_data_file_under_limit() {
2789        let temp_dir = tempfile::TempDir::new().unwrap();
2790        let expected_consumed_size = size_of::<u32>() as u64;
2791        let consumed_size = serialize_snapshot_data_file_capped(
2792            &temp_dir.path().join("data-file"),
2793            expected_consumed_size,
2794            |stream| {
2795                serialize_into(stream, &2323_u32)?;
2796                Ok(())
2797            },
2798        )
2799        .unwrap();
2800        assert_eq!(consumed_size, expected_consumed_size);
2801    }
2802
2803    #[test]
2804    fn test_serialize_snapshot_data_file_over_limit() {
2805        let temp_dir = tempfile::TempDir::new().unwrap();
2806        let expected_consumed_size = size_of::<u32>() as u64;
2807        let result = serialize_snapshot_data_file_capped(
2808            &temp_dir.path().join("data-file"),
2809            expected_consumed_size - 1,
2810            |stream| {
2811                serialize_into(stream, &2323_u32)?;
2812                Ok(())
2813            },
2814        );
2815        assert_matches!(result, Err(SnapshotError::Io(ref message)) if message.to_string().starts_with("too large snapshot data file to serialize"));
2816    }
2817
2818    #[test]
2819    fn test_deserialize_snapshot_data_file_under_limit() {
2820        let expected_data = 2323_u32;
2821        let expected_consumed_size = size_of::<u32>() as u64;
2822
2823        let temp_dir = tempfile::TempDir::new().unwrap();
2824        serialize_snapshot_data_file_capped(
2825            &temp_dir.path().join("data-file"),
2826            expected_consumed_size,
2827            |stream| {
2828                serialize_into(stream, &expected_data)?;
2829                Ok(())
2830            },
2831        )
2832        .unwrap();
2833
2834        let snapshot_root_paths = SnapshotRootPaths {
2835            full_snapshot_root_file_path: temp_dir.path().join("data-file"),
2836            incremental_snapshot_root_file_path: None,
2837        };
2838
2839        let actual_data = deserialize_snapshot_data_files_capped(
2840            &snapshot_root_paths,
2841            expected_consumed_size,
2842            |stream| {
2843                Ok(deserialize_from::<_, u32>(
2844                    &mut stream.full_snapshot_stream,
2845                )?)
2846            },
2847        )
2848        .unwrap();
2849        assert_eq!(actual_data, expected_data);
2850    }
2851
2852    #[test]
2853    fn test_deserialize_snapshot_data_file_over_limit() {
2854        let expected_data = 2323_u32;
2855        let expected_consumed_size = size_of::<u32>() as u64;
2856
2857        let temp_dir = tempfile::TempDir::new().unwrap();
2858        serialize_snapshot_data_file_capped(
2859            &temp_dir.path().join("data-file"),
2860            expected_consumed_size,
2861            |stream| {
2862                serialize_into(stream, &expected_data)?;
2863                Ok(())
2864            },
2865        )
2866        .unwrap();
2867
2868        let snapshot_root_paths = SnapshotRootPaths {
2869            full_snapshot_root_file_path: temp_dir.path().join("data-file"),
2870            incremental_snapshot_root_file_path: None,
2871        };
2872
2873        let result = deserialize_snapshot_data_files_capped(
2874            &snapshot_root_paths,
2875            expected_consumed_size - 1,
2876            |stream| {
2877                Ok(deserialize_from::<_, u32>(
2878                    &mut stream.full_snapshot_stream,
2879                )?)
2880            },
2881        );
2882        assert_matches!(result, Err(SnapshotError::Io(ref message)) if message.to_string().starts_with("too large snapshot data file to deserialize"));
2883    }
2884
2885    #[test]
2886    fn test_deserialize_snapshot_data_file_extra_data() {
2887        let expected_data = 2323_u32;
2888        let expected_consumed_size = size_of::<u32>() as u64;
2889
2890        let temp_dir = tempfile::TempDir::new().unwrap();
2891        serialize_snapshot_data_file_capped(
2892            &temp_dir.path().join("data-file"),
2893            expected_consumed_size * 2,
2894            |stream| {
2895                serialize_into(stream.by_ref(), &expected_data)?;
2896                serialize_into(stream.by_ref(), &expected_data)?;
2897                Ok(())
2898            },
2899        )
2900        .unwrap();
2901
2902        let snapshot_root_paths = SnapshotRootPaths {
2903            full_snapshot_root_file_path: temp_dir.path().join("data-file"),
2904            incremental_snapshot_root_file_path: None,
2905        };
2906
2907        let result = deserialize_snapshot_data_files_capped(
2908            &snapshot_root_paths,
2909            expected_consumed_size * 2,
2910            |stream| {
2911                Ok(deserialize_from::<_, u32>(
2912                    &mut stream.full_snapshot_stream,
2913                )?)
2914            },
2915        );
2916        assert_matches!(result, Err(SnapshotError::Io(ref message)) if message.to_string().starts_with("invalid snapshot data file"));
2917    }
2918
2919    #[test]
2920    fn test_snapshot_version_from_file_under_limit() {
2921        let file_content = SnapshotVersion::default().as_str();
2922        let mut file = NamedTempFile::new().unwrap();
2923        file.write_all(file_content.as_bytes()).unwrap();
2924        let version_from_file = snapshot_version_from_file(file.path()).unwrap();
2925        assert_eq!(version_from_file, file_content);
2926    }
2927
2928    #[test]
2929    fn test_snapshot_version_from_file_over_limit() {
2930        let over_limit_size = usize::try_from(MAX_SNAPSHOT_VERSION_FILE_SIZE + 1).unwrap();
2931        let file_content = vec![7u8; over_limit_size];
2932        let mut file = NamedTempFile::new().unwrap();
2933        file.write_all(&file_content).unwrap();
2934        assert_matches!(
2935            snapshot_version_from_file(file.path()),
2936            Err(SnapshotError::Io(ref message)) if message.to_string().starts_with("snapshot version file too large")
2937        );
2938    }
2939
2940    #[test]
2941    fn test_parse_full_snapshot_archive_filename() {
2942        assert_eq!(
2943            parse_full_snapshot_archive_filename(&format!(
2944                "snapshot-42-{}.tar.bz2",
2945                Hash::default()
2946            ))
2947            .unwrap(),
2948            (42, SnapshotHash(Hash::default()), ArchiveFormat::TarBzip2)
2949        );
2950        assert_eq!(
2951            parse_full_snapshot_archive_filename(&format!(
2952                "snapshot-43-{}.tar.zst",
2953                Hash::default()
2954            ))
2955            .unwrap(),
2956            (43, SnapshotHash(Hash::default()), ArchiveFormat::TarZstd)
2957        );
2958        assert_eq!(
2959            parse_full_snapshot_archive_filename(&format!("snapshot-44-{}.tar", Hash::default()))
2960                .unwrap(),
2961            (44, SnapshotHash(Hash::default()), ArchiveFormat::Tar)
2962        );
2963        assert_eq!(
2964            parse_full_snapshot_archive_filename(&format!(
2965                "snapshot-45-{}.tar.lz4",
2966                Hash::default()
2967            ))
2968            .unwrap(),
2969            (45, SnapshotHash(Hash::default()), ArchiveFormat::TarLz4)
2970        );
2971
2972        assert!(parse_full_snapshot_archive_filename("invalid").is_err());
2973        assert!(
2974            parse_full_snapshot_archive_filename("snapshot-bad!slot-bad!hash.bad!ext").is_err()
2975        );
2976
2977        assert!(
2978            parse_full_snapshot_archive_filename("snapshot-12345678-bad!hash.bad!ext").is_err()
2979        );
2980        assert!(parse_full_snapshot_archive_filename(&format!(
2981            "snapshot-12345678-{}.bad!ext",
2982            Hash::new_unique()
2983        ))
2984        .is_err());
2985        assert!(parse_full_snapshot_archive_filename("snapshot-12345678-bad!hash.tar").is_err());
2986
2987        assert!(parse_full_snapshot_archive_filename(&format!(
2988            "snapshot-bad!slot-{}.bad!ext",
2989            Hash::new_unique()
2990        ))
2991        .is_err());
2992        assert!(parse_full_snapshot_archive_filename(&format!(
2993            "snapshot-12345678-{}.bad!ext",
2994            Hash::new_unique()
2995        ))
2996        .is_err());
2997        assert!(parse_full_snapshot_archive_filename(&format!(
2998            "snapshot-bad!slot-{}.tar",
2999            Hash::new_unique()
3000        ))
3001        .is_err());
3002
3003        assert!(parse_full_snapshot_archive_filename("snapshot-bad!slot-bad!hash.tar").is_err());
3004        assert!(parse_full_snapshot_archive_filename("snapshot-12345678-bad!hash.tar").is_err());
3005        assert!(parse_full_snapshot_archive_filename(&format!(
3006            "snapshot-bad!slot-{}.tar",
3007            Hash::new_unique()
3008        ))
3009        .is_err());
3010    }
3011
3012    #[test]
3013    fn test_parse_incremental_snapshot_archive_filename() {
3014        solana_logger::setup();
3015        assert_eq!(
3016            parse_incremental_snapshot_archive_filename(&format!(
3017                "incremental-snapshot-42-123-{}.tar.bz2",
3018                Hash::default()
3019            ))
3020            .unwrap(),
3021            (
3022                42,
3023                123,
3024                SnapshotHash(Hash::default()),
3025                ArchiveFormat::TarBzip2
3026            )
3027        );
3028        assert_eq!(
3029            parse_incremental_snapshot_archive_filename(&format!(
3030                "incremental-snapshot-43-234-{}.tar.zst",
3031                Hash::default()
3032            ))
3033            .unwrap(),
3034            (
3035                43,
3036                234,
3037                SnapshotHash(Hash::default()),
3038                ArchiveFormat::TarZstd
3039            )
3040        );
3041        assert_eq!(
3042            parse_incremental_snapshot_archive_filename(&format!(
3043                "incremental-snapshot-44-345-{}.tar",
3044                Hash::default()
3045            ))
3046            .unwrap(),
3047            (44, 345, SnapshotHash(Hash::default()), ArchiveFormat::Tar)
3048        );
3049        assert_eq!(
3050            parse_incremental_snapshot_archive_filename(&format!(
3051                "incremental-snapshot-45-456-{}.tar.lz4",
3052                Hash::default()
3053            ))
3054            .unwrap(),
3055            (
3056                45,
3057                456,
3058                SnapshotHash(Hash::default()),
3059                ArchiveFormat::TarLz4
3060            )
3061        );
3062
3063        assert!(parse_incremental_snapshot_archive_filename("invalid").is_err());
3064        assert!(parse_incremental_snapshot_archive_filename(&format!(
3065            "snapshot-42-{}.tar",
3066            Hash::new_unique()
3067        ))
3068        .is_err());
3069        assert!(parse_incremental_snapshot_archive_filename(
3070            "incremental-snapshot-bad!slot-bad!slot-bad!hash.bad!ext"
3071        )
3072        .is_err());
3073
3074        assert!(parse_incremental_snapshot_archive_filename(&format!(
3075            "incremental-snapshot-bad!slot-56785678-{}.tar",
3076            Hash::new_unique()
3077        ))
3078        .is_err());
3079
3080        assert!(parse_incremental_snapshot_archive_filename(&format!(
3081            "incremental-snapshot-12345678-bad!slot-{}.tar",
3082            Hash::new_unique()
3083        ))
3084        .is_err());
3085
3086        assert!(parse_incremental_snapshot_archive_filename(
3087            "incremental-snapshot-12341234-56785678-bad!HASH.tar"
3088        )
3089        .is_err());
3090
3091        assert!(parse_incremental_snapshot_archive_filename(&format!(
3092            "incremental-snapshot-12341234-56785678-{}.bad!ext",
3093            Hash::new_unique()
3094        ))
3095        .is_err());
3096    }
3097
3098    #[test]
3099    fn test_check_are_snapshots_compatible() {
3100        solana_logger::setup();
3101        let slot1: Slot = 1234;
3102        let slot2: Slot = 5678;
3103        let slot3: Slot = 999_999;
3104
3105        let full_snapshot_archive_info = FullSnapshotArchiveInfo::new_from_path(PathBuf::from(
3106            format!("/dir/snapshot-{}-{}.tar", slot1, Hash::new_unique()),
3107        ))
3108        .unwrap();
3109
3110        assert!(check_are_snapshots_compatible(&full_snapshot_archive_info, None,).is_ok());
3111
3112        let incremental_snapshot_archive_info =
3113            IncrementalSnapshotArchiveInfo::new_from_path(PathBuf::from(format!(
3114                "/dir/incremental-snapshot-{}-{}-{}.tar",
3115                slot1,
3116                slot2,
3117                Hash::new_unique()
3118            )))
3119            .unwrap();
3120
3121        assert!(check_are_snapshots_compatible(
3122            &full_snapshot_archive_info,
3123            Some(&incremental_snapshot_archive_info)
3124        )
3125        .is_ok());
3126
3127        let incremental_snapshot_archive_info =
3128            IncrementalSnapshotArchiveInfo::new_from_path(PathBuf::from(format!(
3129                "/dir/incremental-snapshot-{}-{}-{}.tar",
3130                slot2,
3131                slot3,
3132                Hash::new_unique()
3133            )))
3134            .unwrap();
3135
3136        assert!(check_are_snapshots_compatible(
3137            &full_snapshot_archive_info,
3138            Some(&incremental_snapshot_archive_info)
3139        )
3140        .is_err());
3141    }
3142
3143    /// A test heler function that creates bank snapshot files
3144    fn common_create_bank_snapshot_files(
3145        bank_snapshots_dir: &Path,
3146        min_slot: Slot,
3147        max_slot: Slot,
3148    ) {
3149        for slot in min_slot..max_slot {
3150            let snapshot_dir = get_bank_snapshots_dir(bank_snapshots_dir, slot);
3151            fs::create_dir_all(&snapshot_dir).unwrap();
3152
3153            let snapshot_filename = get_snapshot_file_name(slot);
3154            let snapshot_path = snapshot_dir.join(snapshot_filename);
3155            File::create(snapshot_path).unwrap();
3156        }
3157    }
3158
3159    #[test]
3160    fn test_get_bank_snapshots() {
3161        solana_logger::setup();
3162        let temp_snapshots_dir = tempfile::TempDir::new().unwrap();
3163        let min_slot = 10;
3164        let max_slot = 20;
3165        common_create_bank_snapshot_files(temp_snapshots_dir.path(), min_slot, max_slot);
3166
3167        let bank_snapshots = get_bank_snapshots(temp_snapshots_dir.path());
3168        assert_eq!(bank_snapshots.len() as Slot, max_slot - min_slot);
3169    }
3170
3171    #[test]
3172    fn test_get_highest_bank_snapshot_post() {
3173        solana_logger::setup();
3174        let temp_snapshots_dir = tempfile::TempDir::new().unwrap();
3175        let min_slot = 99;
3176        let max_slot = 123;
3177        common_create_bank_snapshot_files(temp_snapshots_dir.path(), min_slot, max_slot);
3178
3179        let highest_bank_snapshot = get_highest_bank_snapshot_post(temp_snapshots_dir.path());
3180        assert!(highest_bank_snapshot.is_some());
3181        assert_eq!(highest_bank_snapshot.unwrap().slot, max_slot - 1);
3182    }
3183
3184    /// A test helper function that creates full and incremental snapshot archive files.  Creates
3185    /// full snapshot files in the range (`min_full_snapshot_slot`, `max_full_snapshot_slot`], and
3186    /// incremental snapshot files in the range (`min_incremental_snapshot_slot`,
3187    /// `max_incremental_snapshot_slot`].  Additionally, "bad" files are created for both full and
3188    /// incremental snapshots to ensure the tests properly filter them out.
3189    fn common_create_snapshot_archive_files(
3190        full_snapshot_archives_dir: &Path,
3191        incremental_snapshot_archives_dir: &Path,
3192        min_full_snapshot_slot: Slot,
3193        max_full_snapshot_slot: Slot,
3194        min_incremental_snapshot_slot: Slot,
3195        max_incremental_snapshot_slot: Slot,
3196    ) {
3197        fs::create_dir_all(full_snapshot_archives_dir).unwrap();
3198        fs::create_dir_all(incremental_snapshot_archives_dir).unwrap();
3199        for full_snapshot_slot in min_full_snapshot_slot..max_full_snapshot_slot {
3200            for incremental_snapshot_slot in
3201                min_incremental_snapshot_slot..max_incremental_snapshot_slot
3202            {
3203                let snapshot_filename = format!(
3204                    "incremental-snapshot-{}-{}-{}.tar",
3205                    full_snapshot_slot,
3206                    incremental_snapshot_slot,
3207                    Hash::default()
3208                );
3209                let snapshot_filepath = incremental_snapshot_archives_dir.join(snapshot_filename);
3210                File::create(snapshot_filepath).unwrap();
3211            }
3212
3213            let snapshot_filename =
3214                format!("snapshot-{}-{}.tar", full_snapshot_slot, Hash::default());
3215            let snapshot_filepath = full_snapshot_archives_dir.join(snapshot_filename);
3216            File::create(snapshot_filepath).unwrap();
3217
3218            // Add in an incremental snapshot with a bad filename and high slot to ensure filename are filtered and sorted correctly
3219            let bad_filename = format!(
3220                "incremental-snapshot-{}-{}-bad!hash.tar",
3221                full_snapshot_slot,
3222                max_incremental_snapshot_slot + 1,
3223            );
3224            let bad_filepath = incremental_snapshot_archives_dir.join(bad_filename);
3225            File::create(bad_filepath).unwrap();
3226        }
3227
3228        // Add in a snapshot with a bad filename and high slot to ensure filename are filtered and
3229        // sorted correctly
3230        let bad_filename = format!("snapshot-{}-bad!hash.tar", max_full_snapshot_slot + 1);
3231        let bad_filepath = full_snapshot_archives_dir.join(bad_filename);
3232        File::create(bad_filepath).unwrap();
3233    }
3234
3235    #[test]
3236    fn test_get_full_snapshot_archives() {
3237        solana_logger::setup();
3238        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3239        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3240        let min_slot = 123;
3241        let max_slot = 456;
3242        common_create_snapshot_archive_files(
3243            full_snapshot_archives_dir.path(),
3244            incremental_snapshot_archives_dir.path(),
3245            min_slot,
3246            max_slot,
3247            0,
3248            0,
3249        );
3250
3251        let snapshot_archives = get_full_snapshot_archives(full_snapshot_archives_dir);
3252        assert_eq!(snapshot_archives.len() as Slot, max_slot - min_slot);
3253    }
3254
3255    #[test]
3256    fn test_get_full_snapshot_archives_remote() {
3257        solana_logger::setup();
3258        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3259        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3260        let min_slot = 123;
3261        let max_slot = 456;
3262        common_create_snapshot_archive_files(
3263            &full_snapshot_archives_dir.path().join("remote"),
3264            &incremental_snapshot_archives_dir.path().join("remote"),
3265            min_slot,
3266            max_slot,
3267            0,
3268            0,
3269        );
3270
3271        let snapshot_archives = get_full_snapshot_archives(full_snapshot_archives_dir);
3272        assert_eq!(snapshot_archives.len() as Slot, max_slot - min_slot);
3273        assert!(snapshot_archives.iter().all(|info| info.is_remote()));
3274    }
3275
3276    #[test]
3277    fn test_get_incremental_snapshot_archives() {
3278        solana_logger::setup();
3279        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3280        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3281        let min_full_snapshot_slot = 12;
3282        let max_full_snapshot_slot = 23;
3283        let min_incremental_snapshot_slot = 34;
3284        let max_incremental_snapshot_slot = 45;
3285        common_create_snapshot_archive_files(
3286            full_snapshot_archives_dir.path(),
3287            incremental_snapshot_archives_dir.path(),
3288            min_full_snapshot_slot,
3289            max_full_snapshot_slot,
3290            min_incremental_snapshot_slot,
3291            max_incremental_snapshot_slot,
3292        );
3293
3294        let incremental_snapshot_archives =
3295            get_incremental_snapshot_archives(incremental_snapshot_archives_dir);
3296        assert_eq!(
3297            incremental_snapshot_archives.len() as Slot,
3298            (max_full_snapshot_slot - min_full_snapshot_slot)
3299                * (max_incremental_snapshot_slot - min_incremental_snapshot_slot)
3300        );
3301    }
3302
3303    #[test]
3304    fn test_get_incremental_snapshot_archives_remote() {
3305        solana_logger::setup();
3306        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3307        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3308        let min_full_snapshot_slot = 12;
3309        let max_full_snapshot_slot = 23;
3310        let min_incremental_snapshot_slot = 34;
3311        let max_incremental_snapshot_slot = 45;
3312        common_create_snapshot_archive_files(
3313            &full_snapshot_archives_dir.path().join("remote"),
3314            &incremental_snapshot_archives_dir.path().join("remote"),
3315            min_full_snapshot_slot,
3316            max_full_snapshot_slot,
3317            min_incremental_snapshot_slot,
3318            max_incremental_snapshot_slot,
3319        );
3320
3321        let incremental_snapshot_archives =
3322            get_incremental_snapshot_archives(incremental_snapshot_archives_dir);
3323        assert_eq!(
3324            incremental_snapshot_archives.len() as Slot,
3325            (max_full_snapshot_slot - min_full_snapshot_slot)
3326                * (max_incremental_snapshot_slot - min_incremental_snapshot_slot)
3327        );
3328        assert!(incremental_snapshot_archives
3329            .iter()
3330            .all(|info| info.is_remote()));
3331    }
3332
3333    #[test]
3334    fn test_get_highest_full_snapshot_archive_slot() {
3335        solana_logger::setup();
3336        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3337        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3338        let min_slot = 123;
3339        let max_slot = 456;
3340        common_create_snapshot_archive_files(
3341            full_snapshot_archives_dir.path(),
3342            incremental_snapshot_archives_dir.path(),
3343            min_slot,
3344            max_slot,
3345            0,
3346            0,
3347        );
3348
3349        assert_eq!(
3350            get_highest_full_snapshot_archive_slot(full_snapshot_archives_dir.path()),
3351            Some(max_slot - 1)
3352        );
3353    }
3354
3355    #[test]
3356    fn test_get_highest_incremental_snapshot_slot() {
3357        solana_logger::setup();
3358        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3359        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3360        let min_full_snapshot_slot = 12;
3361        let max_full_snapshot_slot = 23;
3362        let min_incremental_snapshot_slot = 34;
3363        let max_incremental_snapshot_slot = 45;
3364        common_create_snapshot_archive_files(
3365            full_snapshot_archives_dir.path(),
3366            incremental_snapshot_archives_dir.path(),
3367            min_full_snapshot_slot,
3368            max_full_snapshot_slot,
3369            min_incremental_snapshot_slot,
3370            max_incremental_snapshot_slot,
3371        );
3372
3373        for full_snapshot_slot in min_full_snapshot_slot..max_full_snapshot_slot {
3374            assert_eq!(
3375                get_highest_incremental_snapshot_archive_slot(
3376                    incremental_snapshot_archives_dir.path(),
3377                    full_snapshot_slot
3378                ),
3379                Some(max_incremental_snapshot_slot - 1)
3380            );
3381        }
3382
3383        assert_eq!(
3384            get_highest_incremental_snapshot_archive_slot(
3385                incremental_snapshot_archives_dir.path(),
3386                max_full_snapshot_slot
3387            ),
3388            None
3389        );
3390    }
3391
3392    fn common_test_purge_old_snapshot_archives(
3393        snapshot_names: &[&String],
3394        maximum_full_snapshot_archives_to_retain: usize,
3395        maximum_incremental_snapshot_archives_to_retain: usize,
3396        expected_snapshots: &[&String],
3397    ) {
3398        let temp_snap_dir = tempfile::TempDir::new().unwrap();
3399
3400        for snap_name in snapshot_names {
3401            let snap_path = temp_snap_dir.path().join(snap_name);
3402            let mut _snap_file = File::create(snap_path);
3403        }
3404        purge_old_snapshot_archives(
3405            temp_snap_dir.path(),
3406            temp_snap_dir.path(),
3407            maximum_full_snapshot_archives_to_retain,
3408            maximum_incremental_snapshot_archives_to_retain,
3409        );
3410
3411        let mut retained_snaps = HashSet::new();
3412        for entry in fs::read_dir(temp_snap_dir.path()).unwrap() {
3413            let entry_path_buf = entry.unwrap().path();
3414            let entry_path = entry_path_buf.as_path();
3415            let snapshot_name = entry_path
3416                .file_name()
3417                .unwrap()
3418                .to_str()
3419                .unwrap()
3420                .to_string();
3421            retained_snaps.insert(snapshot_name);
3422        }
3423
3424        for snap_name in expected_snapshots {
3425            assert!(
3426                retained_snaps.contains(snap_name.as_str()),
3427                "{snap_name} not found"
3428            );
3429        }
3430        assert_eq!(retained_snaps.len(), expected_snapshots.len());
3431    }
3432
3433    #[test]
3434    fn test_purge_old_full_snapshot_archives() {
3435        let snap1_name = format!("snapshot-1-{}.tar.zst", Hash::default());
3436        let snap2_name = format!("snapshot-3-{}.tar.zst", Hash::default());
3437        let snap3_name = format!("snapshot-50-{}.tar.zst", Hash::default());
3438        let snapshot_names = vec![&snap1_name, &snap2_name, &snap3_name];
3439
3440        // expecting only the newest to be retained
3441        let expected_snapshots = vec![&snap3_name];
3442        common_test_purge_old_snapshot_archives(
3443            &snapshot_names,
3444            1,
3445            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3446            &expected_snapshots,
3447        );
3448
3449        // retaining 0, but minimum to retain is 1
3450        common_test_purge_old_snapshot_archives(
3451            &snapshot_names,
3452            0,
3453            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3454            &expected_snapshots,
3455        );
3456
3457        // retaining 2, expecting the 2 newest to be retained
3458        let expected_snapshots = vec![&snap2_name, &snap3_name];
3459        common_test_purge_old_snapshot_archives(
3460            &snapshot_names,
3461            2,
3462            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3463            &expected_snapshots,
3464        );
3465
3466        // retaining 3, all three should be retained
3467        let expected_snapshots = vec![&snap1_name, &snap2_name, &snap3_name];
3468        common_test_purge_old_snapshot_archives(
3469            &snapshot_names,
3470            3,
3471            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3472            &expected_snapshots,
3473        );
3474    }
3475
3476    /// Mimic a running node's behavior w.r.t. purging old snapshot archives.  Take snapshots in a
3477    /// loop, and periodically purge old snapshot archives.  After purging, check to make sure the
3478    /// snapshot archives on disk are correct.
3479    #[test]
3480    fn test_purge_old_full_snapshot_archives_in_the_loop() {
3481        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3482        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3483        let maximum_snapshots_to_retain = 5;
3484        let starting_slot: Slot = 42;
3485
3486        for slot in (starting_slot..).take(100) {
3487            let full_snapshot_archive_file_name =
3488                format!("snapshot-{}-{}.tar", slot, Hash::default());
3489            let full_snapshot_archive_path = full_snapshot_archives_dir
3490                .as_ref()
3491                .join(full_snapshot_archive_file_name);
3492            File::create(full_snapshot_archive_path).unwrap();
3493
3494            // don't purge-and-check until enough snapshot archives have been created
3495            if slot < starting_slot + maximum_snapshots_to_retain as Slot {
3496                continue;
3497            }
3498
3499            // purge infrequently, so there will always be snapshot archives to purge
3500            if slot % (maximum_snapshots_to_retain as Slot * 2) != 0 {
3501                continue;
3502            }
3503
3504            purge_old_snapshot_archives(
3505                &full_snapshot_archives_dir,
3506                &incremental_snapshot_archives_dir,
3507                maximum_snapshots_to_retain,
3508                usize::MAX,
3509            );
3510            let mut full_snapshot_archives =
3511                get_full_snapshot_archives(&full_snapshot_archives_dir);
3512            full_snapshot_archives.sort_unstable();
3513            assert_eq!(full_snapshot_archives.len(), maximum_snapshots_to_retain);
3514            assert_eq!(full_snapshot_archives.last().unwrap().slot(), slot);
3515            for (i, full_snapshot_archive) in full_snapshot_archives.iter().rev().enumerate() {
3516                assert_eq!(full_snapshot_archive.slot(), slot - i as Slot);
3517            }
3518        }
3519    }
3520
3521    #[test]
3522    fn test_purge_old_incremental_snapshot_archives() {
3523        solana_logger::setup();
3524        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3525        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3526        let starting_slot = 100_000;
3527
3528        let maximum_incremental_snapshot_archives_to_retain =
3529            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN;
3530        let maximum_full_snapshot_archives_to_retain = DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN;
3531
3532        let incremental_snapshot_interval = 100;
3533        let num_incremental_snapshots_per_full_snapshot =
3534            maximum_incremental_snapshot_archives_to_retain * 2;
3535        let full_snapshot_interval =
3536            incremental_snapshot_interval * num_incremental_snapshots_per_full_snapshot;
3537
3538        let mut snapshot_filenames = vec![];
3539        (starting_slot..)
3540            .step_by(full_snapshot_interval)
3541            .take(maximum_full_snapshot_archives_to_retain * 2)
3542            .for_each(|full_snapshot_slot| {
3543                let snapshot_filename =
3544                    format!("snapshot-{}-{}.tar", full_snapshot_slot, Hash::default());
3545                let snapshot_path = full_snapshot_archives_dir.path().join(&snapshot_filename);
3546                File::create(snapshot_path).unwrap();
3547                snapshot_filenames.push(snapshot_filename);
3548
3549                (full_snapshot_slot..)
3550                    .step_by(incremental_snapshot_interval)
3551                    .take(num_incremental_snapshots_per_full_snapshot)
3552                    .skip(1)
3553                    .for_each(|incremental_snapshot_slot| {
3554                        let snapshot_filename = format!(
3555                            "incremental-snapshot-{}-{}-{}.tar",
3556                            full_snapshot_slot,
3557                            incremental_snapshot_slot,
3558                            Hash::default()
3559                        );
3560                        let snapshot_path = incremental_snapshot_archives_dir
3561                            .path()
3562                            .join(&snapshot_filename);
3563                        File::create(snapshot_path).unwrap();
3564                        snapshot_filenames.push(snapshot_filename);
3565                    });
3566            });
3567
3568        purge_old_snapshot_archives(
3569            full_snapshot_archives_dir.path(),
3570            incremental_snapshot_archives_dir.path(),
3571            maximum_full_snapshot_archives_to_retain,
3572            maximum_incremental_snapshot_archives_to_retain,
3573        );
3574
3575        // Ensure correct number of full snapshot archives are purged/retained
3576        // NOTE: One extra full snapshot is always kept (the oldest), hence the `+1`
3577        let mut remaining_full_snapshot_archives =
3578            get_full_snapshot_archives(full_snapshot_archives_dir.path());
3579        assert_eq!(
3580            remaining_full_snapshot_archives.len(),
3581            maximum_full_snapshot_archives_to_retain,
3582        );
3583        remaining_full_snapshot_archives.sort_unstable();
3584        let latest_full_snapshot_archive_slot =
3585            remaining_full_snapshot_archives.last().unwrap().slot();
3586
3587        // Ensure correct number of incremental snapshot archives are purged/retained
3588        let mut remaining_incremental_snapshot_archives =
3589            get_incremental_snapshot_archives(incremental_snapshot_archives_dir.path());
3590        assert_eq!(
3591            remaining_incremental_snapshot_archives.len(),
3592            maximum_incremental_snapshot_archives_to_retain
3593                + maximum_full_snapshot_archives_to_retain.saturating_sub(1)
3594        );
3595        remaining_incremental_snapshot_archives.sort_unstable();
3596        remaining_incremental_snapshot_archives.reverse();
3597
3598        // Ensure there exists one incremental snapshot all but the latest full snapshot
3599        for i in (1..maximum_full_snapshot_archives_to_retain).rev() {
3600            let incremental_snapshot_archive =
3601                remaining_incremental_snapshot_archives.pop().unwrap();
3602
3603            let expected_base_slot =
3604                latest_full_snapshot_archive_slot - (i * full_snapshot_interval) as u64;
3605            assert_eq!(incremental_snapshot_archive.base_slot(), expected_base_slot);
3606            let expected_slot = expected_base_slot
3607                + (full_snapshot_interval - incremental_snapshot_interval) as u64;
3608            assert_eq!(incremental_snapshot_archive.slot(), expected_slot);
3609        }
3610
3611        // Ensure all remaining incremental snapshots are only for the latest full snapshot
3612        for incremental_snapshot_archive in &remaining_incremental_snapshot_archives {
3613            assert_eq!(
3614                incremental_snapshot_archive.base_slot(),
3615                latest_full_snapshot_archive_slot
3616            );
3617        }
3618
3619        // Ensure the remaining incremental snapshots are at the right slot
3620        let expected_remaing_incremental_snapshot_archive_slots =
3621            (latest_full_snapshot_archive_slot..)
3622                .step_by(incremental_snapshot_interval)
3623                .take(num_incremental_snapshots_per_full_snapshot)
3624                .skip(
3625                    num_incremental_snapshots_per_full_snapshot
3626                        - maximum_incremental_snapshot_archives_to_retain,
3627                )
3628                .collect::<HashSet<_>>();
3629
3630        let actual_remaining_incremental_snapshot_archive_slots =
3631            remaining_incremental_snapshot_archives
3632                .iter()
3633                .map(|snapshot| snapshot.slot())
3634                .collect::<HashSet<_>>();
3635        assert_eq!(
3636            actual_remaining_incremental_snapshot_archive_slots,
3637            expected_remaing_incremental_snapshot_archive_slots
3638        );
3639    }
3640
3641    #[test]
3642    fn test_purge_all_incremental_snapshot_archives_when_no_full_snapshot_archives() {
3643        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3644        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3645
3646        for snapshot_filenames in [
3647            format!("incremental-snapshot-100-120-{}.tar", Hash::default()),
3648            format!("incremental-snapshot-100-140-{}.tar", Hash::default()),
3649            format!("incremental-snapshot-100-160-{}.tar", Hash::default()),
3650            format!("incremental-snapshot-100-180-{}.tar", Hash::default()),
3651            format!("incremental-snapshot-200-220-{}.tar", Hash::default()),
3652            format!("incremental-snapshot-200-240-{}.tar", Hash::default()),
3653            format!("incremental-snapshot-200-260-{}.tar", Hash::default()),
3654            format!("incremental-snapshot-200-280-{}.tar", Hash::default()),
3655        ] {
3656            let snapshot_path = incremental_snapshot_archives_dir
3657                .path()
3658                .join(snapshot_filenames);
3659            File::create(snapshot_path).unwrap();
3660        }
3661
3662        purge_old_snapshot_archives(
3663            full_snapshot_archives_dir.path(),
3664            incremental_snapshot_archives_dir.path(),
3665            usize::MAX,
3666            usize::MAX,
3667        );
3668
3669        let remaining_incremental_snapshot_archives =
3670            get_incremental_snapshot_archives(incremental_snapshot_archives_dir.path());
3671        assert!(remaining_incremental_snapshot_archives.is_empty());
3672    }
3673
3674    /// Test roundtrip of bank to a full snapshot, then back again.  This test creates the simplest
3675    /// bank possible, so the contents of the snapshot archive will be quite minimal.
3676    #[test]
3677    fn test_roundtrip_bank_to_and_from_full_snapshot_simple() {
3678        solana_logger::setup();
3679        let genesis_config = GenesisConfig::default();
3680        let original_bank = Bank::new_for_tests(&genesis_config);
3681
3682        while !original_bank.is_complete() {
3683            original_bank.register_tick(&Hash::new_unique());
3684        }
3685
3686        let (_tmp_dir, accounts_dir) = create_tmp_accounts_dir_for_tests();
3687        let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
3688        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3689        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3690        let snapshot_archive_format = ArchiveFormat::Tar;
3691
3692        let snapshot_archive_info = bank_to_full_snapshot_archive(
3693            &bank_snapshots_dir,
3694            &original_bank,
3695            None,
3696            full_snapshot_archives_dir.path(),
3697            incremental_snapshot_archives_dir.path(),
3698            snapshot_archive_format,
3699            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3700            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3701        )
3702        .unwrap();
3703
3704        let (roundtrip_bank, _) = bank_from_snapshot_archives(
3705            &[accounts_dir],
3706            bank_snapshots_dir.path(),
3707            &snapshot_archive_info,
3708            None,
3709            &genesis_config,
3710            &RuntimeConfig::default(),
3711            None,
3712            None,
3713            AccountSecondaryIndexes::default(),
3714            None,
3715            AccountShrinkThreshold::default(),
3716            false,
3717            false,
3718            false,
3719            Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
3720            None,
3721            &Arc::default(),
3722        )
3723        .unwrap();
3724        roundtrip_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
3725        assert_eq!(original_bank, roundtrip_bank);
3726    }
3727
3728    /// Test roundtrip of bank to a full snapshot, then back again.  This test is more involved
3729    /// than the simple version above; creating multiple banks over multiple slots and doing
3730    /// multiple transfers.  So this full snapshot should contain more data.
3731    #[test]
3732    fn test_roundtrip_bank_to_and_from_snapshot_complex() {
3733        solana_logger::setup();
3734        let collector = Pubkey::new_unique();
3735        let key1 = Keypair::new();
3736        let key2 = Keypair::new();
3737        let key3 = Keypair::new();
3738        let key4 = Keypair::new();
3739        let key5 = Keypair::new();
3740
3741        let (genesis_config, mint_keypair) = create_genesis_config(sol_to_lamports(1_000_000.));
3742        let bank0 = Arc::new(Bank::new_for_tests(&genesis_config));
3743        bank0
3744            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3745            .unwrap();
3746        bank0
3747            .transfer(sol_to_lamports(2.), &mint_keypair, &key2.pubkey())
3748            .unwrap();
3749        bank0
3750            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
3751            .unwrap();
3752        while !bank0.is_complete() {
3753            bank0.register_tick(&Hash::new_unique());
3754        }
3755
3756        let slot = 1;
3757        let bank1 = Arc::new(Bank::new_from_parent(&bank0, &collector, slot));
3758        bank1
3759            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
3760            .unwrap();
3761        bank1
3762            .transfer(sol_to_lamports(4.), &mint_keypair, &key4.pubkey())
3763            .unwrap();
3764        bank1
3765            .transfer(sol_to_lamports(5.), &mint_keypair, &key5.pubkey())
3766            .unwrap();
3767        while !bank1.is_complete() {
3768            bank1.register_tick(&Hash::new_unique());
3769        }
3770
3771        let slot = slot + 1;
3772        let bank2 = Arc::new(Bank::new_from_parent(&bank1, &collector, slot));
3773        bank2
3774            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3775            .unwrap();
3776        while !bank2.is_complete() {
3777            bank2.register_tick(&Hash::new_unique());
3778        }
3779
3780        let slot = slot + 1;
3781        let bank3 = Arc::new(Bank::new_from_parent(&bank2, &collector, slot));
3782        bank3
3783            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3784            .unwrap();
3785        while !bank3.is_complete() {
3786            bank3.register_tick(&Hash::new_unique());
3787        }
3788
3789        let slot = slot + 1;
3790        let bank4 = Arc::new(Bank::new_from_parent(&bank3, &collector, slot));
3791        bank4
3792            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3793            .unwrap();
3794        while !bank4.is_complete() {
3795            bank4.register_tick(&Hash::new_unique());
3796        }
3797
3798        let (_tmp_dir, accounts_dir) = create_tmp_accounts_dir_for_tests();
3799        let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
3800        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3801        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3802        let snapshot_archive_format = ArchiveFormat::TarGzip;
3803
3804        let full_snapshot_archive_info = bank_to_full_snapshot_archive(
3805            bank_snapshots_dir.path(),
3806            &bank4,
3807            None,
3808            full_snapshot_archives_dir.path(),
3809            incremental_snapshot_archives_dir.path(),
3810            snapshot_archive_format,
3811            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3812            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3813        )
3814        .unwrap();
3815
3816        let (roundtrip_bank, _) = bank_from_snapshot_archives(
3817            &[accounts_dir],
3818            bank_snapshots_dir.path(),
3819            &full_snapshot_archive_info,
3820            None,
3821            &genesis_config,
3822            &RuntimeConfig::default(),
3823            None,
3824            None,
3825            AccountSecondaryIndexes::default(),
3826            None,
3827            AccountShrinkThreshold::default(),
3828            false,
3829            false,
3830            false,
3831            Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
3832            None,
3833            &Arc::default(),
3834        )
3835        .unwrap();
3836        roundtrip_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
3837        assert_eq!(*bank4, roundtrip_bank);
3838    }
3839
3840    /// Test roundtrip of bank to snapshots, then back again, with incremental snapshots.  In this
3841    /// version, build up a few slots and take a full snapshot.  Continue on a few more slots and
3842    /// take an incremental snapshot.  Rebuild the bank from both the incremental snapshot and full
3843    /// snapshot.
3844    ///
3845    /// For the full snapshot, touch all the accounts, but only one for the incremental snapshot.
3846    /// This is intended to mimic the real behavior of transactions, where only a small number of
3847    /// accounts are modified often, which are captured by the incremental snapshot.  The majority
3848    /// of the accounts are not modified often, and are captured by the full snapshot.
3849    #[test]
3850    fn test_roundtrip_bank_to_and_from_incremental_snapshot() {
3851        solana_logger::setup();
3852        let collector = Pubkey::new_unique();
3853        let key1 = Keypair::new();
3854        let key2 = Keypair::new();
3855        let key3 = Keypair::new();
3856        let key4 = Keypair::new();
3857        let key5 = Keypair::new();
3858
3859        let (genesis_config, mint_keypair) = create_genesis_config(sol_to_lamports(1_000_000.));
3860        let bank0 = Arc::new(Bank::new_for_tests(&genesis_config));
3861        bank0
3862            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3863            .unwrap();
3864        bank0
3865            .transfer(sol_to_lamports(2.), &mint_keypair, &key2.pubkey())
3866            .unwrap();
3867        bank0
3868            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
3869            .unwrap();
3870        while !bank0.is_complete() {
3871            bank0.register_tick(&Hash::new_unique());
3872        }
3873
3874        let slot = 1;
3875        let bank1 = Arc::new(Bank::new_from_parent(&bank0, &collector, slot));
3876        bank1
3877            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
3878            .unwrap();
3879        bank1
3880            .transfer(sol_to_lamports(4.), &mint_keypair, &key4.pubkey())
3881            .unwrap();
3882        bank1
3883            .transfer(sol_to_lamports(5.), &mint_keypair, &key5.pubkey())
3884            .unwrap();
3885        while !bank1.is_complete() {
3886            bank1.register_tick(&Hash::new_unique());
3887        }
3888
3889        let (_tmp_dir, accounts_dir) = create_tmp_accounts_dir_for_tests();
3890        let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
3891        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3892        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
3893        let snapshot_archive_format = ArchiveFormat::TarZstd;
3894
3895        let full_snapshot_slot = slot;
3896        let full_snapshot_archive_info = bank_to_full_snapshot_archive(
3897            bank_snapshots_dir.path(),
3898            &bank1,
3899            None,
3900            full_snapshot_archives_dir.path(),
3901            incremental_snapshot_archives_dir.path(),
3902            snapshot_archive_format,
3903            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3904            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3905        )
3906        .unwrap();
3907
3908        let slot = slot + 1;
3909        let bank2 = Arc::new(Bank::new_from_parent(&bank1, &collector, slot));
3910        bank2
3911            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3912            .unwrap();
3913        while !bank2.is_complete() {
3914            bank2.register_tick(&Hash::new_unique());
3915        }
3916
3917        let slot = slot + 1;
3918        let bank3 = Arc::new(Bank::new_from_parent(&bank2, &collector, slot));
3919        bank3
3920            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3921            .unwrap();
3922        while !bank3.is_complete() {
3923            bank3.register_tick(&Hash::new_unique());
3924        }
3925
3926        let slot = slot + 1;
3927        let bank4 = Arc::new(Bank::new_from_parent(&bank3, &collector, slot));
3928        bank4
3929            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3930            .unwrap();
3931        while !bank4.is_complete() {
3932            bank4.register_tick(&Hash::new_unique());
3933        }
3934
3935        let incremental_snapshot_archive_info = bank_to_incremental_snapshot_archive(
3936            bank_snapshots_dir.path(),
3937            &bank4,
3938            full_snapshot_slot,
3939            None,
3940            full_snapshot_archives_dir.path(),
3941            incremental_snapshot_archives_dir.path(),
3942            snapshot_archive_format,
3943            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3944            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
3945        )
3946        .unwrap();
3947
3948        let (roundtrip_bank, _) = bank_from_snapshot_archives(
3949            &[accounts_dir],
3950            bank_snapshots_dir.path(),
3951            &full_snapshot_archive_info,
3952            Some(&incremental_snapshot_archive_info),
3953            &genesis_config,
3954            &RuntimeConfig::default(),
3955            None,
3956            None,
3957            AccountSecondaryIndexes::default(),
3958            None,
3959            AccountShrinkThreshold::default(),
3960            false,
3961            false,
3962            false,
3963            Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
3964            None,
3965            &Arc::default(),
3966        )
3967        .unwrap();
3968        roundtrip_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
3969        assert_eq!(*bank4, roundtrip_bank);
3970    }
3971
3972    /// Test rebuilding bank from the latest snapshot archives
3973    #[test]
3974    fn test_bank_from_latest_snapshot_archives() {
3975        solana_logger::setup();
3976        let collector = Pubkey::new_unique();
3977        let key1 = Keypair::new();
3978        let key2 = Keypair::new();
3979        let key3 = Keypair::new();
3980
3981        let (genesis_config, mint_keypair) = create_genesis_config(sol_to_lamports(1_000_000.));
3982        let bank0 = Arc::new(Bank::new_for_tests(&genesis_config));
3983        bank0
3984            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
3985            .unwrap();
3986        bank0
3987            .transfer(sol_to_lamports(2.), &mint_keypair, &key2.pubkey())
3988            .unwrap();
3989        bank0
3990            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
3991            .unwrap();
3992        while !bank0.is_complete() {
3993            bank0.register_tick(&Hash::new_unique());
3994        }
3995
3996        let slot = 1;
3997        let bank1 = Arc::new(Bank::new_from_parent(&bank0, &collector, slot));
3998        bank1
3999            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
4000            .unwrap();
4001        bank1
4002            .transfer(sol_to_lamports(2.), &mint_keypair, &key2.pubkey())
4003            .unwrap();
4004        bank1
4005            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
4006            .unwrap();
4007        while !bank1.is_complete() {
4008            bank1.register_tick(&Hash::new_unique());
4009        }
4010
4011        let (_tmp_dir, accounts_dir) = create_tmp_accounts_dir_for_tests();
4012        let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
4013        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
4014        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
4015        let snapshot_archive_format = ArchiveFormat::Tar;
4016
4017        let full_snapshot_slot = slot;
4018        bank_to_full_snapshot_archive(
4019            &bank_snapshots_dir,
4020            &bank1,
4021            None,
4022            &full_snapshot_archives_dir,
4023            &incremental_snapshot_archives_dir,
4024            snapshot_archive_format,
4025            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4026            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4027        )
4028        .unwrap();
4029
4030        let slot = slot + 1;
4031        let bank2 = Arc::new(Bank::new_from_parent(&bank1, &collector, slot));
4032        bank2
4033            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
4034            .unwrap();
4035        while !bank2.is_complete() {
4036            bank2.register_tick(&Hash::new_unique());
4037        }
4038
4039        let slot = slot + 1;
4040        let bank3 = Arc::new(Bank::new_from_parent(&bank2, &collector, slot));
4041        bank3
4042            .transfer(sol_to_lamports(2.), &mint_keypair, &key2.pubkey())
4043            .unwrap();
4044        while !bank3.is_complete() {
4045            bank3.register_tick(&Hash::new_unique());
4046        }
4047
4048        let slot = slot + 1;
4049        let bank4 = Arc::new(Bank::new_from_parent(&bank3, &collector, slot));
4050        bank4
4051            .transfer(sol_to_lamports(3.), &mint_keypair, &key3.pubkey())
4052            .unwrap();
4053        while !bank4.is_complete() {
4054            bank4.register_tick(&Hash::new_unique());
4055        }
4056
4057        bank_to_incremental_snapshot_archive(
4058            &bank_snapshots_dir,
4059            &bank4,
4060            full_snapshot_slot,
4061            None,
4062            &full_snapshot_archives_dir,
4063            &incremental_snapshot_archives_dir,
4064            snapshot_archive_format,
4065            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4066            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4067        )
4068        .unwrap();
4069
4070        let (deserialized_bank, ..) = bank_from_latest_snapshot_archives(
4071            &bank_snapshots_dir,
4072            &full_snapshot_archives_dir,
4073            &incremental_snapshot_archives_dir,
4074            &[accounts_dir],
4075            &genesis_config,
4076            &RuntimeConfig::default(),
4077            None,
4078            None,
4079            AccountSecondaryIndexes::default(),
4080            None,
4081            AccountShrinkThreshold::default(),
4082            false,
4083            false,
4084            false,
4085            Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
4086            None,
4087            &Arc::default(),
4088        )
4089        .unwrap();
4090        deserialized_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
4091        assert_eq!(deserialized_bank, *bank4);
4092    }
4093
4094    /// Test that cleaning works well in the edge cases of zero-lamport accounts and snapshots.
4095    /// Here's the scenario:
4096    ///
4097    /// slot 1:
4098    ///     - send some lamports to Account1 (from Account2) to bring it to life
4099    ///     - take a full snapshot
4100    /// slot 2:
4101    ///     - make Account1 have zero lamports (send back to Account2)
4102    ///     - take an incremental snapshot
4103    ///     - ensure deserializing from this snapshot is equal to this bank
4104    /// slot 3:
4105    ///     - remove Account2's reference back to slot 2 by transfering from the mint to Account2
4106    /// slot 4:
4107    ///     - ensure `clean_accounts()` has run and that Account1 is gone
4108    ///     - take another incremental snapshot
4109    ///     - ensure deserializing from this snapshots is equal to this bank
4110    ///     - ensure Account1 hasn't come back from the dead
4111    ///
4112    /// The check at slot 4 will fail with the pre-incremental-snapshot cleaning logic.  Because
4113    /// of the cleaning/purging at slot 4, the incremental snapshot at slot 4 will no longer have
4114    /// information about Account1, but the full snapshost _does_ have info for Account1, which is
4115    /// no longer correct!
4116    #[test]
4117    fn test_incremental_snapshots_handle_zero_lamport_accounts() {
4118        solana_logger::setup();
4119
4120        let collector = Pubkey::new_unique();
4121        let key1 = Keypair::new();
4122        let key2 = Keypair::new();
4123
4124        let (_tmp_dir, accounts_dir) = create_tmp_accounts_dir_for_tests();
4125        let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
4126        let full_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
4127        let incremental_snapshot_archives_dir = tempfile::TempDir::new().unwrap();
4128        let snapshot_archive_format = ArchiveFormat::Tar;
4129
4130        let (genesis_config, mint_keypair) = create_genesis_config(sol_to_lamports(1_000_000.));
4131
4132        let lamports_to_transfer = sol_to_lamports(123_456.);
4133        let bank0 = Arc::new(Bank::new_with_paths_for_tests(
4134            &genesis_config,
4135            Arc::<RuntimeConfig>::default(),
4136            vec![accounts_dir.clone()],
4137            AccountSecondaryIndexes::default(),
4138            AccountShrinkThreshold::default(),
4139        ));
4140        bank0
4141            .transfer(lamports_to_transfer, &mint_keypair, &key2.pubkey())
4142            .unwrap();
4143        while !bank0.is_complete() {
4144            bank0.register_tick(&Hash::new_unique());
4145        }
4146
4147        let slot = 1;
4148        let bank1 = Arc::new(Bank::new_from_parent(&bank0, &collector, slot));
4149        bank1
4150            .transfer(lamports_to_transfer, &key2, &key1.pubkey())
4151            .unwrap();
4152        while !bank1.is_complete() {
4153            bank1.register_tick(&Hash::new_unique());
4154        }
4155
4156        let full_snapshot_slot = slot;
4157        let full_snapshot_archive_info = bank_to_full_snapshot_archive(
4158            bank_snapshots_dir.path(),
4159            &bank1,
4160            None,
4161            full_snapshot_archives_dir.path(),
4162            incremental_snapshot_archives_dir.path(),
4163            snapshot_archive_format,
4164            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4165            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4166        )
4167        .unwrap();
4168
4169        let slot = slot + 1;
4170        let bank2 = Arc::new(Bank::new_from_parent(&bank1, &collector, slot));
4171        let blockhash = bank2.last_blockhash();
4172        let tx = SanitizedTransaction::from_transaction_for_tests(system_transaction::transfer(
4173            &key1,
4174            &key2.pubkey(),
4175            lamports_to_transfer,
4176            blockhash,
4177        ));
4178        let fee = bank2.get_fee_for_message(tx.message()).unwrap();
4179        let tx = system_transaction::transfer(
4180            &key1,
4181            &key2.pubkey(),
4182            lamports_to_transfer - fee,
4183            blockhash,
4184        );
4185        bank2.process_transaction(&tx).unwrap();
4186        assert_eq!(
4187            bank2.get_balance(&key1.pubkey()),
4188            0,
4189            "Ensure Account1's balance is zero"
4190        );
4191        while !bank2.is_complete() {
4192            bank2.register_tick(&Hash::new_unique());
4193        }
4194
4195        // Take an incremental snapshot and then do a roundtrip on the bank and ensure it
4196        // deserializes correctly.
4197        let incremental_snapshot_archive_info = bank_to_incremental_snapshot_archive(
4198            bank_snapshots_dir.path(),
4199            &bank2,
4200            full_snapshot_slot,
4201            None,
4202            full_snapshot_archives_dir.path(),
4203            incremental_snapshot_archives_dir.path(),
4204            snapshot_archive_format,
4205            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4206            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4207        )
4208        .unwrap();
4209        let (deserialized_bank, _) = bank_from_snapshot_archives(
4210            &[accounts_dir.clone()],
4211            bank_snapshots_dir.path(),
4212            &full_snapshot_archive_info,
4213            Some(&incremental_snapshot_archive_info),
4214            &genesis_config,
4215            &RuntimeConfig::default(),
4216            None,
4217            None,
4218            AccountSecondaryIndexes::default(),
4219            None,
4220            AccountShrinkThreshold::default(),
4221            false,
4222            false,
4223            false,
4224            Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
4225            None,
4226            &Arc::default(),
4227        )
4228        .unwrap();
4229        deserialized_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
4230        assert_eq!(
4231            deserialized_bank, *bank2,
4232            "Ensure rebuilding from an incremental snapshot works"
4233        );
4234
4235        let slot = slot + 1;
4236        let bank3 = Arc::new(Bank::new_from_parent(&bank2, &collector, slot));
4237        // Update Account2 so that it no longer holds a reference to slot2
4238        bank3
4239            .transfer(lamports_to_transfer, &mint_keypair, &key2.pubkey())
4240            .unwrap();
4241        while !bank3.is_complete() {
4242            bank3.register_tick(&Hash::new_unique());
4243        }
4244
4245        let slot = slot + 1;
4246        let bank4 = Arc::new(Bank::new_from_parent(&bank3, &collector, slot));
4247        while !bank4.is_complete() {
4248            bank4.register_tick(&Hash::new_unique());
4249        }
4250
4251        // Ensure account1 has been cleaned/purged from everywhere
4252        bank4.squash();
4253        bank4.clean_accounts(Some(full_snapshot_slot));
4254        assert!(
4255            bank4.get_account_modified_slot(&key1.pubkey()).is_none(),
4256            "Ensure Account1 has been cleaned and purged from AccountsDb"
4257        );
4258
4259        // Take an incremental snapshot and then do a roundtrip on the bank and ensure it
4260        // deserializes correctly
4261        let incremental_snapshot_archive_info = bank_to_incremental_snapshot_archive(
4262            bank_snapshots_dir.path(),
4263            &bank4,
4264            full_snapshot_slot,
4265            None,
4266            full_snapshot_archives_dir.path(),
4267            incremental_snapshot_archives_dir.path(),
4268            snapshot_archive_format,
4269            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4270            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4271        )
4272        .unwrap();
4273
4274        let (deserialized_bank, _) = bank_from_snapshot_archives(
4275            &[accounts_dir],
4276            bank_snapshots_dir.path(),
4277            &full_snapshot_archive_info,
4278            Some(&incremental_snapshot_archive_info),
4279            &genesis_config,
4280            &RuntimeConfig::default(),
4281            None,
4282            None,
4283            AccountSecondaryIndexes::default(),
4284            None,
4285            AccountShrinkThreshold::default(),
4286            false,
4287            false,
4288            false,
4289            Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
4290            None,
4291            &Arc::default(),
4292        )
4293        .unwrap();
4294        deserialized_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
4295        assert_eq!(
4296            deserialized_bank, *bank4,
4297            "Ensure rebuilding from an incremental snapshot works",
4298        );
4299        assert!(
4300            deserialized_bank
4301                .get_account_modified_slot(&key1.pubkey())
4302                .is_none(),
4303            "Ensure Account1 has not been brought back from the dead"
4304        );
4305    }
4306
4307    #[test]
4308    fn test_bank_fields_from_snapshot() {
4309        solana_logger::setup();
4310        let collector = Pubkey::new_unique();
4311        let key1 = Keypair::new();
4312
4313        let (genesis_config, mint_keypair) = create_genesis_config(sol_to_lamports(1_000_000.));
4314        let bank0 = Arc::new(Bank::new_for_tests(&genesis_config));
4315        while !bank0.is_complete() {
4316            bank0.register_tick(&Hash::new_unique());
4317        }
4318
4319        let slot = 1;
4320        let bank1 = Arc::new(Bank::new_from_parent(&bank0, &collector, slot));
4321        while !bank1.is_complete() {
4322            bank1.register_tick(&Hash::new_unique());
4323        }
4324
4325        let all_snapshots_dir = tempfile::TempDir::new().unwrap();
4326        let snapshot_archive_format = ArchiveFormat::Tar;
4327
4328        let full_snapshot_slot = slot;
4329        bank_to_full_snapshot_archive(
4330            &all_snapshots_dir,
4331            &bank1,
4332            None,
4333            &all_snapshots_dir,
4334            &all_snapshots_dir,
4335            snapshot_archive_format,
4336            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4337            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4338        )
4339        .unwrap();
4340
4341        let slot = slot + 1;
4342        let bank2 = Arc::new(Bank::new_from_parent(&bank1, &collector, slot));
4343        bank2
4344            .transfer(sol_to_lamports(1.), &mint_keypair, &key1.pubkey())
4345            .unwrap();
4346        while !bank2.is_complete() {
4347            bank2.register_tick(&Hash::new_unique());
4348        }
4349
4350        bank_to_incremental_snapshot_archive(
4351            &all_snapshots_dir,
4352            &bank2,
4353            full_snapshot_slot,
4354            None,
4355            &all_snapshots_dir,
4356            &all_snapshots_dir,
4357            snapshot_archive_format,
4358            DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4359            DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
4360        )
4361        .unwrap();
4362
4363        let bank_fields = bank_fields_from_snapshot_archives(
4364            &all_snapshots_dir,
4365            &all_snapshots_dir,
4366            &all_snapshots_dir,
4367        )
4368        .unwrap();
4369        assert_eq!(bank_fields.slot, bank2.slot());
4370        assert_eq!(bank_fields.parent_slot, bank2.parent_slot());
4371    }
4372
4373    #[test]
4374    fn test_verify_slot_deltas_structural_good() {
4375        // NOTE: slot deltas do not need to be sorted
4376        let slot_deltas = vec![
4377            (222, true, Status::default()),
4378            (333, true, Status::default()),
4379            (111, true, Status::default()),
4380        ];
4381
4382        let bank_slot = 333;
4383        let result = verify_slot_deltas_structural(slot_deltas.as_slice(), bank_slot);
4384        assert_eq!(
4385            result,
4386            Ok(VerifySlotDeltasStructuralInfo {
4387                slots: HashSet::from([111, 222, 333])
4388            })
4389        );
4390    }
4391
4392    #[test]
4393    fn test_verify_slot_deltas_structural_bad_too_many_entries() {
4394        let bank_slot = status_cache::MAX_CACHE_ENTRIES as Slot + 1;
4395        let slot_deltas: Vec<_> = (0..bank_slot)
4396            .map(|slot| (slot, true, Status::default()))
4397            .collect();
4398
4399        let result = verify_slot_deltas_structural(slot_deltas.as_slice(), bank_slot);
4400        assert_eq!(
4401            result,
4402            Err(VerifySlotDeltasError::TooManyEntries(
4403                status_cache::MAX_CACHE_ENTRIES + 1,
4404                status_cache::MAX_CACHE_ENTRIES
4405            )),
4406        );
4407    }
4408
4409    #[test]
4410    fn test_verify_slot_deltas_structural_bad_slot_not_root() {
4411        let slot_deltas = vec![
4412            (111, true, Status::default()),
4413            (222, false, Status::default()), // <-- slot is not a root
4414            (333, true, Status::default()),
4415        ];
4416
4417        let bank_slot = 333;
4418        let result = verify_slot_deltas_structural(slot_deltas.as_slice(), bank_slot);
4419        assert_eq!(result, Err(VerifySlotDeltasError::SlotIsNotRoot(222)));
4420    }
4421
4422    #[test]
4423    fn test_verify_slot_deltas_structural_bad_slot_greater_than_bank() {
4424        let slot_deltas = vec![
4425            (222, true, Status::default()),
4426            (111, true, Status::default()),
4427            (555, true, Status::default()), // <-- slot is greater than the bank slot
4428        ];
4429
4430        let bank_slot = 444;
4431        let result = verify_slot_deltas_structural(slot_deltas.as_slice(), bank_slot);
4432        assert_eq!(
4433            result,
4434            Err(VerifySlotDeltasError::SlotGreaterThanMaxRoot(
4435                555, bank_slot
4436            )),
4437        );
4438    }
4439
4440    #[test]
4441    fn test_verify_slot_deltas_structural_bad_slot_has_multiple_entries() {
4442        let slot_deltas = vec![
4443            (111, true, Status::default()),
4444            (222, true, Status::default()),
4445            (111, true, Status::default()), // <-- slot is a duplicate
4446        ];
4447
4448        let bank_slot = 222;
4449        let result = verify_slot_deltas_structural(slot_deltas.as_slice(), bank_slot);
4450        assert_eq!(
4451            result,
4452            Err(VerifySlotDeltasError::SlotHasMultipleEntries(111)),
4453        );
4454    }
4455
4456    #[test]
4457    fn test_verify_slot_deltas_with_history_good() {
4458        let mut slots_from_slot_deltas = HashSet::default();
4459        let mut slot_history = SlotHistory::default();
4460        // note: slot history expects slots to be added in numeric order
4461        for slot in [0, 111, 222, 333, 444] {
4462            slots_from_slot_deltas.insert(slot);
4463            slot_history.add(slot);
4464        }
4465
4466        let bank_slot = 444;
4467        let result =
4468            verify_slot_deltas_with_history(&slots_from_slot_deltas, &slot_history, bank_slot);
4469        assert_eq!(result, Ok(()));
4470    }
4471
4472    #[test]
4473    fn test_verify_slot_deltas_with_history_bad_slot_history() {
4474        let bank_slot = 444;
4475        let result = verify_slot_deltas_with_history(
4476            &HashSet::default(),
4477            &SlotHistory::default(), // <-- will only have an entry for slot 0
4478            bank_slot,
4479        );
4480        assert_eq!(result, Err(VerifySlotDeltasError::BadSlotHistory));
4481    }
4482
4483    #[test]
4484    fn test_verify_slot_deltas_with_history_bad_slot_not_in_history() {
4485        let slots_from_slot_deltas = HashSet::from([
4486            0, // slot history has slot 0 added by default
4487            444, 222,
4488        ]);
4489        let mut slot_history = SlotHistory::default();
4490        slot_history.add(444); // <-- slot history is missing slot 222
4491
4492        let bank_slot = 444;
4493        let result =
4494            verify_slot_deltas_with_history(&slots_from_slot_deltas, &slot_history, bank_slot);
4495
4496        assert_eq!(
4497            result,
4498            Err(VerifySlotDeltasError::SlotNotFoundInHistory(222)),
4499        );
4500    }
4501
4502    #[test]
4503    fn test_verify_slot_deltas_with_history_bad_slot_not_in_deltas() {
4504        let slots_from_slot_deltas = HashSet::from([
4505            0, // slot history has slot 0 added by default
4506            444, 222,
4507            // <-- slot deltas is missing slot 333
4508        ]);
4509        let mut slot_history = SlotHistory::default();
4510        slot_history.add(222);
4511        slot_history.add(333);
4512        slot_history.add(444);
4513
4514        let bank_slot = 444;
4515        let result =
4516            verify_slot_deltas_with_history(&slots_from_slot_deltas, &slot_history, bank_slot);
4517
4518        assert_eq!(
4519            result,
4520            Err(VerifySlotDeltasError::SlotNotFoundInDeltas(333)),
4521        );
4522    }
4523
4524    #[test]
4525    fn test_bank_snapshot_dir_accounts_hardlinks() {
4526        solana_logger::setup();
4527        let genesis_config = GenesisConfig::default();
4528        let bank = Bank::new_for_tests(&genesis_config);
4529
4530        bank.fill_bank_with_ticks_for_tests();
4531
4532        let bank_snapshots_dir = tempfile::TempDir::new().unwrap();
4533
4534        bank.squash();
4535        bank.force_flush_accounts_cache();
4536
4537        let snapshot_version = SnapshotVersion::default();
4538        let snapshot_storages = bank.get_snapshot_storages(None);
4539        let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas();
4540        add_bank_snapshot(
4541            &bank_snapshots_dir,
4542            &bank,
4543            &snapshot_storages,
4544            snapshot_version,
4545            slot_deltas,
4546        )
4547        .unwrap();
4548
4549        let accounts_hardlinks_dir =
4550            get_bank_snapshots_dir(&bank_snapshots_dir, bank.slot()).join("accounts_hardlinks");
4551        assert!(fs::metadata(&accounts_hardlinks_dir).is_ok());
4552
4553        let mut hardlink_dirs: Vec<PathBuf> = Vec::new();
4554        // This directory contain symlinks to all accounts snapshot directories.
4555        for entry in fs::read_dir(accounts_hardlinks_dir).unwrap() {
4556            let entry = entry.unwrap();
4557            let symlink = entry.path();
4558            let dst_path = fs::read_link(symlink).unwrap();
4559            assert!(fs::metadata(&dst_path).is_ok());
4560            hardlink_dirs.push(dst_path);
4561        }
4562
4563        assert!(remove_bank_snapshot(bank.slot(), bank_snapshots_dir).is_ok());
4564
4565        // When the bank snapshot is removed, all the snapshot hardlink directories should be removed.
4566        assert!(hardlink_dirs.iter().all(|dir| fs::metadata(dir).is_err()));
4567    }
4568
4569    #[test]
4570    fn test_get_snapshot_accounts_hardlink_dir() {
4571        solana_logger::setup();
4572
4573        let slot: Slot = 1;
4574
4575        let mut account_paths_set: HashSet<PathBuf> = HashSet::new();
4576
4577        let bank_snapshots_dir_tmp = tempfile::TempDir::new().unwrap();
4578        let bank_snapshot_dir = bank_snapshots_dir_tmp.path().join(slot.to_string());
4579        let accounts_hardlinks_dir = bank_snapshot_dir.join("accounts_hardlinks");
4580        fs::create_dir_all(&accounts_hardlinks_dir).unwrap();
4581
4582        let (_tmp_dir, accounts_dir) = create_tmp_accounts_dir_for_tests();
4583        let appendvec_filename = format!("{slot}.0");
4584        let appendvec_path = accounts_dir.join(appendvec_filename);
4585
4586        let ret = get_snapshot_accounts_hardlink_dir(
4587            &appendvec_path,
4588            slot,
4589            &mut account_paths_set,
4590            &accounts_hardlinks_dir,
4591        );
4592        assert!(ret.is_ok());
4593
4594        let wrong_appendvec_path = appendvec_path
4595            .parent()
4596            .unwrap()
4597            .parent()
4598            .unwrap()
4599            .join(appendvec_path.file_name().unwrap());
4600        let ret = get_snapshot_accounts_hardlink_dir(
4601            &wrong_appendvec_path,
4602            slot,
4603            &mut account_paths_set,
4604            accounts_hardlinks_dir,
4605        );
4606
4607        assert!(matches!(ret, Err(SnapshotError::InvalidAppendVecPath(_))));
4608    }
4609}