solana_accounts_db/
hardened_unpack.rs

1use {
2    crate::file_io::{file_creator, FileCreator},
3    bzip2::bufread::BzDecoder,
4    crossbeam_channel::Sender,
5    log::*,
6    rand::{thread_rng, Rng},
7    solana_genesis_config::{GenesisConfig, DEFAULT_GENESIS_ARCHIVE, DEFAULT_GENESIS_FILE},
8    std::{
9        collections::HashMap,
10        fs::{self, File},
11        io::{self, BufReader, Read},
12        path::{
13            Component::{self, CurDir, Normal},
14            Path, PathBuf,
15        },
16        sync::Arc,
17        time::Instant,
18    },
19    tar::{
20        Archive,
21        EntryType::{Directory, GNUSparse, Regular},
22    },
23    thiserror::Error,
24};
25
26#[derive(Error, Debug)]
27pub enum UnpackError {
28    #[error("IO error: {0}")]
29    Io(#[from] std::io::Error),
30    #[error("Archive error: {0}")]
31    Archive(String),
32}
33
34pub type Result<T> = std::result::Result<T, UnpackError>;
35
36// 64 TiB; some safe margin to the max 128 TiB in amd64 linux userspace VmSize
37// (ref: https://unix.stackexchange.com/a/386555/364236)
38// note that this is directly related to the mmaped data size
39// so protect against insane value
40// This is the file size including holes for sparse files
41const MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE: u64 = 64 * 1024 * 1024 * 1024 * 1024;
42
43// 4 TiB;
44// This is the actually consumed disk usage for sparse files
45const MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE: u64 = 4 * 1024 * 1024 * 1024 * 1024;
46
47const MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT: u64 = 5_000_000;
48pub const MAX_GENESIS_ARCHIVE_UNPACKED_SIZE: u64 = 10 * 1024 * 1024; // 10 MiB
49const MAX_GENESIS_ARCHIVE_UNPACKED_COUNT: u64 = 100;
50
51// The buffer should be large enough to saturate write I/O bandwidth, while also accommodating:
52// - Many small files: each file consumes at least one write-capacity-sized chunk (0.5-1 MiB).
53// - Large files: their data may accumulate in backlog buffers while waiting for file open
54//   operations to complete.
55const MAX_UNPACK_WRITE_BUF_SIZE: usize = 512 * 1024 * 1024;
56
57fn checked_total_size_sum(total_size: u64, entry_size: u64, limit_size: u64) -> Result<u64> {
58    trace!("checked_total_size_sum: {total_size} + {entry_size} < {limit_size}");
59    let total_size = total_size.saturating_add(entry_size);
60    if total_size > limit_size {
61        return Err(UnpackError::Archive(format!(
62            "too large archive: {total_size} than limit: {limit_size}",
63        )));
64    }
65    Ok(total_size)
66}
67
68fn checked_total_count_increment(total_count: u64, limit_count: u64) -> Result<u64> {
69    let total_count = total_count + 1;
70    if total_count > limit_count {
71        return Err(UnpackError::Archive(format!(
72            "too many files in snapshot: {total_count:?}"
73        )));
74    }
75    Ok(total_count)
76}
77
78fn check_unpack_result(unpack_result: Result<()>, path: String) -> Result<()> {
79    if let Err(err) = unpack_result {
80        return Err(UnpackError::Archive(format!(
81            "failed to unpack {path:?}: {err}"
82        )));
83    }
84    Ok(())
85}
86
87#[derive(Debug, PartialEq, Eq)]
88pub enum UnpackPath<'a> {
89    Valid(&'a Path),
90    Ignore,
91    Invalid,
92}
93
94fn unpack_archive<'a, A, C, D>(
95    mut archive: Archive<A>,
96    input_archive_size: u64,
97    apparent_limit_size: u64,
98    actual_limit_size: u64,
99    limit_count: u64,
100    mut entry_checker: C,   // checks if entry is valid
101    file_path_processor: D, // processes file paths after writing
102) -> Result<()>
103where
104    A: Read,
105    C: FnMut(&[&str], tar::EntryType) -> UnpackPath<'a>,
106    D: FnMut(PathBuf),
107{
108    let mut apparent_total_size: u64 = 0;
109    let mut actual_total_size: u64 = 0;
110    let mut total_count: u64 = 0;
111
112    let mut total_entries = 0;
113    let mut open_dirs = Vec::new();
114
115    // Bound the buffer based on provided limit of unpacked data and input archive size
116    // (decompression multiplies content size, but buffering more than origin isn't necessary).
117    let buf_size =
118        (input_archive_size.min(actual_limit_size) as usize).min(MAX_UNPACK_WRITE_BUF_SIZE);
119    let mut files_creator = file_creator(buf_size, file_path_processor)?;
120
121    for entry in archive.entries()? {
122        let entry = entry?;
123        let path = entry.path()?;
124        let path_str = path.display().to_string();
125
126        // Although the `tar` crate safely skips at the actual unpacking, fail
127        // first by ourselves when there are odd paths like including `..` or /
128        // for our clearer pattern matching reasoning:
129        //   https://docs.rs/tar/0.4.26/src/tar/entry.rs.html#371
130        let parts = path
131            .components()
132            .map(|p| match p {
133                CurDir => Ok("."),
134                Normal(c) => c.to_str().ok_or(()),
135                _ => Err(()), // Prefix (for Windows) and RootDir are forbidden
136            })
137            .collect::<std::result::Result<Vec<_>, _>>();
138
139        // Reject old-style BSD directory entries that aren't explicitly tagged as directories
140        let legacy_dir_entry =
141            entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/");
142        let kind = entry.header().entry_type();
143        let reject_legacy_dir_entry = legacy_dir_entry && (kind != Directory);
144        let (Ok(parts), false) = (parts, reject_legacy_dir_entry) else {
145            return Err(UnpackError::Archive(format!(
146                "invalid path found: {path_str:?}"
147            )));
148        };
149
150        let unpack_dir = match entry_checker(parts.as_slice(), kind) {
151            UnpackPath::Invalid => {
152                return Err(UnpackError::Archive(format!(
153                    "extra entry found: {:?} {:?}",
154                    path_str,
155                    entry.header().entry_type(),
156                )));
157            }
158            UnpackPath::Ignore => {
159                continue;
160            }
161            UnpackPath::Valid(unpack_dir) => unpack_dir,
162        };
163
164        apparent_total_size = checked_total_size_sum(
165            apparent_total_size,
166            entry.header().size()?,
167            apparent_limit_size,
168        )?;
169        actual_total_size = checked_total_size_sum(
170            actual_total_size,
171            entry.header().entry_size()?,
172            actual_limit_size,
173        )?;
174        total_count = checked_total_count_increment(total_count, limit_count)?;
175
176        let account_filename = match parts.as_slice() {
177            ["accounts", account_filename] => Some(PathBuf::from(account_filename)),
178            _ => None,
179        };
180        let entry_path = if let Some(account) = account_filename {
181            // Special case account files. We're unpacking an account entry inside one of the
182            // account_paths returned by `entry_checker`. We want to unpack into
183            // account_path/<account> instead of account_path/accounts/<account> so we strip the
184            // accounts/ prefix.
185            sanitize_path_and_open_dir(&account, unpack_dir, &mut open_dirs)
186        } else {
187            sanitize_path_and_open_dir(&path, unpack_dir, &mut open_dirs)
188        }?; // ? handles file system errors
189        let Some((entry_path, open_dir)) = entry_path else {
190            continue; // skip it
191        };
192
193        let unpack = unpack_entry(&mut files_creator, entry, entry_path, open_dir);
194        check_unpack_result(unpack, path_str)?;
195
196        total_entries += 1;
197    }
198    files_creator.drain()?;
199
200    info!("unpacked {total_entries} entries total");
201    Ok(())
202}
203
204fn unpack_entry<'a, R: Read>(
205    files_creator: &mut Box<dyn FileCreator + 'a>,
206    mut entry: tar::Entry<'_, R>,
207    dst: PathBuf,
208    dst_open_dir: Arc<File>,
209) -> Result<()> {
210    let mode = match entry.header().entry_type() {
211        GNUSparse | Regular => 0o644,
212        _ => 0o755,
213    };
214    if should_fallback_to_tar_unpack(&entry) {
215        entry.unpack(&dst)?;
216        // Sanitize permissions.
217        set_perms(&dst, mode)?;
218
219        if !entry.header().entry_type().is_dir() {
220            // Process file after setting permissions
221            files_creator.file_complete(dst);
222        }
223        return Ok(());
224    }
225    files_creator.schedule_create_at_dir(dst, mode, dst_open_dir, &mut entry)?;
226
227    return Ok(());
228
229    #[cfg(unix)]
230    fn set_perms(dst: &Path, mode: u32) -> io::Result<()> {
231        use std::os::unix::fs::PermissionsExt;
232
233        let perm = fs::Permissions::from_mode(mode as _);
234        fs::set_permissions(dst, perm)
235    }
236
237    #[cfg(windows)]
238    fn set_perms(dst: &Path, _mode: u32) -> io::Result<()> {
239        super::file_io::set_file_readonly(dst, false)
240    }
241}
242
243fn should_fallback_to_tar_unpack<R: io::Read>(entry: &tar::Entry<'_, R>) -> bool {
244    // Follows cases that are handled as directory or in special way by tar-rs library,
245    // we want to handle just cases where the library would write plain files with entry's content.
246    matches!(
247        entry.header().entry_type(),
248        tar::EntryType::Directory
249            | tar::EntryType::Link
250            | tar::EntryType::Symlink
251            | tar::EntryType::XGlobalHeader
252            | tar::EntryType::XHeader
253            | tar::EntryType::GNULongName
254            | tar::EntryType::GNULongLink
255    ) || entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/")
256}
257
258// return Err on file system error
259// return Some((path, open_dir)) if path is good
260// return None if we should skip this file
261fn sanitize_path_and_open_dir(
262    entry_path: &Path,
263    dst: &Path,
264    open_dirs: &mut Vec<(PathBuf, Arc<File>)>,
265) -> Result<Option<(PathBuf, Arc<File>)>> {
266    // We cannot call unpack_in because it errors if we try to use 2 account paths.
267    // So, this code is borrowed from unpack_in
268    // ref: https://docs.rs/tar/*/tar/struct.Entry.html#method.unpack_in
269    let mut file_dst = dst.to_path_buf();
270    const SKIP: Result<Option<(PathBuf, Arc<File>)>> = Ok(None);
271    {
272        let path = entry_path;
273        for part in path.components() {
274            match part {
275                // Leading '/' characters, root paths, and '.'
276                // components are just ignored and treated as "empty
277                // components"
278                Component::Prefix(..) | Component::RootDir | Component::CurDir => continue,
279
280                // If any part of the filename is '..', then skip over
281                // unpacking the file to prevent directory traversal
282                // security issues.  See, e.g.: CVE-2001-1267,
283                // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
284                Component::ParentDir => return SKIP,
285
286                Component::Normal(part) => file_dst.push(part),
287            }
288        }
289    }
290
291    // Skip cases where only slashes or '.' parts were seen, because
292    // this is effectively an empty filename.
293    if *dst == *file_dst {
294        return SKIP;
295    }
296
297    // Skip entries without a parent (i.e. outside of FS root)
298    let Some(parent) = file_dst.parent() else {
299        return SKIP;
300    };
301
302    let open_dst_dir = match open_dirs.binary_search_by(|(key, _)| parent.cmp(key)) {
303        Err(insert_at) => {
304            fs::create_dir_all(parent)?;
305
306            // Here we are different than untar_in. The code for tar::unpack_in internally calling unpack is a little different.
307            // ignore return value here
308            validate_inside_dst(dst, parent)?;
309
310            let opened_dir = Arc::new(File::open(parent)?);
311            open_dirs.insert(insert_at, (parent.to_path_buf(), opened_dir.clone()));
312            opened_dir
313        }
314        Ok(index) => open_dirs[index].1.clone(),
315    };
316
317    Ok(Some((file_dst, open_dst_dir)))
318}
319
320// copied from:
321// https://github.com/alexcrichton/tar-rs/blob/d90a02f582c03dfa0fd11c78d608d0974625ae5d/src/entry.rs#L781
322fn validate_inside_dst(dst: &Path, file_dst: &Path) -> Result<PathBuf> {
323    // Abort if target (canonical) parent is outside of `dst`
324    let canon_parent = file_dst.canonicalize().map_err(|err| {
325        UnpackError::Archive(format!("{err} while canonicalizing {}", file_dst.display()))
326    })?;
327    let canon_target = dst.canonicalize().map_err(|err| {
328        UnpackError::Archive(format!("{err} while canonicalizing {}", dst.display()))
329    })?;
330    if !canon_parent.starts_with(&canon_target) {
331        return Err(UnpackError::Archive(format!(
332            "trying to unpack outside of destination path: {}",
333            canon_target.display()
334        )));
335    }
336    Ok(canon_target)
337}
338
339/// Map from AppendVec file name to unpacked file system location
340pub type UnpackedAppendVecMap = HashMap<String, PathBuf>;
341
342/// Unpacks snapshot and collects AppendVec file names & paths
343pub fn unpack_snapshot<A: Read>(
344    archive: Archive<A>,
345    input_archive_size: u64,
346    ledger_dir: &Path,
347    account_paths: &[PathBuf],
348) -> Result<UnpackedAppendVecMap> {
349    let mut unpacked_append_vec_map = UnpackedAppendVecMap::new();
350    unpack_snapshot_with_processors(
351        archive,
352        input_archive_size,
353        ledger_dir,
354        account_paths,
355        |file, path| {
356            unpacked_append_vec_map.insert(file.to_string(), path.join("accounts").join(file));
357        },
358        |_| {},
359    )
360    .map(|_| unpacked_append_vec_map)
361}
362
363/// Unpacks snapshot from (potentially partial) `archive` and
364/// sends entry file paths through the `sender` channel
365pub fn streaming_unpack_snapshot<A: Read>(
366    archive: Archive<A>,
367    input_archive_size: u64,
368    ledger_dir: &Path,
369    account_paths: &[PathBuf],
370    sender: &Sender<PathBuf>,
371) -> Result<()> {
372    unpack_snapshot_with_processors(
373        archive,
374        input_archive_size,
375        ledger_dir,
376        account_paths,
377        |_, _| {},
378        |file_path| {
379            let result = sender.send(file_path);
380            if let Err(err) = result {
381                panic!(
382                    "failed to send path '{}' from unpacker to rebuilder: {err}",
383                    err.0.display(),
384                );
385            }
386        },
387    )
388}
389
390fn unpack_snapshot_with_processors<A, F, G>(
391    archive: Archive<A>,
392    input_archive_size: u64,
393    ledger_dir: &Path,
394    account_paths: &[PathBuf],
395    mut accounts_path_processor: F,
396    file_path_processor: G,
397) -> Result<()>
398where
399    A: Read,
400    F: FnMut(&str, &Path),
401    G: FnMut(PathBuf),
402{
403    assert!(!account_paths.is_empty());
404
405    unpack_archive(
406        archive,
407        input_archive_size,
408        MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE,
409        MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE,
410        MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT,
411        |parts, kind| {
412            if is_valid_snapshot_archive_entry(parts, kind) {
413                if let ["accounts", file] = parts {
414                    // Randomly distribute the accounts files about the available `account_paths`,
415                    let path_index = thread_rng().gen_range(0..account_paths.len());
416                    match account_paths
417                        .get(path_index)
418                        .map(|path_buf| path_buf.as_path())
419                    {
420                        Some(path) => {
421                            accounts_path_processor(file, path);
422                            UnpackPath::Valid(path)
423                        }
424                        None => UnpackPath::Invalid,
425                    }
426                } else {
427                    UnpackPath::Valid(ledger_dir)
428                }
429            } else {
430                UnpackPath::Invalid
431            }
432        },
433        file_path_processor,
434    )
435}
436
437fn all_digits(v: &str) -> bool {
438    if v.is_empty() {
439        return false;
440    }
441    for x in v.chars() {
442        if !x.is_ascii_digit() {
443            return false;
444        }
445    }
446    true
447}
448
449fn like_storage(v: &str) -> bool {
450    let mut periods = 0;
451    let mut saw_numbers = false;
452    for x in v.chars() {
453        if !x.is_ascii_digit() {
454            if x == '.' {
455                if periods > 0 || !saw_numbers {
456                    return false;
457                }
458                saw_numbers = false;
459                periods += 1;
460            } else {
461                return false;
462            }
463        } else {
464            saw_numbers = true;
465        }
466    }
467    saw_numbers && periods == 1
468}
469
470fn is_valid_snapshot_archive_entry(parts: &[&str], kind: tar::EntryType) -> bool {
471    match (parts, kind) {
472        (["version"], Regular) => true,
473        (["accounts"], Directory) => true,
474        (["accounts", file], GNUSparse) if like_storage(file) => true,
475        (["accounts", file], Regular) if like_storage(file) => true,
476        (["snapshots"], Directory) => true,
477        (["snapshots", "status_cache"], GNUSparse) => true,
478        (["snapshots", "status_cache"], Regular) => true,
479        (["snapshots", dir, file], GNUSparse) if all_digits(dir) && all_digits(file) => true,
480        (["snapshots", dir, file], Regular) if all_digits(dir) && all_digits(file) => true,
481        (["snapshots", dir], Directory) if all_digits(dir) => true,
482        _ => false,
483    }
484}
485
486#[derive(Error, Debug)]
487pub enum OpenGenesisConfigError {
488    #[error("unpack error: {0}")]
489    Unpack(#[from] UnpackError),
490    #[error("Genesis load error: {0}")]
491    Load(#[from] std::io::Error),
492}
493
494pub fn open_genesis_config(
495    ledger_path: &Path,
496    max_genesis_archive_unpacked_size: u64,
497) -> std::result::Result<GenesisConfig, OpenGenesisConfigError> {
498    match GenesisConfig::load(ledger_path) {
499        Ok(genesis_config) => Ok(genesis_config),
500        Err(load_err) => {
501            warn!(
502                "Failed to load genesis_config at {ledger_path:?}: {load_err}. Will attempt to \
503                 unpack genesis archive and then retry loading."
504            );
505
506            let genesis_package = ledger_path.join(DEFAULT_GENESIS_ARCHIVE);
507            unpack_genesis_archive(
508                &genesis_package,
509                ledger_path,
510                max_genesis_archive_unpacked_size,
511            )?;
512            GenesisConfig::load(ledger_path).map_err(OpenGenesisConfigError::Load)
513        }
514    }
515}
516
517pub fn unpack_genesis_archive(
518    archive_filename: &Path,
519    destination_dir: &Path,
520    max_genesis_archive_unpacked_size: u64,
521) -> std::result::Result<(), UnpackError> {
522    info!("Extracting {archive_filename:?}...");
523    let extract_start = Instant::now();
524
525    fs::create_dir_all(destination_dir)?;
526    let tar_bz2 = File::open(archive_filename)?;
527    let archive_size = tar_bz2.metadata()?.len();
528    let tar = BzDecoder::new(BufReader::new(tar_bz2));
529    let archive = Archive::new(tar);
530    unpack_genesis(
531        archive,
532        archive_size,
533        destination_dir,
534        max_genesis_archive_unpacked_size,
535    )?;
536    info!(
537        "Extracted {:?} in {:?}",
538        archive_filename,
539        Instant::now().duration_since(extract_start)
540    );
541    Ok(())
542}
543
544fn unpack_genesis<A: Read>(
545    archive: Archive<A>,
546    input_archive_size: u64,
547    unpack_dir: &Path,
548    max_genesis_archive_unpacked_size: u64,
549) -> Result<()> {
550    unpack_archive(
551        archive,
552        input_archive_size,
553        max_genesis_archive_unpacked_size,
554        max_genesis_archive_unpacked_size,
555        MAX_GENESIS_ARCHIVE_UNPACKED_COUNT,
556        |p, k| is_valid_genesis_archive_entry(unpack_dir, p, k),
557        |_| {},
558    )
559}
560
561fn is_valid_genesis_archive_entry<'a>(
562    unpack_dir: &'a Path,
563    parts: &[&str],
564    kind: tar::EntryType,
565) -> UnpackPath<'a> {
566    trace!("validating: {parts:?} {kind:?}");
567    #[allow(clippy::match_like_matches_macro)]
568    match (parts, kind) {
569        ([DEFAULT_GENESIS_FILE], GNUSparse) => UnpackPath::Valid(unpack_dir),
570        ([DEFAULT_GENESIS_FILE], Regular) => UnpackPath::Valid(unpack_dir),
571        (["rocksdb"], Directory) => UnpackPath::Ignore,
572        (["rocksdb", _], GNUSparse) => UnpackPath::Ignore,
573        (["rocksdb", _], Regular) => UnpackPath::Ignore,
574        (["rocksdb_fifo"], Directory) => UnpackPath::Ignore,
575        (["rocksdb_fifo", _], GNUSparse) => UnpackPath::Ignore,
576        (["rocksdb_fifo", _], Regular) => UnpackPath::Ignore,
577        _ => UnpackPath::Invalid,
578    }
579}
580
581#[cfg(test)]
582mod tests {
583    use {
584        super::*,
585        assert_matches::assert_matches,
586        tar::{Builder, Header},
587    };
588
589    #[test]
590    fn test_archive_is_valid_entry() {
591        assert!(is_valid_snapshot_archive_entry(
592            &["snapshots"],
593            tar::EntryType::Directory
594        ));
595        assert!(!is_valid_snapshot_archive_entry(
596            &["snapshots", ""],
597            tar::EntryType::Directory
598        ));
599        assert!(is_valid_snapshot_archive_entry(
600            &["snapshots", "3"],
601            tar::EntryType::Directory
602        ));
603        assert!(is_valid_snapshot_archive_entry(
604            &["snapshots", "3", "3"],
605            tar::EntryType::Regular
606        ));
607        assert!(is_valid_snapshot_archive_entry(
608            &["version"],
609            tar::EntryType::Regular
610        ));
611        assert!(is_valid_snapshot_archive_entry(
612            &["accounts"],
613            tar::EntryType::Directory
614        ));
615        assert!(!is_valid_snapshot_archive_entry(
616            &["accounts", ""],
617            tar::EntryType::Regular
618        ));
619
620        assert!(!is_valid_snapshot_archive_entry(
621            &["snapshots"],
622            tar::EntryType::Regular
623        ));
624        assert!(!is_valid_snapshot_archive_entry(
625            &["snapshots", "x0"],
626            tar::EntryType::Directory
627        ));
628        assert!(!is_valid_snapshot_archive_entry(
629            &["snapshots", "0x"],
630            tar::EntryType::Directory
631        ));
632        assert!(!is_valid_snapshot_archive_entry(
633            &["snapshots", "①"],
634            tar::EntryType::Directory
635        ));
636        assert!(!is_valid_snapshot_archive_entry(
637            &["snapshots", "0", "aa"],
638            tar::EntryType::Regular
639        ));
640        assert!(!is_valid_snapshot_archive_entry(
641            &["aaaa"],
642            tar::EntryType::Regular
643        ));
644    }
645
646    #[test]
647    fn test_valid_snapshot_accounts() {
648        solana_logger::setup();
649        assert!(is_valid_snapshot_archive_entry(
650            &["accounts", "0.0"],
651            tar::EntryType::Regular
652        ));
653        assert!(is_valid_snapshot_archive_entry(
654            &["accounts", "01829.077"],
655            tar::EntryType::Regular
656        ));
657
658        assert!(!is_valid_snapshot_archive_entry(
659            &["accounts", "1.2.34"],
660            tar::EntryType::Regular
661        ));
662        assert!(!is_valid_snapshot_archive_entry(
663            &["accounts", "12."],
664            tar::EntryType::Regular
665        ));
666        assert!(!is_valid_snapshot_archive_entry(
667            &["accounts", ".12"],
668            tar::EntryType::Regular
669        ));
670        assert!(!is_valid_snapshot_archive_entry(
671            &["accounts", "0x0"],
672            tar::EntryType::Regular
673        ));
674        assert!(!is_valid_snapshot_archive_entry(
675            &["accounts", "abc"],
676            tar::EntryType::Regular
677        ));
678        assert!(!is_valid_snapshot_archive_entry(
679            &["accounts", "232323"],
680            tar::EntryType::Regular
681        ));
682        assert!(!is_valid_snapshot_archive_entry(
683            &["accounts", "৬.¾"],
684            tar::EntryType::Regular
685        ));
686    }
687
688    #[test]
689    fn test_archive_is_valid_archive_entry() {
690        let path = Path::new("");
691        assert_eq!(
692            is_valid_genesis_archive_entry(path, &["genesis.bin"], tar::EntryType::Regular),
693            UnpackPath::Valid(path)
694        );
695        assert_eq!(
696            is_valid_genesis_archive_entry(path, &["genesis.bin"], tar::EntryType::GNUSparse,),
697            UnpackPath::Valid(path)
698        );
699        assert_eq!(
700            is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::Directory),
701            UnpackPath::Ignore
702        );
703        assert_eq!(
704            is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::Regular),
705            UnpackPath::Ignore
706        );
707        assert_eq!(
708            is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::GNUSparse,),
709            UnpackPath::Ignore
710        );
711        assert_eq!(
712            is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::Directory),
713            UnpackPath::Ignore
714        );
715        assert_eq!(
716            is_valid_genesis_archive_entry(path, &["rocksdb_fifo", "foo"], tar::EntryType::Regular),
717            UnpackPath::Ignore
718        );
719        assert_eq!(
720            is_valid_genesis_archive_entry(
721                path,
722                &["rocksdb_fifo", "foo"],
723                tar::EntryType::GNUSparse,
724            ),
725            UnpackPath::Ignore
726        );
727        assert_eq!(
728            is_valid_genesis_archive_entry(path, &["aaaa"], tar::EntryType::Regular),
729            UnpackPath::Invalid
730        );
731        assert_eq!(
732            is_valid_genesis_archive_entry(path, &["aaaa"], tar::EntryType::GNUSparse,),
733            UnpackPath::Invalid
734        );
735        assert_eq!(
736            is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::Regular),
737            UnpackPath::Invalid
738        );
739        assert_eq!(
740            is_valid_genesis_archive_entry(path, &["rocksdb"], tar::EntryType::GNUSparse,),
741            UnpackPath::Invalid
742        );
743        assert_eq!(
744            is_valid_genesis_archive_entry(path, &["rocksdb", "foo"], tar::EntryType::Directory,),
745            UnpackPath::Invalid
746        );
747        assert_eq!(
748            is_valid_genesis_archive_entry(
749                path,
750                &["rocksdb", "foo", "bar"],
751                tar::EntryType::Directory,
752            ),
753            UnpackPath::Invalid
754        );
755        assert_eq!(
756            is_valid_genesis_archive_entry(
757                path,
758                &["rocksdb", "foo", "bar"],
759                tar::EntryType::Regular
760            ),
761            UnpackPath::Invalid
762        );
763        assert_eq!(
764            is_valid_genesis_archive_entry(
765                path,
766                &["rocksdb", "foo", "bar"],
767                tar::EntryType::GNUSparse
768            ),
769            UnpackPath::Invalid
770        );
771        assert_eq!(
772            is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::Regular),
773            UnpackPath::Invalid
774        );
775        assert_eq!(
776            is_valid_genesis_archive_entry(path, &["rocksdb_fifo"], tar::EntryType::GNUSparse,),
777            UnpackPath::Invalid
778        );
779        assert_eq!(
780            is_valid_genesis_archive_entry(
781                path,
782                &["rocksdb_fifo", "foo"],
783                tar::EntryType::Directory,
784            ),
785            UnpackPath::Invalid
786        );
787        assert_eq!(
788            is_valid_genesis_archive_entry(
789                path,
790                &["rocksdb_fifo", "foo", "bar"],
791                tar::EntryType::Directory,
792            ),
793            UnpackPath::Invalid
794        );
795        assert_eq!(
796            is_valid_genesis_archive_entry(
797                path,
798                &["rocksdb_fifo", "foo", "bar"],
799                tar::EntryType::Regular
800            ),
801            UnpackPath::Invalid
802        );
803        assert_eq!(
804            is_valid_genesis_archive_entry(
805                path,
806                &["rocksdb_fifo", "foo", "bar"],
807                tar::EntryType::GNUSparse
808            ),
809            UnpackPath::Invalid
810        );
811    }
812
813    fn with_finalize_and_unpack<C>(archive: tar::Builder<Vec<u8>>, checker: C) -> Result<()>
814    where
815        C: Fn(Archive<BufReader<&[u8]>>, &Path) -> Result<()>,
816    {
817        let data = archive.into_inner().unwrap();
818        let reader = BufReader::new(&data[..]);
819        let archive = Archive::new(reader);
820        let temp_dir = tempfile::TempDir::new().unwrap();
821
822        checker(archive, temp_dir.path())?;
823        // Check that there is no bad permissions preventing deletion.
824        let result = temp_dir.close();
825        assert_matches!(result, Ok(()));
826        Ok(())
827    }
828
829    fn finalize_and_unpack_snapshot(archive: tar::Builder<Vec<u8>>) -> Result<()> {
830        with_finalize_and_unpack(archive, |a, b| {
831            unpack_snapshot_with_processors(a, 256, b, &[PathBuf::new()], |_, _| {}, |_| {})
832                .map(|_| ())
833        })
834    }
835
836    fn finalize_and_unpack_genesis(archive: tar::Builder<Vec<u8>>) -> Result<()> {
837        with_finalize_and_unpack(archive, |a, b| {
838            unpack_genesis(a, 256, b, MAX_GENESIS_ARCHIVE_UNPACKED_SIZE)
839        })
840    }
841
842    #[test]
843    fn test_archive_unpack_snapshot_ok() {
844        let mut header = Header::new_gnu();
845        header.set_path("version").unwrap();
846        header.set_size(4);
847        header.set_cksum();
848
849        let data: &[u8] = &[1, 2, 3, 4];
850
851        let mut archive = Builder::new(Vec::new());
852        archive.append(&header, data).unwrap();
853
854        let result = finalize_and_unpack_snapshot(archive);
855        assert_matches!(result, Ok(()));
856    }
857
858    #[test]
859    fn test_archive_unpack_genesis_ok() {
860        let mut header = Header::new_gnu();
861        header.set_path("genesis.bin").unwrap();
862        header.set_size(4);
863        header.set_cksum();
864
865        let data: &[u8] = &[1, 2, 3, 4];
866
867        let mut archive = Builder::new(Vec::new());
868        archive.append(&header, data).unwrap();
869
870        let result = finalize_and_unpack_genesis(archive);
871        assert_matches!(result, Ok(()));
872    }
873
874    #[test]
875    fn test_archive_unpack_genesis_bad_perms() {
876        let mut archive = Builder::new(Vec::new());
877
878        let mut header = Header::new_gnu();
879        header.set_path("rocksdb").unwrap();
880        header.set_entry_type(Directory);
881        header.set_size(0);
882        header.set_cksum();
883        let data: &[u8] = &[];
884        archive.append(&header, data).unwrap();
885
886        let mut header = Header::new_gnu();
887        header.set_path("rocksdb/test").unwrap();
888        header.set_size(4);
889        header.set_cksum();
890        let data: &[u8] = &[1, 2, 3, 4];
891        archive.append(&header, data).unwrap();
892
893        // Removing all permissions makes it harder to delete this directory
894        // or work with files inside it.
895        let mut header = Header::new_gnu();
896        header.set_path("rocksdb").unwrap();
897        header.set_entry_type(Directory);
898        header.set_mode(0o000);
899        header.set_size(0);
900        header.set_cksum();
901        let data: &[u8] = &[];
902        archive.append(&header, data).unwrap();
903
904        let result = finalize_and_unpack_genesis(archive);
905        assert_matches!(result, Ok(()));
906    }
907
908    #[test]
909    fn test_archive_unpack_genesis_bad_rocksdb_subdir() {
910        let mut archive = Builder::new(Vec::new());
911
912        let mut header = Header::new_gnu();
913        header.set_path("rocksdb").unwrap();
914        header.set_entry_type(Directory);
915        header.set_size(0);
916        header.set_cksum();
917        let data: &[u8] = &[];
918        archive.append(&header, data).unwrap();
919
920        // tar-rs treats following entry as a Directory to support old tar formats.
921        let mut header = Header::new_gnu();
922        header.set_path("rocksdb/test/").unwrap();
923        header.set_entry_type(Regular);
924        header.set_size(0);
925        header.set_cksum();
926        let data: &[u8] = &[];
927        archive.append(&header, data).unwrap();
928
929        let result = finalize_and_unpack_genesis(archive);
930        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "invalid path found: \"rocksdb/test/\"");
931    }
932
933    #[test]
934    fn test_archive_unpack_snapshot_invalid_path() {
935        let mut header = Header::new_gnu();
936        // bypass the sanitization of the .set_path()
937        for (p, c) in header
938            .as_old_mut()
939            .name
940            .iter_mut()
941            .zip(b"foo/../../../dangerous".iter().chain(Some(&0)))
942        {
943            *p = *c;
944        }
945        header.set_size(4);
946        header.set_cksum();
947
948        let data: &[u8] = &[1, 2, 3, 4];
949
950        let mut archive = Builder::new(Vec::new());
951        archive.append(&header, data).unwrap();
952        let result = finalize_and_unpack_snapshot(archive);
953        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "invalid path found: \"foo/../../../dangerous\"");
954    }
955
956    fn with_archive_unpack_snapshot_invalid_path(path: &str) -> Result<()> {
957        let mut header = Header::new_gnu();
958        // bypass the sanitization of the .set_path()
959        for (p, c) in header
960            .as_old_mut()
961            .name
962            .iter_mut()
963            .zip(path.as_bytes().iter().chain(Some(&0)))
964        {
965            *p = *c;
966        }
967        header.set_size(4);
968        header.set_cksum();
969
970        let data: &[u8] = &[1, 2, 3, 4];
971
972        let mut archive = Builder::new(Vec::new());
973        archive.append(&header, data).unwrap();
974        with_finalize_and_unpack(archive, |mut unpacking_archive, path| {
975            for entry in unpacking_archive.entries()? {
976                if !entry?.unpack_in(path)? {
977                    return Err(UnpackError::Archive("failed!".to_string()));
978                } else if !path.join(path).exists() {
979                    return Err(UnpackError::Archive("not existing!".to_string()));
980                }
981            }
982            Ok(())
983        })
984    }
985
986    #[test]
987    fn test_archive_unpack_itself() {
988        assert_matches!(
989            with_archive_unpack_snapshot_invalid_path("ryoqun/work"),
990            Ok(())
991        );
992        // Absolute paths are neutralized as relative
993        assert_matches!(
994            with_archive_unpack_snapshot_invalid_path("/etc/passwd"),
995            Ok(())
996        );
997        assert_matches!(with_archive_unpack_snapshot_invalid_path("../../../dangerous"), Err(UnpackError::Archive(ref message)) if message == "failed!");
998    }
999
1000    #[test]
1001    fn test_archive_unpack_snapshot_invalid_entry() {
1002        let mut header = Header::new_gnu();
1003        header.set_path("foo").unwrap();
1004        header.set_size(4);
1005        header.set_cksum();
1006
1007        let data: &[u8] = &[1, 2, 3, 4];
1008
1009        let mut archive = Builder::new(Vec::new());
1010        archive.append(&header, data).unwrap();
1011        let result = finalize_and_unpack_snapshot(archive);
1012        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "extra entry found: \"foo\" Regular");
1013    }
1014
1015    #[test]
1016    fn test_archive_unpack_snapshot_too_large() {
1017        let mut header = Header::new_gnu();
1018        header.set_path("version").unwrap();
1019        header.set_size(1024 * 1024 * 1024 * 1024 * 1024);
1020        header.set_cksum();
1021
1022        let data: &[u8] = &[1, 2, 3, 4];
1023
1024        let mut archive = Builder::new(Vec::new());
1025        archive.append(&header, data).unwrap();
1026        let result = finalize_and_unpack_snapshot(archive);
1027        assert_matches!(
1028            result,
1029            Err(UnpackError::Archive(ref message))
1030                if message == &format!(
1031                    "too large archive: 1125899906842624 than limit: {MAX_SNAPSHOT_ARCHIVE_UNPACKED_APPARENT_SIZE}"
1032                )
1033        );
1034    }
1035
1036    #[test]
1037    fn test_archive_unpack_snapshot_bad_unpack() {
1038        let result = check_unpack_result(
1039            Err(UnpackError::Io(io::ErrorKind::FileTooLarge.into())),
1040            "abc".to_string(),
1041        );
1042        assert_matches!(result, Err(UnpackError::Archive(ref message)) if message == "failed to unpack \"abc\": IO error: file too large");
1043    }
1044
1045    #[test]
1046    fn test_archive_checked_total_size_sum() {
1047        let result = checked_total_size_sum(500, 500, MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE);
1048        assert_matches!(result, Ok(1000));
1049
1050        let result =
1051            checked_total_size_sum(u64::MAX - 2, 2, MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE);
1052        assert_matches!(
1053            result,
1054            Err(UnpackError::Archive(ref message))
1055                if message == &format!(
1056                    "too large archive: 18446744073709551615 than limit: {MAX_SNAPSHOT_ARCHIVE_UNPACKED_ACTUAL_SIZE}"
1057                )
1058        );
1059    }
1060
1061    #[test]
1062    fn test_archive_checked_total_size_count() {
1063        let result = checked_total_count_increment(101, MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT);
1064        assert_matches!(result, Ok(102));
1065
1066        let result =
1067            checked_total_count_increment(999_999_999_999, MAX_SNAPSHOT_ARCHIVE_UNPACKED_COUNT);
1068        assert_matches!(
1069            result,
1070            Err(UnpackError::Archive(ref message))
1071                if message == "too many files in snapshot: 1000000000000"
1072        );
1073    }
1074
1075    #[test]
1076    fn test_archive_unpack_account_path() {
1077        let mut header = Header::new_gnu();
1078        header.set_path("accounts/123.456").unwrap();
1079        header.set_size(4);
1080        header.set_cksum();
1081        let data: &[u8] = &[1, 2, 3, 4];
1082
1083        let mut archive = Builder::new(Vec::new());
1084        archive.append(&header, data).unwrap();
1085        let result = with_finalize_and_unpack(archive, |ar, tmp| {
1086            unpack_snapshot_with_processors(
1087                ar,
1088                256,
1089                tmp,
1090                &[tmp.join("accounts_dest")],
1091                |_, _| {},
1092                |path| assert_eq!(path, tmp.join("accounts_dest/123.456")),
1093            )
1094        });
1095        assert_matches!(result, Ok(()));
1096    }
1097}