rustic_core 0.11.0

//! `check` subcommand
use std::{
    collections::{BTreeMap, BTreeSet, HashMap},
    fmt::Debug,
    num::ParseIntError,
    path::PathBuf,
    str::FromStr,
    sync::Mutex,
};

use bytes::Bytes;
use bytesize::ByteSize;
use derive_setters::Setters;
use displaydoc::Display;
use jiff::Zoned;
use log::{debug, error, warn};
use rand::{Rng, prelude::SliceRandom, rng};
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use thiserror::Error;
use zstd::stream::decode_all;

use crate::{
    DataId, ErrorKind, RusticError, TreeId,
    backend::{FileType, ReadBackend, cache::Cache, decrypt::DecryptReadBackend, node::NodeType},
    blob::{BlobId, BlobType, tree::TreeStreamerOnce},
    crypto::hasher::hash,
    error::RusticResult,
    id::Id,
    index::{
        GlobalIndex, ReadGlobalIndex,
        binarysorted::{IndexCollector, IndexType},
    },
    progress::Progress,
    repofile::{
        IndexFile, IndexPack, PackHeader, PackHeaderLength, PackHeaderRef, packfile::PackId,
    },
    repository::{Open, Repository},
};

#[derive(Clone, Copy, Debug, Default)]
#[non_exhaustive]
/// Options to specify which subset of packs will be read
pub enum ReadSubsetOption {
    #[default]
    /// Read all pack files
    All,
    /// Read a random subset of pack files with (approximately) the given percentage of total size
    Percentage(f64),
    /// Read a random subset of pack files with (approximately) the given size
    Size(u64),
    /// Read a subset of packfiles based on Ids: Using (1,n) .. (n,n) in separate runs will cover all pack files
    IdSubSet((u32, u32)),
}

impl ReadSubsetOption {
    fn apply(self, packs: impl IntoIterator<Item = IndexPack>) -> Vec<IndexPack> {
        self.apply_with_rng(packs, &mut rng())
    }

    fn apply_with_rng(
        self,
        packs: impl IntoIterator<Item = IndexPack>,
        rng: &mut impl Rng,
    ) -> Vec<IndexPack> {
        fn id_matches_n_m(id: &Id, n: u32, m: u32) -> bool {
            id.as_u32() % m == n % m
        }

        let mut total_size: u64 = 0;
        let mut packs: Vec<_> = packs
            .into_iter()
            .inspect(|p| total_size += u64::from(p.pack_size()))
            .collect();

        // Apply read-subset option
        if let Some(mut size) = match self {
            Self::All => None,
            // we need some casts to compute percentage...
            #[allow(clippy::cast_possible_truncation)]
            #[allow(clippy::cast_precision_loss)]
            #[allow(clippy::cast_sign_loss)]
            Self::Percentage(p) => Some((total_size as f64 * p / 100.0) as u64),
            Self::Size(s) => Some(s),
            Self::IdSubSet((n, m)) => {
                packs.retain(|p| id_matches_n_m(&p.id, n, m));
                None
            }
        } {
            // random subset of given size is required
            packs.shuffle(rng);
            packs.retain(|p| {
                let p_size = u64::from(p.pack_size());
                if size > p_size {
                    size = size.saturating_sub(p_size);
                    true
                } else {
                    false
                }
            });
        }
        packs
    }
}

/// parses n/m including named settings depending on current date
#[allow(clippy::cast_sign_loss)] // we only use safe casts
fn parse_n_m(now: &Zoned, n_in: &str, m_in: &str) -> Result<(u32, u32), ParseIntError> {
    let n = match n_in {
        "hourly" => (now.day_of_year() as u32 - 1) * 24 + (now.hour() as u32),
        "daily" => now.day_of_year() as u32 - 1,
        "weekly" => now.clone().iso_week_date().week() as u32 - 1,
        "monthly" => now.month() as u32 - 1,
        n => n.parse()?,
    };

    let m = match (n_in, m_in) {
        ("hourly", "day") => 24,
        ("hourly", "week") => 24 * 7,
        ("hourly", "month") | (_, "month_hours") => 24 * now.days_in_month() as u32,
        ("hourly", "year") | (_, "year_hours") => 24 * now.days_in_year() as u32,
        ("daily", "week") => 7,
        ("daily", "month") | (_, "month_days") => now.days_in_month() as u32,
        ("daily", "year") | (_, "year_days") => now.days_in_year() as u32,
        ("weekly", "month") => 4,
        ("weekly", "year") => 52,
        ("monthly", "year") => 12,
        (_, m) => m.parse()?,
    };
    Ok((n % m, m))
}

impl FromStr for ReadSubsetOption {
    type Err = Box<RusticError>;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let result = if s == "all" {
            Self::All
        } else if let Some(p) = s.strip_suffix('%') {
            // try to read percentage
            let percentage = p.parse().map_err(|err| {
                RusticError::with_source(
                    ErrorKind::InvalidInput,
                    "Error parsing percentage from value `{value}` for ReadSubset option. Did you forget the '%'?",
                    err,
                )
                .attach_context("value", p.to_string())
            })?;

            Self::Percentage(percentage)
        } else if let Some((n, m)) = s.split_once('/') {
            let now = Zoned::now();
            let subset = parse_n_m(&now, n, m).map_err(
                |err|
                    RusticError::with_source(
                        ErrorKind::InvalidInput,
                        "Error parsing 'n/m' from value `{value}` for ReadSubset option. Allowed values: 'all', 'x%', 'n/m' or a size.",
                        err
                    )
                    .attach_context("value", s)
                    .attach_context("n/m", format!("{n}/{m}"))
                    .attach_context("now", now.to_string())
            )?;

            Self::IdSubSet(subset)
        } else {
            let byte_size = ByteSize::from_str(s)
                    .map_err(|err| {
                        RusticError::with_source(
                            ErrorKind::InvalidInput,
                            "Error parsing size from value `{value}` for ReadSubset option. Allowed values: 'all', 'x%', 'n/m' or a size.",
                            err
                        )
                        .attach_context("value", s)
                    })?
                    .as_u64();

            Self::Size(byte_size)
        };

        Ok(result)
    }
}

#[cfg_attr(feature = "clap", derive(clap::Parser))]
#[derive(Clone, Copy, Debug, Default, Setters)]
#[setters(into)]
#[non_exhaustive]
/// Options for the `check` command
pub struct CheckOptions {
    /// Don't verify the data saved in the cache
    #[cfg_attr(feature = "clap", clap(long, conflicts_with = "no_cache"))]
    pub trust_cache: bool,

    /// Also read and check pack files
    #[cfg_attr(feature = "clap", clap(long))]
    pub read_data: bool,

    /// Read only a subset of the data. Allowed values: "all", "n/m" for specific part, "x%" or a size for a random subset.
    #[cfg_attr(
        feature = "clap",
        clap(long, default_value = "all", requires = "read_data")
    )]
    pub read_data_subset: ReadSubsetOption,
}

/// Runs the `check` command
///
/// # Type Parameters
///
/// * `P` - The progress bar type.
/// * `S` - The state the repository is in.
///
/// # Arguments
///
/// * `repo` - The repository to check
/// * `opts` - The check options to use
/// * `trees` - The trees to check
///
/// # Errors
///
/// * If the repository is corrupted
///
/// # Panics
///
// TODO: Add panics
pub(crate) fn check_repository<S: Open>(
    repo: &Repository<S>,
    opts: CheckOptions,
    trees: Vec<TreeId>,
) -> RusticResult<CheckResults> {
    let be = repo.dbe();
    let cache = repo.cache();
    let hot_be = &repo.be_hot;
    let raw_be = repo.dbe();
    let collector = CheckResultsCollector::default().log(true);
    if !opts.trust_cache
        && let Some(cache) = &cache
    {
        for file_type in [FileType::Snapshot, FileType::Index] {
            // list files in order to clean up the cache
            //
            // This lists files here and later when reading index / checking snapshots
            // TODO: Only list the files once...
            _ = be.list_with_size(file_type)?;

            let p = repo.progress_bytes(&format!("checking {file_type:?} in cache..."));
            // TODO: Make concurrency (20) customizable
            check_cache_files(20, cache, raw_be, file_type, &p, &collector)?;
        }
    }

    if let Some(hot_be) = hot_be {
        for file_type in [FileType::Snapshot, FileType::Index] {
            let p = repo.progress_spinner(&format!("checking {file_type:?} in hot repo..."));
            check_hot_files(raw_be, hot_be, file_type, &p, &collector)?;
        }
    }

    let (index_collector, missing_packs) = check_packs(repo, be, hot_be.as_ref(), &collector)?;

    if let Some(cache) = &cache {
        let p = repo.progress_spinner("cleaning up packs from cache...");
        let ids: Vec<_> = index_collector
            .tree_packs()
            .iter()
            .map(|(id, size)| (**id, *size))
            .collect();
        if let Err(err) = cache.remove_not_in_list(FileType::Pack, &ids) {
            warn!(
                "Error in cache backend removing pack files: {}",
                err.display_log()
            );
        }
        p.finish();

        if !opts.trust_cache {
            let p = repo.progress_bytes("checking packs in cache...");
            // TODO: Make concurrency (5) customizable
            check_cache_files(5, cache, raw_be, FileType::Pack, &p, &collector)?;
        }
    }

    let index_be = GlobalIndex::new_from_index(index_collector.into_index());

    let packs = check_trees(repo, be, &index_be, trees, &collector)
        .map_err(|err| collector.add_error(CheckError::ErrorCheckingTrees { source: err }))
        .unwrap_or_default();

    if opts.read_data {
        let packs = index_be
            .into_index()
            .into_iter()
            .filter(|p| !missing_packs.contains_key(&p.id))
            .filter(|p| packs.contains(&p.id));

        debug!("using read-data-subset {:?}", opts.read_data_subset);
        let packs = opts.read_data_subset.apply(packs);

        repo.warm_up_wait(packs.iter().map(|pack| pack.id))?;

        let total_pack_size = packs.iter().map(|pack| u64::from(pack.pack_size())).sum();
        let p = repo.progress_bytes("reading pack data...");
        p.set_length(total_pack_size);

        packs.into_par_iter().for_each(|pack| {
            let id = pack.id;
            match be.read_full(FileType::Pack, &id) {
                Err(err) => {
                    collector.add_error(CheckError::ErrorReadingPack { id, source: err });
                }
                Ok(data) => {
                    if let Err(err) = check_pack(be, pack, data, &p, &collector) {
                        collector.add_error(CheckError::ErrorCheckingPack { id, source: err });
                    }
                }
            }
        });
        p.finish();
    }

    Ok(collector.into_check_results())
}

/// Checks if all files in the backend are also in the hot backend
///
/// # Arguments
///
/// * `be` - The backend to check
/// * `be_hot` - The hot backend to check
/// * `file_type` - The type of the files to check
/// * `pb` - The progress bar to use
///
/// # Errors
///
/// * If a file is missing or has a different size
fn check_hot_files(
    be: &impl ReadBackend,
    be_hot: &impl ReadBackend,
    file_type: FileType,
    p: &Progress,
    collector: &CheckResultsCollector,
) -> RusticResult<()> {
    let mut files = be
        .list_with_size(file_type)?
        .into_iter()
        .collect::<HashMap<_, _>>();

    let files_hot = be_hot.list_with_size(file_type)?;

    for (id, size_hot) in files_hot {
        match files.remove(&id) {
            None => collector.add_error(CheckError::NoColdFile { id, file_type }),
            Some(size) if size != size_hot => {
                collector.add_error(CheckError::HotFileSizeMismatch {
                    id,
                    file_type,
                    size_hot,
                    size,
                });
            }
            _ => {} //everything ok
        }
    }

    for (id, _) in files {
        collector.add_error(CheckError::NoHotFile { id, file_type });
    }
    p.finish();

    Ok(())
}

/// Checks if all files in the cache are also in the backend
///
/// # Arguments
///
/// * `concurrency` - The number of threads to use
/// * `cache` - The cache to check
/// * `be` - The backend to check
/// * `file_type` - The type of the files to check
/// * `p` - The progress bar to use
///
/// # Errors
///
/// * If a file is missing or has a different size
fn check_cache_files(
    _concurrency: usize,
    cache: &Cache,
    be: &impl ReadBackend,
    file_type: FileType,
    p: &Progress,
    collector: &CheckResultsCollector,
) -> RusticResult<()> {
    let files = cache.list_with_size(file_type)?;

    if files.is_empty() {
        return Ok(());
    }

    let total_size = files.values().map(|size| u64::from(*size)).sum();
    p.set_length(total_size);

    files
        .into_par_iter()
        .for_each_with((cache, be, p), |(cache, be, p), (id, size)| {
            // Read file from cache and from backend and compare
            match (
                cache.read_full(file_type, &id),
                be.read_full(file_type, &id),
            ) {
                (Err(err), _) => {
                    collector.add_error(CheckError::ErrorReadingCache {
                        id,
                        file_type,
                        source: err,
                    });
                }
                (_, Err(err)) => {
                    collector.add_error(CheckError::ErrorReadingFile {
                        id,
                        file_type,
                        source: err,
                    });
                }
                (Ok(Some(data_cached)), Ok(data)) if data_cached != data => {
                    collector.add_error(CheckError::CacheMismatch { id, file_type });
                }
                (Ok(_), Ok(_)) => {} // everything ok
            }

            p.inc(u64::from(size));
        });

    p.finish();
    Ok(())
}

// IndexPacks is a Map of all packs from the index with information about size (u32) and whether they are marked-to-delete (bool)
type IndexPacks = BTreeMap<PackId, (u32, bool)>;

/// Check if packs correspond to index and are present in the backend
///
/// # Arguments
///
/// * `be` - The backend to check
/// * `hot_be` - The hot backend to check
/// * `read_data` - Whether to read the data of the packs
/// * `pb` - The progress bar to use
///
/// # Errors
///
/// * If a pack is missing or has a different size
///
/// # Returns
///
/// The index collector
fn check_packs<S: Open>(
    repo: &Repository<S>,
    be: &impl DecryptReadBackend,
    hot_be: Option<&impl ReadBackend>,
    collector: &CheckResultsCollector,
) -> RusticResult<(IndexCollector, IndexPacks)> {
    let mut packs = BTreeMap::new();
    let mut tree_packs = BTreeMap::new();
    let mut index_collector = IndexCollector::new(IndexType::Full);

    let p = repo.progress_counter("reading index...");
    for index in be.stream_all::<IndexFile>(&p)? {
        let index = index?.1;
        index_collector.extend(index.packs.clone());
        for (p, to_delete) in index.all_packs() {
            let check_time = to_delete; // Check if time is set for packs marked to delete
            let blob_type = p.blob_type();
            let pack_size = p.pack_size();
            _ = packs.insert(p.id, (pack_size, to_delete));
            if hot_be.is_some() && blob_type == BlobType::Tree {
                _ = tree_packs.insert(p.id, (pack_size, to_delete));
            }

            // Check if time is set _
            if check_time && p.time.is_none() {
                collector.add_error(CheckError::PackTimeNotSet { id: p.id });
            }

            // check offsests in index
            let mut expected_offset: u32 = 0;
            let mut blobs = p.blobs;
            blobs.sort_unstable();
            for blob in blobs {
                if blob.tpe != blob_type {
                    collector.add_error(CheckError::PackBlobTypesMismatch {
                        id: p.id,
                        blob_id: blob.id,
                        blob_type: blob.tpe,
                        expected: blob_type,
                    });
                }

                if blob.location.offset != expected_offset {
                    collector.add_error(CheckError::PackBlobOffsetMismatch {
                        id: p.id,
                        blob_id: blob.id,
                        offset: blob.location.offset,
                        expected: expected_offset,
                    });
                }
                expected_offset += blob.location.length;
            }
        }
    }

    p.finish();

    if let Some(hot_be) = hot_be {
        let p = repo.progress_spinner("listing packs in hot repo...");
        check_packs_list_hot(hot_be, tree_packs, &packs, collector)?;
        p.finish();
    }

    let p = repo.progress_spinner("listing packs...");
    check_packs_list(be, &mut packs, collector)?;
    p.finish();

    Ok((index_collector, packs))
}

// TODO: Add documentation
/// Checks if all packs in the backend are also in the index
///
/// # Arguments
///
/// * `be` - The backend to check
/// * `packs` - The packs to check
///
/// # Errors
///
/// * If a pack is missing or has a different size
fn check_packs_list(
    be: &impl ReadBackend,
    packs: &mut IndexPacks,
    collector: &CheckResultsCollector,
) -> RusticResult<()> {
    let mut packs_from_be = be.list_with_size(FileType::Pack)?;
    packs_from_be.sort_by_key(|item| item.0);
    for (id, size) in packs_from_be {
        match packs.remove(&PackId::from(id)) {
            None => collector.add_warn(CheckError::PackNotReferenced { id }),
            Some((index_size, to_delete)) if index_size != size => {
                collector.add_error(CheckError::PackSizeMismatchIndex {
                    id,
                    to_delete,
                    index_size,
                    size,
                });
            }
            _ => {} //everything ok
        }
    }

    for (id, (size, to_delete)) in packs {
        collector.add_error(CheckError::NoPack {
            id: *id,
            to_delete: *to_delete,
            size: *size,
        });
    }
    Ok(())
}

/// Checks if all packs in the backend are also in the index
///
/// # Arguments
///
/// * `be` - The backend to check
/// * `packs` - The packs to check
///
/// # Errors
///
/// * If a pack is missing or has a different size
fn check_packs_list_hot(
    be: &impl ReadBackend,
    mut treepacks: IndexPacks,
    packs: &IndexPacks,
    collector: &CheckResultsCollector,
) -> RusticResult<()> {
    for (id, size) in be.list_with_size(FileType::Pack)? {
        match treepacks.remove(&PackId::from(id)) {
            None => {
                let id = PackId::from(id);
                if packs.contains_key(&id) {
                    collector.add_warn(CheckError::HotDataPack { id });
                } else {
                    collector.add_warn(CheckError::HotPackNotReferenced { id });
                }
            }
            Some((index_size, to_delete)) if index_size != size => {
                collector.add_error(CheckError::HotPackSizeMismatchIndex {
                    id,
                    to_delete,
                    index_size,
                    size,
                });
            }
            _ => {} //everything ok
        }
    }

    for (id, (size, to_delete)) in treepacks {
        collector.add_error(CheckError::NoHotPack {
            id,
            to_delete,
            size,
        });
    }
    Ok(())
}

/// Check if all snapshots and contained trees can be loaded and contents exist in the index
///
/// # Arguments
///
/// * `index` - The index to check
/// * `pb` - The progress bar to use
///
/// # Errors
///
/// * If a snapshot or tree is missing or has a different size
fn check_trees<S: Open>(
    repo: &Repository<S>,
    be: &impl DecryptReadBackend,
    index: &impl ReadGlobalIndex,
    snap_trees: Vec<TreeId>,
    collector: &CheckResultsCollector,
) -> RusticResult<BTreeSet<PackId>> {
    let mut packs = BTreeSet::new();
    let p = repo.progress_counter("checking trees...");
    let mut tree_streamer = TreeStreamerOnce::new(be, index, snap_trees, p)?;
    while let Some(item) = tree_streamer.next().transpose()? {
        let (path, tree) = item;
        for node in tree.nodes {
            match node.node_type {
                NodeType::File => node.content.as_ref().map_or_else(
                    || {
                        collector.add_error(CheckError::FileHasNoContent {
                            file: path.join(node.name()),
                        });
                    },
                    |content| {
                        for (i, id) in content.iter().enumerate() {
                            if id.is_null() {
                                collector.add_error(CheckError::FileBlobHasNullId {
                                    file: path.join(node.name()),
                                    blob_num: i,
                                });
                            }

                            match index.get_data(id) {
                                None => {
                                    collector.add_error(CheckError::FileBlobNotInIndex {
                                        file: path.join(node.name()),
                                        blob_id: *id,
                                    });
                                }
                                Some(entry) => {
                                    _ = packs.insert(entry.pack);
                                }
                            }
                        }
                    },
                ),

                NodeType::Dir => {
                    match node.subtree {
                        None => collector.add_error(CheckError::NoSubTree {
                            dir: path.join(node.name()),
                        }),
                        Some(tree) if tree.is_null() => {
                            collector.add_error(CheckError::NullSubTree {
                                dir: path.join(node.name()),
                            });
                        }
                        Some(id) => match index.get_tree(&id) {
                            None => {
                                collector.add_error(CheckError::SubTreeMissingInIndex {
                                    dir: path.join(node.name()),
                                    blob_id: id,
                                });
                            }
                            Some(entry) => {
                                _ = packs.insert(entry.pack);
                            }
                        }, // subtree is ok
                    }
                }
                _ => {} // nothing to check
            }
        }
    }

    Ok(packs)
}

/// Check if a pack is valid
///
/// # Arguments
///
/// * `be` - The backend to use
/// * `index_pack` - The pack to check
/// * `data` - The data of the pack
/// * `p` - The progress bar to use
///
/// # Errors
///
/// * If the pack is invalid
///
/// # Panics
///
/// * If zstd decompression fails.
fn check_pack(
    be: &impl DecryptReadBackend,
    index_pack: IndexPack,
    mut data: Bytes,
    p: &Progress,
    collector: &CheckResultsCollector,
) -> RusticResult<()> {
    let id = index_pack.id;
    let size = index_pack.pack_size();
    if data.len() != size as usize {
        collector.add_error(CheckError::PackSizeMismatch {
            id,
            size: data.len(),
            expected: size as usize,
        });
        return Ok(());
    }

    let comp_id = PackId::from(hash(&data));
    if id != comp_id {
        collector.add_error(CheckError::PackHashMismatch { id, comp_id });
        return Ok(());
    }

    // check header length
    let header_len = PackHeaderRef::from_index_pack(&index_pack).size();
    let pack_header_len = PackHeaderLength::from_binary(&data.split_off(data.len() - 4))
        .map_err(|err| {
            RusticError::with_source(
                ErrorKind::Internal,
                "Error reading pack header length `{length}` for `{pack_id}`",
                err,
            )
            .attach_context("pack_id", id.to_string())
            .attach_context("length", header_len.to_string())
            .ask_report()
        })?
        .to_u32();
    if pack_header_len != header_len {
        collector.add_error(CheckError::PackHeaderLengthMismatch {
            id,
            length: pack_header_len,
            computed: header_len,
        });
        return Ok(());
    }

    // check header
    let header = be.decrypt(&data.split_off(data.len() - header_len as usize))?;

    let pack_blobs = PackHeader::from_binary(&header)
        .map_err(|err| {
            RusticError::with_source(
                ErrorKind::Internal,
                "Error reading pack header for id `{pack_id}`",
                err,
            )
            .attach_context("pack_id", id.to_string())
            .ask_report()
        })?
        .into_blobs();
    let mut blobs = index_pack.blobs;
    blobs.sort_unstable();
    if pack_blobs != blobs {
        collector.add_error(CheckError::PackHeaderMismatchIndex { id });
        debug!("pack file header: {pack_blobs:?}");
        debug!("index: {blobs:?}");
        return Ok(());
    }
    p.inc(u64::from(header_len) + 4);

    // check blobs
    for blob in blobs {
        let blob_id = blob.id;
        let mut blob_data = be.decrypt(&data.split_to(blob.location.length as usize))?;

        // TODO: this is identical to backend/decrypt.rs; unify these two parts!
        if let Some(length) = blob.location.uncompressed_length {
            blob_data = decode_all(&*blob_data).unwrap();
            if blob_data.len() != length.get() as usize {
                collector.add_error(CheckError::PackBlobLengthMismatch { id, blob_id });
            }
        }

        let comp_id = BlobId::from(hash(&blob_data));
        if blob.id != comp_id {
            collector.add_error(CheckError::PackBlobHashMismatch {
                id,
                blob_id,
                comp_id,
            });
        }
        p.inc(blob.location.length.into());
    }

    Ok(())
}

#[non_exhaustive]
#[derive(Error, Debug, Display)]
#[allow(clippy::doc_markdown)]
pub enum CheckError {
    /// error reading pack {id} : {source}
    ErrorReadingPack {
        id: PackId,
        source: Box<RusticError>,
    },
    /// error checking pack {id} : {source}
    ErrorCheckingPack {
        id: PackId,
        source: Box<RusticError>,
    },
    /// error checking trees : {source}
    ErrorCheckingTrees { source: Box<RusticError> },
    /// cold file for hot file Type: {file_type:?}, Id: {id} does not exist
    NoColdFile { id: Id, file_type: FileType },
    /// Type: {file_type:?}, Id: {id}: hot size: {size_hot}, actual size: {size}
    HotFileSizeMismatch {
        id: Id,
        file_type: FileType,
        size_hot: u32,
        size: u32,
    },
    /// hot file Type: {file_type:?}, Id: {id} is missing!
    NoHotFile { id: Id, file_type: FileType },
    /// Error reading cached file Type: {file_type:?}, Id: {id} : {source}
    ErrorReadingCache {
        id: Id,
        file_type: FileType,
        source: Box<RusticError>,
    },
    /// Error reading file Type: {file_type:?}, Id: {id} : {source}
    ErrorReadingFile {
        id: Id,
        file_type: FileType,
        source: Box<RusticError>,
    },
    /// Cached file Type: {file_type:?}, Id: {id} is not identical to backend!
    CacheMismatch { id: Id, file_type: FileType },
    /// pack {id}: No time is set! Run prune to correct this!
    PackTimeNotSet { id: PackId },
    /// pack {id}: blob {blob_id} blob type does not match: type: {blob_type:?}, expected: {expected:?}
    PackBlobTypesMismatch {
        id: PackId,
        blob_id: BlobId,
        blob_type: BlobType,
        expected: BlobType,
    },
    /// pack {id}: blob {blob_id} offset in index: {offset}, expected: {expected}
    PackBlobOffsetMismatch {
        id: PackId,
        blob_id: BlobId,
        offset: u32,
        expected: u32,
    },
    /// pack {id} not referenced in index. Can be a parallel backup job. To repair: 'rustic repair index'.
    PackNotReferenced { id: Id },
    /// hot pack {id} not referenced in index. Can be a parallel backup job. To repair: 'rustic repair index'.
    HotPackNotReferenced { id: PackId },
    /// pack {id} marked to delete: {to_delete}: size computed by index: {index_size}, actual size: {size}. To repair: 'rustic repair index'.
    PackSizeMismatchIndex {
        id: Id,
        to_delete: bool,
        index_size: u32,
        size: u32,
    },
    /// hot pack {id} marked to-delete: {to_delete}: size computed by index: {index_size}, actual size: {size}. To repair: 'rustic repair index'.
    HotPackSizeMismatchIndex {
        id: Id,
        to_delete: bool,
        index_size: u32,
        size: u32,
    },
    /// hot pack {id} is a data pack. This should not happen.
    HotDataPack { id: PackId },
    /// hot pack {id} marked to-delete: {to_delete} is referenced by the index with size {size} but not present! To repair: 'rustic repair index'.
    NoHotPack {
        id: PackId,
        to_delete: bool,
        size: u32,
    },
    /// pack {id} marked to-delete: {to_delete} is referenced by the index with size {size} but not present! To repair: 'rustic repair index'.
    NoPack {
        id: PackId,
        to_delete: bool,
        size: u32,
    },
    /// file {file:?} doesn't have a content
    FileHasNoContent { file: PathBuf },
    /// file {file:?} blob {blob_num} has null ID
    FileBlobHasNullId { file: PathBuf, blob_num: usize },
    /// file {file:?} blob {blob_id} is missing in index
    FileBlobNotInIndex { file: PathBuf, blob_id: DataId },
    /// dir {dir:?} doesn't have a subtree
    NoSubTree { dir: PathBuf },
    /// "dir {dir:?} subtree has null ID
    NullSubTree { dir: PathBuf },
    /// "dir {dir:?} subtree blob {blob_id} is missing in index",
    SubTreeMissingInIndex { dir: PathBuf, blob_id: TreeId },
    /// pack {id}: data size does not match expected size. Read: {size} bytes, expected: {expected} bytes
    PackSizeMismatch {
        id: PackId,
        size: usize,
        expected: usize,
    },
    /// pack {id}: Hash mismatch. Computed hash: {comp_id}
    PackHashMismatch { id: PackId, comp_id: PackId },
    /// pack {id}: Header length in pack file doesn't match index. In pack: {length}, computed: {computed}
    PackHeaderLengthMismatch {
        id: PackId,
        length: u32,
        computed: u32,
    },
    /// pack {id}: Header from pack file does not match the index
    PackHeaderMismatchIndex { id: PackId },
    /// pack {id}, blob {blob_id}: Actual uncompressed length does not fit saved uncompressed length
    PackBlobLengthMismatch { id: PackId, blob_id: BlobId },
    /// pack {id}, blob {blob_id}: Hash mismatch. Computed hash: {comp_id}
    PackBlobHashMismatch {
        id: PackId,
        blob_id: BlobId,
        comp_id: BlobId,
    },
}

#[derive(Debug, PartialEq, Eq)]
#[non_exhaustive]
/// `CheckErrorLevel` describes severity levels of problems identified by check.
pub enum CheckErrorLevel {
    /// A warning: Something is strange and should be corrected, but repository integrity is not affected.
    Warn,
    /// An error: Something in the repository is not as it should be.
    Error,
}

#[derive(Debug)]
/// `CheckResults` is a list of errors encountered during the check.
pub struct CheckResults(pub Vec<(CheckErrorLevel, CheckError)>);

impl CheckResults {
    /// Returns whether severe errors have been found.
    ///
    /// # Errors
    /// `CheckFoundErrors` if there are severe errors.
    pub fn is_ok(&self) -> RusticResult<()> {
        if self
            .0
            .iter()
            .any(|(level, _)| level == &CheckErrorLevel::Error)
        {
            return Err(RusticError::new(
                ErrorKind::Repository,
                "check found errors!",
            ));
        }
        Ok(())
    }
}

#[derive(Default)]
pub struct CheckResultsCollector {
    log: bool,
    findings: Mutex<Vec<(CheckErrorLevel, CheckError)>>,
}

impl CheckResultsCollector {
    pub(crate) fn log(self, log: bool) -> Self {
        Self {
            log,
            findings: self.findings,
        }
    }

    fn add_error(&self, err: CheckError) {
        if self.log {
            error!("{err}");
        }
        self.findings
            .lock()
            .unwrap()
            .push((CheckErrorLevel::Error, err));
    }

    fn add_warn(&self, err: CheckError) {
        if self.log {
            warn!("{err}");
        }
        self.findings
            .lock()
            .unwrap()
            .push((CheckErrorLevel::Warn, err));
    }

    // turn collected results into `CheckResults`
    pub(crate) fn into_check_results(self) -> CheckResults {
        CheckResults(self.findings.into_inner().unwrap())
    }
}

#[cfg(test)]
mod tests {
    use crate::repofile::RusticTime;

    use super::*;
    use insta::assert_ron_snapshot;
    use rand::{Rng, RngExt, SeedableRng, rngs::StdRng};
    use rstest::{fixture, rstest};

    const PACK_SIZE: u32 = 100_000_000;

    #[fixture]
    fn rng() -> StdRng {
        StdRng::seed_from_u64(5)
    }
    fn test_packs(rng: &mut impl Rng) -> Vec<IndexPack> {
        (0..500)
            .map(|_| IndexPack {
                id: PackId::from(Id::random_from_rng(rng)),
                blobs: Vec::new(),
                time: None,
                size: Some(rng.random_range(0..PACK_SIZE)),
            })
            .collect()
    }

    #[rstest]
    #[case("all")]
    #[case("5/12")]
    #[case("5%")]
    #[case("250MiB")]
    fn test_read_subset(mut rng: StdRng, #[case] s: &str) {
        let size =
            |packs: &[IndexPack]| -> u64 { packs.iter().map(|p| u64::from(p.pack_size())).sum() };

        let test_packs = test_packs(&mut rng);
        let total_size = size(&test_packs);

        let subset: ReadSubsetOption = s.parse().unwrap();
        let packs = subset.apply_with_rng(test_packs, &mut rng);
        let test_size = size(&packs);

        match subset {
            ReadSubsetOption::All => assert_eq!(test_size, total_size),
            #[allow(clippy::cast_possible_truncation)]
            #[allow(clippy::cast_precision_loss)]
            #[allow(clippy::cast_sign_loss)]
            ReadSubsetOption::Percentage(s) => assert!(test_size <= (total_size as f64 * s) as u64),
            ReadSubsetOption::Size(size) => {
                assert!(test_size <= size && size <= test_size + u64::from(PACK_SIZE));
            }
            ReadSubsetOption::IdSubSet(_) => {}
        }

        let ids: Vec<_> = packs.iter().map(|pack| (pack.id, pack.size)).collect();
        assert_ron_snapshot!(s, ids);
    }

    #[rstest]
    #[case("5", "12")]
    #[case("29", "28")]
    #[case("15", "month_hours")]
    #[case("4", "month_days")]
    #[case("hourly", "day")]
    #[case("hourly", "week")]
    #[case("hourly", "month")]
    #[case("hourly", "year")]
    #[case("hourly", "20")]
    #[case("daily", "week")]
    #[case("daily", "month")]
    #[case("daily", "year")]
    #[case("daily", "15")]
    #[case("weekly", "month")]
    #[case("weekly", "year")]
    #[case("weekly", "10")]
    #[case("monthly", "year")]
    #[case("monthly", "5")]
    fn test_parse_n_m(#[case] n: &str, #[case] m: &str) {
        let now = RusticTime::parse_utc("2024-10-11T12:00:00").unwrap();
        let res = parse_n_m(&now, n, m).unwrap();
        let now = RusticTime::parse_utc("2024-10-11T13:00:00").unwrap();
        let res_1h = parse_n_m(&now, n, m).unwrap();
        let now = RusticTime::parse_utc("2024-10-12T12:00:00").unwrap();
        let res_1d = parse_n_m(&now, n, m).unwrap();
        let now = RusticTime::parse_utc("2024-10-18T12:00:00").unwrap();
        let res_1w = parse_n_m(&now, n, m).unwrap();
        let now = RusticTime::parse_utc("2024-11-11T12:00:00").unwrap();
        let res_1m = parse_n_m(&now, n, m).unwrap();
        let now = RusticTime::parse_utc("2025-10-11T12:00:00").unwrap();
        let res_1y = parse_n_m(&now, n, m).unwrap();
        let now = RusticTime::parse_utc("2020-02-02T12:00:00").unwrap();
        let res2 = parse_n_m(&now, n, m).unwrap();

        assert_ron_snapshot!(
            format!("n_m_{n}_{m}"),
            (res, res_1h, res_1d, res_1w, res_1m, res_1y, res2)
        );
    }

    fn test_read_subset_n_m() {
        let test_packs = test_packs(&mut rng());
        let mut all_packs: BTreeSet<_> = test_packs.iter().map(|pack| pack.id).collect();

        let mut run_with = |s: &str| {
            let subset: ReadSubsetOption = s.parse().unwrap();
            let packs = subset.apply(test_packs.clone());
            for pack in packs {
                assert!(all_packs.remove(&pack.id));
            }
        };

        run_with("1/5");
        run_with("2/5");
        run_with("3/5");
        run_with("4/5");
        run_with("5/5");

        assert!(all_packs.is_empty());
    }
}