use derive_setters::Setters;
use jiff::Timestamp;
use log::{debug, error, info, trace, warn};
use std::{
cmp::Ordering,
collections::BTreeMap,
path::{Path, PathBuf},
sync::Mutex,
};
use ignore::{DirEntry, WalkBuilder};
use itertools::Itertools;
use rayon::ThreadPoolBuilder;
use crate::{
backend::{
FileType, ReadBackend,
decrypt::DecryptReadBackend,
local_destination::LocalDestination,
node::{Node, NodeType},
},
blob::{BlobLocation, BlobLocations},
error::{ErrorKind, RusticError, RusticResult},
repofile::packfile::PackId,
repository::{IndexedFull, IndexedTree, Open, Repository},
};
pub(crate) mod constants {
pub(crate) const MAX_READER_THREADS_NUM: usize = 20;
}
type RestoreInfo = BTreeMap<(PackId, BlobLocation), Vec<FileLocation>>;
type Filenames = Vec<PathBuf>;
#[allow(clippy::struct_excessive_bools)]
#[cfg_attr(feature = "clap", derive(clap::Parser))]
#[derive(Debug, Copy, Clone, Default, Setters)]
#[setters(into)]
#[non_exhaustive]
pub struct RestoreOptions {
#[cfg_attr(feature = "clap", clap(long))]
pub delete: bool,
#[cfg_attr(feature = "clap", clap(long))]
pub numeric_id: bool,
#[cfg_attr(feature = "clap", clap(long, conflicts_with = "numeric_id"))]
pub no_ownership: bool,
#[cfg_attr(feature = "clap", clap(long))]
pub verify_existing: bool,
}
#[derive(Default, Debug, Clone, Copy)]
#[non_exhaustive]
pub struct FileDirStats {
pub restore: u64,
pub unchanged: u64,
pub verified: u64,
pub modify: u64,
pub additional: u64,
}
#[derive(Default, Debug, Clone, Copy)]
#[non_exhaustive]
pub struct RestoreStats {
pub files: FileDirStats,
pub dirs: FileDirStats,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
struct HardlinkKey {
device_id: u64,
inode: u64,
}
pub(crate) fn restore_repository<S: IndexedTree>(
file_infos: RestorePlan,
repo: &Repository<S>,
opts: RestoreOptions,
node_streamer: impl Iterator<Item = RusticResult<(PathBuf, Node)>>,
dest: &LocalDestination,
) -> RusticResult<()> {
repo.warm_up_wait(file_infos.to_packs().into_iter())?;
restore_contents(
repo,
dest,
&file_infos.names,
file_infos.file_lengths,
file_infos.r,
file_infos.restore_size,
)?;
let p = repo.progress_spinner("setting metadata...");
restore_metadata(node_streamer, &file_infos.hardlink_candidates, opts, dest)?;
p.finish();
Ok(())
}
#[allow(clippy::too_many_lines)]
pub(crate) fn collect_and_prepare<S: IndexedFull>(
repo: &Repository<S>,
opts: RestoreOptions,
mut node_streamer: impl Iterator<Item = RusticResult<(PathBuf, Node)>>,
dest: &LocalDestination,
dry_run: bool,
) -> RusticResult<RestorePlan> {
let p = repo.progress_spinner("collecting file information...");
let dest_path = dest.path("");
let mut stats = RestoreStats::default();
let mut restore_infos = RestorePlan::default();
let mut additional_existing = false;
let mut removed_dir = None;
let mut process_existing = |entry: &DirEntry| -> RusticResult<_> {
if entry.depth() == 0 {
return Ok(());
}
debug!("additional {}", entry.path().display());
if entry.file_type().unwrap().is_dir() {
stats.dirs.additional += 1;
} else {
stats.files.additional += 1;
}
match (opts.delete, dry_run, entry.file_type().unwrap().is_dir()) {
(true, true, true) => {
info!(
"would have removed the additional dir: {}",
entry.path().display()
);
}
(true, true, false) => {
info!(
"would have removed the additional file: {}",
entry.path().display()
);
}
(true, false, true) => {
let path = entry.path();
match &removed_dir {
Some(dir) if path.starts_with(dir) => {}
_ => match dest.remove_dir(path) {
Ok(()) => {
removed_dir = Some(path.to_path_buf());
}
Err(err) => {
error!("error removing {}: {err}", path.display());
}
},
}
}
(true, false, false) => {
if let Err(err) = dest.remove_file(entry.path()) {
error!("error removing {}: {err}", entry.path().display());
}
}
(false, _, _) => {
additional_existing = true;
}
}
Ok(())
};
let mut process_node = |path: &PathBuf, node: &Node, exists: bool| -> RusticResult<_> {
match node.node_type {
NodeType::Dir => {
if exists {
stats.dirs.modify += 1;
trace!("existing dir {}", path.display());
} else {
stats.dirs.restore += 1;
debug!("to restore: {}", path.display());
if !dry_run {
dest.create_dir(path)
.map_err(|err| {
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to create the directory `{path}`. Please check the path and try again.",
err
)
.attach_context("path", path.display().to_string())
})?;
}
}
}
NodeType::File => {
if let Some(key) = hardlink_key(node) {
match restore_infos.hardlink_candidates.entry(key) {
std::collections::btree_map::Entry::Vacant(entry) => {
trace!("Adding hardlink candidate {}", path.display());
_ = entry.insert(path.clone());
}
std::collections::btree_map::Entry::Occupied(_) => return Ok(()), }
}
match (
exists,
restore_infos.add_file(dest, node, path.clone(), repo, opts.verify_existing)?,
) {
(_, AddFileResult::Existing) => {
stats.files.unchanged += 1;
trace!("identical file: {}", path.display());
}
(_, AddFileResult::Verified) => {
stats.files.verified += 1;
trace!("verified identical file: {}", path.display());
}
(true, AddFileResult::Modify) => {
stats.files.modify += 1;
debug!("to modify: {}", path.display());
}
(false, AddFileResult::Modify) => {
stats.files.restore += 1;
debug!("to restore: {}", path.display());
}
}
}
_ => {} }
Ok(())
};
let mut dst_iter = WalkBuilder::new(dest_path)
.follow_links(false)
.hidden(false)
.ignore(false)
.sort_by_file_path(Path::cmp)
.build()
.inspect(|r| {
if let Err(err) = r {
error!("Error during collection of files: {err:?}");
}
})
.filter_map(Result::ok);
let mut next_dst = dst_iter.next();
let mut next_node = node_streamer.next().transpose()?;
loop {
match (&next_dst, &next_node) {
(None, None) => break,
(Some(destination), None) => {
process_existing(destination)?;
next_dst = dst_iter.next();
}
(Some(destination), Some((path, node))) => {
match destination.path().cmp(&dest.path(path)) {
Ordering::Less => {
process_existing(destination)?;
next_dst = dst_iter.next();
}
Ordering::Equal => {
if (node.is_dir() && !destination.file_type().unwrap().is_dir())
|| (node.is_file() && !destination.metadata().unwrap().is_file())
|| node.is_special()
{
process_existing(destination)?;
}
process_node(path, node, true)?;
next_dst = dst_iter.next();
next_node = node_streamer.next().transpose()?;
}
Ordering::Greater => {
process_node(path, node, false)?;
next_node = node_streamer.next().transpose()?;
}
}
}
(None, Some((path, node))) => {
process_node(path, node, false)?;
next_node = node_streamer.next().transpose()?;
}
}
}
if additional_existing {
warn!("Note: additional entries exist in destination");
}
restore_infos.stats = stats;
p.finish();
Ok(restore_infos)
}
fn restore_metadata(
mut node_streamer: impl Iterator<Item = RusticResult<(PathBuf, Node)>>,
hardlink_candidates: &BTreeMap<HardlinkKey, PathBuf>,
opts: RestoreOptions,
dest: &LocalDestination,
) -> RusticResult<()> {
let mut dir_stack = Vec::new();
while let Some((path, node)) = node_streamer.next().transpose()? {
if let Some(key) = hardlink_key(&node)
&& let Some(canonical) = hardlink_candidates.get(&key)
&& canonical != &path
{
debug!(
"restoring hardlink {} -> {}",
path.display(),
canonical.display()
);
dest.hard_link(canonical, &path).map_err(|err| {
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to recreate the hardlink `{path}` from `{canonical}`.",
err,
)
.attach_context("path", path.display().to_string())
.attach_context("canonical", canonical.display().to_string())
})?;
}
match node.node_type {
NodeType::Dir => {
while let Some((stackpath, _)) = dir_stack.last() {
if path.starts_with(stackpath) {
break;
}
let (path, node) = dir_stack.pop().unwrap();
set_metadata(dest, opts, &path, &node);
}
dir_stack.push((path, node));
}
_ => set_metadata(dest, opts, &path, &node),
}
}
for (path, node) in dir_stack.into_iter().rev() {
set_metadata(dest, opts, &path, &node);
}
Ok(())
}
fn hardlink_key(node: &Node) -> Option<HardlinkKey> {
(matches!(node.node_type, NodeType::File)
&& node.meta.links > 1
&& node.meta.device_id != 0
&& node.meta.inode != 0)
.then_some(HardlinkKey {
device_id: node.meta.device_id,
inode: node.meta.inode,
})
}
pub(crate) fn set_metadata(
dest: &LocalDestination,
opts: RestoreOptions,
path: &PathBuf,
node: &Node,
) {
debug!("setting metadata for {}", path.display());
dest.create_special(path, node)
.unwrap_or_else(|_| warn!("restore {}: creating special file failed.", path.display()));
match (opts.no_ownership, opts.numeric_id) {
(true, _) => {}
(false, true) => dest
.set_uid_gid(path, &node.meta)
.unwrap_or_else(|_| warn!("restore {}: setting UID/GID failed.", path.display())),
(false, false) => dest
.set_user_group(path, &node.meta)
.unwrap_or_else(|_| warn!("restore {}: setting User/Group failed.", path.display())),
}
dest.set_permission(path, node)
.unwrap_or_else(|_| warn!("restore {}: chmod failed.", path.display()));
dest.set_extended_attributes(path, &node.meta.extended_attributes)
.unwrap_or_else(|_| {
warn!(
"restore {}: setting extended attributes failed.",
path.display()
);
});
dest.set_times(path, &node.meta)
.unwrap_or_else(|_| warn!("restore {}: setting file times failed.", path.display()));
}
struct PackInfo {
pack_id: PackId,
from_file: Option<(usize, u64, u32)>,
locations: BlobLocations<Vec<(usize, u64)>>,
}
impl PackInfo {
#[allow(clippy::result_large_err)]
fn coalesce(self, other: Self) -> Result<Self, (Self, Self)> {
if self.pack_id == other.pack_id && self.from_file.is_none() && self.locations.can_coalesce(&other.locations)
{
Ok(Self {
pack_id: self.pack_id,
from_file: self.from_file,
locations: self.locations.append(other.locations),
})
} else {
Err((self, other))
}
}
}
#[allow(clippy::too_many_lines)]
fn restore_contents<S: Open>(
repo: &Repository<S>,
dest: &LocalDestination,
filenames: &Filenames,
file_lengths: Vec<u64>,
restore_info: RestoreInfo,
restore_size: u64,
) -> RusticResult<()> {
let be = repo.dbe();
for (i, size) in file_lengths.iter().enumerate() {
if *size == 0 {
let path = &filenames[i];
dest.set_length(path, *size).map_err(|err| {
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to set the length of the file `{path}`. Please check the path and try again.",
err,
)
.attach_context("path", path.display().to_string())
})?;
}
}
let sizes = &Mutex::new(file_lengths);
let p = repo.progress_bytes("restoring file contents...");
p.set_length(restore_size);
let packs: Vec<_> = restore_info
.into_iter()
.map(|((pack_id, bl), fls)| {
let from_file = fls
.iter()
.find(|fl| fl.matches)
.map(|fl| (fl.file_idx, fl.file_start, bl.data_length()));
let name_dests: Vec<_> = fls
.iter()
.filter(|fl| !fl.matches)
.map(|fl| (fl.file_idx, fl.file_start))
.collect();
PackInfo {
pack_id,
from_file,
locations: BlobLocations::from_blob_location(bl, name_dests),
}
})
.coalesce(PackInfo::coalesce)
.collect();
let threads = constants::MAX_READER_THREADS_NUM;
let pool = ThreadPoolBuilder::new()
.num_threads(threads)
.build()
.map_err(|err| {
RusticError::with_source(
ErrorKind::Internal,
"Failed to create the thread pool with `{num_threads}` threads. Please try again.",
err,
)
.attach_context("num_threads", threads.to_string())
})?;
pool.in_place_scope(|s| {
for PackInfo {
pack_id,
from_file,
locations:
BlobLocations {
offset,
length,
blobs,
},
} in packs
{
let p = &p;
if !blobs.is_empty() {
s.spawn(move |s1| {
let read_data = match &from_file {
Some((file_idx, offset_file, length_file)) => {
dest.read_at(&filenames[*file_idx], *offset_file, (*length_file).into())
.unwrap()
}
None => {
be.read_partial(FileType::Pack, &pack_id, false, offset, length)
.unwrap()
}
};
for (bl, name_dests) in blobs {
let size = bl.data_length().into();
let data = if from_file.is_some() {
read_data.clone()
} else {
let start = usize::try_from(bl.offset - offset)
.expect("convert from u32 to usize should not fail!");
let end = usize::try_from(bl.offset + bl.length - offset)
.expect("convert from u32 to usize should not fail!");
be.read_encrypted_from_partial(
&read_data[start..end],
bl.uncompressed_length,
)
.unwrap()
};
for (file_idx, start) in name_dests {
let data = data.clone();
s1.spawn(move |_| {
let path = &filenames[file_idx];
let mut sizes_guard = sizes.lock().unwrap();
let filesize = sizes_guard[file_idx];
if filesize > 0 {
dest.set_length(path, filesize).unwrap();
sizes_guard[file_idx] = 0;
}
drop(sizes_guard);
dest.write_at(path, start, &data).unwrap();
p.inc(size);
});
}
}
});
}
}
});
p.finish();
Ok(())
}
#[derive(Debug, Default)]
pub struct RestorePlan {
names: Filenames,
file_lengths: Vec<u64>,
r: RestoreInfo,
hardlink_candidates: BTreeMap<HardlinkKey, PathBuf>,
pub restore_size: u64,
pub matched_size: u64,
pub stats: RestoreStats,
}
#[derive(Debug)]
struct FileLocation {
file_idx: usize,
file_start: u64,
matches: bool,
}
enum AddFileResult {
Existing,
Verified,
Modify,
}
impl RestorePlan {
fn add_file<S: IndexedFull>(
&mut self,
dest: &LocalDestination,
file: &Node,
name: PathBuf,
repo: &Repository<S>,
ignore_mtime: bool,
) -> RusticResult<AddFileResult> {
let mut open_file = dest.get_matching_file(&name, file.meta.size);
if file.meta.size == 0
&& let Some(meta) = open_file
.as_ref()
.map(std::fs::File::metadata)
.transpose()
.map_err(|err|
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to get the metadata of the file `{path}`. Please check the path and try again.",
err
)
.attach_context("path", name.display().to_string())
)?
&& meta.len() == 0 {
return Ok(AddFileResult::Existing);
}
if !ignore_mtime
&& let Some(meta) = open_file
.as_ref()
.map(std::fs::File::metadata)
.transpose()
.map_err(|err|
RusticError::with_source(
ErrorKind::InputOutput,
"Failed to get the metadata of the file `{path}`. Please check the path and try again.",
err
)
.attach_context("path", name.display().to_string())
)?
{
let mtime = meta
.modified()
.ok()
.and_then(|t| Timestamp::try_from(t).ok());
if meta.len() == file.meta.size && mtime == file.meta.mtime {
debug!("file {} exists with suitable size and mtime, accepting it!",name.display());
self.matched_size += file.meta.size;
return Ok(AddFileResult::Existing);
}
}
let file_idx = self.names.len();
self.names.push(name);
let mut file_pos = 0;
let mut has_unmatched = false;
for id in file.content.iter().flatten() {
let ie = repo.get_index_entry(id)?;
let bl = ie.location;
let length: u64 = bl.data_length().into();
let matches = open_file
.as_mut()
.is_some_and(|file| id.blob_matches_reader(length, file));
let blob_location = self.r.entry((ie.pack, bl)).or_default();
blob_location.push(FileLocation {
file_idx,
file_start: file_pos,
matches,
});
if matches {
self.matched_size += length;
} else {
self.restore_size += length;
has_unmatched = true;
}
file_pos += length;
}
self.file_lengths.push(file_pos);
if !has_unmatched && open_file.is_some() {
Ok(AddFileResult::Verified)
} else {
Ok(AddFileResult::Modify)
}
}
#[must_use]
pub fn to_packs(&self) -> Vec<PackId> {
self.r
.iter()
.filter(|(_, fls)| fls.iter().all(|fl| !fl.matches))
.map(|((pack, _), _)| *pack)
.dedup()
.collect()
}
}