// Conserve backup system.
// Copyright 2015, 2016, 2017, 2018, 2019, 2020, 2022 Martin Pool.
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//! Access a "live" on-disk tree as a source for backups, destination for restores, etc.
use std::collections::vec_deque::VecDeque;
use std::fs;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use crate::owner::Owner;
use crate::stats::LiveTreeIterStats;
use crate::unix_mode::UnixMode;
use crate::unix_time::UnixTime;
use crate::*;
/// A real tree on the filesystem, for use as a backup source or restore destination.
#[derive(Clone)]
pub struct LiveTree {
path: PathBuf,
}
impl LiveTree {
/// Open the live tree rooted at `path`.
pub fn open<P: AsRef<Path>>(path: P) -> Result<LiveTree> {
// TODO: Maybe fail here if the root doesn't exist or isn't a directory?
Ok(LiveTree {
path: path.as_ref().to_path_buf(),
})
}
fn relative_path(&self, apath: &Apath) -> PathBuf {
apath.below(&self.path)
}
/// Return the root path for this tree.
pub fn path(&self) -> &Path {
&self.path
}
}
/// An in-memory Entry describing a file/dir/symlink in a live tree.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct LiveEntry {
apath: Apath,
kind: Kind,
mtime: UnixTime,
size: Option<u64>,
symlink_target: Option<String>,
unix_mode: UnixMode,
owner: Owner,
}
impl tree::ReadTree for LiveTree {
type Entry = LiveEntry;
type R = std::fs::File;
type IT = Iter;
fn iter_entries(&self, subtree: Apath, exclude: Exclude) -> Result<Self::IT> {
Iter::new(&self.path, subtree, exclude)
}
fn file_contents(&self, entry: &LiveEntry) -> Result<Self::R> {
assert_eq!(entry.kind(), Kind::File);
let path = self.relative_path(&entry.apath);
fs::File::open(&path).map_err(|source| Error::ReadSourceFile { path, source })
}
fn estimate_count(&self) -> Result<u64> {
// TODO: This stats the file and builds an entry about them, just to
// throw it away. We could perhaps change the iter to optionally do
// less work.
Ok(self
.iter_entries(Apath::root(), Exclude::nothing())?
.count() as u64)
}
}
impl Entry for LiveEntry {
fn apath(&self) -> &Apath {
&self.apath
}
fn kind(&self) -> Kind {
self.kind
}
fn mtime(&self) -> UnixTime {
self.mtime
}
fn size(&self) -> Option<u64> {
self.size
}
fn symlink_target(&self) -> &Option<String> {
&self.symlink_target
}
fn unix_mode(&self) -> UnixMode {
self.unix_mode
}
fn owner(&self) -> Owner {
self.owner.clone()
}
}
impl LiveEntry {
fn from_fs_metadata(
apath: Apath,
metadata: &fs::Metadata,
symlink_target: Option<String>,
) -> LiveEntry {
// TODO: Could we read the symlink target here, rather than in the caller?
let mtime = metadata
.modified()
.expect("Failed to get file mtime")
.into();
let size = if metadata.is_file() {
Some(metadata.len())
} else {
None
};
let owner = Owner::from(metadata);
let unix_mode = UnixMode::from(metadata.permissions());
LiveEntry {
apath,
kind: metadata.file_type().into(),
mtime,
symlink_target,
size,
unix_mode,
owner,
}
}
}
/// Recursive iterator of the contents of a live tree.
///
/// Iterate source files descending through a source directory.
///
/// Visit the files in a directory before descending into its children, as
/// is the defined order for files stored in an archive. Within those files and
/// child directories, visit them according to a sorted comparison by their UTF-8
/// name.
#[derive(Debug)]
pub struct Iter {
/// Root of the source tree.
root_path: PathBuf,
/// Directories yet to be visited.
dir_deque: VecDeque<Apath>,
/// All entries that have been seen but not yet returned by the iterator, in the order they
/// should be returned.
entry_deque: VecDeque<LiveEntry>,
/// Check that emitted paths are in the right order.
check_order: apath::DebugCheckOrder,
/// Patterns to exclude from iteration.
exclude: Exclude,
stats: LiveTreeIterStats,
}
impl Iter {
/// Construct a new iter that will visit everything below this root path,
/// subject to some exclusions
fn new(root_path: &Path, subtree: Apath, exclude: Exclude) -> Result<Iter> {
let start_metadata = fs::symlink_metadata(subtree.below(root_path))?;
// Preload iter to return the root and then recurse into it.
let entry_deque: VecDeque<LiveEntry> = [LiveEntry::from_fs_metadata(
subtree.clone(),
&start_metadata,
None,
)]
.into();
// TODO: Consider the case where the root is not actually a directory?
// Should that be supported?
let dir_deque: VecDeque<Apath> = [subtree].into();
Ok(Iter {
root_path: root_path.to_path_buf(),
entry_deque,
dir_deque,
check_order: apath::DebugCheckOrder::new(),
exclude,
stats: LiveTreeIterStats::default(),
})
}
/// Visit the next directory.
///
/// Any errors occurring are logged but not returned; we'll continue to
/// visit whatever can be read.
fn visit_next_directory(&mut self, parent_apath: &Apath) {
self.stats.directories_visited += 1;
// Tuples of (name, entry) so that we can sort children by name.
let mut children = Vec::<(String, LiveEntry)>::new();
let dir_path = parent_apath.below(&self.root_path);
let dir_iter = match fs::read_dir(&dir_path) {
Ok(i) => i,
Err(e) => {
ui::problem(&format!("Error reading directory {:?}: {}", &dir_path, e));
return;
}
};
let mut subdir_apaths: Vec<Apath> = Vec::new();
for dir_entry in dir_iter {
let dir_entry = match dir_entry {
Ok(dir_entry) => dir_entry,
Err(e) => {
ui::problem(&format!(
"Error reading next entry from directory {:?}: {}",
&dir_path, e
));
continue;
}
};
let child_osstr = dir_entry.file_name();
let child_name = match child_osstr.to_str() {
Some(c) => c,
None => {
ui::problem(&format!(
"Couldn't decode filename {child_osstr:?} in {dir_path:?}",
));
continue;
}
};
let child_apath = parent_apath.append(child_name);
if self.exclude.matches(&child_apath) {
self.stats.exclusions += 1;
continue;
}
let ft = match dir_entry.file_type() {
Ok(ft) => ft,
Err(e) => {
ui::problem(&format!(
"Error getting type of {child_apath:?} during iteration: {e}"
));
continue;
}
};
if ft.is_dir() {
// TODO: Count them?
// TODO: Perhaps an option to back them up anyhow?
match cachedir::is_tagged(&dir_entry.path()) {
Ok(true) => continue,
Ok(false) => (),
Err(e) => {
ui::problem(&format!(
"Error checking CACHEDIR.TAG in {dir_entry:?}: {e}"
));
}
}
}
let metadata = match dir_entry.metadata() {
Ok(metadata) => metadata,
Err(e) => {
match e.kind() {
ErrorKind::NotFound => {
// Fairly harmless, and maybe not even worth logging. Just a race
// between listing the directory and looking at the contents.
ui::problem(&format!(
"File disappeared during iteration: {child_apath:?}: {e}"
));
}
_ => {
ui::problem(&format!(
"Failed to read source metadata from {child_apath:?}: {e}"
));
self.stats.metadata_error += 1;
}
};
continue;
}
};
// TODO: Move this into LiveEntry::from_fs_metadata, once there's a
// global way for it to complain about errors.
let target: Option<String> = if ft.is_symlink() {
let t = match dir_path.join(dir_entry.file_name()).read_link() {
Ok(t) => t,
Err(e) => {
ui::problem(&format!(
"Failed to read target of symlink {child_apath:?}: {e}"
));
continue;
}
};
match t.into_os_string().into_string() {
Ok(t) => Some(t),
Err(e) => {
ui::problem(&format!(
"Failed to decode target of symlink {child_apath:?}: {e:?}"
));
continue;
}
}
} else {
None
};
if ft.is_dir() {
subdir_apaths.push(child_apath.clone());
}
children.push((
child_name.to_string(),
LiveEntry::from_fs_metadata(child_apath, &metadata, target),
));
}
// To get the right overall tree ordering, any new subdirectories
// discovered here should be visited together in apath order, but before
// any previously pending directories. In other words, in reverse order
// push them onto the front of the dir deque.
if !subdir_apaths.is_empty() {
subdir_apaths.sort_unstable();
self.dir_deque.reserve(subdir_apaths.len());
for a in subdir_apaths.into_iter().rev() {
self.dir_deque.push_front(a);
}
}
children.sort_unstable_by(|a, b| a.0.cmp(&b.0));
self.entry_deque.extend(children.into_iter().map(|x| x.1));
}
}
// The source iterator yields one path at a time as it walks through the source directories.
//
// It has to read each directory entirely so that it can sort the entries.
// These entries are then returned before visiting any subdirectories.
//
// It also has to manage a stack of directories which might be partially walked. Those
// subdirectories are then visited, also in sorted order, before returning to
// any higher-level directories.
impl Iterator for Iter {
type Item = LiveEntry;
fn next(&mut self) -> Option<LiveEntry> {
loop {
if let Some(entry) = self.entry_deque.pop_front() {
// Have already found some entries, so just return the first.
self.stats.entries_returned += 1;
// Sanity check that all the returned paths are in correct order.
self.check_order.check(&entry.apath);
return Some(entry);
} else if let Some(entry) = self.dir_deque.pop_front() {
// No entries already queued, visit a new directory to try to refill the queue.
self.visit_next_directory(&entry)
} else {
// No entries queued and no more directories to visit.
return None;
}
}
}
}