use anyhow::{bail, Error, Result};
use log::{trace, debug, info, warn};
use std::cell::RefCell;
use std::cmp::{min, Ordering};
use std::fs;
use std::hash::{DefaultHasher, Hasher};
use std::io::{self, Read};
use std::os::unix::fs::{MetadataExt, PermissionsExt};
use std::path::{Path, PathBuf};
use super::config::Config;
#[cfg(feature = "selinux")]
use super::fcontexts;
#[derive(Debug, Default, PartialEq)]
pub struct Stats {
pub directories: u64,
pub files: u64,
pub candidate_files: u64,
pub files_read: u64,
pub files_linked: u64,
pub errors: u64,
pub bytes_linked: u64,
}
impl Stats {
pub fn new() -> Self { Default::default() }
pub fn summarize(&self) {
println!(
"Scanned {} directories and {} files,\n \
considered {} files, read {} files, linked {} files, {} errors\n \
sum of sizes of linked files: {} bytes\
",
self.directories, self.files,
self.candidate_files, self.files_read, self.files_linked, self.errors,
self.bytes_linked);
}
}
#[derive(Debug)]
enum FileState {
None,
Open(fs::File),
Error,
Closed,
}
#[derive(Debug)]
struct FileInfo {
path: PathBuf,
metadata: fs::Metadata,
#[cfg(feature = "selinux")]
selinux_context: RefCell<Option<String>>,
hashes: RefCell<Vec<u64>>,
file_state: RefCell<FileState>,
}
impl FileInfo {
fn new(path: PathBuf, metadata: fs::Metadata) -> FileInfo {
FileInfo {
path,
metadata,
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: RefCell::new(vec![]),
file_state: RefCell::new(FileState::None),
}
}
#[cfg(feature = "selinux")]
fn set_selinux_context(
&self,
labels: &selinux::label::Labeler<selinux::label::back_end::File>,
root: Option<&Path>,
) -> Result<()> {
let mut context = self.selinux_context.borrow_mut();
if context.is_none() {
let fc = fcontexts::lookup_context(labels, root, &self.path)?;
context.replace(fc);
}
Ok(())
}
fn compare(
&self,
other: &FileInfo,
config: &Config,
) -> Ordering {
let ms = &self.metadata;
let mo = &other.metadata;
let mut partial = ms.len().cmp(&mo.len());
if partial != Ordering::Equal {
trace!("Comparing {} and {} → size={:?}", self.path.display(), other.path.display(), partial);
return partial;
}
partial = ms.dev().cmp(&mo.dev());
if partial != Ordering::Equal {
trace!("Comparing {} and {} → filesystem={:?}", self.path.display(), other.path.display(), partial);
return partial;
}
let ino_res = ms.ino().cmp(&mo.ino());
if ino_res == Ordering::Equal {
trace!("Comparing {} and {} → inode={:?}", self.path.display(), other.path.display(), partial);
return ino_res;
}
if !config.ignore_mode {
partial = ms.permissions().mode().cmp(&mo.permissions().mode());
if partial != Ordering::Equal {
trace!("Comparing {} and {} → mode={:?}", self.path.display(), other.path.display(), partial);
return partial;
}
}
if !config.ignore_owner {
partial = ms.uid().cmp(&mo.uid());
if partial != Ordering::Equal {
trace!("Comparing {} and {} → uid={:?}", self.path.display(), other.path.display(), partial);
return partial;
}
partial = ms.gid().cmp(&mo.gid());
if partial != Ordering::Equal {
trace!("Comparing {} and {} → gid={:?}", self.path.display(), other.path.display(), partial);
return partial;
}
}
if !config.ignore_mtime {
let mut t1 = ms.modified().expect("query mtime");
if let Some(s) = config.source_date_epoch.filter(|s| s < &t1) {
t1 = s;
}
let mut t2 = mo.modified().expect("query mtime");
if let Some(s) = config.source_date_epoch.filter(|s| s < &t2) {
t2 = s;
}
partial = t1.cmp(&t2);
if partial != Ordering::Equal {
trace!("Comparing {} and {} → mtime={:?}", self.path.display(), other.path.display(), partial);
return partial;
}
}
#[cfg(feature = "selinux")]
if let Some(labels) = config.selinux_labels.as_ref() {
if let Err(e) = self.set_selinux_context(labels, config.root.as_deref()) {
return FileInfo::file_error(ino_res, e, config);
}
if let Err(e) = other.set_selinux_context(labels, config.root.as_deref()) {
return FileInfo::file_error(ino_res, e, config);
}
let c1 = self.selinux_context.borrow();
let c2 = other.selinux_context.borrow();
partial = c1.cmp(&c2);
if partial != Ordering::Equal {
debug!("Comparing {} and {} → {} and {}, fcontext={:?}",
self.path.display(), other.path.display(),
c1.as_deref().unwrap_or("<<none>>"),
c2.as_deref().unwrap_or("<<none>>"),
partial);
return partial;
}
}
if ms.len() == 0 {
trace!("Comparing {} and {} → size=0, {:?}",
self.path.display(), other.path.display(), Ordering::Equal);
return Ordering::Equal;
}
for i in 0.. {
let hash1 = match self.get_hash(i) {
Err(e) => { return FileInfo::file_error(ino_res, e, config); }
Ok(hash) => hash,
};
let hash2 = match other.get_hash(i) {
Err(e) => { return FileInfo::file_error(ino_res, e, config); }
Ok(hash) => hash,
};
let res = hash1.cmp(&hash2);
if res != Ordering::Equal {
trace!("Comparing {} and {} → hash{}={:?}",
self.path.display(), other.path.display(), i, partial);
return res;
}
if hash1.is_none() && hash2.is_none() {
trace!("Comparing {} and {} → contents={:?}",
self.path.display(), other.path.display(), Ordering::Equal);
return Ordering::Equal;
}
}
unreachable!();
}
fn compare_for_sorting(
&self,
other: &FileInfo,
config: &Config,
) -> Ordering {
match self.compare(other, config) {
Ordering::Equal => {
let new = self.path.cmp(&other.path);
assert!(new != Ordering::Equal);
new
}
v => v,
}
}
fn file_error(partial: Ordering, _err: Error, config: &Config) -> Ordering {
if config.fatal_errors {
std::process::exit(1);
} else {
partial
}
}
fn hash_chunk_size(previous_chunk_count: usize) -> u64 {
4096u64 * 2u64.pow(min(previous_chunk_count, 8) as u32)
}
fn get_hash(&self, index: usize) -> Result<Option<u64>> {
if let Some(val) = self.hashes.borrow().get(index) {
return Ok(Some(*val));
}
assert!(index <= self.hashes.borrow().len());
self.get_next_hash()
}
fn get_next_hash(&self) -> Result<Option<u64>> {
let mut file_state = self.file_state.borrow_mut();
match *file_state {
FileState::None => {
match fs::File::open(&self.path) {
Ok(f) => {
*file_state = FileState::Open(f);
}
Err(e) => {
warn!("{}: open failed: {}", self.path.display(), e);
*file_state = FileState::Error;
return Err(e.into());
}
}
}
FileState::Error => { bail!("{} is unreadable", self.path.display()); }
FileState::Closed => { return Ok(None); }
_ => {}
};
let file = match *file_state {
FileState::Open(ref f) => { f }
_ => { panic!() }
};
let mut hashes = self.hashes.borrow_mut();
let chunk_size = Self::hash_chunk_size(hashes.len());
let mut buffer = Vec::new();
let count = match file.take(chunk_size).read_to_end(&mut buffer) {
Ok(count) => count,
Err(e) => {
warn!("{}: read failed: {}", self.path.display(), e);
*file_state = FileState::Error;
return Err(e.into());
}
};
if (count as u64) < chunk_size {
*file_state = FileState::Closed;
}
if count == 0 {
return Ok(None);
}
let mut hasher = DefaultHasher::new();
hasher.write(&buffer[..count]);
let hash = hasher.finish();
hashes.push(hash);
Ok(Some(hash))
}
}
fn process_file_or_dir(
files_seen: &mut Vec<FileInfo>,
input_path: &Path,
config: &Config,
stats: &mut Stats,
) -> Result<()> {
for entry in walkdir::WalkDir::new(input_path)
.follow_links(false)
.into_iter() {
let entry = match entry {
Err(e) => {
stats.errors += 1;
if config.fatal_errors &&
!(config.brp &&
e.depth() == 0 &&
e.io_error().is_some_and(
|e| e.kind() == io::ErrorKind::NotFound
)) {
return Err(e.into());
}
warn!("Failed to process {}: {}", input_path.display(), e);
continue;
}
Ok(entry) => entry
};
let metadata = match entry.metadata() {
Err(e) => {
stats.errors += 1;
if config.fatal_errors {
return Err(e.into());
} else {
warn!("{}: failed to stat: {}", entry.path().display(), e);
continue;
}
}
Ok(metadata) => metadata
};
if metadata.is_dir() {
stats.directories += 1;
continue;
}
stats.files += 1;
if !metadata.is_file() {
debug!("{}: not a file", entry.path().display());
continue;
}
stats.candidate_files += 1;
files_seen.push(FileInfo::new(entry.path().to_path_buf(), metadata));
}
Ok(())
}
fn link_file(a: &FileInfo, b: &FileInfo, config: &Config) -> Result<bool> {
if a.metadata.ino() == b.metadata.ino() {
debug!("Already linked: {} and {}", a.path.display(), b.path.display());
return Ok(false);
}
let md = b.path.symlink_metadata()?;
if md.ino() != b.metadata.ino() {
debug!("Ignoring changed {}", b.path.display());
return Ok(false);
}
if config.dry_run {
info!("Would link {} ← {}", a.path.display(), b.path.display());
} else {
let tmp = b.path.with_file_name(format!(".#.{}.tmp", b.path.file_name().unwrap().to_str().unwrap()));
fs::hard_link(&a.path, &tmp)?;
if let Err(e) = fs::rename(&tmp, &b.path) {
if let Err(g) = fs::remove_file(&tmp) {
warn!("Removal of temporary file {} failed: {}", tmp.display(), g);
};
return Err(e.into());
}
info!("Linked {} ← {}", a.path.display(), b.path.display());
}
Ok(true)
}
fn link_files(
files: Vec<FileInfo>,
config: &Config,
stats: &mut Stats,
) -> Result<()> {
let mut linkto: Option<usize> = None;
for (n, finfo) in files.iter().enumerate() {
if matches!(*finfo.file_state.borrow(), FileState::Error) {
stats.errors += 1;
}
if !matches!(*finfo.file_state.borrow(), FileState::None) {
stats.files_read += 1;
}
#[allow(clippy::unnecessary_unwrap)]
if linkto.is_some() &&
FileInfo::compare(&files[linkto.unwrap()], finfo, config) == Ordering::Equal {
match link_file(&files[linkto.unwrap()], finfo, config) {
Ok(res) => {
if res {
stats.files_linked += 1;
stats.bytes_linked += finfo.metadata.len();
}
}
Err(e) => {
if config.fatal_errors {
return Err(e);
} else {
stats.errors += 1;
warn!("{}: failed to link to {}: {}",
files[linkto.unwrap()].path.display(), finfo.path.display(), e);
}
}
}
} else if let FileState::Error = *finfo.file_state.borrow() {
trace!("Skipping over {} with error…", finfo.path.display());
} else {
linkto = Some(n);
}
}
Ok(())
}
pub fn process_inputs(config: &Config) -> Result<Stats> {
let mut files_seen = vec![];
let mut stats = Stats::new();
for input_path in &config.inputs {
process_file_or_dir(&mut files_seen, input_path, config, &mut stats)?;
}
files_seen.sort_by(|a, b| FileInfo::compare_for_sorting(a, b, config));
link_files(files_seen, config, &mut stats)?;
Ok(stats)
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn compare_metadata() {
let mut config = Config::empty();
let mut file1 = tempfile::NamedTempFile::new().unwrap();
let mut file2 = tempfile::NamedTempFile::new().unwrap();
file1.write(b"0").unwrap();
file2.write(b"0").unwrap();
let ts = file2.as_file().metadata().unwrap().modified().unwrap();
file1.as_file().set_modified(ts).unwrap();
let a = FileInfo {
path: file1.path().to_path_buf(),
metadata: fs::metadata(file1.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![1, 2, 3, 4].into(),
file_state: FileState::Closed.into(),
};
let b = FileInfo {
path: file2.path().to_path_buf(),
metadata: fs::metadata(file2.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![1, 2, 3, 4].into(),
file_state: FileState::Closed.into(),
};
assert_eq!(a.compare(&b, &config), Ordering::Equal);
b.hashes.borrow_mut().push(5);
assert_eq!(a.compare(&b, &config), Ordering::Less);
a.hashes.borrow_mut().push(6);
assert_eq!(a.compare(&b, &config), Ordering::Greater);
let a_again = FileInfo {
path: "/a/b/c".into(),
metadata: a.metadata.clone(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::None.into(),
};
assert_eq!(a.compare(&a_again, &config), Ordering::Equal);
file2.as_file().set_modified(
ts + time::Duration::new(-30i64, 123)
).unwrap();
let mut b_again = FileInfo {
path: file2.path().to_path_buf(),
metadata: fs::metadata(file2.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: a.hashes.borrow().clone().into(),
file_state: FileState::Closed.into(),
};
assert_eq!(a.compare(&b_again, &config), Ordering::Greater);
file2.as_file().set_modified(
ts + time::Duration::new(30i64, 123)
).unwrap();
b_again.metadata = fs::metadata(file2.path()).unwrap();
assert_eq!(a.compare(&b_again, &config), Ordering::Less);
config.ignore_mtime = true;
assert_eq!(a.compare(&b_again, &config), Ordering::Equal);
config.ignore_mtime = false;
config.source_date_epoch = Some(ts);
assert_eq!(a.compare(&b_again, &config), Ordering::Equal);
}
#[test]
fn compare_different_fs() {
let config = Config::empty();
let a = FileInfo {
path: "/dev".into(),
metadata: fs::metadata("/dev").unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::Closed.into(),
};
let b = FileInfo {
path: "/proc".into(),
metadata: fs::metadata("/proc").unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::Closed.into(),
};
assert_ne!(a.compare(&b, &config), Ordering::Equal);
}
#[test]
#[cfg(feature = "selinux")]
fn compare_selinux_contexts() {
let labels = match selinux::label::Labeler::new(&[], false) {
Err(e) => {
info!("Failed to initalize SELinux db: {}", e);
return;
}
Ok(v) => v,
};
let mut config = Config::empty();
config.selinux_labels.replace(labels);
let mut file1 = tempfile::NamedTempFile::new().unwrap();
let mut file2 = tempfile::NamedTempFile::new().unwrap();
file1.write(b"0").unwrap();
file2.write(b"0").unwrap();
let ts = file2.as_file().metadata().unwrap().modified().unwrap();
file1.as_file().set_modified(ts).unwrap();
let a = FileInfo {
path: file1.path().to_path_buf(),
metadata: fs::metadata(file1.path()).unwrap(),
selinux_context: RefCell::new(None),
hashes: vec![5, 6, 7].into(),
file_state: FileState::Closed.into(),
};
let b = FileInfo {
path: file2.path().to_path_buf(),
metadata: fs::metadata(file2.path()).unwrap(),
selinux_context: RefCell::new(None),
hashes: vec![5, 6, 7].into(),
file_state: FileState::Closed.into(),
};
assert_eq!(a.compare(&b, &config), Ordering::Equal);
a.selinux_context.borrow_mut().replace("aaa".to_owned());
assert_eq!(a.compare(&b, &config), Ordering::Greater);
b.selinux_context.borrow_mut().replace("bbb".to_owned());
assert_eq!(a.compare(&b, &config), Ordering::Less);
b.selinux_context.borrow_mut().replace("aaa".to_owned());
assert_eq!(a.compare(&b, &config), Ordering::Equal);
}
#[test]
fn compare_unreadable() {
let mut config = Config::empty();
config.ignore_mtime = true;
let mut file1 = tempfile::NamedTempFile::new().unwrap();
let mut file2 = tempfile::NamedTempFile::new().unwrap();
file1.write(b"0").unwrap();
file2.write(b"0").unwrap();
fs::set_permissions(file1.path(), fs::Permissions::from_mode(0u32)).unwrap();
fs::set_permissions(file2.path(), fs::Permissions::from_mode(0u32)).unwrap();
let a = FileInfo {
path: file1.path().to_path_buf(),
metadata: fs::metadata(file1.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::None.into(),
};
let b = FileInfo {
path: file2.path().to_path_buf(),
metadata: fs::metadata(file2.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::None.into(),
};
let amiroot = fs::metadata("/proc/self/cmdline").unwrap().uid() == 0;
let expected = if amiroot {
Ordering::Equal
} else {
a.metadata.ino().cmp(&b.metadata.ino())
};
assert_eq!(a.compare(&b, &config), expected);
}
#[test]
fn compare_contents() {
let mut config = Config::empty();
config.ignore_mtime = true;
let mut file1 = tempfile::NamedTempFile::new().unwrap();
let mut file2 = tempfile::NamedTempFile::new().unwrap();
for (size, chunk_count) in vec![(0, 0), (4, 1), (4092, 1), (4096, 2), (4096*9, 4)] {
if size > 0 {
let data = Vec::from_iter(std::iter::repeat_n(66u8, size));
file1.write(&data).unwrap();
file1.flush().unwrap();
file2.write(&data).unwrap();
file2.flush().unwrap();
}
let a = FileInfo {
path: file1.path().to_path_buf(),
metadata: fs::metadata(file1.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::None.into(),
};
let b = FileInfo {
path: file2.path().to_path_buf(),
metadata: fs::metadata(file2.path()).unwrap(),
#[cfg(feature = "selinux")]
selinux_context: RefCell::new(None),
hashes: vec![].into(),
file_state: FileState::None.into(),
};
assert_eq!(a.compare(&b, &config), Ordering::Equal);
assert_eq!(a.hashes.borrow().len(), chunk_count);
assert_eq!(b.hashes.borrow().len(), chunk_count);
assert_eq!(*a.hashes.borrow(), *b.hashes.borrow());
let _exp_state = if size > 0 { FileState::Closed } else { FileState::None };
assert!(matches!(a.file_state.borrow(), _exp_state));
assert!(matches!(b.file_state.borrow(), _exp_state));
}
}
}