1use anyhow::{bail, Error, Result};
4use log::{trace, debug, info, warn};
5
6use std::cmp::{min, Ordering};
7use std::hash::{DefaultHasher, Hasher};
8use std::io::Read;
9use std::os::unix::fs::{MetadataExt, PermissionsExt};
10use std::path::{Path, PathBuf};
11use std::cell::RefCell;
12use std::fs;
13
14use super::config::Config;
15#[cfg(feature = "selinux")]
16use super::fcontexts;
17
18#[derive(Debug, Default, PartialEq)]
19pub struct Stats {
20 pub directories: u64,
24
25 pub files: u64,
29
30 pub candidate_files: u64,
31
32 pub files_read: u64,
34
35 pub files_linked: u64,
37
38 pub errors: u64,
40
41 pub bytes_linked: u64,
43}
44
45impl Stats {
46 pub fn new() -> Self { Default::default() }
47
48 pub fn summarize(&self) {
49 println!(
50 "Scanned {} directories and {} files,\n \
51 considered {} files, read {} files, linked {} files, {} errors\n \
52 sum of sizes of linked files: {} bytes\
53 ",
54 self.directories, self.files,
55 self.candidate_files, self.files_read, self.files_linked, self.errors,
56 self.bytes_linked);
57 }
58}
59
60#[derive(Debug)]
61enum FileState {
62 None,
63 Open(fs::File),
64 Error,
65 Closed,
66}
67
68#[derive(Debug)]
69struct FileInfo {
70 path: PathBuf,
71 metadata: fs::Metadata,
72
73 #[cfg(feature = "selinux")]
74 selinux_context: RefCell<Option<String>>,
75
76 hashes: RefCell<Vec<u64>>,
77 file_state: RefCell<FileState>,
78}
79
80impl FileInfo {
81 fn new(path: PathBuf, metadata: fs::Metadata) -> FileInfo {
82 FileInfo {
83 path,
84 metadata,
85 #[cfg(feature = "selinux")]
86 selinux_context: RefCell::new(None),
87 hashes: RefCell::new(vec![]),
88 file_state: RefCell::new(FileState::None),
89 }
90 }
91
92 #[cfg(feature = "selinux")]
93 fn set_selinux_context(
94 &self,
95 labels: &selinux::label::Labeler<selinux::label::back_end::File>,
96 root: Option<&Path>,
97 ) -> Result<()> {
98
99 let mut context = self.selinux_context.borrow_mut();
100
101 if context.is_none() {
102 let fc = fcontexts::lookup_context(labels, root, &self.path)?;
103 context.replace(fc);
104 }
105
106 Ok(())
107 }
108
109 fn compare(
110 &self,
111 other: &FileInfo,
112 config: &Config,
113 ) -> Ordering {
114 let ms = &self.metadata;
120 let mo = &other.metadata;
121
122 let mut partial = ms.len().cmp(&mo.len());
124 if partial != Ordering::Equal {
125 trace!("Comparing {} and {} → size={:?}", self.path.display(), other.path.display(), partial);
126 return partial;
127 }
128
129 partial = ms.dev().cmp(&mo.dev());
130 if partial != Ordering::Equal {
131 trace!("Comparing {} and {} → filesystem={:?}", self.path.display(), other.path.display(), partial);
132 return partial;
133 }
134
135 let ino_res = ms.ino().cmp(&mo.ino());
137 if ino_res == Ordering::Equal {
138 trace!("Comparing {} and {} → inode={:?}", self.path.display(), other.path.display(), partial);
139 return ino_res;
140 }
141
142 if !config.ignore_mode {
143 partial = ms.permissions().mode().cmp(&mo.permissions().mode());
144 if partial != Ordering::Equal {
145 trace!("Comparing {} and {} → mode={:?}", self.path.display(), other.path.display(), partial);
146 return partial;
147 }
148 }
149
150 if !config.ignore_owner {
151 partial = ms.uid().cmp(&mo.uid());
152 if partial != Ordering::Equal {
153 trace!("Comparing {} and {} → uid={:?}", self.path.display(), other.path.display(), partial);
154 return partial;
155 }
156
157 partial = ms.gid().cmp(&mo.gid());
158 if partial != Ordering::Equal {
159 trace!("Comparing {} and {} → gid={:?}", self.path.display(), other.path.display(), partial);
160 return partial;
161 }
162 }
163
164 if !config.ignore_mtime {
165 let mut t1 = ms.modified().expect("query mtime");
167 if let Some(s) = config.source_date_epoch.filter(|s| s < &t1) {
168 t1 = s;
169 }
170
171 let mut t2 = mo.modified().expect("query mtime");
172 if let Some(s) = config.source_date_epoch.filter(|s| s < &t2) {
173 t2 = s;
174 }
175
176 partial = t1.cmp(&t2);
177 if partial != Ordering::Equal {
178 trace!("Comparing {} and {} → mtime={:?}", self.path.display(), other.path.display(), partial);
179 return partial;
180 }
181 }
182
183 #[cfg(feature = "selinux")]
186 if let Some(labels) = config.selinux_labels.as_ref() {
187 if let Err(e) = self.set_selinux_context(labels, config.root.as_deref()) {
188 return FileInfo::file_error(ino_res, e, config);
189 }
190 if let Err(e) = other.set_selinux_context(labels, config.root.as_deref()) {
191 return FileInfo::file_error(ino_res, e, config);
192 }
193
194 let c1 = self.selinux_context.borrow();
195 let c2 = other.selinux_context.borrow();
196
197 partial = c1.cmp(&c2);
198 if partial != Ordering::Equal {
199 debug!("Comparing {} and {} → {} and {}, fcontext={:?}",
200 self.path.display(), other.path.display(),
201 c1.as_deref().unwrap_or("<<none>>"),
202 c2.as_deref().unwrap_or("<<none>>"),
203 partial);
204 return partial;
205 }
206 }
207
208 if ms.len() == 0 {
210 trace!("Comparing {} and {} → size=0, {:?}",
211 self.path.display(), other.path.display(), Ordering::Equal);
212 return Ordering::Equal;
213 }
214
215 for i in 0.. {
216 let hash1 = match self.get_hash(i) {
217 Err(e) => { return FileInfo::file_error(ino_res, e, config); }
218 Ok(hash) => hash,
219 };
220
221 let hash2 = match other.get_hash(i) {
222 Err(e) => { return FileInfo::file_error(ino_res, e, config); }
223 Ok(hash) => hash,
224 };
225
226 let res = hash1.cmp(&hash2);
227 if res != Ordering::Equal {
228 trace!("Comparing {} and {} → hash{}={:?}",
229 self.path.display(), other.path.display(), i, partial);
230 return res;
231 }
232
233 if hash1.is_none() && hash2.is_none() {
234 trace!("Comparing {} and {} → contents={:?}",
236 self.path.display(), other.path.display(), Ordering::Equal);
237 return Ordering::Equal;
238 }
239 }
240
241 unreachable!();
242 }
243
244 fn file_error(partial: Ordering, _err: Error, config: &Config) -> Ordering {
245 if config.fatal_errors {
248 std::process::exit(1);
249 } else {
250 partial
251 }
252 }
253
254 fn hash_chunk_size(previous_chunk_count: usize) -> u64 {
255 4096u64 * 2u64.pow(min(previous_chunk_count, 8) as u32)
256 }
257
258 fn get_hash(&self, index: usize) -> Result<Option<u64>> {
259 if let Some(val) = self.hashes.borrow().get(index) {
260 return Ok(Some(*val));
261 }
262
263 assert!(index <= self.hashes.borrow().len());
266
267 self.get_next_hash()
268 }
269
270 fn get_next_hash(&self) -> Result<Option<u64>> {
271 let mut file_state = self.file_state.borrow_mut();
276
277 match *file_state {
278 FileState::None => {
279 match fs::File::open(&self.path) {
281 Ok(f) => {
282 *file_state = FileState::Open(f);
283 }
284 Err(e) => {
285 warn!("{}: open failed: {}", self.path.display(), e);
286 *file_state = FileState::Error;
287 return Err(e.into());
288 }
289 }
290 }
291 FileState::Error => { bail!("{} is unreadable", self.path.display()); }
292 FileState::Closed => { return Ok(None); }
293 _ => {}
294 };
295
296 let file = match *file_state {
297 FileState::Open(ref f) => { f }
298 _ => { panic!() }
299 };
300
301 let mut hashes = self.hashes.borrow_mut();
302 let chunk_size = Self::hash_chunk_size(hashes.len());
303
304 let mut buffer = Vec::new();
305
306 let count = match file.take(chunk_size).read_to_end(&mut buffer) {
307 Ok(count) => count,
308 Err(e) => {
309 warn!("{}: read failed: {}", self.path.display(), e);
310 *file_state = FileState::Error;
311 return Err(e.into());
312 }
313 };
314
315 if (count as u64) < chunk_size {
316 *file_state = FileState::Closed;
317 }
318
319 if count == 0 {
320 return Ok(None);
321 }
322
323 let mut hasher = DefaultHasher::new();
324 hasher.write(&buffer[..count]);
325 let hash = hasher.finish();
326 hashes.push(hash);
327 Ok(Some(hash))
328 }
329}
330
331fn process_file_or_dir(
332 files_seen: &mut Vec<FileInfo>,
333 input_path: &Path,
334 config: &Config,
335 stats: &mut Stats,
336) -> Result<()> {
337
338 for entry in walkdir::WalkDir::new(input_path)
339 .follow_links(false)
340 .into_iter() {
341 let entry = match entry {
342 Err(e) => {
343 stats.errors += 1;
344 if config.fatal_errors {
345 return Err(e.into());
346 } else {
347 warn!("Failed to process {}: {}", input_path.display(), e);
348 continue;
349 }
350 }
351 Ok(entry) => entry
352 };
353
354 let metadata = match entry.metadata() {
355 Err(e) => {
356 stats.errors += 1;
357 if config.fatal_errors {
358 return Err(e.into());
359 } else {
360 warn!("{}: failed to stat: {}", entry.path().display(), e);
361 continue;
362 }
363 }
364 Ok(metadata) => metadata
365 };
366
367 if metadata.is_dir() {
368 stats.directories += 1;
369 continue;
370 }
371
372 stats.files += 1;
373
374 if !metadata.is_file() {
375 debug!("{}: not a file", entry.path().display());
376 continue;
377 }
378
379 stats.candidate_files += 1;
380 files_seen.push(FileInfo::new(entry.path().to_path_buf(), metadata));
381 }
382
383 Ok(())
384}
385
386fn link_file(a: &FileInfo, b: &FileInfo, config: &Config) -> Result<bool> {
387 if a.metadata.ino() == b.metadata.ino() {
391 debug!("Already linked: {} and {}", a.path.display(), b.path.display());
392 return Ok(false);
393 }
394
395 let md = b.path.symlink_metadata()?;
398 if md.ino() != b.metadata.ino() {
399 debug!("Ignoring changed {}", b.path.display());
400 return Ok(false);
401 }
402
403 if config.dry_run {
404 info!("Would link {} ← {}", a.path.display(), b.path.display());
405 } else {
406 let tmp = b.path.with_file_name(format!(".#.{}.tmp", b.path.file_name().unwrap().to_str().unwrap()));
407 fs::hard_link(&a.path, &tmp)?;
408 if let Err(e) = fs::rename(&tmp, &b.path) {
409 if let Err(g) = fs::remove_file(&tmp) {
411 warn!("Removal of temporary file {} failed: {}", tmp.display(), g);
412 };
413 return Err(e.into());
414 }
415
416 info!("Linked {} ← {}", a.path.display(), b.path.display());
417 }
418
419 Ok(true)
420}
421
422fn link_files(
423 files: Vec<FileInfo>,
424 config: &Config,
425 stats: &mut Stats,
426) -> Result<()> {
427 let mut linkto: Option<usize> = None;
428
429 for (n, finfo) in files.iter().enumerate() {
438 if matches!(*finfo.file_state.borrow(), FileState::Error) {
439 stats.errors += 1;
440 }
441
442 if !matches!(*finfo.file_state.borrow(), FileState::None) {
443 stats.files_read += 1;
444 }
445
446 #[allow(clippy::unnecessary_unwrap)]
447 if linkto.is_some() &&
448 FileInfo::compare(&files[linkto.unwrap()], finfo, config) == Ordering::Equal {
449
450 match link_file(&files[linkto.unwrap()], finfo, config) {
451 Ok(res) => {
452 if res {
453 stats.files_linked += 1;
454 stats.bytes_linked += finfo.metadata.len();
456 }
457 }
458 Err(e) => {
459 if config.fatal_errors {
460 return Err(e);
461 } else {
462 stats.errors += 1;
463 warn!("{}: failed to link to {}: {}",
464 files[linkto.unwrap()].path.display(), finfo.path.display(), e);
465 }
466 }
467 }
468 } else if let FileState::Error = *finfo.file_state.borrow() {
469 trace!("Skipping over {} with error…", finfo.path.display());
470
471 } else {
472 linkto = Some(n);
473 }
474 }
475
476 Ok(())
477}
478
479pub fn process_inputs(config: &Config) -> Result<Stats> {
480 let mut files_seen = vec![];
481 let mut stats = Stats::new();
482
483 for input_path in &config.inputs {
484 process_file_or_dir(&mut files_seen, input_path, config, &mut stats)?;
485 }
486
487 files_seen.sort_by(|a, b| FileInfo::compare(a, b, config));
488
489 link_files(files_seen, config, &mut stats)?;
490
491 Ok(stats)
492}
493
494#[cfg(test)]
495mod tests {
496 use super::*;
497 use std::io::Write;
498
499 #[test]
500 fn compare_metadata() {
501 let mut config = Config::empty();
502
503 let mut file1 = tempfile::NamedTempFile::new().unwrap();
504 let mut file2 = tempfile::NamedTempFile::new().unwrap();
505
506 file1.write(b"0").unwrap();
507 file2.write(b"0").unwrap();
508
509 let ts = file2.as_file().metadata().unwrap().modified().unwrap();
510 file1.as_file().set_modified(ts).unwrap();
511
512 let a = FileInfo {
513 path: file1.path().to_path_buf(),
514 metadata: fs::metadata(file1.path()).unwrap(),
515 #[cfg(feature = "selinux")]
516 selinux_context: RefCell::new(None),
517 hashes: vec![1, 2, 3, 4].into(),
518 file_state: FileState::Closed.into(),
519 };
520
521 let b = FileInfo {
522 path: file2.path().to_path_buf(),
523 metadata: fs::metadata(file2.path()).unwrap(),
524 #[cfg(feature = "selinux")]
525 selinux_context: RefCell::new(None),
526 hashes: vec![1, 2, 3, 4].into(),
527 file_state: FileState::Closed.into(),
528 };
529
530 assert_eq!(a.compare(&b, &config), Ordering::Equal);
531
532 b.hashes.borrow_mut().push(5);
533
534 assert_eq!(a.compare(&b, &config), Ordering::Less);
535
536 a.hashes.borrow_mut().push(6);
537
538 assert_eq!(a.compare(&b, &config), Ordering::Greater);
539
540 let a_again = FileInfo {
541 path: "/a/b/c".into(),
542 metadata: a.metadata.clone(),
543 #[cfg(feature = "selinux")]
544 selinux_context: RefCell::new(None),
545 hashes: vec![].into(),
546 file_state: FileState::None.into(),
547 };
548
549 assert_eq!(a.compare(&a_again, &config), Ordering::Equal);
550
551 file2.as_file().set_modified(
553 ts + time::Duration::new(-30i64, 123)
554 ).unwrap();
555
556 let mut b_again = FileInfo {
557 path: file2.path().to_path_buf(),
558 metadata: fs::metadata(file2.path()).unwrap(),
559 #[cfg(feature = "selinux")]
560 selinux_context: RefCell::new(None),
561 hashes: a.hashes.borrow().clone().into(),
562 file_state: FileState::Closed.into(),
563 };
564
565 assert_eq!(a.compare(&b_again, &config), Ordering::Greater);
566
567 file2.as_file().set_modified(
569 ts + time::Duration::new(30i64, 123)
570 ).unwrap();
571
572 b_again.metadata = fs::metadata(file2.path()).unwrap();
573
574 assert_eq!(a.compare(&b_again, &config), Ordering::Less);
575
576 config.ignore_mtime = true;
578
579 assert_eq!(a.compare(&b_again, &config), Ordering::Equal);
580
581 config.ignore_mtime = false;
583 config.source_date_epoch = Some(ts);
584
585 assert_eq!(a.compare(&b_again, &config), Ordering::Equal);
586 }
587
588 #[test]
589 fn compare_different_fs() {
590 let config = Config::empty();
591
592 let a = FileInfo {
593 path: "/dev".into(),
594 metadata: fs::metadata("/dev").unwrap(),
595 #[cfg(feature = "selinux")]
596 selinux_context: RefCell::new(None),
597 hashes: vec![].into(),
598 file_state: FileState::Closed.into(),
599 };
600
601 let b = FileInfo {
602 path: "/proc".into(),
603 metadata: fs::metadata("/proc").unwrap(),
604 #[cfg(feature = "selinux")]
605 selinux_context: RefCell::new(None),
606 hashes: vec![].into(),
607 file_state: FileState::Closed.into(),
608 };
609
610 assert_ne!(a.compare(&b, &config), Ordering::Equal);
611 }
612
613 #[test]
614 #[cfg(feature = "selinux")]
615 fn compare_selinux_contexts() {
616 let labels = match selinux::label::Labeler::new(&[], false) {
617 Err(e) => {
618 info!("Failed to initalize SELinux db: {}", e);
619 return;
620 }
621 Ok(v) => v,
622 };
623
624 let mut config = Config::empty();
625 config.selinux_labels.replace(labels);
626
627 let mut file1 = tempfile::NamedTempFile::new().unwrap();
628 let mut file2 = tempfile::NamedTempFile::new().unwrap();
629
630 file1.write(b"0").unwrap();
631 file2.write(b"0").unwrap();
632
633 let ts = file2.as_file().metadata().unwrap().modified().unwrap();
634 file1.as_file().set_modified(ts).unwrap();
635
636 let a = FileInfo {
637 path: file1.path().to_path_buf(),
638 metadata: fs::metadata(file1.path()).unwrap(),
639 selinux_context: RefCell::new(None),
640 hashes: vec![5, 6, 7].into(),
641 file_state: FileState::Closed.into(),
642 };
643
644 let b = FileInfo {
645 path: file2.path().to_path_buf(),
646 metadata: fs::metadata(file2.path()).unwrap(),
647 selinux_context: RefCell::new(None),
648 hashes: vec![5, 6, 7].into(),
649 file_state: FileState::Closed.into(),
650 };
651
652 assert_eq!(a.compare(&b, &config), Ordering::Equal);
653 a.selinux_context.borrow_mut().replace("aaa".to_owned());
654 assert_eq!(a.compare(&b, &config), Ordering::Greater);
655 b.selinux_context.borrow_mut().replace("bbb".to_owned());
656 assert_eq!(a.compare(&b, &config), Ordering::Less);
657 b.selinux_context.borrow_mut().replace("aaa".to_owned());
658 assert_eq!(a.compare(&b, &config), Ordering::Equal);
659 }
660
661 #[test]
662 fn compare_unreadable() {
663 let mut config = Config::empty();
664 config.ignore_mtime = true;
665
666 let mut file1 = tempfile::NamedTempFile::new().unwrap();
667 let mut file2 = tempfile::NamedTempFile::new().unwrap();
668
669 file1.write(b"0").unwrap();
670 file2.write(b"0").unwrap();
671
672 fs::set_permissions(file1.path(), fs::Permissions::from_mode(0u32)).unwrap();
673 fs::set_permissions(file2.path(), fs::Permissions::from_mode(0u32)).unwrap();
674
675 let a = FileInfo {
676 path: file1.path().to_path_buf(),
677 metadata: fs::metadata(file1.path()).unwrap(),
678 #[cfg(feature = "selinux")]
679 selinux_context: RefCell::new(None),
680 hashes: vec![].into(),
681 file_state: FileState::None.into(),
682 };
683
684 let b = FileInfo {
685 path: file2.path().to_path_buf(),
686 metadata: fs::metadata(file2.path()).unwrap(),
687 #[cfg(feature = "selinux")]
688 selinux_context: RefCell::new(None),
689 hashes: vec![].into(),
690 file_state: FileState::None.into(),
691 };
692
693 let amiroot = fs::metadata("/proc/self/cmdline").unwrap().uid() == 0;
694 let expected = if amiroot {
695 Ordering::Equal
696 } else {
697 a.metadata.ino().cmp(&b.metadata.ino())
698 };
699 assert_eq!(a.compare(&b, &config), expected);
700 }
701
702 #[test]
703 fn compare_contents() {
704 let mut config = Config::empty();
705 config.ignore_mtime = true;
706
707 let mut file1 = tempfile::NamedTempFile::new().unwrap();
708 let mut file2 = tempfile::NamedTempFile::new().unwrap();
709
710 for (size, chunk_count) in vec![(0, 0), (4, 1), (4092, 1), (4096, 2), (4096*9, 4)] {
711 if size > 0 {
712 let data = Vec::from_iter(std::iter::repeat_n(66u8, size));
713 file1.write(&data).unwrap();
714 file1.flush().unwrap();
715 file2.write(&data).unwrap();
716 file2.flush().unwrap();
717 }
718
719 let a = FileInfo {
720 path: file1.path().to_path_buf(),
721 metadata: fs::metadata(file1.path()).unwrap(),
722 #[cfg(feature = "selinux")]
723 selinux_context: RefCell::new(None),
724 hashes: vec![].into(),
725 file_state: FileState::None.into(),
726 };
727
728 let b = FileInfo {
729 path: file2.path().to_path_buf(),
730 metadata: fs::metadata(file2.path()).unwrap(),
731 #[cfg(feature = "selinux")]
732 selinux_context: RefCell::new(None),
733 hashes: vec![].into(),
734 file_state: FileState::None.into(),
735 };
736
737 assert_eq!(a.compare(&b, &config), Ordering::Equal);
738 assert_eq!(a.hashes.borrow().len(), chunk_count);
739 assert_eq!(b.hashes.borrow().len(), chunk_count);
740 assert_eq!(*a.hashes.borrow(), *b.hashes.borrow());
741 let _exp_state = if size > 0 { FileState::Closed } else { FileState::None };
742 assert!(matches!(a.file_state.borrow(), _exp_state));
743 assert!(matches!(b.file_state.borrow(), _exp_state));
744 }
745 }
746}