#![feature(file_create_new)]
#![warn(
clippy::all,
clippy::pedantic,
clippy::nursery,
clippy::cargo_common_metadata
)]
#![allow(clippy::non_ascii_literal)]
#![allow(
clippy::cast_possible_truncation,
clippy::cast_sign_loss,
clippy::cast_precision_loss,
clippy::cast_lossless
)]
#![allow(let_underscore_drop)]
use std::{
env::set_var,
fs::{create_dir_all, read_to_string, remove_file, rename, File},
io::{self, BufWriter, ErrorKind, Read, Write},
os::{
fd::{FromRawFd, IntoRawFd},
unix::prelude::OsStrExt,
},
path::Path,
process,
thread::{self, sleep, JoinHandle},
time::{Duration, Instant},
};
use anyhow::{anyhow, Context as AnyhowContext, Error};
use clap::Parser;
use libc::{c_int, fcntl, F_GETFL, F_SETFL, O_NONBLOCK};
use libhashfindutils::parser::UpdateDbConfig;
use ring::digest::{Context, SHA256};
use signal_hook::{
consts::{SIGINT, SIGTERM},
iterator::Signals,
low_level::signal_name,
};
use walkdir::WalkDir;
#[derive(Parser)]
#[command(author, version, about, long_about = None)]
struct Cli
{
#[arg(short, long)]
once: bool,
}
struct Writer
{
sha256: Vec<BufWriter<File>>,
ids: BufWriter<File>,
next_id: u64,
block_size: usize,
}
impl Writer
{
fn new(config: &UpdateDbConfig) -> Result<Self, Error>
{
let mut sha256 = vec![];
let mut ids = BufWriter::new(
File::create(config.db_path.join("ids.new"))
.context("Couldn't create the database master file")?,
);
ids.write_all(&[0, 0, 0, 0])
.context("Couldn't declare database version")?;
for i in 0..=255
{
sha256.push(BufWriter::new(
File::create(config.db_path.join(format!("{i:03}.sha256.new")))
.with_context(|| format!("Couldn't create the {i}th hash file"))?,
));
}
Ok(Self {
sha256,
ids,
next_id: 0,
block_size: 4096,
})
}
fn handle_file(&mut self, path: &Path) -> Result<(), Error>
{
let file = match File::open(path)
{
Ok(file) => file,
Err(e) =>
{
println!("Error with opening file {path:?} to hash it: {e}");
return Ok(());
}
};
let mut file = match make_file_non_blocking(file)
{
Ok(file) => file,
Err(e) =>
{
println!("Error with marking file {path:?} as non-blocking: {e}");
return Ok(());
}
};
let mut hash = Context::new(&SHA256);
let mut buf = vec![0; self.block_size];
let mut last_time: Option<Instant> = None;
loop
{
match file.read(&mut buf)
{
Ok(0) => break,
Ok(n) =>
{
last_time = None;
hash.update(&buf[..n]);
}
Err(e) =>
{
if e.kind() == ErrorKind::WouldBlock
{
if let Some(time) = &last_time
{
if time.elapsed().as_secs() > 5
{
println!(
"Error with reading file {path:?} to hash it: It is blocking"
);
return Ok(());
}
}
else
{
last_time = Some(Instant::now());
}
sleep(Duration::from_millis(1));
continue;
}
println!("Error with reading file {path:?} to hash it: {e}");
return Ok(());
}
}
}
let hash = hash.finish();
let hash = hash.as_ref();
self.ids
.write_all(&self.next_id.to_ne_bytes())
.context("Couldn't write file ID to the database master file")?;
self.ids
.write_all(path.as_os_str().as_bytes())
.with_context(|| {
format!("Couldn't write the path to the database master file: {path:?}")
})?;
self.ids
.write_all(&[0])
.context("Couldn't write file name seperator to the database master file")?;
let file = &mut self.sha256[hash[0] as usize];
file.write_all(&self.next_id.to_ne_bytes())
.with_context(|| format!("Couldn't write file id to {}th hash file", hash[0]))?;
file.write_all(&hash[1..])
.with_context(|| format!("Couldn't write hash to {} hash file", hash[0]))?;
self.next_id += 1;
Ok(())
}
}
trait GetLast
{
type MutOutput;
fn last_mut(&mut self) -> &mut Self::MutOutput;
}
impl<T> GetLast for Vec<T>
{
type MutOutput = T;
fn last_mut(&mut self) -> &mut Self::MutOutput
{
let len = self.len();
&mut self[len - 1]
}
}
fn make_file_non_blocking(file: File) -> Result<File, Error>
{
let fd: c_int = file.into_raw_fd();
unsafe {
let flags = fcntl(fd, F_GETFL, 0);
if flags == -1
{
return Err(io::Error::last_os_error()).context("Couldn't get file flags");
}
let rv = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
if rv == -1
{
return Err(io::Error::last_os_error()).context("Couldn't set file flags");
}
Ok(File::from_raw_fd(fd))
}
}
fn get_lock(config: &UpdateDbConfig) -> Result<(), Error>
{
let mut lock =
File::create_new(config.db_path.join("lock")).context("Couldn't create new lock file")?;
write!(lock, "{}", process::id()).context("Couldn't write pid to lock file")?;
drop(lock);
if read_to_string(config.db_path.join("lock"))
.context("Couldn't read lock file to checked against race conditions")?
!= format!("{}", process::id())
{
return Err(anyhow!("Race condition in acquiring lock file"));
}
Ok(())
}
fn release_lock(config: &UpdateDbConfig) -> Result<(), Error>
{
remove_file(config.db_path.join("lock")).context("Couldn't delete lock file")?;
Ok(())
}
fn is_path_excluded(path: &Path, config: &UpdateDbConfig) -> bool
{
config
.exclude_paths
.iter()
.any(|excluded| path.starts_with(excluded))
}
fn traverse_file_system(
fds: &mut Writer,
config: &UpdateDbConfig,
sig_thread: &JoinHandle<Result<i32, Error>>,
) -> Result<(), Error>
{
macro_rules! try_ {
($val: expr) => {
match $val
{
Ok(val) => val,
Err(e) =>
{
println!("Error traversing file system: {e}");
continue;
}
}
};
}
for file in config.search_paths.iter().flat_map(|search_path| {
WalkDir::new(search_path)
.sort_by_file_name()
.into_iter()
.filter_entry(|entry| !is_path_excluded(entry.path(), config))
})
{
if sig_thread.is_finished()
{
return Ok(());
}
let file = try_!(file);
if try_!(file.metadata()).is_file()
{
fds.handle_file(file.path())
.with_context(|| format!("Couldn't handle file: {:?}", file.path()))?;
}
thread::sleep(Duration::from_micros(1));
}
Ok(())
}
fn handle_sig_thread(
sig_thread: JoinHandle<Result<i32, Error>>,
config: &UpdateDbConfig,
) -> Result<(), Error>
{
match sig_thread.join()
{
Ok(Ok(sig)) =>
{
eprintln!(
"Stopping program due to signal: {}",
signal_name(sig).map_or_else(
|| format!("Unknown signal with ID {sig}"),
ToOwned::to_owned
)
);
release_lock(config).context("Couldn't release the lock; please do this manually")?;
Ok(())
}
Ok(Err(e)) => Err(e).context("Problems with capturing signals, stopping..."),
Err(e) => Err(anyhow!("Problems with signal capturing thread: {e:?}")),
}
}
fn main() -> Result<(), Error>
{
let should_once = Cli::parse().once;
set_var("RUST_BACKTRACE", "full");
let config = UpdateDbConfig::new().context("Couldn't read the configuration")?;
create_dir_all(&config.db_path).context("Couldn't create necessary directories")?;
get_lock(&config).context("Couldn't acquire lock")?;
let sig_thread = thread::spawn(|| {
let mut sigs = Signals::new([SIGINT, SIGTERM]).context("Couldn't capture signals")?;
sigs.forever()
.next()
.ok_or_else(|| anyhow!("Capturing of signals was wrongly stopped."))
});
let mut has_once = false;
while !(should_once && has_once)
{
let mut fds = Writer::new(&config).context("Couldn't open all database files")?;
if sig_thread.is_finished()
{
return handle_sig_thread(sig_thread, &config);
}
traverse_file_system(&mut fds, &config, &sig_thread)
.context("Problem hashing all files")?;
if sig_thread.is_finished()
{
return handle_sig_thread(sig_thread, &config);
}
drop(fds);
for i in 0..=255
{
rename(
config.db_path.join(format!("{i:03}.sha256.new")),
config.db_path.join(format!("{i:03}.sha256")),
)
.with_context(|| format!("Couldn't update the {i}th hash file"))?;
}
rename(config.db_path.join("ids.new"), config.db_path.join("ids"))
.context("Couldn't update the database master file")?;
thread::sleep(Duration::from_secs(1));
has_once = true;
}
release_lock(&config).context("Couldn't release lock")?;
Ok(())
}