#![cfg_attr(docsrs, feature(doc_cfg))]
mod ch;
mod err;
use std::{
fs,
path::{Path, PathBuf}
};
#[cfg(feature = "enumerate")]
use {
std::{path::Component, thread},
walkdir::WalkDir
};
use idbag::IdBagU32;
use tmpfile::TmpProc;
use sha2::{Digest, Sha256};
pub use ch::ContentHash;
pub use tmpfile::{self, TmpFile};
pub use err::Error;
struct Hasher {
inner: Sha256,
_id: idbag::IdU32
}
impl TmpProc for Hasher {
type Output = ContentHash;
type Error = Error;
fn update(&mut self, buf: &[u8]) {
self.inner.update(buf);
}
fn finalize(
&mut self,
tmpfile: Option<&Path>
) -> Result<(Self::Output, Option<PathBuf>), Self::Error> {
let result = self.inner.clone().finalize();
let hash = result.to_vec();
let fname = if let Some(tmpfile) = tmpfile {
let Some(basedir) = tmpfile.parent() else {
panic!("foo");
};
let hexhash = hex::encode(&hash);
let (subdir1, rest) = hexhash.split_at(2);
let (subdir2, fname) = rest.split_at(2);
let dir = basedir.join(subdir1).join(subdir2);
if !dir.exists() {
std::fs::create_dir_all(&dir)?;
}
Some(dir.join(fname))
} else {
None
};
Ok((ContentHash::from(hash), fname))
}
}
pub struct FsBlobStore {
basedir: PathBuf,
minsize: Option<usize>,
idbag: IdBagU32
}
impl FsBlobStore {
fn fsparts(hexhash: &str) -> (&str, &str, &str) {
let (subdir1, rest) = hexhash.split_at(2);
let (subdir2, fname) = rest.split_at(2);
(subdir1, subdir2, fname)
}
fn relpathname(hash: &[u8]) -> PathBuf {
assert_eq!(hash.len(), 32);
let hexhash = hex::encode(hash);
let (subdir1, subdir2, fname) = Self::fsparts(&hexhash);
PathBuf::from(subdir1).join(subdir2).join(fname)
}
fn abspathname(&self, hash: &[u8]) -> PathBuf {
let p = Self::relpathname(hash);
self.basedir.join(p)
}
}
impl FsBlobStore {
pub fn new(basedir: impl AsRef<Path>) -> Result<Self, Error> {
let basedir = basedir.as_ref();
#[cfg(feature = "mkbasedir")]
if !basedir.exists() {
fs::create_dir_all(basedir)?;
}
Ok(Self {
basedir: basedir.to_path_buf(),
minsize: None,
idbag: IdBagU32::new()
})
}
#[allow(clippy::missing_errors_doc)]
pub fn with_minsize(
basedir: impl AsRef<Path>,
minsize: usize
) -> Result<Self, Error> {
let basedir = basedir.as_ref();
#[cfg(feature = "mkbasedir")]
if !basedir.exists() {
fs::create_dir_all(basedir)?;
}
Ok(Self {
basedir: basedir.to_path_buf(),
minsize: Some(minsize),
idbag: IdBagU32::new()
})
}
pub fn have(&self, hash: &[u8]) -> Result<bool, std::io::Error> {
let fname = self.abspathname(hash);
fname.try_exists()
}
pub fn reader(
&self,
hash: &[u8]
) -> Result<impl std::io::Read, std::io::Error> {
let fname = self.abspathname(hash);
fs::File::open(fname)
}
pub fn writer(&self) -> Result<TmpFile<ContentHash, Error>, std::io::Error> {
let id = self.idbag.alloc();
let tmpfname = format!("tmp-{:08x}", id.get());
let tp = Hasher {
inner: Sha256::new(),
_id: id
};
let tmpfname = self.basedir.join(tmpfname);
if let Some(minsize) = self.minsize {
TmpFile::with_minsize(tmpfname, Box::new(tp), minsize)
} else {
TmpFile::new(tmpfname, Box::new(tp))
}
}
pub fn rm(&self, hash: &[u8]) -> Result<(), std::io::Error> {
let fname = self.abspathname(hash);
fs::remove_file(&fname)?;
let Some(subdir) = fname.parent() else {
panic!("Unexpectedly unable to get parent directory.");
};
let Ok(()) = fs::remove_dir(subdir) else {
return Ok(());
};
let Some(subdir) = subdir.parent() else {
panic!("Unexpectedly unable to get parent directory.");
};
let Ok(()) = fs::remove_dir(subdir) else {
return Ok(());
};
Ok(())
}
#[cfg(feature = "enumerate")]
#[cfg_attr(docsrs, doc(cfg(feature = "enumerate")))]
#[allow(clippy::missing_panics_doc)]
#[must_use]
pub fn enumerate(
&self
) -> (recstrm::Receiver<ContentHash, ()>, thread::JoinHandle<()>) {
let (tx, rx) = recstrm::channel::<ContentHash, ()>(32, None);
let basedir = self.basedir.clone();
let jh = thread::spawn(move || {
let mut batch = Vec::with_capacity(16);
for entry in WalkDir::new(&basedir).into_iter().filter_map(Result::ok) {
if entry.depth() != 3 {
continue;
}
if !entry.file_type().is_file() {
continue;
}
let pth = entry.path();
let pth = pth.strip_prefix(&basedir).unwrap();
let mut p = String::with_capacity(64);
for c in pth.components() {
match c {
Component::Normal(os) => {
let Some(s) = os.to_str() else {
continue;
};
p.push_str(s);
}
_ => {
continue;
}
}
}
if p.len() != 64 {
continue;
}
if !p.chars().all(|c| c.is_ascii_hexdigit()) {
continue;
}
let hash = hex::decode(p).unwrap();
batch.push(ContentHash::from(hash));
#[allow(clippy::iter_with_drain)]
if batch.len() >= 16 && tx.send_batch(batch.drain(..)).is_err() {
break;
}
}
if !batch.is_empty() {
let _ = tx.send_batch(batch.into_iter());
}
});
(rx, jh)
}
#[cfg(feature = "get-fname")]
#[cfg_attr(docsrs, doc(cfg(feature = "get-fname")))]
pub fn get_fname(&self, hash: &[u8]) -> Result<PathBuf, std::io::Error> {
let fname = self.abspathname(hash);
fs::metadata(&fname)?;
Ok(fname)
}
}