use std::{
collections::HashMap,
fs::{File, OpenOptions},
io::{BufRead, BufReader, Write},
path::Path,
sync::Mutex,
};
use anyhow::{Context, Result};
use linprov_common::fnv_hash;
use log::{debug, warn};
pub struct HashDb {
map: Mutex<HashMap<u64, String>>,
writer: Mutex<File>,
}
impl HashDb {
pub fn open(path: &Path) -> Result<Self> {
if let Some(parent) = path.parent() {
if !parent.as_os_str().is_empty() {
std::fs::create_dir_all(parent)
.with_context(|| format!("creating `{}`", parent.display()))?;
}
}
let mut map = HashMap::new();
match File::open(path) {
Ok(f) => {
for (i, line) in BufReader::new(f).lines().enumerate() {
let line = match line {
Ok(l) => l,
Err(e) => {
warn!("hashdb: read error on line {}: {e}", i + 1);
break;
}
};
let Some((hex, path_str)) = line.split_once('\t') else {
continue; };
if let Ok(hash) = u64::from_str_radix(hex.trim(), 16) {
map.insert(hash, path_str.to_string());
}
}
debug!(
"hashdb: loaded {} entries from {}",
map.len(),
path.display()
);
}
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
Err(e) => return Err(e).with_context(|| format!("opening `{}`", path.display())),
}
let writer = OpenOptions::new()
.create(true)
.append(true)
.open(path)
.with_context(|| format!("opening `{}` for append", path.display()))?;
Ok(Self {
map: Mutex::new(map),
writer: Mutex::new(writer),
})
}
pub fn record(&self, s: &str) -> u64 {
let hash = fnv_hash(s);
let mut map = self.map.lock().expect("hashdb map mutex poisoned");
if map.contains_key(&hash) {
return hash;
}
map.insert(hash, s.to_string());
let mut w = self.writer.lock().expect("hashdb writer mutex poisoned");
if let Err(e) = writeln!(w, "{hash:016x}\t{s}") {
warn!("hashdb: failed to append {hash:016x} -> {s}: {e}");
}
hash
}
pub fn resolve(&self, hash: u64) -> Option<String> {
if hash == 0 {
return None;
}
self.map
.lock()
.expect("hashdb map mutex poisoned")
.get(&hash)
.cloned()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn record_resolve_roundtrip() {
let dir = tempdir().unwrap();
let path = dir.path().join("hashes.tsv");
let db = HashDb::open(&path).unwrap();
let h = db.record("/home/user/Downloads/");
assert_eq!(db.resolve(h).as_deref(), Some("/home/user/Downloads/"));
assert_eq!(db.resolve(0), None);
assert_eq!(db.resolve(0xdead_beef), None);
}
#[test]
fn dedup_and_persist() {
let dir = tempdir().unwrap();
let path = dir.path().join("hashes.tsv");
{
let db = HashDb::open(&path).unwrap();
db.record("/a/b/");
db.record("/a/b/"); db.record("foo.sh");
}
let body = std::fs::read_to_string(&path).unwrap();
assert_eq!(body.lines().count(), 2, "body was:\n{body}");
let db = HashDb::open(&path).unwrap();
assert_eq!(db.resolve(fnv_hash("/a/b/")).as_deref(), Some("/a/b/"));
assert_eq!(db.resolve(fnv_hash("foo.sh")).as_deref(), Some("foo.sh"));
}
}