use std::collections::BTreeSet;
use std::path::Path;
const BLAKE3_PREFIX: &str = "blake3:";
const HEX_HASH_LEN: usize = 64;
pub fn scan_file_refs(content: &[u8], known_hashes: &BTreeSet<String>) -> BTreeSet<String> {
let text = String::from_utf8_lossy(content);
let mut refs = BTreeSet::new();
for (idx, _) in text.match_indices(BLAKE3_PREFIX) {
let end = idx + BLAKE3_PREFIX.len() + HEX_HASH_LEN;
if end <= text.len() {
let candidate = &text[idx..end];
if is_valid_blake3_hash(candidate) && known_hashes.contains(candidate) {
refs.insert(candidate.to_string());
}
}
}
refs
}
pub fn scan_directory_refs(
dir: &Path,
known_hashes: &BTreeSet<String>,
) -> Result<BTreeSet<String>, String> {
let mut all_refs = BTreeSet::new();
scan_dir_recursive(dir, known_hashes, &mut all_refs)?;
Ok(all_refs)
}
pub fn is_valid_blake3_hash(s: &str) -> bool {
if let Some(hex) = s.strip_prefix(BLAKE3_PREFIX) {
hex.len() == HEX_HASH_LEN && hex.chars().all(|c| c.is_ascii_hexdigit())
} else {
false
}
}
fn scan_dir_recursive(
dir: &Path,
known_hashes: &BTreeSet<String>,
refs: &mut BTreeSet<String>,
) -> Result<(), String> {
let entries = std::fs::read_dir(dir).map_err(|e| format!("read dir {}: {e}", dir.display()))?;
let mut children: Vec<std::fs::DirEntry> = entries.filter_map(|e| e.ok()).collect();
children.sort_by_key(|e| e.file_name());
for entry in children {
let ft = entry.file_type().map_err(|e| format!("stat: {e}"))?;
let path = entry.path();
if ft.is_file() {
if let Ok(content) = std::fs::read(&path) {
let file_refs = scan_file_refs(&content, known_hashes);
refs.extend(file_refs);
}
} else if ft.is_dir() {
scan_dir_recursive(&path, known_hashes, refs)?;
}
}
Ok(())
}