use std::sync::Arc;
use vyre_libs::intern::perfect_hash::{build_chd, PerfectHash};
pub(crate) const SEED_SOURCE_TYPES: &[&str] = &[
"filesystem",
"git",
"git/history",
"git/diff",
"git/staged",
"git-diff",
"stdin",
"s3",
"docker",
"web",
"github",
"slack",
"binary",
];
#[derive(Default)]
pub struct StaticInterner {
phf: PerfectHash,
arena: Vec<Arc<str>>,
}
impl StaticInterner {
pub fn from_detector_strings<I, S>(detector_strings: I) -> Self
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut all: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
for s in detector_strings {
all.insert(s.as_ref().to_owned());
}
for s in SEED_SOURCE_TYPES {
all.insert((*s).to_owned());
}
if all.is_empty() {
return Self {
phf: PerfectHash::default(),
arena: Vec::new(),
};
}
let arena: Vec<Arc<str>> = all.iter().map(|s| Arc::from(s.as_str())).collect();
let entries: Vec<(String, u32)> = all
.into_iter()
.enumerate()
.map(|(i, s)| (s, i as u32))
.collect();
let phf = build_chd(entries);
Self { phf, arena }
}
#[inline]
pub fn lookup(&self, s: &str) -> Option<Arc<str>> {
let idx = self.phf.lookup(s)? as usize;
let arc = self.arena.get(idx)?;
if arc.as_ref() == s {
Some(arc.clone())
} else {
None
}
}
pub fn len(&self) -> usize {
self.arena.len()
}
pub fn is_empty(&self) -> bool {
self.arena.is_empty()
}
}
pub fn seed_source_type_count() -> usize {
SEED_SOURCE_TYPES.len()
}