use {
crate::{LaburnumError, Span},
nohash_hasher::IsEnabled,
rapidhash::v3::{RapidSecrets, rapidhash_v3_seeded},
std::hash::{BuildHasher, Hash, Hasher},
};
pub(crate) const IDENTHASH_SEED: RapidSecrets =
RapidSecrets::seed(0x3C79AC492BA7B653);
pub(crate) const fn identhash(bytes: &[u8]) -> u64 {
rapidhash_v3_seeded(bytes, &IDENTHASH_SEED)
}
#[derive(
Debug,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
serde::Serialize,
serde::Deserialize,
)]
pub struct Ident(pub u64);
pub type IdentHashSet = std::collections::HashSet<Ident, IdentHashState>;
pub type IdentHashMap<V> = std::collections::HashMap<Ident, V, IdentHashState>;
impl std::fmt::Display for Ident {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl Hash for Ident {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.0);
}
}
impl IsEnabled for Ident {}
impl Ident {
pub const fn new(s: &str) -> Self {
Ident(identhash(s.as_bytes()))
}
pub const fn new_bytes(bytes: &[u8]) -> Self {
Ident(identhash(bytes))
}
pub const fn try_new(s: &str) -> Result<Self, usize> {
Ok(Self::new(s))
}
pub const fn from_hash(hash: u64) -> Self {
Ident(hash)
}
pub const fn as_u64(self) -> u64 {
self.0
}
}
impl<T: Hash> From<Vec<T>> for Ident {
fn from(vec: Vec<T>) -> Self {
let mut hasher = IdentHasher::new();
for item in vec {
item.hash(&mut hasher);
}
Ident(hasher.finish())
}
}
impl<T: Hash> From<&'_ [T]> for Ident {
fn from(vec: &'_ [T]) -> Self {
let mut hasher = IdentHasher::new();
for item in vec {
item.hash(&mut hasher);
}
Ident(hasher.finish())
}
}
pub trait IdentExt {
fn try_new_with_span(s: &str, span: Span) -> Result<Ident, LaburnumError>;
}
impl IdentExt for Ident {
fn try_new_with_span(s: &str, _span: Span) -> Result<Ident, LaburnumError> {
Ok(Ident::new(s))
}
}
#[derive(Clone)]
pub struct IdentHasher {
buffer: Vec<u8>,
}
impl Default for IdentHasher {
fn default() -> Self {
Self::new()
}
}
impl IdentHasher {
pub const fn new() -> Self {
Self { buffer: Vec::new() }
}
}
impl std::hash::Hasher for IdentHasher {
#[inline]
fn write(&mut self, bytes: &[u8]) {
self.buffer.extend_from_slice(bytes);
}
#[inline(always)]
fn finish(&self) -> u64 {
identhash(&self.buffer)
}
}
#[derive(Clone, Debug, Default)]
pub struct IdentHashState;
impl BuildHasher for IdentHashState {
type Hasher = IdentHasher;
#[inline(always)]
fn build_hasher(&self) -> Self::Hasher {
IdentHasher::new()
}
}
pub trait HashSetExt {
fn new() -> Self;
fn with_capacity(capacity: usize) -> Self;
}
impl<T> HashSetExt for std::collections::HashSet<T, IdentHashState> {
#[inline(always)]
fn new() -> Self {
Self::with_hasher(IdentHashState)
}
#[inline(always)]
fn with_capacity(capacity: usize) -> Self {
Self::with_capacity_and_hasher(capacity, IdentHashState)
}
}
pub trait HashMapExt {
fn new() -> Self;
fn with_capacity(capacity: usize) -> Self;
}
impl<K, V> HashMapExt for std::collections::HashMap<K, V, IdentHashState> {
#[inline(always)]
fn new() -> Self {
Self::with_hasher(IdentHashState)
}
#[inline(always)]
fn with_capacity(capacity: usize) -> Self {
Self::with_capacity_and_hasher(capacity, IdentHashState)
}
}
#[cfg(test)]
mod tests {
use {super::*, std::collections::HashSet};
#[test]
fn test_ascii_determinism_and_equivalence() {
assert_eq!(Ident::new("a").as_u64(), 3700951030190498094);
assert_eq!(Ident::new("Z").as_u64(), 8357378038406823441);
assert_eq!(Ident::new("hello").as_u64(), 15142343916341883008);
assert_eq!(Ident::new("test").as_u64(), 8149802307428693157);
assert_eq!(Ident::new_bytes(b"test").as_u64(), 8149802307428693157);
assert_eq!(Ident::new("/path/to/file").as_u64(), 2733807838187845117);
assert_eq!(
Ident::new_bytes(b"/path/to/file").as_u64(),
2733807838187845117
);
assert_eq!(Ident::new("A").as_u64(), 10691531192626808154);
assert_eq!(Ident::new("Hello").as_u64(), 11922218774751536327);
assert_eq!(Ident::new("a"), Ident::new("a"));
assert_eq!(Ident::new("Z"), Ident::new("Z"));
assert_eq!(Ident::new("hello"), Ident::new("hello"));
assert_eq!(Ident::new("test"), Ident::new_bytes(b"test"));
assert_eq!(
Ident::new("/path/to/file"),
Ident::new_bytes(b"/path/to/file")
);
assert_ne!(Ident::new("a"), Ident::new("A"));
assert_ne!(Ident::new("Hello"), Ident::new("hello"));
}
#[test]
fn test_file_paths() {
assert_eq!(Ident::new("/usr/local/bin").as_u64(), 9030215546033674857);
assert_eq!(
Ident::new("/home/user/.config/app.toml").as_u64(),
1331543623963564045
);
assert_eq!(
Ident::new("./relative/path.rs").as_u64(),
11371775916658370102
);
assert_eq!(
Ident::new("../parent/file.txt").as_u64(),
18142460124076272868
);
assert_eq!(
Ident::new("/path/with spaces/file.txt").as_u64(),
8417690910137731666
);
assert_eq!(
Ident::new("C:\\Users\\name\\file.txt").as_u64(),
11364599268261376842
);
assert_eq!(
Ident::new("D:\\Program Files\\app\\config.json").as_u64(),
4022307410791609541
);
assert_eq!(
Ident::new("\\\\server\\share\\file").as_u64(),
15310850083273919536
);
assert_eq!(Ident::new("/path/a").as_u64(), 3213602927056226211);
assert_eq!(Ident::new("/path/b").as_u64(), 8757611503651546624);
assert_eq!(Ident::new("C:\\a").as_u64(), 14069590557361773865);
assert_eq!(Ident::new("C:\\b").as_u64(), 2402246842646923943);
assert_ne!(Ident::new("/path/a"), Ident::new("/path/b"));
assert_ne!(Ident::new("C:\\a"), Ident::new("C:\\b"));
}
#[test]
fn test_urls() {
assert_eq!(
Ident::new("https://example.com").as_u64(),
6238028901153254662
);
assert_eq!(
Ident::new("http://localhost:8080/api/v1").as_u64(),
2902551157357227276
);
assert_eq!(
Ident::new("file:///home/user/doc.txt").as_u64(),
10228072659410006829
);
assert_eq!(
Ident::new(
"https://user:pass@host.com:443/path?query=1&foo=bar#fragment"
)
.as_u64(),
11009532693065718485
);
assert_eq!(
Ident::new("ftp://files.example.org/pub/").as_u64(),
6273708379278450780
);
assert_eq!(
Ident::new("mailto:user@example.com").as_u64(),
16821099726601877414
);
assert_eq!(
Ident::new("data:text/plain;base64,SGVsbG8=").as_u64(),
12439930609024639619
);
assert_eq!(Ident::new("https://a.com").as_u64(), 4350992704903104206);
assert_eq!(Ident::new("https://b.com").as_u64(), 7483602583297229809);
assert_ne!(Ident::new("https://a.com"), Ident::new("https://b.com"));
}
#[test]
fn test_scope_paths_and_identifiers() {
assert_eq!(
Ident::new("crate::module::Type").as_u64(),
7353646146934595997
);
assert_eq!(
Ident::new("std::collections::HashMap").as_u64(),
228363037788517432
);
assert_eq!(
Ident::new("super::parent::Item").as_u64(),
9075467112349720042
);
assert_eq!(
Ident::new("self::local::Func").as_u64(),
9646871762726632932
);
assert_eq!(
Ident::new("com.example.package.Class").as_u64(),
17765118520647057090
);
assert_eq!(
Ident::new("org.apache.commons.lang3.StringUtils").as_u64(),
11967095804903269371
);
assert_eq!(
Ident::new("@scope/package-name").as_u64(),
12476035324349407333
);
assert_eq!(Ident::new("lodash/fp/map").as_u64(), 17191274791568812972);
assert_eq!(Ident::new("snake_case_name").as_u64(), 7679839508022046910);
assert_eq!(Ident::new("camelCaseName").as_u64(), 18433820878116494569);
assert_eq!(Ident::new("PascalCaseName").as_u64(), 13905387098582118764);
assert_eq!(
Ident::new("SCREAMING_SNAKE_CASE").as_u64(),
7500735447424560583
);
assert_eq!(Ident::new("kebab-case-name").as_u64(), 6676167227388259276);
assert_eq!(
Ident::new("_leading_underscore").as_u64(),
12098504121973090025
);
assert_eq!(Ident::new("__dunder__").as_u64(), 6398388586955339633);
assert_eq!(Ident::new("name123").as_u64(), 4900864233809741506);
assert_eq!(Ident::new("123numeric").as_u64(), 4626075536632704739);
assert_eq!(Ident::new("a::b").as_u64(), 13824895284988678755);
assert_eq!(Ident::new("a::c").as_u64(), 5783875612052290569);
assert_eq!(Ident::new("a.b").as_u64(), 563295907748883360);
assert_eq!(Ident::new("a.c").as_u64(), 13143813497973795153);
assert_ne!(Ident::new("a::b"), Ident::new("a::c"));
assert_ne!(Ident::new("a.b"), Ident::new("a.c"));
}
#[test]
fn test_control_and_whitespace_characters() {
let mut hashes = HashSet::new();
assert_eq!(Ident::new_bytes(&[0x00]).as_u64(), 13491658792156090086);
assert_eq!(Ident::new_bytes(&[0x01]).as_u64(), 1589143368662234169);
assert_eq!(Ident::new_bytes(&[0x02]).as_u64(), 10656707396701532021);
assert_eq!(Ident::new_bytes(&[0x03]).as_u64(), 8936171596589985943);
assert_eq!(Ident::new_bytes(&[0x04]).as_u64(), 4949149390491642062);
assert_eq!(Ident::new_bytes(&[0x05]).as_u64(), 12619172941123127036);
assert_eq!(Ident::new_bytes(&[0x06]).as_u64(), 14609691773113461128);
assert_eq!(Ident::new_bytes(&[0x07]).as_u64(), 905649811912880660);
assert_eq!(Ident::new_bytes(&[0x08]).as_u64(), 27146095367030173);
assert_eq!(Ident::new_bytes(&[0x09]).as_u64(), 15460968893599093751);
assert_eq!(Ident::new_bytes(&[0x0A]).as_u64(), 5102878630537141075);
assert_eq!(Ident::new_bytes(&[0x0B]).as_u64(), 13505710329181966436);
assert_eq!(Ident::new_bytes(&[0x0C]).as_u64(), 2313578407557187463);
assert_eq!(Ident::new_bytes(&[0x0D]).as_u64(), 18304346233760503625);
assert_eq!(Ident::new_bytes(&[0x0E]).as_u64(), 11678280355214419242);
assert_eq!(Ident::new_bytes(&[0x0F]).as_u64(), 1327731869604357285);
assert_eq!(Ident::new_bytes(&[0x10]).as_u64(), 12063498339717853243);
assert_eq!(Ident::new_bytes(&[0x11]).as_u64(), 2461177000706403463);
assert_eq!(Ident::new_bytes(&[0x12]).as_u64(), 3604498279627892928);
assert_eq!(Ident::new_bytes(&[0x13]).as_u64(), 17346164203596173066);
assert_eq!(Ident::new_bytes(&[0x14]).as_u64(), 3098284557260280679);
assert_eq!(Ident::new_bytes(&[0x15]).as_u64(), 12523788770039081765);
assert_eq!(Ident::new_bytes(&[0x16]).as_u64(), 5498797734862201196);
assert_eq!(Ident::new_bytes(&[0x17]).as_u64(), 3512967940549931808);
assert_eq!(Ident::new_bytes(&[0x18]).as_u64(), 17306716638781378924);
assert_eq!(Ident::new_bytes(&[0x19]).as_u64(), 9773804243523666113);
assert_eq!(Ident::new_bytes(&[0x1A]).as_u64(), 18436899027930971181);
assert_eq!(Ident::new_bytes(&[0x1B]).as_u64(), 15443613748034217196);
assert_eq!(Ident::new_bytes(&[0x1C]).as_u64(), 11830630746654985175);
assert_eq!(Ident::new_bytes(&[0x1D]).as_u64(), 996703353192860291);
assert_eq!(Ident::new_bytes(&[0x1E]).as_u64(), 2288280085764105422);
assert_eq!(Ident::new_bytes(&[0x1F]).as_u64(), 17245636785511140876);
assert_eq!(Ident::new_bytes(&[0x7F]).as_u64(), 5852993145577577193);
assert_eq!(Ident::new(" ").as_u64(), 5215813110348550705);
assert_eq!(Ident::new("\t").as_u64(), 15460968893599093751);
assert_eq!(Ident::new("\n").as_u64(), 5102878630537141075);
assert_eq!(Ident::new("\r\n").as_u64(), 9179019608793778032);
for byte in 0x00u8..=0x1F {
let ident = Ident::new_bytes(&[byte]);
assert!(hashes.insert(ident.as_u64()));
}
let del = Ident::new_bytes(&[0x7F]);
assert!(hashes.insert(del.as_u64()));
assert_ne!(Ident::new(" "), Ident::new("\t"));
assert_ne!(Ident::new("\t"), Ident::new("\n"));
assert_ne!(Ident::new("\r\n"), Ident::new("\n"));
}
#[test]
fn test_empty_string() {
assert_eq!(Ident::new("").as_u64(), 1132974141190728019);
assert_eq!(Ident::new_bytes(b"").as_u64(), 1132974141190728019);
let empty = Ident::new("");
let empty_bytes = Ident::new_bytes(b"");
assert_eq!(empty, empty_bytes);
assert_ne!(empty, Ident::new(" "));
}
}