use {
crate::{LaburnumError, Span},
std::{
fmt,
hash::{BuildHasher, Hash, Hasher},
},
};
#[inline]
const fn splitmix64(mut z: u64) -> u64 {
z = (z ^ (z >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
z = (z ^ (z >> 27)).wrapping_mul(0x94d049bb133111eb);
z ^ (z >> 31)
}
const GOLDEN: u64 = 0x9e3779b97f4a7c15;
#[derive(
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
serde::Serialize,
serde::Deserialize,
)]
pub struct Ident([u8; 16]);
pub type IdentHashSet = std::collections::HashSet<Ident, IdentHashState>;
pub type IdentHashMap<V> = std::collections::HashMap<Ident, V, IdentHashState>;
impl Ident {
pub const NONE: Self = Self([0u8; 16]);
const fn inline_unchecked(bytes: &[u8]) -> Self {
let mut out = [0u8; 16];
let n = bytes.len();
let mut i = 0;
while i < n {
out[i] = bytes[i];
i += 1;
}
out[15] = n as u8;
Self(out)
}
pub(crate) const fn from_digest(digest: &[u8; 32]) -> Self {
let mut out = [0u8; 16];
let mut i = 0;
while i < 15 {
out[i] = digest[i];
i += 1;
}
out[15] = (digest[15] & 0x7f) | 0x80;
Self(out)
}
#[cfg(any(test, feature = "ident_constructor"))]
pub const fn new_const(s: &str) -> Self {
let bytes = s.as_bytes();
assert!(
bytes.len() <= 15,
"Ident::new_const: identifier exceeds 15 bytes (only short keys may be const)"
);
assert!(
bytes.is_empty() || bytes[0] != b'$',
"Ident::new_const: a leading `$` is reserved for laburnum-internal partitions"
);
Self::inline_unchecked(bytes)
}
pub(crate) const fn new_const_internal(s: &str) -> Self {
let bytes = s.as_bytes();
assert!(
bytes.len() <= 15,
"Ident::new_const_internal: key exceeds 15 bytes"
);
debug_assert!(
!bytes.is_empty() && bytes[0] == b'$',
"Ident::new_const_internal: first-party partition keys must begin with `$`"
);
Self::inline_unchecked(bytes)
}
pub(crate) fn new_internal(s: &str) -> Self {
Self::from_input(s.as_bytes())
}
fn from_input(bytes: &[u8]) -> Self {
if bytes.len() <= 15 {
Self::inline_unchecked(bytes)
} else {
Self::from_digest(blake3::hash(bytes).as_bytes())
}
}
#[cfg(any(test, feature = "ident_constructor"))]
pub fn new(s: &str) -> Self {
Self::new_internal(s)
}
pub const fn is_none(self) -> bool {
let mut i = 0;
while i < 16 {
if self.0[i] != 0 {
return false;
}
i += 1;
}
true
}
pub const fn const_eq(self, other: Self) -> bool {
let (a, b) = (self.0, other.0);
let mut i = 0;
while i < 16 {
if a[i] != b[i] {
return false;
}
i += 1;
}
true
}
pub const fn to_bytes(self) -> [u8; 16] {
self.0
}
#[inline]
const fn lanes(self) -> (u64, u64) {
let b = self.0;
(
u64::from_le_bytes([b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]]),
u64::from_le_bytes([
b[8], b[9], b[10], b[11], b[12], b[13], b[14], b[15],
]),
)
}
}
impl fmt::Debug for Ident {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let (lo, hi) = self.lanes();
write!(f, "Ident({hi:016x}{lo:016x})")
}
}
impl fmt::Display for Ident {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let (lo, hi) = self.lanes();
write!(f, "{hi:016x}{lo:016x}")
}
}
impl Hash for Ident {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
let (lo, hi) = self.lanes();
state.write_u64(lo);
state.write_u64(hi);
}
}
impl<T: Hash> From<Vec<T>> for Ident {
fn from(vec: Vec<T>) -> Self {
let mut hasher = crate::hash::content::ContentHasher::new();
for item in &vec {
item.hash(&mut hasher);
}
hasher.finish_ident()
}
}
impl<T: Hash> From<&'_ [T]> for Ident {
fn from(slice: &'_ [T]) -> Self {
let mut hasher = crate::hash::content::ContentHasher::new();
for item in slice {
item.hash(&mut hasher);
}
hasher.finish_ident()
}
}
pub trait IdentExt {
fn try_new_with_span(s: &str, span: Span) -> Result<Ident, LaburnumError>;
}
impl IdentExt for Ident {
fn try_new_with_span(s: &str, _span: Span) -> Result<Ident, LaburnumError> {
Ok(Ident::new_internal(s))
}
}
#[derive(Clone)]
pub struct IdentHasher {
state: u64,
}
impl Default for IdentHasher {
fn default() -> Self {
Self::new()
}
}
impl IdentHasher {
pub const fn new() -> Self {
Self { state: 0 }
}
}
impl Hasher for IdentHasher {
#[inline]
fn write_u64(&mut self, value: u64) {
self.state =
splitmix64(self.state ^ splitmix64(value.wrapping_add(GOLDEN)));
}
#[inline]
fn write(&mut self, bytes: &[u8]) {
for &byte in bytes {
self.state =
splitmix64(self.state ^ (byte as u64).wrapping_add(GOLDEN));
}
}
#[inline]
fn finish(&self) -> u64 {
splitmix64(self.state)
}
}
#[derive(Clone, Debug, Default)]
pub struct IdentHashState;
impl BuildHasher for IdentHashState {
type Hasher = IdentHasher;
#[inline]
fn build_hasher(&self) -> Self::Hasher {
IdentHasher::new()
}
}
pub trait HashSetExt {
fn new() -> Self;
fn with_capacity(capacity: usize) -> Self;
}
impl<T> HashSetExt for std::collections::HashSet<T, IdentHashState> {
#[inline]
fn new() -> Self {
Self::with_hasher(IdentHashState)
}
#[inline]
fn with_capacity(capacity: usize) -> Self {
Self::with_capacity_and_hasher(capacity, IdentHashState)
}
}
pub trait HashMapExt {
fn new() -> Self;
fn with_capacity(capacity: usize) -> Self;
}
impl<K, V> HashMapExt for std::collections::HashMap<K, V, IdentHashState> {
#[inline]
fn new() -> Self {
Self::with_hasher(IdentHashState)
}
#[inline]
fn with_capacity(capacity: usize) -> Self {
Self::with_capacity_and_hasher(capacity, IdentHashState)
}
}
#[cfg(test)]
mod tests {
use {super::*, std::collections::HashSet};
fn tag(ident: Ident) -> u8 {
ident.to_bytes()[15] & 0x80
}
#[test]
fn inline_determinism_and_equivalence() {
assert_eq!(Ident::new("a"), Ident::new("a"));
assert_eq!(Ident::new("hello"), Ident::new("hello"));
assert_ne!(Ident::new("a"), Ident::new("A"));
assert_ne!(Ident::new("Hello"), Ident::new("hello"));
assert_eq!(tag(Ident::new("hello")), 0);
assert_eq!(tag(Ident::new("a_15_byte_strin")), 0);
}
#[test]
fn inline_encodes_bytes_verbatim() {
let ident = Ident::new("count");
let bytes = ident.to_bytes();
assert_eq!(&bytes[0..5], b"count");
assert_eq!(bytes[15], 5); assert!(bytes[5..15].iter().all(|&b| b == 0));
}
#[test]
fn boundary_15_inline_16_hashed() {
let fifteen = "abcdefghijklmno"; let sixteen = "abcdefghijklmnop"; assert_eq!(fifteen.len(), 15);
assert_eq!(sixteen.len(), 16);
assert_eq!(tag(Ident::new(fifteen)), 0, "15 bytes must inline");
assert_eq!(tag(Ident::new(sixteen)), 0x80, "16 bytes must hash");
assert_eq!(Ident::new(sixteen), Ident::new(sixteen));
assert_ne!(Ident::new(sixteen), Ident::new("abcdefghijklmnoq"));
}
#[test]
fn cross_mode_disjoint() {
let inline = Ident::new("short");
let hashed = Ident::new("a_long_identifier_over_fifteen_bytes");
assert_eq!(tag(inline), 0);
assert_eq!(tag(hashed), 0x80);
assert_ne!(inline, hashed);
}
#[test]
fn none_is_structurally_reserved() {
assert!(Ident::NONE.is_none());
assert_eq!(Ident::new(""), Ident::NONE);
assert!(!Ident::new("a").is_none());
assert!(!Ident::new("a_long_identifier_over_fifteen_bytes").is_none());
}
#[test]
fn canonical_length_field_prevents_prefix_collision() {
assert_ne!(Ident::new("ab"), Ident::new("abc"));
assert_ne!(Ident::new("a"), Ident::new("aa"));
}
#[test]
fn new_const_matches_runtime() {
const C: Ident = Ident::new_const("clients");
assert_eq!(C, Ident::new("clients"));
const INTERNAL: Ident = Ident::new_const_internal("$clients");
assert_eq!(INTERNAL, Ident::new("$clients"));
}
#[test]
fn content_finalizer_is_deterministic_and_hashed() {
let a: Ident = vec![1u64, 2, 3].into();
let b: Ident = vec![1u64, 2, 3].into();
let c: Ident = vec![1u64, 2, 4].into();
assert_eq!(a, b);
assert_ne!(a, c);
assert_eq!(tag(a), 0x80, "content-derived idents are hashed mode");
}
#[test]
fn map_hashing_distributes() {
let mut set: IdentHashSet = HashSet::with_hasher(IdentHashState);
for name in ["i", "j", "x", "n", "self", "len", "count", "tmp"] {
assert!(set.insert(Ident::new(name)));
}
assert_eq!(set.len(), 8);
}
#[test]
fn mixer_diffuses_into_high_bits() {
let state = IdentHashState;
let mut top_bits = HashSet::new();
for name in [
"i", "j", "x", "n", "a", "b", "c", "d", "self", "len", "count", "tmp",
"idx", "key", "val", "out",
] {
let hash = state.hash_one(Ident::new(name));
top_bits.insert(hash >> 57); }
assert!(
top_bits.len() > 8,
"mixer failed to diffuse into the high bits ({} distinct top-bit groups)",
top_bits.len()
);
}
#[test]
fn const_eq_matches_eq() {
const A: Ident = Ident::new_const("count");
const B: Ident = Ident::new_const("count");
const C: Ident = Ident::new_const("other");
const _: () = assert!(A.const_eq(B));
const _: () = assert!(!A.const_eq(C));
assert_eq!(A.const_eq(B), A == B);
assert_eq!(A.const_eq(C), A == C);
}
#[test]
fn inline_byte15_is_canonical() {
for s in ["", "a", "count", "abcdefghijklmno"] {
let b15 = Ident::new(s).to_bytes()[15];
assert_eq!(b15 & 0x80, 0, "inline tag bit must be clear");
assert_eq!(b15 >> 4, 0, "byte 15 tag/spare bits (4-7) must be zero");
assert_eq!(b15 as usize, s.len(), "low nibble is the length");
}
}
#[test]
fn none_usable_as_map_key() {
let mut map: IdentHashMap<u32> =
std::collections::HashMap::with_hasher(IdentHashState);
map.insert(Ident::NONE, 1);
map.insert(Ident::new("a"), 2);
assert_eq!(map.get(&Ident::NONE), Some(&1));
assert_eq!(map.len(), 2);
}
}