use rustc_hash::FxBuildHasher;
use std::borrow::Borrow;
use std::collections::HashMap;
use std::collections::TryReserveError;
use std::hash::{BuildHasher, Hash};
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum InternerError {
CapacityExceeded,
AllocationFailed(TryReserveError),
}
impl std::fmt::Display for InternerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::CapacityExceeded => write!(
f,
"KeyInterner is at MAX_CAPACITY; refusing to intern another unique key"
),
Self::AllocationFailed(e) => write!(f, "KeyInterner allocation failed: {e}"),
}
}
}
impl std::error::Error for InternerError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
Self::AllocationFailed(e) => Some(e),
_ => None,
}
}
}
impl From<TryReserveError> for InternerError {
fn from(e: TryReserveError) -> Self {
Self::AllocationFailed(e)
}
}
pub struct KeyInterner<K, S = FxBuildHasher> {
index: HashMap<K, u64, S>,
keys: Vec<K>,
generation: u64,
}
impl<K, S> std::fmt::Debug for KeyInterner<K, S> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("KeyInterner")
.field("len", &self.keys.len())
.field("capacity", &self.keys.capacity())
.field("index_capacity", &self.index.capacity())
.field("generation", &self.generation)
.finish_non_exhaustive()
}
}
impl<K, S> Clone for KeyInterner<K, S>
where
K: Clone + Eq + Hash,
S: BuildHasher + Clone,
{
fn clone(&self) -> Self {
let len = self.keys.len();
let hasher = self.index.hasher().clone();
let mut new_index = HashMap::with_capacity_and_hasher(len, hasher);
for (k, &v) in self.index.iter() {
new_index.insert(k.clone(), v);
}
let new_keys = self.keys.clone();
Self {
index: new_index,
keys: new_keys,
generation: self.generation,
}
}
}
impl<K, S> Default for KeyInterner<K, S>
where
S: Default,
{
fn default() -> Self {
Self {
index: HashMap::with_hasher(S::default()),
keys: Vec::new(),
generation: 0,
}
}
}
impl<K> KeyInterner<K, FxBuildHasher> {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self
where
K: Eq + Hash,
{
Self::with_capacity_and_hasher(capacity, FxBuildHasher)
}
pub fn try_with_capacity(capacity: usize) -> Result<Self, InternerError>
where
K: Eq + Hash,
{
Self::try_with_capacity_and_hasher(capacity, FxBuildHasher)
}
}
impl<K, S> KeyInterner<K, S> {
pub const MAX_CAPACITY: usize = (isize::MAX as usize) / 64;
#[must_use]
pub fn with_hasher(hash_builder: S) -> Self {
Self {
index: HashMap::with_hasher(hash_builder),
keys: Vec::new(),
generation: 0,
}
}
#[must_use]
pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self
where
K: Eq + Hash,
S: BuildHasher,
{
let clamped = capacity.min(Self::MAX_CAPACITY);
let mut keys: Vec<K> = Vec::new();
let mut index: HashMap<K, u64, S> = HashMap::with_hasher(hash_builder);
let _ = keys.try_reserve(clamped);
let _ = index.try_reserve(clamped);
Self {
index,
keys,
generation: 0,
}
}
pub fn try_with_capacity_and_hasher(
capacity: usize,
hash_builder: S,
) -> Result<Self, InternerError>
where
K: Eq + Hash,
S: BuildHasher,
{
if capacity > Self::MAX_CAPACITY {
return Err(InternerError::CapacityExceeded);
}
let mut keys: Vec<K> = Vec::new();
keys.try_reserve(capacity)?;
let mut index: HashMap<K, u64, S> = HashMap::with_hasher(hash_builder);
index.try_reserve(capacity)?;
Ok(Self {
index,
keys,
generation: 0,
})
}
#[must_use]
pub fn hasher(&self) -> &S {
self.index.hasher()
}
#[must_use]
pub fn generation(&self) -> u64 {
self.generation
}
}
impl<K, S> KeyInterner<K, S>
where
K: Eq + Hash + Clone,
S: BuildHasher,
{
#[track_caller]
pub fn intern(&mut self, key: &K) -> u64 {
match self.try_intern(key) {
Ok(id) => id,
Err(InternerError::CapacityExceeded) => panic!(
"KeyInterner::intern: reached MAX_CAPACITY ({}); use try_intern to handle this gracefully",
Self::MAX_CAPACITY
),
Err(InternerError::AllocationFailed(e)) => {
panic!("KeyInterner::intern: allocation failed: {e}")
},
}
}
pub fn try_intern(&mut self, key: &K) -> Result<u64, InternerError> {
if let Some(&id) = self.index.get(key) {
return Ok(id);
}
if self.keys.len() >= Self::MAX_CAPACITY {
return Err(InternerError::CapacityExceeded);
}
self.keys.try_reserve(1)?;
self.index.try_reserve(1)?;
let id = self.keys.len() as u64;
let k_for_index = key.clone();
let k_for_keys = key.clone();
self.index.insert(k_for_index, id);
self.keys.push(k_for_keys);
Ok(id)
}
}
impl<K, S> KeyInterner<K, S>
where
K: Eq + Hash,
S: BuildHasher,
{
#[must_use]
pub fn get_handle(&self, key: &K) -> Option<u64> {
self.get_handle_borrowed(key)
}
#[must_use]
pub fn get_handle_borrowed<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
self.index.get(key).copied()
}
pub fn shrink_to_fit(&mut self) {
self.index.shrink_to_fit();
self.keys.shrink_to_fit();
}
pub fn clear_shrink(&mut self) {
self.clear();
self.shrink_to_fit();
}
}
impl<K, S> KeyInterner<K, S> {
#[must_use]
pub fn resolve(&self, handle: u64) -> Option<&K> {
let index = usize::try_from(handle).ok()?;
self.keys.get(index)
}
#[must_use]
pub fn len(&self) -> usize {
self.keys.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
pub fn clear(&mut self) {
self.index.clear();
self.keys.clear();
self.generation = self.generation.wrapping_add(1);
}
#[must_use]
pub fn approx_bytes(&self) -> usize {
let entry_size = std::mem::size_of::<(K, u64)>();
let key_size = std::mem::size_of::<K>();
let index_bytes = self.index.capacity().saturating_mul(entry_size);
let keys_bytes = self.keys.capacity().saturating_mul(key_size);
std::mem::size_of::<Self>()
.saturating_add(index_bytes)
.saturating_add(keys_bytes)
}
pub fn iter(&self) -> impl Iterator<Item = (u64, &K)> + '_ {
self.keys.iter().enumerate().map(|(i, k)| (i as u64, k))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn key_interner_basic_flow() {
let mut interner: KeyInterner<String> = KeyInterner::new();
assert!(interner.is_empty());
let a = interner.intern(&"a".to_owned());
let b = interner.intern(&"b".to_owned());
let a2 = interner.intern(&"a".to_owned());
assert_eq!(a, a2);
assert_ne!(a, b);
assert_eq!(interner.len(), 2);
assert_eq!(interner.get_handle_borrowed("b"), Some(b));
assert_eq!(interner.resolve(a).map(String::as_str), Some("a"));
}
#[test]
fn key_interner_iter() {
let mut interner: KeyInterner<String> = KeyInterner::new();
interner.intern(&"x".to_owned());
interner.intern(&"y".to_owned());
let mut pairs = interner.iter();
assert_eq!(pairs.next(), Some((0, &"x".to_owned())));
assert_eq!(pairs.next(), Some((1, &"y".to_owned())));
assert_eq!(pairs.next(), None);
}
#[test]
fn debug_impl_does_not_leak_keys() {
let mut interner: KeyInterner<String> = KeyInterner::new();
interner.intern(&"sensitive_token_abc123".to_owned());
interner.intern(&"another_secret".to_owned());
let formatted = format!("{interner:?}");
assert!(
!formatted.contains("sensitive_token_abc123"),
"Debug must not include key contents; got: {formatted}"
);
assert!(
!formatted.contains("another_secret"),
"Debug must not include key contents; got: {formatted}"
);
assert!(formatted.contains("len"), "Debug missing len: {formatted}");
assert!(
formatted.contains("generation"),
"Debug missing generation: {formatted}"
);
}
#[test]
fn try_with_capacity_rejects_oversized() {
let err = KeyInterner::<String>::try_with_capacity(
KeyInterner::<String>::MAX_CAPACITY.saturating_add(1),
)
.unwrap_err();
assert_eq!(err, InternerError::CapacityExceeded);
}
#[cfg_attr(miri, ignore)]
#[test]
fn with_capacity_clamps_silently() {
let interner = KeyInterner::<u32>::with_capacity(usize::MAX);
assert!(interner.is_empty());
assert!(interner.approx_bytes() < usize::MAX / 2);
}
#[test]
fn generation_bumps_on_clear() {
let mut interner: KeyInterner<String> = KeyInterner::new();
let g0 = interner.generation();
interner.intern(&"a".to_owned());
assert_eq!(interner.generation(), g0, "intern must not bump generation");
interner.clear();
let g1 = interner.generation();
assert_ne!(g0, g1, "clear must bump generation");
interner.clear_shrink();
let g2 = interner.generation();
assert_ne!(g1, g2, "clear_shrink must bump generation");
}
#[test]
fn handles_reset_to_zero_after_clear_documented_behavior() {
let mut interner: KeyInterner<String> = KeyInterner::new();
let h_alice = interner.intern(&"alice".to_owned());
assert_eq!(h_alice, 0);
let g_alice = interner.generation();
interner.clear();
let h_bob = interner.intern(&"bob".to_owned());
assert_eq!(h_bob, 0, "handle 0 must be reused after clear");
assert_ne!(g_alice, interner.generation(), "generation must differ");
}
#[test]
fn try_intern_happy_path_and_idempotent() {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for i in 0..64u32 {
assert!(interner.try_intern(&i).is_ok());
}
assert_eq!(interner.try_intern(&0u32).unwrap(), 0);
}
#[test]
fn custom_hasher_compiles_and_works() {
use std::collections::hash_map::RandomState;
let mut interner: KeyInterner<String, RandomState> =
KeyInterner::with_hasher(RandomState::new());
let h = interner.intern(&"k".to_owned());
assert_eq!(h, 0);
assert_eq!(
interner.get_handle_borrowed("k"),
Some(0),
"custom hasher must still support borrowed lookups"
);
}
#[test]
fn approx_bytes_saturates_on_overflow() {
let interner = KeyInterner::<u8>::new();
let bytes = interner.approx_bytes();
assert!(bytes >= std::mem::size_of::<KeyInterner<u8>>());
}
#[test]
fn clone_propagates_generation_and_contents() {
let mut interner: KeyInterner<String> = KeyInterner::new();
interner.intern(&"a".to_owned());
interner.clear();
interner.intern(&"b".to_owned());
let cloned = interner.clone();
assert_eq!(cloned.generation(), interner.generation());
assert_eq!(cloned.len(), interner.len());
assert_eq!(cloned.get_handle_borrowed("b"), Some(0));
}
}
#[cfg(test)]
mod property_tests {
use super::*;
use proptest::prelude::*;
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_handles_sequential_from_zero(
keys in prop::collection::vec(any::<u32>(), 1..50)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut unique_keys = Vec::new();
for key in keys {
if !unique_keys.contains(&key) {
unique_keys.push(key);
let handle = interner.intern(&key);
let expected_handle = (unique_keys.len() - 1) as u64;
prop_assert_eq!(handle, expected_handle);
}
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_first_key_gets_zero(key in any::<u32>()) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let handle = interner.intern(&key);
prop_assert_eq!(handle, 0);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_different_keys_different_handles(
key1 in any::<u32>(),
key2 in any::<u32>()
) {
prop_assume!(key1 != key2);
let mut interner: KeyInterner<u32> = KeyInterner::new();
let h1 = interner.intern(&key1);
let h2 = interner.intern(&key2);
prop_assert_ne!(h1, h2);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_intern_idempotent(
key in any::<u32>(),
repeat_count in 1usize..10
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let first_handle = interner.intern(&key);
for _ in 0..repeat_count {
let handle = interner.intern(&key);
prop_assert_eq!(handle, first_handle);
}
prop_assert_eq!(interner.len(), 1);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_reintern_no_length_increase(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for &key in &keys {
interner.intern(&key);
}
let len_after_first = interner.len();
for &key in &keys {
interner.intern(&key);
}
prop_assert_eq!(interner.len(), len_after_first);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_intern_resolve_roundtrip(
keys in prop::collection::vec(any::<u32>(), 0..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
let handle = interner.intern(&key);
prop_assert_eq!(interner.resolve(handle), Some(&key));
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_get_handle_resolve_consistent(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for &key in &keys {
interner.intern(&key);
}
for &key in &keys {
if let Some(handle) = interner.get_handle(&key) {
prop_assert_eq!(interner.resolve(handle), Some(&key));
}
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_all_handles_valid_up_to_len(
keys in prop::collection::vec(0u32..50, 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
interner.intern(&key);
}
let len = interner.len() as u64;
for handle in 0..len {
prop_assert!(interner.resolve(handle).is_some());
}
for handle in len..(len + 10) {
prop_assert_eq!(interner.resolve(handle), None);
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_get_handle_missing_returns_none(
interned_keys in prop::collection::vec(0u32..20, 1..10),
query_key in 20u32..40
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in interned_keys {
interner.intern(&key);
}
prop_assert_eq!(interner.get_handle(&query_key), None);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_get_handle_read_only(
keys in prop::collection::vec(any::<u32>(), 1..20),
query_key in any::<u32>()
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
interner.intern(&key);
}
let len_before = interner.len();
let _ = interner.get_handle(&query_key);
let len_after = interner.len();
prop_assert_eq!(len_before, len_after);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_len_equals_unique_keys(
keys in prop::collection::vec(any::<u32>(), 0..50)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in &keys {
interner.intern(key);
}
let unique_count = {
let mut unique = std::collections::HashSet::new();
for key in keys {
unique.insert(key);
}
unique.len()
};
prop_assert_eq!(interner.len(), unique_count);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_is_empty_consistent_with_len(
keys in prop::collection::vec(any::<u32>(), 0..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut unique_keys = std::collections::HashSet::new();
for key in keys {
interner.intern(&key);
unique_keys.insert(key);
prop_assert_eq!(interner.is_empty(), unique_keys.is_empty());
prop_assert_eq!(interner.len(), unique_keys.len());
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_clear_resets_state(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
interner.intern(&key);
}
interner.clear_shrink();
prop_assert!(interner.is_empty());
prop_assert_eq!(interner.len(), 0);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_clear_invalidates_handles(
keys in prop::collection::vec(any::<u32>(), 1..20)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut handles = Vec::new();
for key in &keys {
let handle = interner.intern(key);
handles.push(handle);
}
interner.clear_shrink();
for handle in handles {
prop_assert_eq!(interner.resolve(handle), None);
}
for key in keys {
prop_assert_eq!(interner.get_handle(&key), None);
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_usable_after_clear(
keys1 in prop::collection::vec(any::<u32>(), 1..20),
keys2 in prop::collection::vec(any::<u32>(), 1..20)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys1 {
interner.intern(&key);
}
interner.clear_shrink();
if let Some(&first_key) = keys2.first() {
let handle = interner.intern(&first_key);
prop_assert_eq!(handle, 0);
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_generation_strictly_increases_on_clear(
rounds in 1usize..20
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut last = interner.generation();
for i in 0..rounds {
interner.intern(&(i as u32));
interner.clear();
let now = interner.generation();
prop_assert_ne!(now, last);
last = now;
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_matches_reference_implementation(
keys in prop::collection::vec(0u32..50, 0..50)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut reference: std::collections::HashMap<u32, u64> = std::collections::HashMap::new();
let mut next_handle: u64 = 0;
for key in keys {
let handle = interner.intern(&key);
let ref_handle = *reference.entry(key).or_insert_with(|| {
let h = next_handle;
next_handle += 1;
h
});
prop_assert_eq!(handle, ref_handle);
prop_assert_eq!(interner.len(), reference.len());
for (&ref_key, &ref_handle) in &reference {
prop_assert_eq!(interner.get_handle(&ref_key), Some(ref_handle));
prop_assert_eq!(interner.resolve(ref_handle), Some(&ref_key));
}
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_approx_bytes_increases(
keys in prop::collection::vec(any::<u32>(), 10..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let base_bytes = interner.approx_bytes();
for key in keys {
interner.intern(&key);
}
let after_bytes = interner.approx_bytes();
prop_assert!(after_bytes >= base_bytes);
}
}
}