use rustc_hash::FxHashMap;
use std::borrow::Borrow;
use std::hash::Hash;
#[derive(Clone, Debug)]
pub struct KeyInterner<K> {
index: FxHashMap<K, u64>,
keys: Vec<K>,
}
impl<K> Default for KeyInterner<K> {
fn default() -> Self {
Self::new()
}
}
impl<K> KeyInterner<K> {
#[must_use]
pub fn new() -> Self {
Self {
index: FxHashMap::default(),
keys: Vec::new(),
}
}
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
index: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
keys: Vec::with_capacity(capacity),
}
}
}
impl<K> KeyInterner<K>
where
K: Eq + Hash + Clone,
{
pub fn intern(&mut self, key: &K) -> u64 {
if let Some(&id) = self.index.get(key) {
return id;
}
let id = self.keys.len() as u64;
self.keys.push(key.clone());
self.index.insert(key.clone(), id);
id
}
}
impl<K> KeyInterner<K>
where
K: Eq + Hash,
{
#[must_use]
pub fn get_handle(&self, key: &K) -> Option<u64> {
self.get_handle_borrowed(key)
}
#[must_use]
pub fn get_handle_borrowed<Q>(&self, key: &Q) -> Option<u64>
where
K: Borrow<Q>,
Q: Eq + Hash + ?Sized,
{
self.index.get(key).copied()
}
pub fn shrink_to_fit(&mut self) {
self.index.shrink_to_fit();
self.keys.shrink_to_fit();
}
pub fn clear_shrink(&mut self) {
self.clear();
self.shrink_to_fit();
}
}
impl<K> KeyInterner<K> {
#[must_use]
pub fn resolve(&self, handle: u64) -> Option<&K> {
let index = usize::try_from(handle).ok()?;
self.keys.get(index)
}
#[must_use]
pub fn len(&self) -> usize {
self.keys.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.keys.is_empty()
}
pub fn clear(&mut self) {
self.index.clear();
self.keys.clear();
}
#[must_use]
pub fn approx_bytes(&self) -> usize {
std::mem::size_of::<Self>()
+ self.index.capacity() * std::mem::size_of::<(K, u64)>()
+ self.keys.capacity() * std::mem::size_of::<K>()
}
pub fn iter(&self) -> impl Iterator<Item = (u64, &K)> + '_ {
self.keys.iter().enumerate().map(|(i, k)| (i as u64, k))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn key_interner_basic_flow() {
let mut interner: KeyInterner<String> = KeyInterner::new();
assert!(interner.is_empty());
let a = interner.intern(&"a".to_owned());
let b = interner.intern(&"b".to_owned());
let a2 = interner.intern(&"a".to_owned());
assert_eq!(a, a2);
assert_ne!(a, b);
assert_eq!(interner.len(), 2);
assert_eq!(interner.get_handle_borrowed("b"), Some(b));
assert_eq!(interner.resolve(a).map(String::as_str), Some("a"));
}
#[test]
fn key_interner_iter() {
let mut interner: KeyInterner<String> = KeyInterner::new();
interner.intern(&"x".to_owned());
interner.intern(&"y".to_owned());
let mut pairs = interner.iter();
assert_eq!(pairs.next(), Some((0, &"x".to_owned())));
assert_eq!(pairs.next(), Some((1, &"y".to_owned())));
assert_eq!(pairs.next(), None);
}
}
#[cfg(test)]
mod property_tests {
use super::*;
use proptest::prelude::*;
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_handles_sequential_from_zero(
keys in prop::collection::vec(any::<u32>(), 1..50)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut unique_keys = Vec::new();
for key in keys {
if !unique_keys.contains(&key) {
unique_keys.push(key);
let handle = interner.intern(&key);
let expected_handle = (unique_keys.len() - 1) as u64;
prop_assert_eq!(handle, expected_handle);
}
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_first_key_gets_zero(key in any::<u32>()) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let handle = interner.intern(&key);
prop_assert_eq!(handle, 0);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_different_keys_different_handles(
key1 in any::<u32>(),
key2 in any::<u32>()
) {
prop_assume!(key1 != key2);
let mut interner: KeyInterner<u32> = KeyInterner::new();
let h1 = interner.intern(&key1);
let h2 = interner.intern(&key2);
prop_assert_ne!(h1, h2);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_intern_idempotent(
key in any::<u32>(),
repeat_count in 1usize..10
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let first_handle = interner.intern(&key);
for _ in 0..repeat_count {
let handle = interner.intern(&key);
prop_assert_eq!(handle, first_handle);
}
prop_assert_eq!(interner.len(), 1);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_reintern_no_length_increase(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for &key in &keys {
interner.intern(&key);
}
let len_after_first = interner.len();
for &key in &keys {
interner.intern(&key);
}
prop_assert_eq!(interner.len(), len_after_first);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_intern_resolve_roundtrip(
keys in prop::collection::vec(any::<u32>(), 0..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
let handle = interner.intern(&key);
prop_assert_eq!(interner.resolve(handle), Some(&key));
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_get_handle_resolve_consistent(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for &key in &keys {
interner.intern(&key);
}
for &key in &keys {
if let Some(handle) = interner.get_handle(&key) {
prop_assert_eq!(interner.resolve(handle), Some(&key));
}
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_all_handles_valid_up_to_len(
keys in prop::collection::vec(0u32..50, 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
interner.intern(&key);
}
let len = interner.len() as u64;
for handle in 0..len {
prop_assert!(interner.resolve(handle).is_some());
}
for handle in len..(len + 10) {
prop_assert_eq!(interner.resolve(handle), None);
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_get_handle_missing_returns_none(
interned_keys in prop::collection::vec(0u32..20, 1..10),
query_key in 20u32..40
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in interned_keys {
interner.intern(&key);
}
prop_assert_eq!(interner.get_handle(&query_key), None);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_get_handle_read_only(
keys in prop::collection::vec(any::<u32>(), 1..20),
query_key in any::<u32>()
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
interner.intern(&key);
}
let len_before = interner.len();
let _ = interner.get_handle(&query_key);
let len_after = interner.len();
prop_assert_eq!(len_before, len_after);
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_len_equals_unique_keys(
keys in prop::collection::vec(any::<u32>(), 0..50)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in &keys {
interner.intern(key);
}
let unique_count = {
let mut unique = std::collections::HashSet::new();
for key in keys {
unique.insert(key);
}
unique.len()
};
prop_assert_eq!(interner.len(), unique_count);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_is_empty_consistent_with_len(
keys in prop::collection::vec(any::<u32>(), 0..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut unique_keys = std::collections::HashSet::new();
for key in keys {
interner.intern(&key);
unique_keys.insert(key);
prop_assert_eq!(interner.is_empty(), unique_keys.is_empty());
prop_assert_eq!(interner.len(), unique_keys.len());
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_clear_resets_state(
keys in prop::collection::vec(any::<u32>(), 1..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys {
interner.intern(&key);
}
interner.clear_shrink();
prop_assert!(interner.is_empty());
prop_assert_eq!(interner.len(), 0);
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_clear_invalidates_handles(
keys in prop::collection::vec(any::<u32>(), 1..20)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut handles = Vec::new();
for key in &keys {
let handle = interner.intern(key);
handles.push(handle);
}
interner.clear_shrink();
for handle in handles {
prop_assert_eq!(interner.resolve(handle), None);
}
for key in keys {
prop_assert_eq!(interner.get_handle(&key), None);
}
}
#[cfg_attr(miri, ignore)]
#[test]
fn prop_usable_after_clear(
keys1 in prop::collection::vec(any::<u32>(), 1..20),
keys2 in prop::collection::vec(any::<u32>(), 1..20)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
for key in keys1 {
interner.intern(&key);
}
interner.clear_shrink();
if let Some(&first_key) = keys2.first() {
let handle = interner.intern(&first_key);
prop_assert_eq!(handle, 0);
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_matches_reference_implementation(
keys in prop::collection::vec(0u32..50, 0..50)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let mut reference: std::collections::HashMap<u32, u64> = std::collections::HashMap::new();
let mut next_handle: u64 = 0;
for key in keys {
let handle = interner.intern(&key);
let ref_handle = *reference.entry(key).or_insert_with(|| {
let h = next_handle;
next_handle += 1;
h
});
prop_assert_eq!(handle, ref_handle);
prop_assert_eq!(interner.len(), reference.len());
for (&ref_key, &ref_handle) in &reference {
prop_assert_eq!(interner.get_handle(&ref_key), Some(ref_handle));
prop_assert_eq!(interner.resolve(ref_handle), Some(&ref_key));
}
}
}
}
proptest! {
#[cfg_attr(miri, ignore)]
#[test]
fn prop_approx_bytes_increases(
keys in prop::collection::vec(any::<u32>(), 10..30)
) {
let mut interner: KeyInterner<u32> = KeyInterner::new();
let base_bytes = interner.approx_bytes();
for key in keys {
interner.intern(&key);
}
let after_bytes = interner.approx_bytes();
prop_assert!(after_bytes >= base_bytes);
}
}
}