#![cfg_attr(nightly, feature(test))]
#[cfg(nightly)] extern crate test;
use std::{
collections::{
hash_set,
HashSet,
},
marker::{
PhantomData,
Send,
Sync,
},
hash::Hash,
borrow::Borrow,
};
mod hashing;
#[cfg(feature="smallmap")] pub mod small;
pub type HashType = hashing::Sha512Hash;
fn compute_hash_for<T: ?Sized + Hash>(value: &T) -> HashType
{
let mut hasher = hashing::Sha512Hasher::new();
value.hash(&mut hasher);
hasher.finalize()
}
#[allow(dead_code)]
#[cold] fn compute_both_hash_for<T: ?Sized + Hash>(value: &T) -> (u64, HashType)
{
use sha2::{
Digest,
digest::generic_array::sequence::Split,
};
let mut hasher = hashing::Sha512Hasher::new();
value.hash(&mut hasher);
let sha512 = hasher.into_inner();
let full = sha512.finalize();
let mut arr = [0u8; hashing::HASH_SIZE];
debug_assert_eq!(arr.len(), full.len());
unsafe {
std::ptr::copy_nonoverlapping(&full[0] as *const u8, &mut arr[0] as *mut u8, hashing::HASH_SIZE);
}
(u64::from_ne_bytes(full.split().0.into()), HashType::from_bytes(arr))
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
#[cfg_attr(feature="serde", derive(serde::Serialize, serde::Deserialize))]
pub struct HashRefSet<T: ?Sized>(HashSet<HashType>, PhantomData<HashSet<*const T>>);
unsafe impl<T: ?Sized + Send> Send for HashRefSet<T>{}
unsafe impl<T: ?Sized + Send + Sync> Sync for HashRefSet<T>{}
impl<T:?Sized + Hash> HashRefSet<T>
{
pub fn new() -> Self
{
Self(
HashSet::new(),
PhantomData
)
}
pub fn into_inner(self) -> HashSet<HashType>
{
self.0
}
pub fn with_capacity(cap: usize) -> Self
{
Self(HashSet::with_capacity(cap), PhantomData)
}
pub fn insert<Q>(&mut self, value: &Q) -> bool
where Q: ?Sized + Borrow<T>
{
self.0.insert(compute_hash_for(value.borrow()))
}
pub fn remove<Q>(&mut self, value: &Q) -> bool
where Q: ?Sized + Borrow<T>
{
self.0.remove(&compute_hash_for(value.borrow()))
}
pub fn contains<Q>(&mut self, value: &Q) -> bool
where Q: ?Sized + Borrow<T>
{
self.0.contains(&compute_hash_for(value.borrow()))
}
pub fn len(&self) -> usize
{
self.0.len()
}
pub fn is_empty(&self) -> bool
{
self.0.is_empty()
}
pub fn hashes_iter(&self) -> hash_set::Iter<'_, HashType>
{
self.0.iter()
}
#[inline] fn into_hashes_iter(self) -> hash_set::IntoIter<HashType>
{
self.0.into_iter()
}
}
impl<T: ?Sized + Hash> IntoIterator for HashRefSet<T>
{
type Item= HashType;
type IntoIter = hash_set::IntoIter<HashType>;
#[inline] fn into_iter(self) -> Self::IntoIter
{
self.into_hashes_iter()
}
}
#[cfg(test)]
mod tests
{
use super::*;
#[test]
fn insert()
{
let mut refset = HashRefSet::new();
let values= vec![
"hi",
"hello",
"one",
"two",
];
for &string in values.iter()
{
refset.insert(string);
}
for string in values
{
assert!(refset.contains(string));
}
assert!(refset.insert("none"));
assert!(!refset.insert("two"));
}
#[cfg(nightly)]
mod benchmarks
{
use test::{black_box, Bencher};
const STRINGS: &str = "leo vel fringilla est ullamcorper eget nulla facilisi etiam dignissim diam quis enim lobortis scelerisque fermentum dui faucibus in ornare quam viverra orci sagittis eu volutpat odio facilisis mauris sit amet massa vitae tortor condimentum lacinia quis vel eros donec ac odio tempor orci dapibus ultrices in iaculis nunc sed augue lacus viverra vitae congue eu consequat ac felis donec et odio pellentesque diam volutpat commodo sed egestas egestas fringilla phasellus faucibus scelerisque eleifend donec pretium vulputate sapien nec sagittis aliquam malesuada bibendum arcu vitae elementum curabitur vitae nunc sed velit dignissim sodales ut eu sem integer vitae justo eget";
const INTS: &[u32] = &[182,248,69,225,164,219,73,122,14,205,148,221,24,107,209,83,210,87,148,249,234,181,217,154,180,240,132,145,208,15,77,4,117,16,43,1,95,49,150,18,207,161,107,216,215,100,76,198,43,21,99,177,77,28,29,172,117,136,151,96,66,208,244,138,90];
#[bench] fn non_owning_strings(b: &mut Bencher)
{
let strings: Vec<String> = STRINGS.split(char::is_whitespace).map(ToOwned::to_owned).collect();
let mut map = super::HashRefSet::new();
b.iter(|| {
for string in strings.iter() {
black_box(map.insert(string.as_str()));
}
})
}
#[bench] fn owning_strings(b: &mut Bencher)
{
let strings: Vec<String> = STRINGS.split(char::is_whitespace).map(ToOwned::to_owned).collect();
let mut map = std::collections::HashSet::new();
b.iter(|| {
for string in strings.iter() {
black_box(map.insert(string.clone()));
}
})
}
#[bench] fn non_owning_ints(b: &mut Bencher)
{
let mut map = super::HashRefSet::new();
b.iter(|| {
for int in INTS.iter() {
black_box(map.insert(int));
}
})
}
#[bench] fn owning_ints(b: &mut Bencher)
{
let mut map = std::collections::HashSet::new();
b.iter(|| {
for int in INTS.iter() {
black_box(map.insert(int));
}
})
}
}
}