#![cfg_attr(feature = "default", doc = include_str!("../README.md"))]
use std::collections::hash_map::Entry::{Occupied, Vacant};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fmt::{Debug, Formatter};
use std::hash::Hash;
use std::mem::ManuallyDrop;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::num::{
NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128,
NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping,
};
use std::ops::Index;
use std::path::PathBuf;
use std::time::{Duration, SystemTime};
use std::{ops, ptr};
pub unsafe trait PtrRead {}
macro_rules! impl_trait {
($($t:ty),*) => {
$(
unsafe impl PtrRead for $t {}
)*
};
}
impl_trait![(), &'static str];
impl_trait![f32, f64, bool, char, String, PathBuf];
impl_trait![SystemTime, Duration, Ipv4Addr, Ipv6Addr, IpAddr];
impl_trait![u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize];
impl_trait![NonZeroU8, NonZeroU16, NonZeroU32];
impl_trait![NonZeroU64, NonZeroU128, NonZeroUsize];
impl_trait![NonZeroI8, NonZeroI16, NonZeroI32];
impl_trait![NonZeroI64, NonZeroI128, NonZeroIsize];
unsafe impl<T: PtrRead> PtrRead for [T] {}
unsafe impl<T: PtrRead, const N: usize> PtrRead for [T; N] {}
unsafe impl<T: PtrRead> PtrRead for Wrapping<T> {}
unsafe impl<T: PtrRead> PtrRead for Option<T> {}
unsafe impl<T: PtrRead> PtrRead for Vec<T> {}
unsafe impl<T: PtrRead, V: PtrRead, S> PtrRead for HashMap<T, V, S> {}
unsafe impl<T: PtrRead, V: PtrRead> PtrRead for BTreeMap<T, V> {}
unsafe impl<T: PtrRead> PtrRead for BTreeSet<T> {}
pub struct DupIndexer<T> {
values: Vec<T>,
lookup: HashMap<ManuallyDrop<T>, usize>,
}
impl<T: PtrRead> DupIndexer<T> {
pub fn new() -> Self {
Self {
values: Vec::new(),
lookup: HashMap::new(),
}
}
pub fn with_capacity(capacity: usize) -> Self {
Self {
values: Vec::with_capacity(capacity),
lookup: HashMap::with_capacity(capacity),
}
}
#[inline]
pub fn capacity(&self) -> usize {
self.values.capacity()
}
#[inline]
pub fn as_slice(&self) -> &[T] {
self
}
#[inline]
pub fn len(&self) -> usize {
self.values.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.values.is_empty()
}
#[inline]
pub fn into_vec(self) -> Vec<T> {
self.values
}
}
impl<T: PtrRead + Default> Default for DupIndexer<T> {
#[inline]
fn default() -> Self {
Self::new()
}
}
impl<T: Eq + Hash> DupIndexer<T> {
pub fn insert(&mut self, value: T) -> usize {
let dup_value = ManuallyDrop::new(unsafe { ptr::read(&value) });
match self.lookup.entry(dup_value) {
Occupied(entry) => *entry.get(),
Vacant(entry) => {
let index = self.values.len();
entry.insert(index);
self.values.push(value);
index
}
}
}
}
impl<T> Index<usize> for DupIndexer<T> {
type Output = T;
#[inline]
fn index(&self, index: usize) -> &Self::Output {
&self.values[index]
}
}
impl<T> IntoIterator for DupIndexer<T> {
type Item = T;
type IntoIter = std::vec::IntoIter<T>;
#[inline]
fn into_iter(self) -> std::vec::IntoIter<T> {
self.values.into_iter()
}
}
impl<T> ops::Deref for DupIndexer<T> {
type Target = [T];
#[inline]
fn deref(&self) -> &[T] {
&self.values
}
}
impl<T: Debug> Debug for DupIndexer<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_map()
.entries(self.values.iter().enumerate())
.finish()
}
}
#[cfg(test)]
mod tests {
use std::ops::Deref;
use super::*;
#[test]
fn test_str() {
let mut di: DupIndexer<&str> = DupIndexer::default();
assert!(di.is_empty());
assert_eq!(di.capacity(), 0);
assert_eq!(di.insert("foo"), 0);
assert_eq!(di.insert("bar"), 1);
assert_eq!(di.insert("foo"), 0);
assert_eq!(di[1], "bar");
assert!(!di.is_empty());
assert_eq!(di.len(), 2);
assert!(di.capacity() >= 2);
assert_eq!(di.deref(), &["foo", "bar"]);
assert_eq!(di.as_slice(), &["foo", "bar"]);
assert_eq!(format!("{di:?}"), r#"{0: "foo", 1: "bar"}"#);
assert_eq!(di.into_vec(), vec!["foo", "bar"]);
}
#[test]
fn test_string() {
let mut di: DupIndexer<String> = DupIndexer::with_capacity(5);
assert!(di.is_empty());
assert!(di.capacity() >= 5);
assert_eq!(di.insert("foo".to_string()), 0);
assert_eq!(di.insert("bar".to_string()), 1);
assert_eq!(di.insert("foo".to_string()), 0);
assert_eq!(di[1], "bar");
assert_eq!(di[1], "bar".to_string());
assert!(!di.is_empty());
assert_eq!(di.len(), 2);
assert!(di.capacity() >= 5);
assert_eq!(di.deref(), &["foo", "bar"]);
assert_eq!(di.as_slice(), &["foo", "bar"]);
assert_eq!(format!("{di:?}"), r#"{0: "foo", 1: "bar"}"#);
assert_eq!(di.into_vec(), vec!["foo", "bar"]);
}
#[test]
fn test_copyable_value() {
let mut di: DupIndexer<i32> = DupIndexer::default();
assert_eq!(di.insert(42), 0);
assert_eq!(di.insert(13), 1);
assert_eq!(di.insert(42), 0);
assert_eq!(di[1], 13);
assert_eq!(di.into_iter().collect::<Vec::<_>>(), vec![42, 13]);
}
#[test]
fn test_copyable_struct() {
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
struct Foo(pub i32);
unsafe impl PtrRead for Foo {}
let mut di: DupIndexer<Foo> = DupIndexer::new();
assert_eq!(di.insert(Foo(42)), 0);
assert_eq!(di.insert(Foo(13)), 1);
assert_eq!(di.insert(Foo(42)), 0);
assert_eq!(di[1], Foo(13));
assert_eq!(di.into_vec(), vec![Foo(42), Foo(13)]);
}
#[test]
fn test_vec() {
let mut di: DupIndexer<Vec<i32>> = DupIndexer::default();
assert_eq!(di.insert(vec![1, 2, 3]), 0);
assert_eq!(di.insert(vec![1, 2]), 1);
assert_eq!(di.insert(vec![1, 2, 3]), 0);
assert_eq!(di[1], vec![1, 2]);
assert_eq!(di.into_vec(), vec![vec![1, 2, 3], vec![1, 2]]);
}
#[test]
fn test_debug_fmt() {
let mut di: DupIndexer<char> = DupIndexer::default();
assert_eq!(di.insert('a'), 0);
assert_eq!(di.insert('b'), 1);
assert_eq!(di.insert('c'), 2);
assert_eq!(di.insert('b'), 1);
assert_eq!(di[2], 'c');
assert_eq!(format!("{di:?}"), "{0: 'a', 1: 'b', 2: 'c'}");
assert_eq!(di.into_vec(), vec!['a', 'b', 'c']);
}
#[test]
fn test_many_strings() {
const ITERATIONS: usize = 50;
let mut di: DupIndexer<String> = DupIndexer::with_capacity(1);
let mut old_capacity = 0;
let mut capacity_has_grown = false;
for shift in &[0, ITERATIONS] {
for _pass in 0..2 {
for idx in 0..ITERATIONS {
assert_eq!(di.insert((idx + shift).to_string()), idx + shift);
if old_capacity == 0 {
old_capacity = di.capacity();
} else if di.capacity() > old_capacity {
capacity_has_grown = true;
}
}
}
}
assert!(capacity_has_grown);
assert_eq!(
di.into_vec(),
(0..ITERATIONS * 2)
.map(|i| i.to_string())
.collect::<Vec<_>>()
);
}
#[test]
fn test_custom_trait() {
#[derive(Debug, Eq, PartialEq, Hash)]
enum Value {
Str(String),
Int(i32),
}
unsafe impl PtrRead for Value {}
let mut di: DupIndexer<Value> = DupIndexer::new();
assert_eq!(di.insert(Value::Str("foo".to_string())), 0);
assert_eq!(di.insert(Value::Int(42)), 1);
assert_eq!(di.insert(Value::Str("foo".to_string())), 0);
assert_eq!(di[1], Value::Int(42));
assert_eq!(
di.into_vec(),
vec![Value::Str("foo".to_string()), Value::Int(42)]
);
}
}