dup_indexer/
owner.rs

1use std::collections::hash_map::Entry::{Occupied, Vacant};
2use std::collections::{BTreeMap, BTreeSet, HashMap};
3use std::fmt::{Debug, Formatter};
4use std::hash::Hash;
5use std::mem::ManuallyDrop;
6use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
7use std::num::{
8    NonZeroI128, NonZeroI16, NonZeroI32, NonZeroI64, NonZeroI8, NonZeroIsize, NonZeroU128,
9    NonZeroU16, NonZeroU32, NonZeroU64, NonZeroU8, NonZeroUsize, Wrapping,
10};
11use std::ops::{Deref, Index};
12use std::path::PathBuf;
13use std::ptr;
14use std::time::{Duration, SystemTime};
15
16/// A value that can be used as a key in a [`DupIndexer`], which will copy its content
17/// using the [`ptr::read`] function, while also owning it internally.
18///
19/// # Safety
20/// Implementing this trait is unsafe because the implementation must guarantee that
21/// the value can be copied by copying the bits of the value assuming that the value
22/// itself is valid and readonly. All Copy types are `PtrRead`, but `Box<T>` is not.
23pub unsafe trait PtrRead {}
24
25macro_rules! impl_trait {
26    ($($t:ty),*) => {
27        $(
28            unsafe impl PtrRead for $t {}
29        )*
30    };
31}
32
33impl_trait![(), &'static str];
34impl_trait![f32, f64, bool, char, String, PathBuf];
35impl_trait![SystemTime, Duration, Ipv4Addr, Ipv6Addr, IpAddr];
36impl_trait![u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize];
37impl_trait![NonZeroU8, NonZeroU16, NonZeroU32];
38impl_trait![NonZeroU64, NonZeroU128, NonZeroUsize];
39impl_trait![NonZeroI8, NonZeroI16, NonZeroI32];
40impl_trait![NonZeroI64, NonZeroI128, NonZeroIsize];
41
42unsafe impl<T: PtrRead> PtrRead for [T] {}
43unsafe impl<T: PtrRead, const N: usize> PtrRead for [T; N] {}
44unsafe impl<T: PtrRead> PtrRead for Wrapping<T> {}
45unsafe impl<T: PtrRead> PtrRead for Option<T> {}
46unsafe impl<T: PtrRead> PtrRead for Vec<T> {}
47unsafe impl<T: PtrRead, V: PtrRead, S> PtrRead for HashMap<T, V, S> {}
48unsafe impl<T: PtrRead, V: PtrRead> PtrRead for BTreeMap<T, V> {}
49unsafe impl<T: PtrRead> PtrRead for BTreeSet<T> {}
50
51pub struct DupIndexer<T> {
52    values: Vec<T>,
53    lookup: HashMap<ManuallyDrop<T>, usize>,
54}
55
56impl<T: PtrRead> DupIndexer<T> {
57    /// Create a new instance of `DupIndexer<T>`, without requiring `T` to implement `Default`.
58    #[must_use]
59    pub fn new() -> Self {
60        Self {
61            values: Vec::new(),
62            lookup: HashMap::new(),
63        }
64    }
65
66    /// Constructs a new, empty `DupIndexer<T>` with at least the specified capacity.
67    #[must_use]
68    pub fn with_capacity(capacity: usize) -> Self {
69        Self {
70            values: Vec::with_capacity(capacity),
71            lookup: HashMap::with_capacity(capacity),
72        }
73    }
74
75    /// Returns the total number of elements the indexer can hold without reallocating.
76    #[inline]
77    #[must_use]
78    pub fn capacity(&self) -> usize {
79        self.values.capacity()
80    }
81
82    /// Extracts a slice containing the entire indexer values.
83    #[inline]
84    #[must_use]
85    pub fn as_slice(&self) -> &[T] {
86        self
87    }
88
89    /// Get the number of values in the indexer.
90    #[inline]
91    #[must_use]
92    pub fn len(&self) -> usize {
93        self.values.len()
94    }
95
96    /// Return true if the indexer is empty.
97    #[inline]
98    #[must_use]
99    pub fn is_empty(&self) -> bool {
100        self.values.is_empty()
101    }
102
103    /// Converts the indexer into a vector.
104    #[inline]
105    #[must_use]
106    pub fn into_vec(self) -> Vec<T> {
107        self.values
108    }
109}
110
111/// If `T` implements `Default`, create a new instance of `DupIndexer<T>`.
112/// Note that [`DupIndexer::new`] does not require `T` to implement `Default`.
113impl<T: PtrRead + Default> Default for DupIndexer<T> {
114    #[inline]
115    fn default() -> Self {
116        Self::new()
117    }
118}
119
120impl<T: Eq + Hash> DupIndexer<T> {
121    /// Insert a value into the indexer if it doesn't already exist,
122    /// and return the index of the value.
123    ///
124    /// ```
125    /// # use dup_indexer::DupIndexer;
126    /// # fn main() {
127    /// let mut di = DupIndexer::<String>::new();
128    /// assert_eq!(di.insert("hello".to_string()), 0);
129    /// assert_eq!(di.insert("world".to_string()), 1);
130    /// assert_eq!(di.insert("hello".to_string()), 0);
131    /// assert_eq!(di.into_vec(), vec!["hello", "world"]);
132    /// # }
133    /// ```
134    pub fn insert(&mut self, value: T) -> usize {
135        // This is safe because we own the value and will not modify or drop it unless we consume the whole values vector,
136        // nor would we access the values in the vector before then.
137        // When dropping, index will be dropped without freeing the memory.
138        let dup_value = ManuallyDrop::new(unsafe { ptr::read(&value) });
139        match self.lookup.entry(dup_value) {
140            Occupied(entry) => *entry.get(),
141            Vacant(entry) => {
142                let index = self.values.len();
143                entry.insert(index);
144                self.values.push(value);
145                index
146            }
147        }
148    }
149}
150
151impl<T> Index<usize> for DupIndexer<T> {
152    type Output = T;
153
154    #[inline]
155    fn index(&self, index: usize) -> &Self::Output {
156        &self.values[index]
157    }
158}
159
160impl<T> IntoIterator for DupIndexer<T> {
161    type Item = T;
162    type IntoIter = std::vec::IntoIter<T>;
163
164    #[inline]
165    fn into_iter(self) -> std::vec::IntoIter<T> {
166        self.values.into_iter()
167    }
168}
169
170impl<T> Deref for DupIndexer<T> {
171    type Target = [T];
172
173    #[inline]
174    fn deref(&self) -> &[T] {
175        &self.values
176    }
177}
178
179impl<T: Debug> Debug for DupIndexer<T> {
180    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
181        f.debug_map()
182            .entries(self.values.iter().enumerate())
183            .finish()
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_str() {
193        let mut di: DupIndexer<&str> = DupIndexer::default();
194        assert!(di.is_empty());
195        assert_eq!(di.capacity(), 0);
196        assert_eq!(di.insert("foo"), 0);
197        assert_eq!(di.insert("bar"), 1);
198        assert_eq!(di.insert("foo"), 0);
199        assert_eq!(di[1], "bar");
200        assert!(!di.is_empty());
201        assert_eq!(di.len(), 2);
202        assert!(di.capacity() >= 2);
203        assert_eq!(di.deref(), &["foo", "bar"]);
204        assert_eq!(di.as_slice(), &["foo", "bar"]);
205        assert_eq!(format!("{di:?}"), r#"{0: "foo", 1: "bar"}"#);
206        assert_eq!(di.into_vec(), vec!["foo", "bar"]);
207    }
208
209    #[test]
210    fn test_string() {
211        let mut di: DupIndexer<String> = DupIndexer::with_capacity(5);
212        assert!(di.is_empty());
213        assert!(di.capacity() >= 5);
214        assert_eq!(di.insert("foo".to_string()), 0);
215        assert_eq!(di.insert("bar".to_string()), 1);
216        assert_eq!(di.insert("foo".to_string()), 0);
217        assert_eq!(di[1], "bar");
218        assert_eq!(di[1], "bar".to_string());
219        assert!(!di.is_empty());
220        assert_eq!(di.len(), 2);
221        assert!(di.capacity() >= 5);
222        assert_eq!(di.deref(), &["foo", "bar"]);
223        assert_eq!(di.as_slice(), &["foo", "bar"]);
224        assert_eq!(format!("{di:?}"), r#"{0: "foo", 1: "bar"}"#);
225        assert_eq!(di.into_vec(), vec!["foo", "bar"]);
226    }
227
228    #[test]
229    fn test_copyable_value() {
230        let mut di: DupIndexer<i32> = DupIndexer::default();
231        assert_eq!(di.insert(42), 0);
232        assert_eq!(di.insert(13), 1);
233        assert_eq!(di.insert(42), 0);
234        assert_eq!(di[1], 13);
235        assert_eq!(di.into_iter().collect::<Vec<_>>(), vec![42, 13]);
236    }
237
238    #[test]
239    fn test_copyable_struct() {
240        #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
241        struct Foo(pub i32);
242
243        unsafe impl PtrRead for Foo {}
244
245        let mut di: DupIndexer<Foo> = DupIndexer::new();
246        assert_eq!(di.insert(Foo(42)), 0);
247        assert_eq!(di.insert(Foo(13)), 1);
248        assert_eq!(di.insert(Foo(42)), 0);
249        assert_eq!(di[1], Foo(13));
250        assert_eq!(di.into_vec(), vec![Foo(42), Foo(13)]);
251    }
252
253    #[test]
254    fn test_vec() {
255        let mut di: DupIndexer<Vec<i32>> = DupIndexer::default();
256        assert_eq!(di.insert(vec![1, 2, 3]), 0);
257        assert_eq!(di.insert(vec![1, 2]), 1);
258        assert_eq!(di.insert(vec![1, 2, 3]), 0);
259        assert_eq!(di[1], vec![1, 2]);
260        assert_eq!(di.into_vec(), vec![vec![1, 2, 3], vec![1, 2]]);
261    }
262
263    #[test]
264    fn test_debug_fmt() {
265        let mut di: DupIndexer<char> = DupIndexer::default();
266        assert_eq!(di.insert('a'), 0);
267        assert_eq!(di.insert('b'), 1);
268        assert_eq!(di.insert('c'), 2);
269        assert_eq!(di.insert('b'), 1);
270        assert_eq!(di[2], 'c');
271        assert_eq!(format!("{di:?}"), "{0: 'a', 1: 'b', 2: 'c'}");
272        assert_eq!(di.into_vec(), vec!['a', 'b', 'c']);
273    }
274
275    #[test]
276    fn test_many_strings() {
277        const ITERATIONS: usize = 50;
278        let mut di: DupIndexer<String> = DupIndexer::with_capacity(1);
279        let mut old_capacity = 0;
280        let mut capacity_has_grown = false;
281        for shift in &[0, ITERATIONS] {
282            for _pass in 0..2 {
283                for idx in 0..ITERATIONS {
284                    assert_eq!(di.insert((idx + shift).to_string()), idx + shift);
285                    if old_capacity == 0 {
286                        old_capacity = di.capacity();
287                    } else if di.capacity() > old_capacity {
288                        capacity_has_grown = true;
289                    }
290                }
291            }
292        }
293        // Ensure that capacity has grown at least once
294        assert!(capacity_has_grown);
295        assert_eq!(
296            di.into_vec(),
297            (0..ITERATIONS * 2)
298                .map(|i| i.to_string())
299                .collect::<Vec<_>>()
300        );
301    }
302
303    #[test]
304    fn test_custom_trait() {
305        #[derive(Debug, Eq, PartialEq, Hash)]
306        enum Value {
307            Str(String),
308            Int(i32),
309        }
310
311        unsafe impl PtrRead for Value {}
312
313        let mut di: DupIndexer<Value> = DupIndexer::new();
314        assert_eq!(di.insert(Value::Str("foo".to_string())), 0);
315        assert_eq!(di.insert(Value::Int(42)), 1);
316        assert_eq!(di.insert(Value::Str("foo".to_string())), 0);
317        assert_eq!(di[1], Value::Int(42));
318        assert_eq!(
319            di.into_vec(),
320            vec![Value::Str("foo".to_string()), Value::Int(42)]
321        );
322    }
323
324    // // This test is ignored on Miri because it fails without any good explanation at the moment.
325    // // See issue https://github.com/nyurik/dup-indexer/issues/1
326    // #[test]
327    // #[cfg_attr(miri, ignore)]
328    // fn test_box() {
329    //     let mut di: DupIndexer<Box<i32>> = DupIndexer::default();
330    //     assert_eq!(di.insert(Box::new(42)), 0);
331    //     assert_eq!(di.insert(Box::new(13)), 1);
332    //     assert_eq!(di.insert(Box::new(42)), 0);
333    //     assert_eq!(di[1], Box::new(13));
334    //     assert_eq!(di.into_vec(), vec![Box::new(42), Box::new(13)]);
335    // }
336}