dup_indexer/
deref.rs

1use std::collections::hash_map::Entry::{Occupied, Vacant};
2use std::collections::HashMap;
3use std::fmt::{Debug, Formatter};
4use std::hash::Hash;
5use std::ops::{Deref, Index};
6
7/// A value that can be stably dereferenced with [`Deref`] trait.
8/// A stable dereference means that a reference to the value will be valid
9/// even if the reference is moved to a different memory location.
10///
11/// See <https://stackoverflow.com/q/77548941/177275> for more details.
12///
13/// # Safety
14/// Implementing this trait is unsafe because the implementation must guarantee that
15/// the [`Deref`] is stable per above.
16pub unsafe trait StableDerefKey: Deref + Eq + Hash {}
17
18unsafe impl StableDerefKey for String {}
19
20pub struct DupIndexerRefs<T: StableDerefKey>
21where
22    <T as Deref>::Target: 'static,
23{
24    values: Vec<T>,
25    lookup: HashMap<&'static T::Target, usize>,
26}
27
28impl<T> Default for DupIndexerRefs<T>
29where
30    T: StableDerefKey,
31    T::Target: Eq + Hash + ToOwned<Owned = T>,
32{
33    fn default() -> Self {
34        Self::new()
35    }
36}
37
38impl<T> DupIndexerRefs<T>
39where
40    T: StableDerefKey,
41    T::Target: Eq + Hash + ToOwned<Owned = T>,
42{
43    /// Constructs a new, empty `DupGenIndexer`
44    #[must_use]
45    pub fn new() -> Self {
46        Self {
47            values: Vec::new(),
48            lookup: HashMap::new(),
49        }
50    }
51
52    /// Constructs a new, empty `DupGenIndexer` with at least the specified capacity.
53    #[must_use]
54    pub fn with_capacity(capacity: usize) -> Self {
55        Self {
56            values: Vec::with_capacity(capacity),
57            lookup: HashMap::with_capacity(capacity),
58        }
59    }
60
61    /// Returns the total number of elements the indexer can hold without reallocating.
62    #[inline]
63    #[must_use]
64    pub fn capacity(&self) -> usize {
65        self.values.capacity()
66    }
67
68    /// Extracts a slice containing the entire indexer values.
69    #[inline]
70    #[must_use]
71    pub fn as_slice(&self) -> &[T] {
72        self
73    }
74
75    /// Get the number of values in the indexer.
76    #[inline]
77    #[must_use]
78    pub fn len(&self) -> usize {
79        self.values.len()
80    }
81
82    /// Return true if the indexer is empty.
83    #[inline]
84    #[must_use]
85    pub fn is_empty(&self) -> bool {
86        self.values.is_empty()
87    }
88
89    /// Converts the indexer into a vector.
90    #[inline]
91    #[must_use]
92    pub fn into_vec(self) -> Vec<T> {
93        self.values
94    }
95
96    /// Insert a string value into the indexer if it doesn't already exist,
97    /// and return the index of the value.
98    ///
99    /// ```
100    /// # use dup_indexer::DupIndexerRefs;
101    /// # fn main() {
102    /// let mut di = DupIndexerRefs::<String>::new();
103    /// assert_eq!(di.insert_owned("hello".to_string()), 0);
104    /// assert_eq!(di.insert_owned("world".to_string()), 1);
105    /// assert_eq!(di.insert_owned("hello".to_string()), 0);
106    /// assert_eq!(di.into_vec(), vec!["hello", "world"]);
107    /// # }
108    /// ```
109    pub fn insert_owned(&mut self, value: T) -> usize {
110        // This is safe because we own the value and will not modify or drop it,
111        // unless we consume the whole values vector,
112        // nor would we access the values in the vector before then.
113        // When dropping, index will be dropped without freeing the memory.
114        // create a static reference to the string, which will live as long as the program
115        let value_ref =
116            unsafe { std::mem::transmute::<&T::Target, &'static T::Target>(value.deref()) };
117
118        match self.lookup.entry(value_ref) {
119            Occupied(entry) => *entry.get(),
120            Vacant(entry) => {
121                let index = self.values.len();
122                entry.insert(index);
123                self.values.push(value);
124                index
125            }
126        }
127    }
128
129    /// Insert a cloneable value into the indexer if it doesn't already exist,
130    /// and return the index of the value. Slightly slower than [`DupIndexerRefs::insert_owned`],
131    /// but allows value to be a reference that does not need to be cloned if it was already added.
132    ///
133    /// ```
134    /// # use dup_indexer::DupIndexerRefs;
135    /// # fn main() {
136    /// let mut di = DupIndexerRefs::<String>::new();
137    /// assert_eq!(di.insert_ref("hello"), 0);
138    /// assert_eq!(di.insert_ref("world"), 1);
139    /// assert_eq!(di.insert_ref("hello"), 0);
140    /// assert_eq!(di.into_vec(), vec!["hello", "world"]);
141    /// # }
142    /// ```
143    pub fn insert_ref(&mut self, value: &T::Target) -> usize {
144        match self.lookup.get(value) {
145            Some(index) => *index,
146            None => self.insert_owned(value.to_owned()),
147        }
148    }
149}
150
151impl<T: StableDerefKey> Index<usize> for DupIndexerRefs<T> {
152    type Output = T;
153
154    #[inline]
155    fn index(&self, index: usize) -> &Self::Output {
156        &self.values[index]
157    }
158}
159
160impl<T: StableDerefKey> IntoIterator for DupIndexerRefs<T> {
161    type Item = T;
162    type IntoIter = std::vec::IntoIter<T>;
163
164    #[inline]
165    fn into_iter(self) -> std::vec::IntoIter<T> {
166        self.values.into_iter()
167    }
168}
169
170impl<T: StableDerefKey> Deref for DupIndexerRefs<T> {
171    type Target = [T];
172
173    #[inline]
174    fn deref(&self) -> &[T] {
175        &self.values
176    }
177}
178
179impl<T: StableDerefKey + Debug> Debug for DupIndexerRefs<T> {
180    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
181        f.debug_map()
182            .entries(self.values.iter().enumerate())
183            .finish()
184    }
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_string() {
193        let mut di: DupIndexerRefs<String> = DupIndexerRefs::with_capacity(5);
194        assert!(di.is_empty());
195        assert!(di.capacity() >= 5);
196        assert_eq!(di.insert_owned("foo".to_string()), 0);
197        assert_eq!(di.insert_owned("bar".to_string()), 1);
198        assert_eq!(di.insert_owned("foo".to_string()), 0);
199        assert_eq!(di[1], "bar");
200        assert_eq!(di[1], "bar".to_string());
201        assert!(!di.is_empty());
202        assert_eq!(di.len(), 2);
203        assert!(di.capacity() >= 5);
204        assert_eq!(di.deref(), &["foo", "bar"]);
205        assert_eq!(di.as_slice(), &["foo", "bar"]);
206        assert_eq!(format!("{di:?}"), r#"{0: "foo", 1: "bar"}"#);
207        assert_eq!(di.into_vec(), vec!["foo", "bar"]);
208    }
209
210    #[test]
211    fn test_string_own() {
212        let mut di: DupIndexerRefs<String> = DupIndexerRefs::with_capacity(5);
213        assert_eq!(di.insert_owned("foo".to_string()), 0);
214        assert_eq!(di.insert_ref("bar"), 1);
215        assert_eq!(di.insert_ref("foo"), 0);
216        assert_eq!(di.into_vec(), vec!["foo", "bar"]);
217    }
218
219    #[test]
220    fn test_many_strings() {
221        const ITERATIONS: usize = 50;
222        let mut di: DupIndexerRefs<String> = DupIndexerRefs::with_capacity(1);
223        let mut old_capacity = 0;
224        let mut capacity_has_grown = false;
225        for shift in &[0, ITERATIONS] {
226            for _pass in 0..2 {
227                for idx in 0..ITERATIONS {
228                    assert_eq!(di.insert_owned((idx + shift).to_string()), idx + shift);
229                    if old_capacity == 0 {
230                        old_capacity = di.capacity();
231                    } else if di.capacity() > old_capacity {
232                        capacity_has_grown = true;
233                    }
234                }
235            }
236        }
237        // Ensure that capacity has grown at least once
238        assert!(capacity_has_grown);
239        assert_eq!(
240            di.into_vec(),
241            (0..ITERATIONS * 2)
242                .map(|i| i.to_string())
243                .collect::<Vec<_>>()
244        );
245    }
246}