libintern/
lib.rs

1#![feature(negative_impls)]
2
3use std::{borrow::Borrow, fmt::{Debug, Display}, hash::Hash, marker::PhantomData, ops::Deref, pin::Pin};
4
5/// The interner.
6///
7/// An interner is a structure which uniquely owns the interned items,
8/// and provides shared immutable references to those items.
9pub struct Interner<'a, T: 'a + Eq> {
10    /// A list of holders of the items
11    holders: Vec<InternedItemHolder<T>>,
12    _ph: PhantomData<&'a T>
13}
14
15impl<'a, T> !Sync for Interner<'a, T> {}
16
17/// The capacity of the first InternedItemHolder
18const BEGIN_INTERNER_CAPACITY: usize = 32;
19/// By how much every next interner's capacity changes
20const INTERNER_CAPACITY_DELTA: f32 = 1.5;
21
22impl<'a, T: 'a + Eq> Interner<'a, T> {
23    #[allow(clippy::new_without_default)]
24    pub fn new() -> Self {
25        Self { 
26            holders: vec![
27                InternedItemHolder::new(BEGIN_INTERNER_CAPACITY)],
28            _ph: PhantomData 
29        }
30    }
31
32    /// Intern an item.
33    ///
34    /// This consumes the item by adding it to the intern-list and returns a reference to it.
35    /// It also extends the lifetime of the item to match the lifetime of this interner.
36    ///
37    /// This item is dropped if an item equal to this one is already interned,
38    /// in which case a reference to the already interned item is returned instead.
39    pub fn intern(&mut self, item: T) -> Intern<'a, T> {
40        // Look whether an item equal to this one already exists
41        let mut result = None;
42        for holder in &self.holders {
43            for h_item in &holder.items {
44                if &item == h_item {
45                    result = Some(h_item);
46                    break
47                }
48            }
49        }
50        // If the new item is unique, add it to the holder
51        if result.is_none() {
52            self.hold_new_item(item);
53            result = Some(
54                // See documentation for [`hold_new_item`]
55                self.holders.last().unwrap().items.last().unwrap()
56            )
57        }
58        let reference = result.unwrap();
59        unsafe { self.transmute_held_item(reference) }
60    }
61
62    pub fn contains(&self, item: &T) -> bool {
63        for holder in &self.holders {
64            for h_item in &holder.items {
65                if item == h_item {
66                    return true
67                }
68            }
69        }
70        false
71    }
72
73    /// Hold a new item.
74    /// If the currently last holder is full, create a new holder.
75    ///
76    /// The new item is guaranteed to be placed as the last item of the last holder
77    fn hold_new_item(&mut self, item: T) {
78        match self.holders.last_mut().unwrap().try_push(item) {
79            Ok(()) => (),
80            Err(item) => {
81                // The holder is full, add a new one
82                let last_holder_capacity = self.holders.last().unwrap().items.capacity();
83                let mut new_holder = InternedItemHolder::new(
84                    ((last_holder_capacity as f32) * INTERNER_CAPACITY_DELTA) as usize
85                );
86                // Add to the holder
87                new_holder.items.push(item);
88                // Add the holder to the list of holders
89                self.holders.push(new_holder);
90            }
91        }
92    }
93
94    /// Transmute a reference to an item held by this interner
95    /// into the Intern<T> type.
96    #[inline]
97    unsafe fn transmute_held_item(&self, item: &T) -> Intern<'a, T> {
98        // SAFETY: Via the lifetime <'a>, we guarantee the interner is alive
99        // as long as the references are alive. Furthermore, the data is NEVER
100        // mutated AND only immutable references to the data exist.
101        // Therefore we uphold all guarantees and can assume safety when transmuting
102        let reference: &'a T = std::mem::transmute(item);
103        // SAFETY: I believe for the reasons stated above, this is also safe
104        let pinned_reference: Pin<&'a T> = Pin::new_unchecked(reference);
105        Intern(pinned_reference)
106    }
107
108    pub fn iter<'this>(&'this self) -> Iter<'this, 'a, T> {
109        Iter { interner: self, holder_id: 0, inside_holder_id: 0 }
110    }
111}
112
113/// A wrapper around a vector, which guarantees that
114/// the vector will never grow, thus the addresses (pointers)
115/// of (to) its items will never change
116struct InternedItemHolder<T> {
117    items: Vec<T>
118}
119
120impl<T> InternedItemHolder<T> {
121    fn new(capacity: usize) -> Self {
122        Self { items: Vec::with_capacity(capacity) }
123    }
124
125    /// Try to add an item to the holder.
126    ///
127    /// If there's enough space for the item, succeed and return Ok(())
128    /// If there's not enough space in the holder,
129    ///  it returns Err(the_item), to prevent dropping the value
130    fn try_push(&mut self, item: T) -> Result<(), T> {
131        if self.items.len() == self.items.capacity() {
132            Err(item)
133        } else {
134            self.items.push(item);
135            Ok(())
136        }
137    }
138}
139
140/// A reference to an interned item.
141///
142/// The main advantage of this type over just
143/// an immutable reference is that its pointer is guaranteed to be unique
144/// within a single [`Interner`]. Therefore comparisons
145/// are very cheap.
146///
147/// # Note about hashing
148///
149/// This wrapper implements [`PartialEq`] by comparing its inner pointer.
150/// In order to keep consistency, the [`Hash`] trait is also implemented
151/// by hashing the pointer, NOT the inner value. Therefore hashes
152/// of the `Intern<T>` type are different than hashes of the `T`.
153pub struct Intern<'a, T: 'a>(Pin<&'a T>);
154
155impl<'a, T> Clone for Intern<'a, T> {
156    fn clone(&self) -> Self {
157        Intern(self.0)
158    }
159}
160
161// We must hand implement Copy, because a T: Copy bound is added when using derive
162impl<'a, T> Copy for Intern<'a, T> {}
163
164// Get reference to the inner item
165impl<'a, T> AsRef<T> for Intern<'a, T> {
166    fn as_ref(&self) -> &T {
167        self.0.get_ref()
168    }
169}
170
171impl<'a, T> Borrow<T> for Intern<'a, T> {
172    fn borrow(&self) -> &T {
173        self.0.get_ref()
174    }
175}
176
177impl<'a, T> Deref for Intern<'a, T> {
178    type Target = T;
179
180    fn deref(&self) -> &Self::Target {
181        self.as_ref()
182    }
183}
184
185// Implement Debug if the item implements Debug
186impl<'a, T: Debug> Debug for Intern<'a, T> {
187    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
188        self.as_ref().fmt(f)
189    }
190}
191
192// Implement Display if the item implements Display
193impl<'a, T: Display> Display for Intern<'a, T> {
194    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195        self.as_ref().fmt(f)
196    }
197}
198
199// Implement PartialEq
200// 
201// Because we can guarantee that if the item is the same,
202// the item's place in memory, therefore the pointer is the same,
203// we can just compare values of the pointers, not the items themselves 
204impl<'a, T> PartialEq for Intern<'a, T> {
205    fn eq(&self, other: &Self) -> bool {
206        std::ptr::eq(self.as_ref() as *const _, other.as_ref() as *const _)
207    }
208}
209
210impl<'a, T> Eq for Intern<'a, T> {}
211
212// Implement Hash
213// 
214/// To keep consistency with [`PartialEq`], we hash the pointer, not the value
215impl<'a, T> Hash for Intern<'a, T> {
216    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
217        std::ptr::hash(self.0.get_ref(), state)
218    }
219}
220
221// Implement PartialOrd and Ord if the item implements it
222impl<'a, T: PartialOrd> PartialOrd for Intern<'a, T> {
223    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
224        self.0.partial_cmp(&other.0)
225    }
226}
227
228impl<'a, T: Ord> Ord for Intern<'a, T> {
229    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
230        self.0.cmp(&other.0)
231    }
232}
233
234impl<'a, T> Intern<'a, T> {
235    /// Create a new [`Intern`] type from a raw pointer.
236    ///
237    /// # Safety
238    /// The caller must guarantee that the pointed value is in fact
239    /// owned by an [`Interner`] which is still in scope (i.e. not dropped)
240    /// and that the pointer will not be mutated and/or moved.
241    pub unsafe fn from_raw(ptr: *const T) -> Self {
242        Intern(Pin::new_unchecked(ptr.as_ref().unwrap()))
243    }
244}
245
246pub struct Iter<'a, 'intern, T: std::cmp::Eq> {
247    interner: &'a Interner<'intern, T>,
248    holder_id: usize,
249    inside_holder_id: usize
250}
251
252impl<'a, 'intern, T: std::cmp::Eq> Iterator for Iter<'a, 'intern, T> {
253    type Item = Intern<'intern, T>;
254
255    fn next(&mut self) -> Option<Self::Item> {
256        let holder = self.interner.holders.get(self.holder_id)?;
257        let item = &holder.items[self.inside_holder_id];
258        if self.inside_holder_id == holder.items.len() - 1 {
259            //if this is the last item inside this holder
260            self.holder_id += 1;
261            self.inside_holder_id = 0;
262        } else {
263            self.inside_holder_id += 1;
264        }
265        Some(unsafe { self.interner.transmute_held_item(item) })
266    }
267}
268
269
270#[cfg(test)]
271mod tests {
272    use super::{InternedItemHolder, Interner};
273
274    #[test]
275    fn interned_item_holder_test() {
276        let mut holder = InternedItemHolder::new(4); // size four
277
278        // Add an item
279        assert!(holder.try_push('a').is_ok());
280        assert!(holder.items.len() == 1);
281        assert!(holder.items.capacity() == 4);
282        // Save the address of the item
283        let first_item_address = holder.items.get(0).unwrap() as *const _ as usize;
284
285        // Add another item
286        assert!(holder.try_push('b').is_ok());
287        assert!(holder.items.len() == 2);
288        assert!(holder.items.capacity() == 4);
289        // Make sure the address of the first one didn't change
290        assert_eq!(
291            holder.items.get(0).unwrap() as *const _ as usize,
292            first_item_address
293        );
294        let second_item_address = holder.items.get(1).unwrap() as *const _ as usize;
295
296        // Add two more items
297        assert!(holder.try_push('c').is_ok());
298        assert!(holder.try_push('d').is_ok());
299        assert!(holder.items.len() == 4);
300        assert!(holder.items.capacity() == 4);
301        // Make sure the addresses didn't change
302        assert_eq!(
303            holder.items.get(0).unwrap() as *const _ as usize,
304            first_item_address
305        );
306        assert_eq!(
307            holder.items.get(1).unwrap() as *const _ as usize,
308            second_item_address
309        );
310
311        // Try to add more items
312        assert_eq!(holder.try_push('e'), Err('e'));
313        assert_eq!(holder.try_push('f'), Err('f'));
314        assert!(holder.items.len() == 4);
315        assert!(holder.items.capacity() == 4);
316        // Make sure the addresses didn't change
317        assert_eq!(
318            holder.items.get(0).unwrap() as *const _ as usize,
319            first_item_address
320        );
321        assert_eq!(
322            holder.items.get(1).unwrap() as *const _ as usize,
323            second_item_address
324        );
325
326        // Try to dereference the addresses, just to be sure
327        assert_eq!(
328            unsafe { *(first_item_address as *const char) },
329            'a');
330        assert_eq!(
331            unsafe { *(second_item_address as *const char) },
332            'b');
333    }
334
335    #[test]
336    fn interner_test() {
337        let mut int = Interner::new();
338        // Intern some things
339        let ref_a1 = int.intern('a');
340        let ref_b = int.intern('b');
341        let ref_a2 = int.intern('a');
342        // After this, only TWO items should be interned 'a' and 'b'. The second 'a' should have been discarded
343        assert_eq!(int.holders.len(), 1);
344        assert_eq!(int.holders[0].items.len(), 2);
345        // Now check that the addresses of ref_a1 and ref_a2 are equal
346        assert!(std::ptr::eq(ref_a1.as_ref(), ref_a2.as_ref()));
347        assert!(!std::ptr::eq(ref_a1.as_ref(), ref_b.as_ref()));
348
349        let ref_b2 = int.intern('b');
350        let _ref_c = int.intern('c');
351        assert_eq!(ref_b, ref_b2);
352    }
353
354    #[test]
355    fn intern_impl_test() {
356        let mut int = Interner::new();
357        let a1 = int.intern('a');
358        let a2 = int.intern('a');
359        let x = int.intern('x');
360
361        // AsRef
362        assert_eq!(a1.as_ref(), &'a');
363        // Borrow
364        assert_eq!(<_ as std::borrow::Borrow<char>>::borrow(&a1), &'a');
365        // Deref
366        assert_eq!(*a1, 'a');
367        // TODO: Debug and Display test
368        // PartialEq
369        assert_eq!(a1, a2);
370        assert_ne!(a1, x);
371        // TODO: Hash test
372    }
373
374    #[test]
375    fn interner_iter_test() {
376        let mut int = Interner::new();
377        for i in 0..100 {
378            int.intern(i);
379        }
380
381        let collected: Vec<i32> = int.iter().map(|i| *i).collect();
382
383        assert_eq!(
384            collected,
385            (0..100).collect::<Vec<i32>>()
386        );
387    }
388}