symbol_map/
indexing.rs

1//! Indexing on top of a `Table`.
2//!
3//! It is anticipated that most uses cases will be covered by
4//! [HashIndexing](struct.HashIndexing.html), which owns a
5//! [Table](../struct.Table.html) and provides bidirectional mappings between
6//! data values and their symbols.
7//!
8//! The [Indexing](trait.Indexing.html) trait is provided in case another lookup
9//! method is needed.
10
11use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
12use std::collections::HashMap;
13use std::default::Default;
14use std::fmt;
15use std::hash::{Hash, Hasher};
16
17use super::{Symbol, SymbolId, Table};
18
19/// Indicates whether the result of a symbol lookup had to create a new table
20/// entry.
21#[derive(Clone, Eq, Ord, Hash, PartialEq, PartialOrd)]
22pub enum Insertion<T>  {
23    /// Result came from an item that was already present in table.
24    Present(T),
25    /// Result came from an item that was not present in table, and a new entry
26    /// was created.
27    New(T),
28}
29
30impl<T> Insertion<T> {
31    /// Maps over the type returned by an `Insertion` to produce a new value
32    /// that may be of a different type.
33    ///
34    /// # Example
35    /// ```
36    /// use symbol_map::indexing::{HashIndexing, Indexing, Insertion};
37    /// use std::str::FromStr;
38    ///
39    /// let mut index = HashIndexing::<String, usize>::default();
40    /// let s1 = String::from_str("value1").unwrap();
41    /// let s2 = String::from_str("value1").unwrap();
42    /// let s3 = String::from_str("value2").unwrap();
43    /// // get_or_insert normally returns an Insertion that borrows the
44    /// // structure on which it was invoked. We map the symbol reference
45    /// // returned after each insertion to a copy of the ID that was mapped to.
46    /// let id1: Insertion<usize> = index.get_or_insert(s1).map(|symbol| *symbol.id());
47    /// let id2: Insertion<usize> = index.get_or_insert(s2).map(|symbol| *symbol.id());
48    /// let id3: Insertion<usize> = index.get_or_insert(s3).map(|symbol| *symbol.id());
49    /// // The Insertion values are not the same because one was an insertion and
50    /// // the other a retrieval.
51    /// assert!(id1 != id2);
52    /// assert!(id1 != id3);
53    /// // But the symbol IDs for identical values are the same.
54    /// assert!(id1.unwrap() == id2.unwrap());
55    /// ```
56    pub fn map<F, X>(&self, f: F) -> Insertion<X> where F: FnOnce(&T) -> X {
57        match *self {
58            Insertion::Present(ref s) => Insertion::Present(f(s)),
59            Insertion::New(ref s) => Insertion::New(f(s)),
60        }
61    }
62
63    /// Unwraps an `Insertion` to produce the value which it wraps.
64    pub fn unwrap(self) -> T {
65        match self {
66            Insertion::Present(s) => s,
67            Insertion::New(s) => s,
68        }
69    }
70}
71
72/// Wrapper for a raw pointer which lets us treat it like a reference.
73///
74/// You are strongly discouraged from exposing this type directly in your data
75/// structures. This type is essentially a giant footgun. In particular:
76///
77/// - No safety checks or lifetimes protect this reference, so a `Ref<T>` may be
78/// invalidated without warning. (You may use a `Ref<T>` safely by ensuring that
79/// the references passed to `Ref<T>::new()` will never be dropped before the
80/// wrappers. A good example of when you'd be able to do this is in in a struct
81/// that has `Ref<T>` references into a data structure that it also owns.)
82///
83/// - The impls for `Debug`, `Eq`, `Hash`, `Ord`, `PartialEq`, and `PartialOrd`
84/// all dereference the raw pointer that this structure wraps. As a result, a
85/// `Ref<T>` must be removed from any data structures that make use of any of
86/// those interfaces *before* it is invalidated.
87///
88/// - `Ref<T>` wraps a value of type `*const T`, which is not usually `Send` or
89/// `Sync`. This restriction is overridden for a `Ref<T>` wrapper so that data
90/// structures which encapsulate it may themselves be `Send` or `Sync`. This
91/// makes it the responsibility of data structures using such wrappers to
92/// satisfy the contracts of those types.
93pub struct Ref<T> { ptr: *const T, }
94
95unsafe impl<T> Send for Ref<T> where T: Send { }
96
97unsafe impl<T> Sync for Ref<T> where T: Sync { }
98
99impl<T> Ref<T> {
100    /// Casts `data` to `*const T` and retains the pointer for dereferencing at
101    /// some point in the future.
102    fn new(data: &T) -> Self {
103        Ref { ptr: data as *const T, }
104    }
105
106    /// Dereferences the wrapped pointer. The explicit lifetime parameter should
107    /// match the lifetime of the parent `Table` that the wrapped pointer points
108    /// into. Care should be taken not to call this method if the integrity of
109    /// the reference passed to `new()` cannot be verified.
110    unsafe fn deref<'a>(&self) -> &'a T {
111        &*self.ptr
112    }
113}
114
115impl<T> Clone for Ref<T> {
116    fn clone(&self) -> Self {
117        Ref { ptr: self.ptr, }
118    }
119}
120
121impl<T> Copy for Ref<T> { }
122
123impl<T> fmt::Pointer for Ref<T> {
124    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
125        fmt::Pointer::fmt(&self.ptr, f)
126    }
127}
128
129impl<T> fmt::Debug for Ref<T> where T: fmt::Debug {
130    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
131        write!(f, "Ref({:?})", unsafe { &(*self.ptr) })
132    }
133}
134
135impl<T> Eq for Ref<T> where T: Eq { }
136
137impl<T> Hash for Ref<T> where T: Hash {
138    fn hash<H>(&self, h: &mut H) where H: Hasher {
139        unsafe { (*self.ptr).hash(h) }
140    }
141}
142
143impl<T> Ord for Ref<T> where T: Ord {
144    fn cmp(&self, other: &Self) -> Ordering {
145        unsafe { (*self.ptr).cmp(&(*other.ptr)) }
146    }
147}
148
149impl<T> PartialEq for Ref<T> where T: PartialEq {
150    fn eq(&self, other: &Self) -> bool {
151        unsafe { (*self.ptr).eq(&(*other.ptr)) }
152    }
153}
154
155impl<T> PartialOrd for Ref<T> where T: PartialOrd {
156    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
157        unsafe { (*self.ptr).partial_cmp(&(*other.ptr)) }
158    }
159}
160
161/// Provides indexing for a `Table`, so that its elements may be retrieved
162/// efficiently. Most table lookups should go through an implementation of this
163/// trait structure instead of a `Table` directly.
164///
165/// An `Indexing` should own an underlying `Table<Indexing::Data>`. This table
166/// provides persistent storage for `Symbol<Indexing::Data>`s, which associate
167/// instances of `Data` with a `SymbolId`.
168///
169/// This trait is provided for extensibility. Realistically speaking, however,
170/// you should probably just use `HashIndexing`.
171pub trait Indexing: Default {
172    /// The type `T` of a `Table<T, D>`.
173    type Data;
174
175    /// The type `D` of a `Table<T, D>`.
176    type Id: SymbolId;
177
178    /// Returns a new indexing method that has already indexed the contents of
179    /// `table`.
180    fn from_table(table: Table<Self::Data, Self::Id>) -> Self;
181
182    /// Returns a read-only view of the underlying table.
183    fn table(&self) -> &Table<Self::Data, Self::Id>;
184
185    /// Extracts the underlying table from the index, discarding all pointers
186    /// into the table.
187    fn to_table(self) -> Table<Self::Data, Self::Id>;
188
189    /// Looks up `data` in the index. Returns `Some(&symbol)` if a symbol is
190    /// present, else `None`.
191    fn get(&self, data: &Self::Data) -> Option<&Symbol<Self::Data, Self::Id>>;
192
193    /// Looks up `data` in the index, inserting it into the index and `table` if
194    /// it isn't present. Returns the resulting `&Symbol<T>` wrapped in an
195    /// `Insertion` that indicates whether a new table entry had to be created.
196    fn get_or_insert<'s>(&'s mut self, data: Self::Data)
197                         -> Insertion<&'s Symbol<Self::Data, Self::Id>>;
198
199    /// Looks up the symbol with id `i` in the index. Returns `Some(symbol)` if
200    /// a symbol is present, else `None`.
201    fn get_symbol<'s>(&'s self, id: &Self::Id) -> Option<&'s Symbol<Self::Data, Self::Id>>;
202}
203
204/// HashMap-backed table indexing.
205#[derive(Debug)]
206pub struct HashIndexing<T, D> where T: Eq + Hash, D: SymbolId {
207    table: Table<T, D>,
208    by_symbol: HashMap<Ref<T>, Ref<Symbol<T, D>>>,
209    by_id: Vec<Ref<Symbol<T, D>>>,
210}
211
212impl<T, D> Default for HashIndexing<T, D> where T: Eq + Hash, D: SymbolId {
213    fn default() -> Self {
214        HashIndexing {
215            table: Table::new(),
216            by_symbol: HashMap::new(),
217            by_id: Vec::new(),
218        }
219    }
220}
221
222impl<T, D> Indexing for HashIndexing<T, D> where T: Eq + Hash, D: SymbolId {
223    type Data = T;
224    type Id = D;
225
226    fn from_table(table: Table<T, D>) -> Self {
227        let mut by_symbol = HashMap::with_capacity(table.len());
228        let mut by_id =
229            match table.iter().next() {
230                Some(symbol) => vec![Ref::new(symbol); table.len()],
231                None => Vec::new(),
232            };
233        for symbol in table.iter() {
234            by_symbol.insert(Ref::new(symbol.data()), Ref::new(symbol));
235            by_id[symbol.id().as_usize()] = Ref::new(symbol);
236        }
237        HashIndexing {
238            table: table,
239            by_symbol: by_symbol,
240            by_id: by_id,
241        }
242    }
243
244    fn table(&self) -> &Table<Self::Data, Self::Id> { &self.table }
245
246    fn to_table(self) -> Table<Self::Data, Self::Id> { self.table }
247
248    fn get<'s>(&'s self, data: &T) -> Option<&'s Symbol<T, D>> {
249        // Unsafe call to Ref::deref(): should be fine as because we own
250        // self.table and the ref refers into that.
251        self.by_symbol.get(&Ref::new(data)).map(|x| unsafe { x.deref() })
252    }
253
254    fn get_or_insert<'s>(&'s mut self, data: T) -> Insertion<&'s Symbol<T, D>> {
255        use std::collections::hash_map::Entry;
256        if let Entry::Occupied(e) = self.by_symbol.entry(Ref::new(&data)) {
257            // Unsafe call to Ref::deref(): should be fine as because we own
258            // self.table and the ref refers into that.
259            return Insertion::Present(unsafe { e.get().deref() })
260        }
261        // TODO: when the HashMap API gets revised, we may be able to do this
262        // without a second hashtable lookup.
263        let symbol = self.table.insert(data);
264        // The Ref that gets inserted has to be backed by data in the table, not
265        // data on the stack (which is how we did the previous lookup).
266        self.by_symbol.insert(Ref::new(symbol.data()), Ref::new(symbol));
267        self.by_id.push(Ref::new(symbol));
268        Insertion::New(symbol)
269    }
270
271    fn get_symbol<'s>(&'s self, id: &D) -> Option<&'s Symbol<T, D>> {
272        self.by_id.get(id.as_usize()).map(|x| unsafe { x.deref() })
273    }
274}
275
276#[cfg(test)]
277mod test {
278    use super::{HashIndexing, Indexing, Insertion, Ref};
279    use ::{SymbolId, Table};
280
281    use std::collections::hash_map::DefaultHasher;
282    use std::cmp::Ordering;
283    use std::hash::{Hash, Hasher};
284    use std::str::FromStr;
285
286    const VALUES: &'static [usize] = &[101, 203, 500, 30, 0, 1];
287
288    #[test]
289    fn ref_impls_ok() {
290        let x1 = String::from_str("foo").unwrap();
291        let x2 = String::from_str("foo").unwrap();
292        let x3 = String::from_str("fo").unwrap();
293        let x4 = String::from_str("fox").unwrap();
294        assert!(x1 == x2);
295        assert!(&x1 as *const String != &x2 as *const String);
296
297        let r1 = Ref::new(&x1);
298        let r2 = Ref::new(&x2);
299        let r3 = Ref::new(&x3);
300        let r4 = Ref::new(&x4);
301        // Eq, PartialEq.
302        assert_eq!(r1, r2);
303        assert!(r1 != r3);
304        assert!(r1 != r4);
305
306        // Ord (skip PartialOrd).
307        assert_eq!(r1.cmp(&r2), Ordering::Equal);
308        assert_eq!(r1.cmp(&r3), Ordering::Greater);
309        assert_eq!(r1.cmp(&r4), Ordering::Less);
310
311        // Hash.
312        let mut rh1 = DefaultHasher::new();
313        let mut rh2 = DefaultHasher::new();
314        let mut rh3 = DefaultHasher::new();
315        let mut rh4 = DefaultHasher::new();
316        r1.hash(&mut rh1);
317        r2.hash(&mut rh2);
318        r3.hash(&mut rh3);
319        r4.hash(&mut rh4);
320        let rh1 = rh1.finish();
321        let rh2 = rh2.finish();
322        let rh3 = rh3.finish();
323        let rh4 = rh4.finish();
324
325        let mut xh1 = DefaultHasher::new();
326        let mut xh2 = DefaultHasher::new();
327        let mut xh3 = DefaultHasher::new();
328        let mut xh4 = DefaultHasher::new();
329        x1.hash(&mut xh1);
330        x2.hash(&mut xh2);
331        x3.hash(&mut xh3);
332        x4.hash(&mut xh4);
333        let xh1 = xh1.finish();
334        let xh2 = xh2.finish();
335        let xh3 = xh3.finish();
336        let xh4 = xh4.finish();
337
338        assert_eq!(rh1, xh1);
339        assert_eq!(rh2, xh2);
340        assert_eq!(rh3, xh3);
341        assert_eq!(rh4, xh4);
342    }
343
344    #[test]
345    fn hash_indexing_empty_ok() {
346        let t = Table::<usize, usize>::new();
347        assert_eq!(t.len(), 0);
348        let i = HashIndexing::from_table(t);
349        assert!(i.by_symbol.is_empty());
350        assert!(i.by_id.is_empty());
351    }
352
353    #[test]
354    fn hash_indexing_from_table_ok() {
355        let mut t = Table::<usize, usize>::new();
356        for v in VALUES.iter() {
357            t.insert(*v);
358        }
359        let expected_len = t.len();
360        let expected_values: Vec<(usize, usize)> =
361            t.iter().map(|s| (*s.data(), *s.id())).collect();
362
363        let i = HashIndexing::from_table(t);
364        assert_eq!(i.by_symbol.len(), expected_len);
365        assert_eq!(i.by_id.len(), expected_len);
366        for (data, id) in expected_values.into_iter() {
367            let data_ref = Ref::new(&data);
368            unsafe {
369                assert_eq!(i.by_symbol.get(&data_ref).unwrap().deref().data(), &data);
370                assert_eq!(i.by_symbol.get(&data_ref).unwrap().deref().id(), &id);
371                assert_eq!(i.by_id[id.as_usize()].deref().data(), &data);
372            }
373        }
374    }
375
376    #[test]
377    fn hash_indexing_empty_insertion_ok() {
378        let mut i = HashIndexing::<usize, usize>::default();
379
380        for v in VALUES.iter() {
381            assert!(i.get(v).is_none());
382            let id = match i.get_or_insert(*v) {
383                Insertion::Present(_) => panic!(),
384                Insertion::New(symbol) => {
385                    assert_eq!(symbol.data(), v);
386                    *symbol.id()
387                },
388            };
389            assert_eq!(i.get_symbol(&id).unwrap().data(), v);
390        }
391    }
392
393    
394    #[test]
395    fn indexed_present_ok() {
396        let mut t = Table::<usize, usize>::new();
397        for v in VALUES.iter() {
398            t.insert(*v);
399        }
400
401        let mut i = HashIndexing::from_table(t);
402        for v in VALUES.iter() {
403            assert_eq!(i.get(v).unwrap().data(), v);
404            let id = match i.get_or_insert(*v) {
405                Insertion::New(_) => panic!(),
406                Insertion::Present(symbol) => {
407                    assert_eq!(symbol.data(), v);
408                    *symbol.id()
409                },
410            };
411            assert_eq!(i.get_symbol(&id).unwrap().data(), v);
412        }
413    }
414
415    #[test]
416    fn send_to_thread_safe_ok() {
417        use std::sync::Arc;
418        use std::thread;
419
420        let mut t = Table::<usize, usize>::new();
421        for v in VALUES.iter() {
422            t.insert(*v);
423        }
424        let index = Arc::new(HashIndexing::from_table(t));
425        {
426            let id1 = index.get(&VALUES[0]).unwrap().id().clone();
427            let id2 = index.get(&VALUES[1]).unwrap().id().clone();
428            let t1 = {
429                let index = index.clone();
430                thread::spawn(move || index.get_symbol(&id1).map(|x| (*x.data(), x.id().clone())))
431            };
432            let t2 = {
433                let index = index.clone();
434                thread::spawn(move || index.get_symbol(&id2).map(|x| (*x.data(), x.id().clone())))
435            };
436            let v1 = index.get(&VALUES[0]).unwrap();
437            let v2 = index.get(&VALUES[1]).unwrap();
438
439            match t1.join() {
440                Ok(Some((data, id))) => {
441                    assert_eq!(&id, v1.id());
442                    assert_eq!(data, *v1.data());
443                },
444                _ => panic!(),
445            }
446            match t2.join() {
447                Ok(Some((data, id))) => {
448                    assert_eq!(&id, v2.id());
449                    assert_eq!(data, *v2.data());
450                },
451                _ => panic!(),
452            }
453        }
454    }
455
456    #[test]
457    fn sync_to_thread_ok() {
458        use ::crossbeam;
459
460        let mut t = Table::<usize, usize>::new();
461        for v in VALUES.iter() {
462            t.insert(*v);
463        }
464        let index = HashIndexing::from_table(t);
465        let id1 = *index.get(&VALUES[0]).unwrap().id();
466        let id2 = *index.get(&VALUES[1]).unwrap().id();
467        let index = &index;
468        let t1 = 
469            crossbeam::scope(move |scope| scope.spawn(move || index.get_symbol(&id1).map(|x| (x.data(), x.id()))));
470        let t2 =
471            crossbeam::scope(move |scope| scope.spawn(move || index.get_symbol(&id2).map(|x| (x.data(), x.id()))));
472        let v1 = index.get(&VALUES[0]).unwrap();
473        let v2 = index.get(&VALUES[1]).unwrap();
474
475        match t1.join() {
476            Some((data, id)) => {
477                assert_eq!(id, v1.id());
478                assert_eq!(data, v1.data());
479            },
480            _ => panic!(),
481        }
482        match t2.join() {
483            Some((data, id)) => {
484                assert_eq!(id, v2.id());
485                assert_eq!(data, v2.data());
486            },
487            _ => panic!(),
488        }
489    }
490}