1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
//! Interner for (potentially large) integer values.
//!
//! We support matching on integers that can be represented by `u64`, but only
//! support automata results that fit in a `u32`. So we intern the (relatively
//! few compared to the full range of `u64`) integers we are matching against
//! here and then reference them by `IntegerId`.

use serde::{Deserialize, Serialize};
use std::collections::BTreeMap;
use std::num::{NonZeroU16, NonZeroU32};

/// An identifier for an interned integer.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct IntegerId(#[doc(hidden)] pub NonZeroU16);

/// An interner for integer values.
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct IntegerInterner {
    // Note: we use `BTreeMap`s for deterministic serialization.
    map: BTreeMap<u64, IntegerId>,
    values: Vec<u64>,
}

impl IntegerInterner {
    /// Construct a new `IntegerInterner`.
    #[inline]
    pub fn new() -> Self {
        Self::default()
    }

    /// Intern a value into this `IntegerInterner`, returning its canonical
    /// `IntegerId`.
    #[inline]
    pub fn intern(&mut self, value: impl Into<u64>) -> IntegerId {
        debug_assert_eq!(self.map.len(), self.values.len());

        let value = value.into();

        if let Some(id) = self.map.get(&value) {
            return *id;
        }

        assert!((self.values.len() as u64) < (std::u16::MAX as u64));
        let id = IntegerId(unsafe { NonZeroU16::new_unchecked(self.values.len() as u16 + 1) });

        self.values.push(value);
        self.map.insert(value, id);
        debug_assert_eq!(self.map.len(), self.values.len());

        id
    }

    /// Get the id of an already-interned integer, or `None` if it has not been
    /// interned.
    pub fn already_interned(&self, value: impl Into<u64>) -> Option<IntegerId> {
        let value = value.into();
        self.map.get(&value).copied()
    }

    /// Lookup a previously interned integer by id.
    #[inline]
    pub fn lookup(&self, id: IntegerId) -> u64 {
        let index = id.0.get() as usize - 1;
        self.values[index]
    }
}

impl From<IntegerId> for NonZeroU32 {
    #[inline]
    fn from(id: IntegerId) -> NonZeroU32 {
        id.0.into()
    }
}