1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
//! Representing paths through the dataflow graph.
//!
//! Paths are relative from a *root* instruction, which is the instruction we
//! are determining which, if any, optimizations apply.
//!
//! Paths are series of indices through each instruction's children as we
//! traverse down the graph from the root. Children are immediates followed by
//! arguments: `[imm0, imm1, ..., immN, arg0, arg1, ..., argN]`.
//!
//! ## Examples
//!
//! * `[0]` is the path to the root.
//! * `[0, 0]` is the path to the root's first child.
//! * `[0, 1]` is the path to the root's second child.
//! * `[0, 1, 0]` is the path to the root's second child's first child.
//!
//! ## Interning
//!
//! To avoid extra allocations, de-duplicate paths, and reference them via a
//! fixed-length value, we intern paths inside a `PathInterner` and then
//! reference them via `PathId`.

// TODO: Make `[]` the path to the root, and get rid of this redundant leading
// zero that is currently in every single path.

use serde::de::{Deserializer, SeqAccess, Visitor};
use serde::ser::{SerializeSeq, Serializer};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::convert::TryInto;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;

/// A path through the data-flow graph from the root instruction.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct Path<'a>(pub &'a [u8]);

impl Path<'_> {
    /// Construct a new path through the data-flow graph from the root
    /// instruction.
    pub fn new(path: &impl AsRef<[u8]>) -> Path {
        Path(path.as_ref())
    }
}

/// An identifier for an interned path.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct PathId(u16);

/// An interner and de-duplicator for `Path`s.
///
/// Can be serialized and deserialized while maintaining the same id to interned
/// path mapping.
#[derive(Debug, Default)]
pub struct PathInterner {
    /// A map from a path (whose owned data is inside `arena`) to the canonical
    /// `PathId` we assigned it when interning it.
    map: HashMap<UnsafePath, PathId>,

    /// A map from a `PathId` index to an unsafe, self-borrowed path pointing
    /// into `arena`. It is safe to given these out as safe `Path`s, as long as
    /// the lifetime is not longer than this `PathInterner`'s lifetime.
    paths: Vec<UnsafePath>,

    /// Bump allocation arena for path data. The bump arena ensures that these
    /// allocations never move, and are therefore safe for self-references.
    arena: bumpalo::Bump,
}

impl PathInterner {
    /// Construct a new, empty `PathInterner`.
    #[inline]
    pub fn new() -> Self {
        Self::default()
    }

    /// Intern a path into this `PathInterner`, returning its canonical
    /// `PathId`.
    ///
    /// If we've already interned this path before, then the existing id we
    /// already assigned to it is returned. If we've never seen this path
    /// before, then it is copied into this `PathInterner` and a new id is
    /// assigned to it.
    #[inline]
    pub fn intern<'a>(&mut self, path: Path<'a>) -> PathId {
        let unsafe_path = unsafe { UnsafePath::from_path(&path) };
        if let Some(id) = self.map.get(&unsafe_path) {
            return *id;
        }
        self.intern_new(path)
    }

    #[inline(never)]
    fn intern_new<'a>(&mut self, path: Path<'a>) -> PathId {
        let id: u16 = self
            .paths
            .len()
            .try_into()
            .expect("too many paths interned");
        let id = PathId(id);

        let our_path = self.arena.alloc_slice_copy(&path.0);
        let unsafe_path = unsafe { UnsafePath::from_slice(&our_path) };

        self.paths.push(unsafe_path.clone());
        let old = self.map.insert(unsafe_path, id);

        debug_assert!(old.is_none());
        debug_assert_eq!(self.lookup(id), path);
        debug_assert_eq!(self.intern(path), id);

        id
    }

    /// Lookup a previously interned path by id.
    #[inline]
    pub fn lookup<'a>(&'a self, id: PathId) -> Path<'a> {
        let unsafe_path = self
            .paths
            .get(id.0 as usize)
            .unwrap_or_else(|| Self::lookup_failure());
        unsafe { unsafe_path.as_path() }
    }

    #[inline(never)]
    fn lookup_failure() -> ! {
        panic!(
            "no path for the given id; this can only happen when mixing `PathId`s with different \
             `PathInterner`s"
        )
    }
}

impl Serialize for PathInterner {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut seq = serializer.serialize_seq(Some(self.paths.len()))?;
        for p in &self.paths {
            let p = unsafe { p.as_path() };
            seq.serialize_element(&p)?;
        }
        seq.end()
    }
}

impl<'de> Deserialize<'de> for PathInterner {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        deserializer.deserialize_seq(PathInternerVisitor {
            marker: PhantomData,
        })
    }
}

struct PathInternerVisitor {
    marker: PhantomData<fn() -> PathInterner>,
}

impl<'de> Visitor<'de> for PathInternerVisitor {
    type Value = PathInterner;

    fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        write!(formatter, "a `peepmatic_runtime::paths::PathInterner`")
    }

    fn visit_seq<M>(self, mut access: M) -> Result<Self::Value, M::Error>
    where
        M: SeqAccess<'de>,
    {
        const DEFAULT_CAPACITY: usize = 16;
        let capacity = access.size_hint().unwrap_or(DEFAULT_CAPACITY);

        let mut interner = PathInterner {
            map: HashMap::with_capacity(capacity),
            paths: Vec::with_capacity(capacity),
            arena: bumpalo::Bump::new(),
        };

        while let Some(path) = access.next_element::<Path>()? {
            interner.intern(path);
        }

        Ok(interner)
    }
}

/// An unsafe, unchecked borrow of a path. Not for use outside of
/// `PathInterner`!
#[derive(Clone, Debug)]
struct UnsafePath {
    ptr: *const u8,
    len: usize,
}

impl PartialEq for UnsafePath {
    fn eq(&self, rhs: &UnsafePath) -> bool {
        unsafe { self.as_slice() == rhs.as_slice() }
    }
}

impl Eq for UnsafePath {}

impl Hash for UnsafePath {
    fn hash<H>(&self, hasher: &mut H)
    where
        H: Hasher,
    {
        unsafe { self.as_slice().hash(hasher) }
    }
}

/// Safety: callers must ensure that the constructed values won't have unsafe
/// usages of `PartialEq`, `Eq`, or `Hash`.
impl UnsafePath {
    unsafe fn from_path(p: &Path) -> Self {
        Self::from_slice(&p.0)
    }

    unsafe fn from_slice(s: &[u8]) -> Self {
        UnsafePath {
            ptr: s.as_ptr(),
            len: s.len(),
        }
    }
}

/// Safety: callers must ensure that `'a` does not outlive the lifetime of the
/// underlying data.
impl UnsafePath {
    unsafe fn as_slice<'a>(&self) -> &'a [u8] {
        std::slice::from_raw_parts(self.ptr, self.len)
    }

    unsafe fn as_path<'a>(&self) -> Path<'a> {
        Path(self.as_slice())
    }
}