scryer_prolog/
atom_table.rs

1use crate::parser::ast::MAX_ARITY;
2use crate::raw_block::*;
3use crate::rcu::{Rcu, RcuRef};
4use crate::types::*;
5
6use std::cmp::Ordering;
7use std::hash::{Hash, Hasher};
8use std::mem;
9use std::ops::Deref;
10use std::ptr;
11use std::slice;
12use std::str;
13use std::sync::Arc;
14use std::sync::Mutex;
15use std::sync::RwLock;
16use std::sync::Weak;
17
18use indexmap::IndexSet;
19
20use scryer_modular_bitfield::prelude::*;
21
22#[derive(Copy, Clone, Debug, PartialEq, Eq)]
23pub struct Atom {
24    pub index: u64,
25}
26
27const_assert!(mem::size_of::<Atom>() == 8);
28
29include!(concat!(env!("OUT_DIR"), "/static_atoms.rs"));
30
31impl<'a> From<&'a Atom> for Atom {
32    #[inline]
33    fn from(atom: &'a Atom) -> Self {
34        *atom
35    }
36}
37
38impl From<bool> for Atom {
39    #[inline]
40    fn from(value: bool) -> Self {
41        if value {
42            atom!("true")
43        } else {
44            atom!("false")
45        }
46    }
47}
48
49impl indexmap::Equivalent<Atom> for str {
50    fn equivalent(&self, key: &Atom) -> bool {
51        &*key.as_str() == self
52    }
53}
54
55const ATOM_TABLE_INIT_SIZE: usize = 1 << 16;
56const ATOM_TABLE_ALIGN: usize = 8;
57
58#[inline(always)]
59fn global_atom_table() -> &'static RwLock<Weak<AtomTable>> {
60    #[cfg(feature = "rust_beta_channel")]
61    {
62        // const Weak::new will be stabilized in 1.73 which is currently in beta,
63        // till then we need a OnceLock for initialization
64        static GLOBAL_ATOM_TABLE: RwLock<Weak<AtomTable>> = RwLock::const_new(Weak::new());
65        &GLOBAL_ATOM_TABLE
66    }
67    #[cfg(not(feature = "rust_beta_channel"))]
68    {
69        use std::sync::OnceLock;
70        static GLOBAL_ATOM_TABLE: OnceLock<RwLock<Weak<AtomTable>>> = OnceLock::new();
71        GLOBAL_ATOM_TABLE.get_or_init(|| RwLock::new(Weak::new()))
72    }
73}
74
75#[inline(always)]
76fn arc_atom_table() -> Option<Arc<AtomTable>> {
77    global_atom_table().read().unwrap().upgrade()
78}
79
80impl RawBlockTraits for AtomTable {
81    #[inline]
82    fn init_size() -> usize {
83        ATOM_TABLE_INIT_SIZE
84    }
85
86    #[inline]
87    fn align() -> usize {
88        ATOM_TABLE_ALIGN
89    }
90}
91
92#[bitfield]
93#[derive(Copy, Clone, Debug)]
94struct AtomHeader {
95    #[allow(unused)]
96    m: bool,
97    len: B50,
98    #[allow(unused)]
99    padding: B13,
100}
101
102impl AtomHeader {
103    fn build_with(len: u64) -> Self {
104        AtomHeader::new().with_len(len).with_m(false)
105    }
106}
107
108impl Hash for Atom {
109    #[inline]
110    fn hash<H: Hasher>(&self, hasher: &mut H) {
111        self.as_str().hash(hasher)
112        // hasher.write_usize(self.index)
113    }
114}
115
116#[macro_export]
117macro_rules! is_char {
118    ($s:expr) => {
119        !$s.is_empty() && $s.chars().nth(1).is_none()
120    };
121}
122
123pub enum AtomString<'a> {
124    Static(&'a str),
125    Dynamic(AtomTableRef<str>),
126}
127
128impl AtomString<'_> {
129    pub fn map<F>(self, f: F) -> Self
130    where
131        for<'a> F: FnOnce(&'a str) -> &'a str,
132    {
133        match self {
134            Self::Static(reference) => Self::Static(f(reference)),
135            Self::Dynamic(guard) => Self::Dynamic(AtomTableRef::map(guard, f)),
136        }
137    }
138}
139
140impl std::fmt::Debug for AtomString<'_> {
141    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
142        std::fmt::Debug::fmt(self.deref(), f)
143    }
144}
145
146impl std::fmt::Display for AtomString<'_> {
147    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
148        std::fmt::Display::fmt(self.deref(), f)
149    }
150}
151
152impl std::ops::Deref for AtomString<'_> {
153    type Target = str;
154    fn deref(&self) -> &Self::Target {
155        match self {
156            Self::Static(reference) => reference,
157            Self::Dynamic(guard) => guard.deref(),
158        }
159    }
160}
161
162#[cfg(feature = "repl")]
163impl rustyline::completion::Candidate for AtomString<'_> {
164    fn display(&self) -> &str {
165        self.deref()
166    }
167
168    fn replacement(&self) -> &str {
169        self.deref()
170    }
171}
172
173impl Atom {
174    #[inline(always)]
175    pub fn is_static(self) -> bool {
176        (self.index as usize) < STRINGS.len() << 3
177    }
178
179    #[inline(always)]
180    pub fn as_ptr(self) -> Option<AtomTableRef<u8>> {
181        if self.is_static() {
182            None
183        } else {
184            let atom_table =
185                arc_atom_table().expect("We should only have an Atom while there is an AtomTable");
186            unsafe {
187                AtomTableRef::try_map(atom_table.buf(), |buf| {
188                    (buf as *const u8)
189                        .add((self.index as usize) - (STRINGS.len() << 3))
190                        .as_ref()
191                })
192            }
193        }
194    }
195
196    #[inline(always)]
197    pub fn from(index: u64) -> Self {
198        Self { index }
199    }
200
201    #[inline(always)]
202    pub fn len(self) -> usize {
203        if self.is_static() {
204            STRINGS[(self.index >> 3) as usize].len()
205        } else {
206            let ptr = self.as_ptr().unwrap();
207            let ptr = ptr.deref() as *const u8 as *const AtomHeader;
208            unsafe { ptr::read(ptr) }.len() as _
209        }
210    }
211
212    pub fn is_empty(self) -> bool {
213        self.len() == 0
214    }
215
216    #[inline(always)]
217    pub fn flat_index(self) -> u64 {
218        self.index >> 3
219    }
220
221    pub fn as_char(self) -> Option<char> {
222        let s = self.as_str();
223        let mut it = s.chars();
224
225        let c1 = it.next();
226        let c2 = it.next();
227
228        if c2.is_none() {
229            c1
230        } else {
231            None
232        }
233    }
234
235    #[inline]
236    pub fn as_str(&self) -> AtomString<'static> {
237        if self.is_static() {
238            AtomString::Static(STRINGS[(self.index >> 3) as usize])
239        } else if let Some(ptr) = self.as_ptr() {
240            AtomString::Dynamic(AtomTableRef::map(ptr, |ptr| {
241                let header =
242                    // Miri seems to hit this line a lot
243                    unsafe { ptr::read::<AtomHeader>(ptr as *const u8 as *const AtomHeader) };
244                let len = header.len() as usize;
245                let buf = unsafe { (ptr as *const u8).add(mem::size_of::<AtomHeader>()) };
246
247                unsafe { str::from_utf8_unchecked(slice::from_raw_parts(buf, len)) }
248            }))
249        } else {
250            AtomString::Static(STRINGS[(self.index >> 3) as usize])
251        }
252    }
253
254    pub fn defrock_brackets(&self, atom_tbl: &AtomTable) -> Self {
255        let s = self.as_str();
256
257        let sub_str = if s.starts_with('(') && s.ends_with(')') {
258            &s['('.len_utf8()..s.len() - ')'.len_utf8()]
259        } else {
260            return *self;
261        };
262
263        AtomTable::build_with(atom_tbl, sub_str)
264    }
265}
266
267unsafe fn write_to_ptr(string: &str, ptr: *mut u8) {
268    ptr::write(ptr as *mut _, AtomHeader::build_with(string.len() as u64));
269    let str_ptr = ptr.add(mem::size_of::<AtomHeader>());
270    ptr::copy_nonoverlapping(string.as_ptr(), str_ptr, string.len());
271}
272
273impl PartialOrd for Atom {
274    #[inline]
275    fn partial_cmp(&self, other: &Atom) -> Option<Ordering> {
276        Some(self.cmp(other))
277    }
278}
279
280impl Ord for Atom {
281    #[inline]
282    fn cmp(&self, other: &Atom) -> Ordering {
283        self.as_str().cmp(&*other.as_str())
284    }
285}
286
287#[derive(Debug)]
288pub struct InnerAtomTable {
289    block: RawBlock<AtomTable>,
290    pub table: Rcu<IndexSet<Atom>>,
291}
292
293#[derive(Debug)]
294pub struct AtomTable {
295    inner: Rcu<InnerAtomTable>,
296    // this lock is taking during resizing
297    update: Mutex<()>,
298}
299
300pub type AtomTableRef<M> = RcuRef<InnerAtomTable, M>;
301
302impl InnerAtomTable {
303    #[inline(always)]
304    fn lookup_str(self: &InnerAtomTable, string: &str) -> Option<Atom> {
305        STATIC_ATOMS_MAP
306            .get(string)
307            .cloned()
308            .or_else(|| self.table.active_epoch().get(string).cloned())
309    }
310}
311
312impl AtomTable {
313    #[inline]
314    pub fn new() -> Arc<Self> {
315        let upgraded = global_atom_table().read().unwrap().upgrade();
316        // don't inline upgraded, otherwise temporary will be dropped too late in case of None
317        if let Some(atom_table) = upgraded {
318            atom_table
319        } else {
320            let mut guard = global_atom_table().write().unwrap();
321            // try to upgrade again in case we lost the race on the write lock
322            if let Some(atom_table) = guard.upgrade() {
323                atom_table
324            } else {
325                let atom_table = Arc::new(Self {
326                    inner: Rcu::new(InnerAtomTable {
327                        block: RawBlock::new(),
328                        table: Rcu::new(IndexSet::new()),
329                    }),
330                    update: Mutex::new(()),
331                });
332                *guard = Arc::downgrade(&atom_table);
333                atom_table
334            }
335        }
336    }
337
338    #[inline]
339    pub fn buf(&self) -> AtomTableRef<u8> {
340        AtomTableRef::<InnerAtomTable>::map(self.inner.active_epoch(), |inner| {
341            unsafe { inner.block.base.as_ref() }.unwrap()
342        })
343    }
344
345    pub fn active_table(&self) -> RcuRef<IndexSet<Atom>, IndexSet<Atom>> {
346        self.inner.active_epoch().table.active_epoch()
347    }
348
349    pub fn build_with(atom_table: &AtomTable, string: &str) -> Atom {
350        loop {
351            let mut block_epoch = atom_table.inner.active_epoch();
352            let mut table_epoch = block_epoch.table.active_epoch();
353
354            if let Some(atom) = block_epoch.lookup_str(string) {
355                return atom;
356            }
357
358            // take a lock to prevent concurrent updates
359            let update_guard = atom_table.update.lock().unwrap();
360
361            let is_same_allocation =
362                RcuRef::same_epoch(&block_epoch, &atom_table.inner.active_epoch());
363            let is_same_atom_list =
364                RcuRef::same_epoch(&table_epoch, &block_epoch.table.active_epoch());
365
366            if !(is_same_allocation && is_same_atom_list) {
367                // some other thread raced us between our lookup and
368                // us aquring the update lock,
369                // try again
370                continue;
371            }
372
373            let size = mem::size_of::<AtomHeader>() + string.len();
374            let align_offset = 8 * mem::align_of::<AtomHeader>();
375            let size = (size & !(align_offset - 1)) + align_offset;
376
377            unsafe {
378                let len_ptr = loop {
379                    let ptr = block_epoch.block.alloc(size);
380
381                    if ptr.is_null() {
382                        // garbage collection would go here
383                        let new_block = block_epoch.block.grow_new().unwrap();
384                        let new_table = Rcu::new(table_epoch.clone());
385                        let new_alloc = InnerAtomTable {
386                            block: new_block,
387                            table: new_table,
388                        };
389                        atom_table.inner.replace(new_alloc);
390                        block_epoch = atom_table.inner.active_epoch();
391                        table_epoch = block_epoch.table.active_epoch();
392                    } else {
393                        break ptr;
394                    }
395                };
396
397                let ptr_base = block_epoch.block.base as usize;
398
399                write_to_ptr(string, len_ptr);
400
401                let atom = Atom {
402                    index: ((STRINGS.len() << 3) + len_ptr as usize - ptr_base) as u64,
403                };
404
405                let mut table = table_epoch.clone();
406                table.insert(atom);
407                block_epoch.table.replace(table);
408
409                // expicit drop to ensure we don't accidentally drop it early
410                drop(update_guard);
411
412                return atom;
413            }
414        }
415    }
416}
417
418unsafe impl Send for AtomTable {}
419unsafe impl Sync for AtomTable {}
420
421#[bitfield]
422#[repr(u64)]
423#[derive(Copy, Clone, Debug)]
424pub struct AtomCell {
425    name: B46,
426    arity: B10,
427    #[allow(unused)]
428    f: bool,
429    #[allow(unused)]
430    m: bool,
431    #[allow(unused)]
432    tag: B6,
433}
434
435impl AtomCell {
436    #[inline]
437    pub fn build_with(name: u64, arity: u16, tag: HeapCellValueTag) -> Self {
438        if arity > 0 {
439            debug_assert!(arity as usize <= MAX_ARITY);
440
441            AtomCell::new()
442                .with_name(name)
443                .with_arity(arity)
444                .with_f(false)
445                .with_tag(tag as u8)
446        } else {
447            AtomCell::new()
448                .with_name(name)
449                .with_f(false)
450                .with_tag(tag as u8)
451        }
452    }
453
454    #[inline]
455    pub fn get_index(self) -> usize {
456        self.name() as usize
457    }
458
459    #[inline]
460    pub fn get_name(self) -> Atom {
461        Atom::from((self.get_index() as u64) << 3)
462    }
463
464    #[inline]
465    pub fn get_arity(self) -> usize {
466        self.arity() as usize
467    }
468
469    #[inline]
470    pub fn get_name_and_arity(self) -> (Atom, usize) {
471        (Atom::from((self.get_index() as u64) << 3), self.get_arity())
472    }
473}