Skip to main content

pipa/runtime/
atom.rs

1use std::sync::Arc;
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
4pub struct Atom(pub u32);
5
6impl Atom {
7    pub fn index(&self) -> u32 {
8        self.0
9    }
10
11    pub fn empty() -> Self {
12        Atom(0)
13    }
14
15    pub fn is_valid(&self) -> bool {
16        self.0 != 0
17    }
18}
19
20#[derive(Debug, Clone)]
21struct AtomEntry {
22    string: Arc<str>,
23    ref_count: u32,
24}
25
26pub struct AtomTable {
27    entries: FxHashMap<u32, AtomEntry>,
28    index_map: FxHashMap<Arc<str>, u32>,
29    next_index: u32,
30    symbol_atoms: FxHashMap<u32, ()>,
31
32    index_atoms: FxHashMap<u32, usize>,
33}
34
35impl AtomTable {
36    pub fn new() -> Self {
37        let mut table = AtomTable {
38            entries: FxHashMap::with_capacity_and_hasher(4096, Default::default()),
39            index_map: FxHashMap::with_capacity_and_hasher(4096, Default::default()),
40            next_index: 0,
41            symbol_atoms: FxHashMap::default(),
42            index_atoms: FxHashMap::with_capacity_and_hasher(64, Default::default()),
43        };
44        table.intern("");
45        table
46    }
47
48    pub fn intern(&mut self, s: &str) -> Atom {
49        if let Some(&idx) = self.index_map.get(s) {
50            if let Some(entry) = self.entries.get_mut(&idx) {
51                entry.ref_count = entry.ref_count.saturating_add(1);
52            }
53            return Atom(idx);
54        }
55
56        let idx = self.next_index;
57        self.next_index += 1;
58
59        let arc: Arc<str> = Arc::from(s);
60        self.index_map.insert(Arc::clone(&arc), idx);
61        self.entries.insert(
62            idx,
63            AtomEntry {
64                string: arc,
65                ref_count: 1,
66            },
67        );
68
69        if let Ok(array_idx) = s.parse::<usize>() {
70            if array_idx < 1_000_000 {
71                self.index_atoms.insert(idx, array_idx);
72            }
73        }
74
75        Atom(idx)
76    }
77
78    #[inline]
79    pub fn intern_fast(&mut self, s: &str) -> Atom {
80        if let Some(&idx) = self.index_map.get(s) {
81            return Atom(idx);
82        }
83        self.intern(s)
84    }
85
86    pub fn lookup(&self, s: &str) -> Option<Atom> {
87        self.index_map.get(s).map(|&idx| Atom(idx))
88    }
89
90    pub fn intern_concat(&mut self, a: &str, b: &str) -> Atom {
91        let total_len = a.len() + b.len();
92
93        if total_len <= 64 {
94            let mut buf = [0u8; 64];
95            buf[..a.len()].copy_from_slice(a.as_bytes());
96            buf[a.len()..total_len].copy_from_slice(b.as_bytes());
97            let s = unsafe { std::str::from_utf8_unchecked(&buf[..total_len]) };
98            return self.intern(s);
99        }
100
101        let mut combined = String::with_capacity(total_len);
102        combined.push_str(a);
103        combined.push_str(b);
104        self.intern(&combined)
105    }
106
107    pub fn intern_concat_atoms(&mut self, a: Atom, b: Atom) -> Atom {
108        let a_str = self.get(a);
109        let b_str = self.get(b);
110        let a_len = a_str.len();
111        let b_len = b_str.len();
112        let total = a_len + b_len;
113
114        if total <= 128 {
115            let mut buf = [0u8; 128];
116            buf[..a_len].copy_from_slice(a_str.as_bytes());
117            buf[a_len..total].copy_from_slice(b_str.as_bytes());
118
119            let s = unsafe { std::str::from_utf8_unchecked(&buf[..total]) };
120            self.intern(s)
121        } else {
122            let a_owned = a_str.to_string();
123            let b_owned = b_str.to_string();
124            let mut combined = String::with_capacity(total);
125            combined.push_str(&a_owned);
126            combined.push_str(&b_owned);
127            self.intern(&combined)
128        }
129    }
130
131    pub fn get(&self, atom: Atom) -> &str {
132        if atom.0 == 0 {
133            return "";
134        }
135        self.entries
136            .get(&atom.0)
137            .map(|e| e.string.as_ref())
138            .unwrap_or("")
139    }
140
141    #[inline]
142    pub fn char_count(&self, atom: Atom) -> usize {
143        let s = self.get(atom);
144        if s.is_ascii() {
145            s.len()
146        } else {
147            s.chars().count()
148        }
149    }
150
151    #[inline]
152    pub fn char_code_at(&self, atom: Atom, index: usize) -> Option<u32> {
153        let s = self.get(atom);
154        if s.is_ascii() {
155            s.as_bytes().get(index).map(|&b| b as u32)
156        } else {
157            s.chars().nth(index).map(|c| c as u32)
158        }
159    }
160
161    pub fn retain(&mut self, atom: Atom) {
162        if let Some(entry) = self.entries.get_mut(&atom.0) {
163            entry.ref_count = entry.ref_count.saturating_add(1);
164        }
165    }
166
167    pub fn release(&mut self, atom: Atom) {
168        if let Some(entry) = self.entries.get_mut(&atom.0) {
169            entry.ref_count = entry.ref_count.saturating_sub(1);
170            if entry.ref_count == 0 {
171                let arc = Arc::clone(&entry.string);
172                self.index_map.remove(arc.as_ref());
173                self.entries.remove(&atom.0);
174            }
175        }
176    }
177
178    pub fn len(&self) -> usize {
179        self.entries.len()
180    }
181
182    pub fn is_empty(&self) -> bool {
183        self.entries.is_empty()
184    }
185
186    pub fn mark_symbol_atom(&mut self, idx: u32) {
187        self.symbol_atoms.insert(idx, ());
188    }
189
190    pub fn is_symbol_atom(&self, idx: u32) -> bool {
191        self.symbol_atoms.contains_key(&idx)
192    }
193
194    #[inline]
195    pub fn get_array_index(&self, atom: Atom) -> Option<usize> {
196        self.index_atoms.get(&atom.0).copied()
197    }
198}
199
200impl Default for AtomTable {
201    fn default() -> Self {
202        Self::new()
203    }
204}
205
206use crate::util::FxHashMap;