1#![doc(html_root_url = "https://docs.rs/crate/tiny-interner/0.1.0")]
2#![cfg_attr(not(feature = "std"), no_std)]
3#![deny(missing_docs)]
4#![warn(unsafe_op_in_unsafe_fn, clippy::redundant_closure_for_method_calls)]
5
6use core::{
44 hash::{BuildHasher, Hash, Hasher},
45 marker::PhantomData,
46 str::from_utf8_unchecked,
47};
48
49extern crate alloc;
50
51use self::alloc::{string::String, vec::Vec};
52use hashbrown::{
53 hash_map::{DefaultHashBuilder, RawEntryMut},
54 HashMap,
55};
56
57pub type Symbol = usize;
59
60#[derive(Debug)]
66pub struct Interner<H = DefaultHashBuilder>
67where
68 H: BuildHasher,
69{
70 dedup: HashMap<usize, (), ()>,
71 hasher: H,
72 backend: Backend,
73}
74
75#[derive(Debug)]
77pub struct Backend {
78 ends: Vec<usize>,
79 buffer: String,
80 marker: PhantomData<fn() -> usize>,
81}
82
83impl Default for Interner {
84 #[inline]
85 fn default() -> Self {
86 Self::new()
87 }
88}
89
90impl Default for Backend {
91 #[inline]
92 fn default() -> Self {
93 Self {
94 ends: Vec::default(),
95 buffer: String::default(),
96 marker: Default::default(),
97 }
98 }
99}
100
101fn hash_value<T>(hasher: &impl BuildHasher, value: &T) -> u64
102where
103 T: ?Sized + Hash,
104{
105 let state = &mut hasher.build_hasher();
106 value.hash(state);
107 state.finish()
108}
109
110impl Backend {
111 #[inline]
112 fn with_capacity(capacity: usize) -> Self {
113 Self {
114 ends: Vec::with_capacity(capacity),
115 buffer: String::default(),
116 marker: Default::default(),
117 }
118 }
119
120 #[inline]
122 fn intern(&mut self, string: &str) -> usize {
123 self.push(string)
124 }
125
126 #[inline]
128 fn intern_static(&mut self, string: &'static str) -> usize {
129 self.intern(string)
130 }
131
132 #[inline]
134 fn resolve(&self, symbol: usize) -> Option<&str> {
135 self.span_of(symbol).map(|span| self.str_at(span))
136 }
137
138 #[inline]
140 unsafe fn unchecked_resolve(&self, symbol: usize) -> &str {
141 unsafe { self.str_at(self.unchecked_span_of(symbol)) }
142 }
143
144 fn shrink_to_fit(&mut self) {
146 self.ends.shrink_to_fit();
147 self.buffer.shrink_to_fit();
148 }
149
150 fn next_symbol(&self) -> usize {
151 self.ends.len()
152 }
153
154 fn span_of(&self, symbol: usize) -> Option<Span> {
156 self.ends.get(symbol).copied().map(|end| Span {
157 start: self.ends.get(symbol.wrapping_sub(1)).copied().unwrap_or(0),
158 end,
159 })
160 }
161
162 unsafe fn unchecked_span_of(&self, symbol: usize) -> Span {
164 let end = unsafe { *self.ends.get_unchecked(symbol) };
165 let start = self.ends.get(symbol.wrapping_sub(1)).copied().unwrap_or(0);
166
167 Span { start, end }
168 }
169
170 fn str_at(&self, span: Span) -> &str {
171 unsafe {
172 from_utf8_unchecked(&self.buffer.as_bytes()[(span.start as usize)..(span.end as usize)])
173 }
174 }
175
176 fn push(&mut self, string: &str) -> usize {
178 self.buffer.push_str(string);
179
180 let end = self.buffer.as_bytes().len();
181 let symbol = self.next_symbol();
182
183 self.ends.push(end);
184
185 symbol
186 }
187}
188
189impl<H> Interner<H>
190where
191 H: BuildHasher + Default,
192{
193 #[inline]
195 pub fn new() -> Self {
196 Self {
197 dedup: HashMap::default(),
198 hasher: Default::default(),
199 backend: Backend::default(),
200 }
201 }
202
203 #[inline]
205 pub fn with_hasher(hasher: H) -> Self {
206 Self {
207 dedup: HashMap::default(),
208 hasher,
209 backend: Backend::default(),
210 }
211 }
212
213 #[inline]
215 pub fn with_capacity(capacity: usize) -> Self {
216 Self {
217 dedup: HashMap::with_capacity_and_hasher(capacity, ()),
218 hasher: Default::default(),
219 backend: Backend::with_capacity(capacity),
220 }
221 }
222
223 #[inline]
225 pub fn with_capacity_and_hasher(capacity: usize, hasher: H) -> Self {
226 Self {
227 dedup: HashMap::with_capacity_and_hasher(capacity, ()),
228 hasher,
229 backend: Backend::with_capacity(capacity),
230 }
231 }
232
233 #[inline]
235 pub fn len(&self) -> usize {
236 self.dedup.len()
237 }
238
239 #[inline]
241 pub fn is_empty(&self) -> bool {
242 self.len() == 0
243 }
244
245 #[inline]
247 pub fn get<T>(&self, string: T) -> Option<usize>
248 where
249 T: AsRef<str>,
250 {
251 let string_ref = string.as_ref();
252 let hasher = &self.hasher;
253 let hash = hash_value(hasher, string_ref);
254
255 self.dedup
256 .raw_entry()
257 .from_hash(hash, |symbol| {
258 string_ref == unsafe { self.backend.unchecked_resolve(*symbol) }
259 })
260 .map(|(&symbol, ())| symbol)
261 }
262
263 #[inline]
265 fn get_or_intern_using<T>(
266 &mut self,
267 string: T,
268 intern_fn: fn(&mut Backend, T) -> usize,
269 ) -> usize
270 where
271 T: AsRef<str> + Copy + Hash + for<'a> PartialEq<&'a str>,
272 {
273 let string_ref = string.as_ref();
274
275 let hasher = &self.hasher;
276 let hash = hash_value(hasher, string_ref);
277
278 let entry = self.dedup.raw_entry_mut().from_hash(hash, |symbol| {
279 string_ref == unsafe { self.backend.unchecked_resolve(*symbol) }
280 });
281
282 let (&mut symbol, &mut ()) = match entry {
283 RawEntryMut::Vacant(vacant) => {
284 let symbol = intern_fn(&mut self.backend, string);
285 vacant.insert_with_hasher(hash, symbol, (), |symbol| {
286 hash_value(hasher, unsafe { self.backend.unchecked_resolve(*symbol) })
287 })
288 }
289 RawEntryMut::Occupied(occupied) => occupied.into_key_value(),
290 };
291
292 symbol
293 }
294
295 #[inline]
297 pub fn get_or_intern<T>(&mut self, string: T) -> usize
298 where
299 T: AsRef<str>,
300 {
301 self.get_or_intern_using(string.as_ref(), Backend::intern)
302 }
303
304 pub fn get_or_intern_static(&mut self, string: &'static str) -> usize {
306 self.get_or_intern_using(string.as_ref(), Backend::intern_static)
307 }
308
309 pub fn shrink_to_fit(&mut self) {
311 self.backend.shrink_to_fit();
312 }
313
314 #[inline]
316 pub fn resolve(&self, symbol: usize) -> Option<&str> {
317 self.backend.resolve(symbol)
318 }
319}
320
321#[derive(Debug, Copy, Clone, PartialEq, Eq)]
323pub struct Span {
324 start: usize,
325 end: usize,
326}