1#![cfg_attr(all(feature = "bench", test), feature(test))]
2#![doc(html_root_url = "https://docs.rs/crate/arc-string-interner/0.1.0")]
3#![deny(missing_docs)]
4
5#[cfg(all(feature = "bench", test))]
55extern crate test;
56
57#[cfg(test)]
58mod tests;
59
60#[cfg(all(feature = "bench", test))]
61mod benches;
62
63#[cfg(feature = "serde_support")]
64mod serde_impl;
65
66use std::iter::FromIterator;
67use std::{
68 collections::{hash_map::RandomState, HashMap},
69 hash::{BuildHasher, Hash, Hasher},
70 iter, marker,
71 num::NonZeroU32,
72 slice,
73 sync::Arc,
74 u32, vec,
75};
76
77pub trait Symbol: Copy + Ord + Eq {
86 fn from_usize(val: usize) -> Self;
92
93 fn to_usize(self) -> usize;
95}
96
97#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
104pub struct Sym(NonZeroU32);
105
106impl Symbol for Sym {
107 fn from_usize(val: usize) -> Self {
113 assert!(
114 val < u32::MAX as usize,
115 "Symbol value {} is too large and not supported by `string_interner::Sym` type",
116 val
117 );
118 Sym(NonZeroU32::new((val + 1) as u32).unwrap_or_else(|| {
119 unreachable!("Should never fail because `val + 1` is nonzero and `<= u32::MAX`")
120 }))
121 }
122
123 fn to_usize(self) -> usize {
124 (self.0.get() as usize) - 1
125 }
126}
127
128impl Symbol for usize {
129 fn from_usize(val: usize) -> Self {
130 val
131 }
132
133 fn to_usize(self) -> usize {
134 self
135 }
136}
137
138#[derive(Debug, Copy, Clone, Eq)]
141struct InternalStrRef(*const str);
142
143impl InternalStrRef {
144 fn from_str(val: &str) -> Self {
148 InternalStrRef(val as *const str)
149 }
150
151 fn as_str(&self) -> &str {
160 unsafe { &*self.0 }
161 }
162}
163
164impl<T> From<T> for InternalStrRef
165where
166 T: AsRef<str>,
167{
168 fn from(val: T) -> Self {
169 InternalStrRef::from_str(val.as_ref())
170 }
171}
172
173impl Hash for InternalStrRef {
174 fn hash<H: Hasher>(&self, state: &mut H) {
175 self.as_str().hash(state)
176 }
177}
178
179impl PartialEq for InternalStrRef {
180 fn eq(&self, other: &InternalStrRef) -> bool {
181 self.as_str() == other.as_str()
182 }
183}
184
185pub type DefaultStringInterner = StringInterner<Sym>;
187
188#[derive(Debug, Eq)]
191pub struct StringInterner<S, H = RandomState>
192where
193 S: Symbol,
194 H: BuildHasher,
195{
196 map: HashMap<InternalStrRef, S, H>,
197 values: Vec<Arc<str>>,
198}
199
200impl<S, H> PartialEq for StringInterner<S, H>
201where
202 S: Symbol,
203 H: BuildHasher,
204{
205 fn eq(&self, rhs: &Self) -> bool {
206 self.len() == rhs.len() && self.values == rhs.values
207 }
208}
209
210impl Default for StringInterner<Sym, RandomState> {
211 #[inline]
212 fn default() -> Self {
213 StringInterner::new()
214 }
215}
216
217impl<S, H> Clone for StringInterner<S, H>
220where
221 S: Symbol,
222 H: Clone + BuildHasher,
223{
224 fn clone(&self) -> Self {
225 let values = self.values.clone();
226 let mut map = HashMap::with_capacity_and_hasher(values.len(), self.map.hasher().clone());
227 map.extend(
230 values
231 .iter()
232 .enumerate()
233 .map(|(i, s)| (InternalStrRef::from_str(s), S::from_usize(i))),
234 );
235 Self { values, map }
236 }
237}
238
239unsafe impl<S, H> Send for StringInterner<S, H>
251where
252 S: Symbol + Send,
253 H: BuildHasher,
254{
255}
256unsafe impl<S, H> Sync for StringInterner<S, H>
257where
258 S: Symbol + Sync,
259 H: BuildHasher,
260{
261}
262
263impl<S> StringInterner<S>
264where
265 S: Symbol,
266{
267 #[inline]
269 pub fn new() -> StringInterner<S, RandomState> {
270 StringInterner {
271 map: HashMap::new(),
272 values: Vec::new(),
273 }
274 }
275
276 #[inline]
278 pub fn with_capacity(cap: usize) -> Self {
279 StringInterner {
280 map: HashMap::with_capacity(cap),
281 values: Vec::with_capacity(cap),
282 }
283 }
284
285 #[inline]
287 pub fn capacity(&self) -> usize {
288 std::cmp::min(self.map.capacity(), self.values.capacity())
289 }
290
291 #[inline]
297 pub fn reserve(&mut self, additional: usize) {
298 self.map.reserve(additional);
299 self.values.reserve(additional);
300 }
301}
302
303impl<S, H> StringInterner<S, H>
304where
305 S: Symbol,
306 H: BuildHasher,
307{
308 #[inline]
310 pub fn with_hasher(hash_builder: H) -> StringInterner<S, H> {
311 StringInterner {
312 map: HashMap::with_hasher(hash_builder),
313 values: Vec::new(),
314 }
315 }
316
317 #[inline]
319 pub fn with_capacity_and_hasher(cap: usize, hash_builder: H) -> StringInterner<S, H> {
320 StringInterner {
321 map: HashMap::with_hasher(hash_builder),
322 values: Vec::with_capacity(cap),
323 }
324 }
325
326 #[inline]
333 pub fn get_or_intern<T>(&mut self, val: T) -> S
334 where
335 T: Into<String> + AsRef<str>,
336 {
337 match self.map.get(&val.as_ref().into()) {
338 Some(&sym) => sym,
339 None => self.intern(val),
340 }
341 }
342
343 fn intern<T>(&mut self, new_val: T) -> S
347 where
348 T: Into<String> + AsRef<str>,
349 {
350 let new_id: S = self.make_symbol();
351 let new_boxed_val: Arc<str> = new_val.into().into();
352 let new_ref: InternalStrRef = new_boxed_val.as_ref().into();
353 self.values.push(new_boxed_val);
354 self.map.insert(new_ref, new_id);
355 new_id
356 }
357
358 fn make_symbol(&self) -> S {
360 S::from_usize(self.len())
361 }
362
363 #[inline]
366 pub fn resolve(&self, symbol: S) -> Option<&Arc<str>> {
367 self.values.get(symbol.to_usize())
368 }
369
370 #[inline]
382 pub unsafe fn resolve_unchecked(&self, symbol: S) -> &Arc<str> {
383 self.values.get_unchecked(symbol.to_usize())
384 }
385
386 #[inline]
389 pub fn get<T>(&self, val: T) -> Option<S>
390 where
391 T: AsRef<str>,
392 {
393 self.map.get(&val.as_ref().into()).cloned()
394 }
395
396 #[inline]
398 pub fn len(&self) -> usize {
399 self.values.len()
400 }
401
402 #[inline]
404 pub fn is_empty(&self) -> bool {
405 self.len() == 0
406 }
407
408 #[inline]
410 pub fn iter(&self) -> Iter<S> {
411 Iter::new(self)
412 }
413
414 #[inline]
416 pub fn iter_values(&self) -> Values<S> {
417 Values::new(self)
418 }
419
420 pub fn shrink_to_fit(&mut self) {
422 self.map.shrink_to_fit();
423 self.values.shrink_to_fit();
424 }
425}
426
427impl<T, S> FromIterator<T> for StringInterner<S>
428where
429 S: Symbol,
430 T: Into<String> + AsRef<str>,
431{
432 fn from_iter<I>(iter: I) -> Self
433 where
434 I: IntoIterator<Item = T>,
435 {
436 let iter = iter.into_iter();
437 let mut interner = StringInterner::with_capacity(iter.size_hint().0);
438 interner.extend(iter);
439 interner
440 }
441}
442
443impl<T, S> std::iter::Extend<T> for StringInterner<S>
444where
445 S: Symbol,
446 T: Into<String> + AsRef<str>,
447{
448 fn extend<I>(&mut self, iter: I)
449 where
450 I: IntoIterator<Item = T>,
451 {
452 for s in iter {
453 self.get_or_intern(s);
454 }
455 }
456}
457
458pub struct Iter<'a, S> {
460 iter: iter::Enumerate<slice::Iter<'a, Arc<str>>>,
461 mark: marker::PhantomData<S>,
462}
463
464impl<'a, S> Iter<'a, S>
465where
466 S: Symbol + 'a,
467{
468 #[inline]
471 fn new<H>(interner: &'a StringInterner<S, H>) -> Self
472 where
473 H: BuildHasher,
474 {
475 Iter {
476 iter: interner.values.iter().enumerate(),
477 mark: marker::PhantomData,
478 }
479 }
480}
481
482impl<'a, S> Iterator for Iter<'a, S>
483where
484 S: Symbol + 'a,
485{
486 type Item = (S, &'a Arc<str>);
487
488 #[inline]
489 fn next(&mut self) -> Option<Self::Item> {
490 self.iter
491 .next()
492 .map(|(num, boxed_str)| (S::from_usize(num), boxed_str))
493 }
494
495 #[inline]
496 fn size_hint(&self) -> (usize, Option<usize>) {
497 self.iter.size_hint()
498 }
499}
500
501pub struct Values<'a, S>
503where
504 S: Symbol + 'a,
505{
506 iter: slice::Iter<'a, Arc<str>>,
507 mark: marker::PhantomData<S>,
508}
509
510impl<'a, S> Values<'a, S>
511where
512 S: Symbol + 'a,
513{
514 #[inline]
516 fn new<H>(interner: &'a StringInterner<S, H>) -> Self
517 where
518 H: BuildHasher,
519 {
520 Values {
521 iter: interner.values.iter(),
522 mark: marker::PhantomData,
523 }
524 }
525}
526
527impl<'a, S> Iterator for Values<'a, S>
528where
529 S: Symbol + 'a,
530{
531 type Item = &'a Arc<str>;
532
533 #[inline]
534 fn next(&mut self) -> Option<Self::Item> {
535 self.iter.next()
536 }
537
538 #[inline]
539 fn size_hint(&self) -> (usize, Option<usize>) {
540 self.iter.size_hint()
541 }
542}
543
544impl<S, H> iter::IntoIterator for StringInterner<S, H>
545where
546 S: Symbol,
547 H: BuildHasher,
548{
549 type Item = (S, Arc<str>);
550 type IntoIter = IntoIter<S>;
551
552 fn into_iter(self) -> Self::IntoIter {
553 IntoIter {
554 iter: self.values.into_iter().enumerate(),
555 mark: marker::PhantomData,
556 }
557 }
558}
559
560pub struct IntoIter<S>
564where
565 S: Symbol,
566{
567 iter: iter::Enumerate<vec::IntoIter<Arc<str>>>,
568 mark: marker::PhantomData<S>,
569}
570
571impl<S> Iterator for IntoIter<S>
572where
573 S: Symbol,
574{
575 type Item = (S, Arc<str>);
576
577 fn next(&mut self) -> Option<Self::Item> {
578 self.iter
579 .next()
580 .map(|(num, boxed_str)| (S::from_usize(num), boxed_str))
581 }
582
583 #[inline]
584 fn size_hint(&self) -> (usize, Option<usize>) {
585 self.iter.size_hint()
586 }
587}