intaglio/
cstr.rs

1//! Intern C strings.
2//!
3//! This module provides a nearly identical API to the one found in the
4//! top-level of this crate. There is one important difference:
5//!
6//! 1. Interned contents are [`&CStr`] instead of `&str`. Additionally,
7//!    [`CString`] is used where `String` would have been used.
8//!
9//! # Example: intern C string
10//!
11//! ```
12//! # use std::ffi::CString;
13//! # use intaglio::cstr::SymbolTable;
14//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
15//! let mut table = SymbolTable::new();
16//! let sym = table.intern(c"abc")?;
17//! assert_eq!(sym, table.intern(CString::from(c"abc"))?);
18//! assert_eq!(Some(c"abc"), table.get(sym));
19//! # Ok(())
20//! # }
21//! # example().unwrap();
22//! ```
23//!
24//! # Example: symbol iterators
25//!
26//! ```
27//! # use std::collections::HashMap;
28//! # use intaglio::cstr::SymbolTable;
29//! # use intaglio::Symbol;
30//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
31//! let mut table = SymbolTable::new();
32//! let sym = table.intern(c"abc")?;
33//! // Retrieve set of `Symbol`s.
34//! let all_symbols = table.all_symbols();
35//! assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
36//!
37//! table.intern(c"xyz")?;
38//! let mut map = HashMap::new();
39//! map.insert(Symbol::new(0), c"abc");
40//! map.insert(Symbol::new(1), c"xyz");
41//! // Retrieve symbol to C string content mappings.
42//! let iter = table.iter();
43//! assert_eq!(map, iter.collect::<HashMap<_, _>>());
44//! # Ok(())
45//! # }
46//! # example().unwrap();
47//! ```
48//!
49//! # Performance
50//!
51//! In general, one should expect this crate's performance on `&CStr` to be
52//! roughly similar to performance on `&str`.
53//!
54//! [`CString`]: std::ffi::CString
55//! [`&CStr`]: std::ffi::CStr
56
57use core::hash::BuildHasher;
58use core::iter::{FromIterator, FusedIterator, Zip};
59use core::marker::PhantomData;
60use core::mem::ManuallyDrop;
61use core::ops::Range;
62use core::slice;
63use std::borrow::Cow;
64use std::collections::{
65    hash_map::{HashMap, RandomState},
66    TryReserveError,
67};
68use std::ffi::CStr;
69
70use crate::internal::Interned;
71use crate::{Symbol, SymbolOverflowError, DEFAULT_SYMBOL_TABLE_CAPACITY};
72
73/// An iterator over all [`Symbol`]s in a [`SymbolTable`].
74///
75/// See the [`all_symbols`](SymbolTable::all_symbols) method in [`SymbolTable`].
76///
77/// # Usage
78///
79/// ```
80/// # use intaglio::cstr::SymbolTable;
81/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
82/// let mut table = SymbolTable::new();
83/// let sym = table.intern(c"abc")?;
84/// let all_symbols = table.all_symbols();
85/// assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
86/// # Ok(())
87/// # }
88/// # example().unwrap();
89/// ```
90#[derive(Debug, Clone, Hash, PartialEq, Eq)]
91#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
92pub struct AllSymbols<'a> {
93    range: Range<usize>,
94    // Hold a shared reference to the underlying `SymbolTable` to ensure the
95    // table is not modified while we are iterating which would make the results
96    // not match the real state.
97    phantom: PhantomData<&'a SymbolTable>,
98}
99
100impl Iterator for AllSymbols<'_> {
101    type Item = Symbol;
102
103    fn next(&mut self) -> Option<Self::Item> {
104        let next = self.range.next()?;
105        debug_assert!(u32::try_from(next).is_ok());
106        Some((next as u32).into())
107    }
108
109    fn size_hint(&self) -> (usize, Option<usize>) {
110        self.range.size_hint()
111    }
112
113    fn count(self) -> usize {
114        self.range.count()
115    }
116
117    fn last(self) -> Option<Self::Item> {
118        let last = self.range.last()?;
119        debug_assert!(u32::try_from(last).is_ok());
120        Some((last as u32).into())
121    }
122
123    fn nth(&mut self, n: usize) -> Option<Self::Item> {
124        let nth = self.range.nth(n)?;
125        debug_assert!(u32::try_from(nth).is_ok());
126        Some((nth as u32).into())
127    }
128
129    fn collect<B: FromIterator<Self::Item>>(self) -> B {
130        self.range.map(|sym| Symbol::from(sym as u32)).collect()
131    }
132}
133
134impl DoubleEndedIterator for AllSymbols<'_> {
135    fn next_back(&mut self) -> Option<Self::Item> {
136        let next = self.range.next_back()?;
137        debug_assert!(u32::try_from(next).is_ok());
138        Some((next as u32).into())
139    }
140
141    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
142        let nth = self.range.nth_back(n)?;
143        debug_assert!(u32::try_from(nth).is_ok());
144        Some((nth as u32).into())
145    }
146}
147
148impl FusedIterator for AllSymbols<'_> {}
149
150/// An iterator over all interned C strings in a [`SymbolTable`].
151///
152/// See the [`c_strings`](SymbolTable::c_strings) method in [`SymbolTable`].
153///
154/// # Usage
155///
156/// ```
157/// # use std::ffi::CString;
158/// # use intaglio::cstr::SymbolTable;
159/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
160/// let mut table = SymbolTable::new();
161/// let sym = table.intern(CString::from(c"abc"))?;
162/// let c_strings = table.c_strings();
163/// assert_eq!(vec![c"abc"], c_strings.collect::<Vec<_>>());
164/// # Ok(())
165/// # }
166/// # example().unwrap();
167/// ```
168#[derive(Debug, Clone)]
169#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
170pub struct CStrings<'a>(slice::Iter<'a, Interned<CStr>>);
171
172impl<'a> Iterator for CStrings<'a> {
173    type Item = &'a CStr;
174
175    fn next(&mut self) -> Option<Self::Item> {
176        self.0.next().map(Interned::as_slice)
177    }
178
179    fn size_hint(&self) -> (usize, Option<usize>) {
180        self.0.size_hint()
181    }
182
183    fn count(self) -> usize {
184        self.0.count()
185    }
186
187    fn last(self) -> Option<Self::Item> {
188        self.0.last().map(Interned::as_slice)
189    }
190
191    fn nth(&mut self, n: usize) -> Option<Self::Item> {
192        self.0.nth(n).map(Interned::as_slice)
193    }
194
195    fn collect<B: FromIterator<Self::Item>>(self) -> B {
196        self.0.map(Interned::as_slice).collect()
197    }
198}
199
200impl DoubleEndedIterator for CStrings<'_> {
201    fn next_back(&mut self) -> Option<Self::Item> {
202        self.0.next_back().map(Interned::as_slice)
203    }
204
205    fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
206        self.0.nth_back(n).map(Interned::as_slice)
207    }
208
209    fn rfold<B, F>(self, accum: B, f: F) -> B
210    where
211        F: FnMut(B, Self::Item) -> B,
212    {
213        self.0.map(Interned::as_slice).rfold(accum, f)
214    }
215}
216
217impl ExactSizeIterator for CStrings<'_> {
218    fn len(&self) -> usize {
219        self.0.len()
220    }
221}
222
223impl FusedIterator for CStrings<'_> {}
224
225/// An iterator over all symbols and interned C strings in a [`SymbolTable`].
226///
227/// See the [`iter`](SymbolTable::iter) method in [`SymbolTable`].
228///
229/// # Usage
230///
231/// ```
232/// # use std::collections::HashMap;
233/// # use intaglio::cstr::SymbolTable;
234/// # use intaglio::Symbol;
235/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
236/// let mut table = SymbolTable::new();
237/// let sym = table.intern(c"abc")?;
238/// let iter = table.iter();
239/// let mut map = HashMap::new();
240/// map.insert(Symbol::new(0), c"abc");
241/// assert_eq!(map, iter.collect::<HashMap<_, _>>());
242/// # Ok(())
243/// # }
244/// # example().unwrap();
245/// ```
246#[derive(Debug, Clone)]
247#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
248pub struct Iter<'a>(Zip<AllSymbols<'a>, CStrings<'a>>);
249
250impl<'a> Iterator for Iter<'a> {
251    type Item = (Symbol, &'a CStr);
252
253    fn next(&mut self) -> Option<Self::Item> {
254        self.0.next()
255    }
256
257    fn size_hint(&self) -> (usize, Option<usize>) {
258        self.0.size_hint()
259    }
260
261    fn count(self) -> usize {
262        self.0.count()
263    }
264
265    fn last(self) -> Option<Self::Item> {
266        self.0.last()
267    }
268
269    fn nth(&mut self, n: usize) -> Option<Self::Item> {
270        self.0.nth(n)
271    }
272
273    fn collect<B: FromIterator<Self::Item>>(self) -> B {
274        self.0.collect()
275    }
276}
277
278impl FusedIterator for Iter<'_> {}
279
280impl<'a, S> IntoIterator for &'a SymbolTable<S> {
281    type Item = (Symbol, &'a CStr);
282    type IntoIter = Iter<'a>;
283
284    fn into_iter(self) -> Self::IntoIter {
285        self.iter()
286    }
287}
288
289/// C string interner.
290///
291/// This symbol table is implemented by storing [`CString`]s with a fast path
292/// for [`&CStr`] that are already `'static`.
293///
294/// See module documentation for more.
295///
296/// # Usage
297///
298/// ```
299/// # use std::ffi::CString;
300/// # use intaglio::cstr::SymbolTable;
301/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
302/// let mut table = SymbolTable::new();
303/// let sym = table.intern(c"abc")?;
304/// assert_eq!(sym, table.intern(CString::from(c"abc"))?);
305/// assert!(table.contains(sym));
306/// assert!(table.is_interned(c"abc"));
307/// # Ok(())
308/// # }
309/// # example().unwrap();
310/// ```
311///
312/// [`CString`]: std::ffi::CString
313/// [`&CStr`]: std::ffi::CStr
314#[derive(Default, Debug)]
315#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
316pub struct SymbolTable<S = RandomState> {
317    map: ManuallyDrop<HashMap<&'static CStr, Symbol, S>>,
318    vec: ManuallyDrop<Vec<Interned<CStr>>>,
319}
320
321impl<S> Drop for SymbolTable<S> {
322    fn drop(&mut self) {
323        // SAFETY: No mutable references to `SymbolTable` internal fields are
324        // given out, which means `ManuallyDrop::drop` can only be invoked in
325        // this `Drop::drop` impl. Interal fields are guaranteed to be
326        // initialized by `SymbolTable` constructors.
327        unsafe {
328            // `Interned` requires that the `'static` references it gives out
329            // are dropped before the owning buffer stored in the `Interned`.
330            ManuallyDrop::drop(&mut self.map);
331            ManuallyDrop::drop(&mut self.vec);
332        }
333    }
334}
335
336impl SymbolTable<RandomState> {
337    /// Constructs a new, empty `SymbolTable` with [default capacity].
338    ///
339    /// This function will always allocate. To construct a symbol table without
340    /// allocating, call [`SymbolTable::with_capacity(0)`].
341    ///
342    /// # Examples
343    ///
344    /// ```
345    /// # use intaglio::cstr::SymbolTable;
346    /// let table = SymbolTable::new();
347    /// assert_eq!(0, table.len());
348    /// assert!(table.capacity() >= 4096);
349    /// ```
350    ///
351    /// [default capacity]: DEFAULT_SYMBOL_TABLE_CAPACITY
352    /// [`SymbolTable::with_capacity(0)`]: Self::with_capacity
353    #[must_use]
354    pub fn new() -> Self {
355        Self::with_capacity(DEFAULT_SYMBOL_TABLE_CAPACITY)
356    }
357
358    /// Constructs a new, empty `SymbolTable` with the specified capacity.
359    ///
360    /// The symbol table will be able to hold at least `capacity` C strings
361    /// without reallocating. If `capacity` is 0, the symbol table will not
362    /// allocate.
363    ///
364    /// # Examples
365    ///
366    /// ```
367    /// # use intaglio::cstr::SymbolTable;
368    /// let table = SymbolTable::with_capacity(10);
369    /// assert_eq!(0, table.len());
370    /// assert!(table.capacity() >= 10);
371    /// ```
372    #[must_use]
373    pub fn with_capacity(capacity: usize) -> Self {
374        let capacity = capacity.next_power_of_two();
375        Self {
376            map: ManuallyDrop::new(HashMap::with_capacity(capacity)),
377            vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
378        }
379    }
380}
381
382impl<S> SymbolTable<S> {
383    /// Constructs a new, empty `SymbolTable` with
384    /// [default capacity](DEFAULT_SYMBOL_TABLE_CAPACITY) and the given hash
385    /// builder.
386    ///
387    /// # Examples
388    ///
389    /// ```
390    /// # use std::collections::hash_map::RandomState;
391    /// # use intaglio::cstr::SymbolTable;
392    /// let hash_builder = RandomState::new();
393    /// let table = SymbolTable::with_hasher(hash_builder);
394    /// assert_eq!(0, table.len());
395    /// assert!(table.capacity() >= 4096);
396    /// ```
397    pub fn with_hasher(hash_builder: S) -> Self {
398        Self::with_capacity_and_hasher(DEFAULT_SYMBOL_TABLE_CAPACITY, hash_builder)
399    }
400
401    /// Constructs a new, empty `SymbolTable` with the specified capacity and
402    /// the given hash builder.
403    ///
404    /// # Examples
405    ///
406    /// ```
407    /// # use std::collections::hash_map::RandomState;
408    /// # use intaglio::cstr::SymbolTable;
409    /// let hash_builder = RandomState::new();
410    /// let table = SymbolTable::with_capacity_and_hasher(10, hash_builder);
411    /// assert_eq!(0, table.len());
412    /// assert!(table.capacity() >= 10);
413    /// ```
414    pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self {
415        Self {
416            map: ManuallyDrop::new(HashMap::with_capacity_and_hasher(capacity, hash_builder)),
417            vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
418        }
419    }
420
421    /// Returns the number of C strings the table can hold without reallocating.
422    ///
423    /// # Examples
424    ///
425    /// ```
426    /// # use intaglio::cstr::SymbolTable;
427    /// let table = SymbolTable::with_capacity(10);
428    /// assert!(table.capacity() >= 10);
429    /// ```
430    pub fn capacity(&self) -> usize {
431        usize::min(self.vec.capacity(), self.map.capacity())
432    }
433
434    /// Returns the number of interned C strings in the table.
435    ///
436    /// # Examples
437    ///
438    /// ```
439    /// # use std::ffi::CString;
440    /// # use intaglio::cstr::SymbolTable;
441    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
442    /// let mut table = SymbolTable::new();
443    /// assert_eq!(0, table.len());
444    ///
445    /// table.intern(CString::new(*b"abc")?)?;
446    /// // only uniquely interned C strings grow the symbol table.
447    /// table.intern(CString::new(*b"abc")?)?;
448    /// table.intern(CString::new(*b"xyz")?)?;
449    /// assert_eq!(2, table.len());
450    /// # Ok(())
451    /// # }
452    /// # example().unwrap();
453    /// ```
454    pub fn len(&self) -> usize {
455        self.vec.len()
456    }
457
458    /// Returns `true` if the symbol table contains no interned C strings.
459    ///
460    /// # Examples
461    ///
462    /// ```
463    /// # use std::ffi::CString;
464    /// # use intaglio::cstr::SymbolTable;
465    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
466    /// let mut table = SymbolTable::new();
467    /// assert!(table.is_empty());
468    ///
469    /// table.intern(CString::new(*b"abc")?)?;
470    /// assert!(!table.is_empty());
471    /// # Ok(())
472    /// # }
473    /// # example().unwrap();
474    /// ```
475    pub fn is_empty(&self) -> bool {
476        self.vec.is_empty()
477    }
478
479    /// Returns `true` if the symbol table contains the given symbol.
480    ///
481    /// # Examples
482    ///
483    /// ```
484    /// # use std::ffi::CString;
485    /// # use intaglio::cstr::SymbolTable;
486    /// # use intaglio::Symbol;
487    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
488    /// let mut table = SymbolTable::new();
489    /// assert!(!table.contains(Symbol::new(0)));
490    ///
491    /// let sym = table.intern(CString::new(*b"abc")?)?;
492    /// assert!(table.contains(Symbol::new(0)));
493    /// assert!(table.contains(sym));
494    /// # Ok(())
495    /// # }
496    /// # example().unwrap();
497    /// ```
498    #[must_use]
499    pub fn contains(&self, id: Symbol) -> bool {
500        self.get(id).is_some()
501    }
502
503    /// Returns a reference to the C string associated with the given symbol.
504    ///
505    /// If the given symbol does not exist in the underlying symbol table,
506    /// `None` is returned.
507    ///
508    /// The lifetime of the returned reference is bound to the symbol table.
509    ///
510    /// # Examples
511    ///
512    /// ```
513    /// # use std::ffi::CString;
514    /// # use intaglio::cstr::SymbolTable;
515    /// # use intaglio::Symbol;
516    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
517    /// let mut table = SymbolTable::new();
518    /// assert!(table.get(Symbol::new(0)).is_none());
519    ///
520    /// let sym = table.intern(CString::from(c"abc"))?;
521    /// assert_eq!(Some(c"abc"), table.get(Symbol::new(0)));
522    /// assert_eq!(Some(c"abc"), table.get(sym));
523    /// # Ok(())
524    /// # }
525    /// # example().unwrap();
526    /// ```
527    #[must_use]
528    pub fn get(&self, id: Symbol) -> Option<&CStr> {
529        let cstr = self.vec.get(usize::from(id))?;
530        Some(cstr.as_slice())
531    }
532
533    /// Returns an iterator over all [`Symbol`]s and C strings in the
534    /// [`SymbolTable`].
535    ///
536    /// # Examples
537    ///
538    /// ```
539    /// # use std::collections::HashMap;
540    /// # use std::ffi::CString;
541    /// # use intaglio::cstr::SymbolTable;
542    /// # use intaglio::Symbol;
543    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
544    /// let mut table = SymbolTable::new();
545    /// table.intern(CString::from(c"abc"))?;
546    /// table.intern(CString::from(c"xyz"))?;
547    /// table.intern(CString::from(c"123"))?;
548    /// table.intern(CString::from(c"789"))?;
549    ///
550    /// let iter = table.iter();
551    /// let mut map = HashMap::new();
552    /// map.insert(Symbol::new(0), c"abc");
553    /// map.insert(Symbol::new(1), c"xyz");
554    /// map.insert(Symbol::new(2), c"123");
555    /// map.insert(Symbol::new(3), c"789");
556    /// assert_eq!(map, iter.collect::<HashMap<_, _>>());
557    /// # Ok(())
558    /// # }
559    /// # example().unwrap();
560    /// ```
561    ///
562    /// ```
563    /// # use std::ffi::CString;
564    /// # use intaglio::cstr::SymbolTable;
565    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
566    /// let mut table = SymbolTable::new();
567    /// table.intern(CString::from(c"abc"))?;
568    /// table.intern(CString::from(c"xyz"))?;
569    /// table.intern(CString::from(c"123"))?;
570    /// table.intern(CString::from(c"789"))?;
571    ///
572    /// let iter = table.iter();
573    /// assert_eq!(table.len(), iter.count());
574    /// # Ok(())
575    /// # }
576    /// # example().unwrap();
577    /// ```
578    pub fn iter(&self) -> Iter<'_> {
579        Iter(self.all_symbols().zip(self.c_strings()))
580    }
581
582    /// Returns an iterator over all [`Symbol`]s in the [`SymbolTable`].
583    ///
584    /// # Examples
585    ///
586    /// ```
587    /// # use std::ffi::CString;
588    /// # use intaglio::cstr::SymbolTable;
589    /// # use intaglio::Symbol;
590    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
591    /// let mut table = SymbolTable::new();
592    /// table.intern(CString::from(c"abc"))?;
593    /// table.intern(CString::from(c"xyz"))?;
594    /// table.intern(CString::from(c"123"))?;
595    /// table.intern(CString::from(c"789"))?;
596    ///
597    /// let mut all_symbols = table.all_symbols();
598    /// assert_eq!(Some(Symbol::new(0)), all_symbols.next());
599    /// assert_eq!(Some(Symbol::new(1)), all_symbols.nth_back(2));
600    /// assert_eq!(None, all_symbols.next());
601    /// # Ok(())
602    /// # }
603    /// # example().unwrap();
604    /// ```
605    ///
606    /// ```
607    /// # use std::ffi::CString;
608    /// # use intaglio::cstr::SymbolTable;
609    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
610    /// let mut table = SymbolTable::new();
611    /// table.intern(CString::from(c"abc"))?;
612    /// table.intern(CString::from(c"xyz"))?;
613    /// table.intern(CString::from(c"123"))?;
614    /// table.intern(CString::from(c"789"))?;
615    ///
616    /// let all_symbols = table.all_symbols();
617    /// assert_eq!(table.len(), all_symbols.count());
618    /// # Ok(())
619    /// # }
620    /// # example().unwrap();
621    /// ```
622    pub fn all_symbols(&self) -> AllSymbols<'_> {
623        AllSymbols {
624            range: 0..self.len(),
625            phantom: PhantomData,
626        }
627    }
628
629    /// Returns an iterator over all C strings in the [`SymbolTable`].
630    ///
631    /// # Examples
632    ///
633    /// ```
634    /// # use std::ffi::CString;
635    /// # use intaglio::cstr::SymbolTable;
636    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
637    /// let mut table = SymbolTable::new();
638    /// table.intern(CString::from(c"abc"))?;
639    /// table.intern(CString::from(c"xyz"))?;
640    /// table.intern(CString::from(c"123"))?;
641    /// table.intern(CString::from(c"789"))?;
642    ///
643    /// let mut c_strings = table.c_strings();
644    /// assert_eq!(Some(c"abc"), c_strings.next());
645    /// assert_eq!(Some(c"xyz"), c_strings.nth_back(2));
646    /// assert_eq!(None, c_strings.next());
647    /// # Ok(())
648    /// # }
649    /// # example().unwrap();
650    /// ```
651    ///
652    /// ```
653    /// # use std::ffi::CString;
654    /// # use intaglio::cstr::SymbolTable;
655    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
656    /// let mut table = SymbolTable::new();
657    /// table.intern(CString::from(c"abc"))?;
658    /// table.intern(CString::from(c"xyz"))?;
659    /// table.intern(CString::from(c"123"))?;
660    /// table.intern(CString::from(c"789"))?;
661    ///
662    /// let c_strings = table.c_strings();
663    /// assert_eq!(table.len(), c_strings.count());
664    /// # Ok(())
665    /// # }
666    /// # example().unwrap();
667    /// ```
668    pub fn c_strings(&self) -> CStrings<'_> {
669        CStrings(self.vec.iter())
670    }
671}
672
673impl<S> SymbolTable<S>
674where
675    S: BuildHasher,
676{
677    /// Intern a C string for the lifetime of the symbol table.
678    ///
679    /// The returned `Symbol` allows retrieving of the underlying [`CStr`].
680    /// Equal C strings will be inserted into the symbol table exactly once.
681    ///
682    /// This function only allocates if the underlying symbol table has no
683    /// remaining capacity.
684    ///
685    /// # Errors
686    ///
687    /// If the symbol table would grow larger than `u32::MAX` interned C
688    /// strings, the [`Symbol`] counter would overflow and a
689    /// [`SymbolOverflowError`] is returned.
690    ///
691    /// # Examples
692    ///
693    /// ```
694    /// # use std::ffi::CString;
695    /// # use intaglio::cstr::SymbolTable;
696    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
697    /// let mut table = SymbolTable::new();
698    /// let sym = table.intern(CString::from(c"abc"))?;
699    /// table.intern(CString::from(c"xyz"))?;
700    /// table.intern(c"123")?;
701    /// table.intern(c"789")?;
702    ///
703    /// assert_eq!(4, table.len());
704    /// assert_eq!(Some(c"abc"), table.get(sym));
705    /// # Ok(())
706    /// # }
707    /// # example().unwrap();
708    /// ```
709    pub fn intern<T>(&mut self, contents: T) -> Result<Symbol, SymbolOverflowError>
710    where
711        T: Into<Cow<'static, CStr>>,
712    {
713        let contents = contents.into();
714        if let Some(&id) = self.map.get(&*contents) {
715            return Ok(id);
716        }
717
718        // The `Interned::Owned` variant is derived from a `Box<T>`. When such
719        // a structure is moved or assigned, as it is below in the call to
720        // `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
721        //
722        // Retagging an allocation pops all of the borrows derived from it off
723        // of the stack. This means we need to move the `Interned` into the
724        // `Vec` before calling `Interned::as_static_slice` to ensure the
725        // reference does not get invalidated by retagging.
726        //
727        // However, that alone may be insufficient as the `Interened` may be
728        // moved when the symbol table grows.
729        //
730        // The `SymbolTable` API prevents shared references to the `Interned`
731        // being invalidated by a retag by tying resolved symbol contents,
732        // `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
733        // grow, shrink, or otherwise reallocate/move contents while a reference
734        // to the `Interned`'s inner `T` is alive.
735        //
736        // To protect against future updates to stacked borrows or the unsafe
737        // code operational semantics, we can address this additional invariant
738        // with updated `Interned` internals which store the `Box<T>` in a raw
739        // pointer form, which allows moves to be treated as untyped copies.
740        //
741        // See:
742        //
743        // - <https://github.com/artichoke/intaglio/issues/235>
744        // - <https://github.com/artichoke/intaglio/pull/236>
745        let name = Interned::from(contents);
746        let id = Symbol::try_from(self.map.len())?;
747
748        // Move the `Interned` into the `Vec`, causing it to be retagged under
749        // stacked borrows, before taking any references to its inner `T`.
750        self.vec.push(name);
751        // Ensure we grow the map before we take any shared references to the
752        // inner `T`.
753        self.map.reserve(1);
754
755        // SAFETY: `self.vec` is non-empty because the preceding line of code
756        // pushed an entry into it.
757        let name = unsafe { self.vec.last().unwrap_unchecked() };
758
759        // SAFETY: This expression creates a reference with a `'static` lifetime
760        // from an owned and interned buffer, which is permissible because:
761        //
762        // - `Interned` is an internal implementation detail of `SymbolTable`.
763        // - `SymbolTable` never gives out `'static` references to underlying
764        //   contents.
765        // - All slice references given out by the `SymbolTable` have the same
766        //   lifetime as the `SymbolTable`.
767        // - The `map` field of `SymbolTable`, which contains the `'static`
768        //   references, is dropped before the owned buffers stored in this
769        //   `Interned`.
770        // - The shared reference may be derived from a `PinBox` which prevents
771        //   moves from retagging the underlying boxed `T` under stacked borrows.
772        // - The symbol table cannot grow, shrink, or otherwise move its contents
773        //   while this reference is alive.
774        let slice = unsafe { name.as_static_slice() };
775
776        self.map.insert(slice, id);
777
778        debug_assert_eq!(self.get(id), Some(slice));
779        debug_assert_eq!(self.intern(slice), Ok(id));
780
781        Ok(id)
782    }
783
784    /// Returns the `Symbol` identifier for `contents` if it has been interned
785    /// before, `None` otherwise.
786    ///
787    /// This method does not modify the symbol table.
788    ///
789    /// # Examples
790    ///
791    /// ```
792    /// # use std::ffi::CString;
793    /// # use intaglio::cstr::SymbolTable;
794    /// # use intaglio::Symbol;
795    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
796    /// let mut table = SymbolTable::new();
797    /// assert!(!table.is_interned(c"abc"));
798    /// assert_eq!(None, table.check_interned(c"abc"));
799    ///
800    /// table.intern(CString::from(c"abc"))?;
801    /// assert!(table.is_interned(c"abc"));
802    /// assert_eq!(Some(Symbol::new(0)), table.check_interned(c"abc"));
803    /// # Ok(())
804    /// # }
805    /// # example().unwrap();
806    /// ```
807    #[must_use]
808    pub fn check_interned(&self, contents: &CStr) -> Option<Symbol> {
809        self.map.get(contents).copied()
810    }
811
812    /// Returns `true` if the given C string has been interned before.
813    ///
814    /// This method does not modify the symbol table.
815    ///
816    /// # Examples
817    ///
818    /// ```
819    /// # use std::ffi::CString;
820    /// # use intaglio::cstr::SymbolTable;
821    /// # use intaglio::Symbol;
822    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
823    /// let mut table = SymbolTable::new();
824    /// assert!(!table.is_interned(c"abc"));
825    /// assert_eq!(None, table.check_interned(c"abc"));
826    ///
827    /// table.intern(CString::from(c"abc"))?;
828    /// assert!(table.is_interned(c"abc"));
829    /// assert_eq!(Some(Symbol::new(0)), table.check_interned(c"abc"));
830    /// # Ok(())
831    /// # }
832    /// # example().unwrap();
833    /// ```
834    #[must_use]
835    pub fn is_interned(&self, contents: &CStr) -> bool {
836        self.map.contains_key(contents)
837    }
838
839    /// Reserves capacity for at least additional more elements to be inserted
840    /// in the given `SymbolTable`. The collection may reserve more space to
841    /// avoid frequent reallocations. After calling reserve, capacity will be
842    /// greater than or equal to `self.len() + additional`. Does nothing if
843    /// capacity is already sufficient.
844    ///
845    /// # Panics
846    ///
847    /// Panics if the new capacity overflows `usize`.
848    ///
849    /// # Examples
850    ///
851    /// ```
852    /// # use std::ffi::CString;
853    /// # use intaglio::cstr::SymbolTable;
854    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
855    /// let mut table = SymbolTable::with_capacity(1);
856    /// table.intern(CString::from(c"abc"))?;
857    /// table.reserve(10);
858    /// assert!(table.capacity() >= 11);
859    /// # Ok(())
860    /// # }
861    /// # example().unwrap();
862    /// ```
863    pub fn reserve(&mut self, additional: usize) {
864        self.map.reserve(additional);
865        self.vec.reserve(additional);
866    }
867
868    /// Tries to reserve capacity for at least `additional` more elements in
869    /// the `SymbolTable`, returning an error if the allocation fails.
870    ///
871    /// After calling `try_reserve`, capacity will be greater than or equal to
872    /// `self.len() + additional` on success. Does nothing if capacity is
873    /// already sufficient.
874    ///
875    /// # Errors
876    ///
877    /// Returns a [`TryReserveError`] if the allocation fails or if the new
878    /// capacity would overflow `usize`.
879    ///
880    /// # Examples
881    ///
882    /// ```
883    /// # use intaglio::cstr::SymbolTable;
884    /// # use std::ffi::CString;
885    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
886    /// let mut table = SymbolTable::with_capacity(1);
887    /// table.intern(CString::new("abc")?)?;
888    /// table.try_reserve(10)?;
889    /// assert!(table.capacity() >= 11);
890    /// # Ok(())
891    /// # }
892    /// # example().unwrap();
893    /// ```
894    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
895        self.map.try_reserve(additional)?;
896        self.vec.try_reserve(additional)
897    }
898
899    /// Shrinks the capacity of the symbol table as much as possible.
900    ///
901    /// It will drop down as close as possible to the length but the allocator
902    /// may still inform the symbol table that there is space for a few more
903    /// elements.
904    ///
905    /// # Examples
906    ///
907    /// ```
908    /// # use std::ffi::CString;
909    /// # use intaglio::cstr::SymbolTable;
910    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
911    /// let mut table = SymbolTable::with_capacity(10);
912    /// table.intern(CString::from(c"abc"));
913    /// table.intern(CString::from(c"xyz"));
914    /// table.intern(CString::from(c"123"));
915    /// table.shrink_to_fit();
916    /// assert!(table.capacity() >= 3);
917    /// # Ok(())
918    /// # }
919    /// # example().unwrap();
920    /// ```
921    pub fn shrink_to_fit(&mut self) {
922        self.map.shrink_to_fit();
923        self.vec.shrink_to_fit();
924    }
925
926    /// Shrinks the capacity of the symbol table with a lower bound.
927    ///
928    /// The capacity will remain at least as large as both the length and the
929    /// supplied value.
930    ///
931    /// If the current capacity is less than the lower limit, this is a no-op.
932    ///
933    /// # Examples
934    ///
935    /// ```
936    /// # use std::ffi::CString;
937    /// # use intaglio::cstr::SymbolTable;
938    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
939    /// let mut table = SymbolTable::with_capacity(10);
940    /// table.intern(CString::from(c"abc"));
941    /// table.intern(CString::from(c"xyz"));
942    /// table.intern(CString::from(c"123"));
943    /// table.shrink_to(5);
944    /// assert!(table.capacity() >= 5);
945    /// # Ok(())
946    /// # }
947    /// # example().unwrap();
948    /// ```
949    pub fn shrink_to(&mut self, min_capacity: usize) {
950        self.map.shrink_to(min_capacity);
951        self.vec.shrink_to(min_capacity);
952    }
953}
954
955#[cfg(test)]
956#[allow(clippy::needless_pass_by_value)]
957mod tests {
958    use std::ffi::CString;
959
960    use quickcheck::quickcheck;
961
962    use super::SymbolTable;
963
964    #[test]
965    fn alloc_drop_new() {
966        let table = SymbolTable::new();
967        drop(table);
968    }
969
970    #[test]
971    fn alloc_drop_with_capacity() {
972        let table = SymbolTable::with_capacity(1 << 14);
973        drop(table);
974    }
975
976    #[test]
977    fn drop_with_true_static_data() {
978        let mut table = SymbolTable::new();
979        table.intern(c"1").unwrap();
980        table.intern(c"2").unwrap();
981        table.intern(c"3").unwrap();
982        table.intern(c"4").unwrap();
983        table.intern(c"5").unwrap();
984        drop(table);
985    }
986
987    #[test]
988    fn drop_with_owned_data() {
989        let mut table = SymbolTable::new();
990        table.intern(c"1".to_owned()).unwrap();
991        table.intern(c"2".to_owned()).unwrap();
992        table.intern(c"3".to_owned()).unwrap();
993        table.intern(c"4".to_owned()).unwrap();
994        table.intern(c"5".to_owned()).unwrap();
995        drop(table);
996    }
997
998    #[test]
999    fn set_owned_value_and_get_with_owned_and_borrowed() {
1000        let mut table = SymbolTable::new();
1001        // intern an owned value
1002        let sym = table.intern(c"abc".to_owned()).unwrap();
1003        // retrieve C string bytes
1004        assert_eq!(&b"abc\0"[..], table.get(sym).unwrap().to_bytes_with_nul());
1005        // intern owned value again
1006        assert_eq!(sym, table.intern(c"abc".to_owned()).unwrap());
1007        // intern borrowed value
1008        assert_eq!(sym, table.intern(c"abc").unwrap());
1009    }
1010
1011    #[test]
1012    fn set_borrowed_value_and_get_with_owned_and_borrowed() {
1013        let mut table = SymbolTable::new();
1014        // intern a borrowed value
1015        let sym = table.intern(c"abc").unwrap();
1016        // retrieve C string bytes
1017        assert_eq!(&b"abc\0"[..], table.get(sym).unwrap().to_bytes_with_nul());
1018        // intern owned value
1019        assert_eq!(sym, table.intern(c"abc".to_owned()).unwrap());
1020        // intern borrowed value again
1021        assert_eq!(sym, table.intern(c"abc").unwrap());
1022    }
1023
1024    #[test]
1025    fn try_reserve_grows_capacity() {
1026        let mut table = SymbolTable::with_capacity(1);
1027        table.intern(c"abc").unwrap();
1028        let len = table.len();
1029
1030        table.try_reserve(10).unwrap();
1031
1032        assert!(table.capacity() >= len + 10);
1033    }
1034
1035    #[test]
1036    fn try_reserve_overflow_errors() {
1037        let mut table = SymbolTable::new();
1038
1039        let result = table.try_reserve(usize::MAX);
1040
1041        assert!(result.is_err());
1042    }
1043
1044    quickcheck! {
1045        fn intern_twice_symbol_equality(cstring: CString) -> bool {
1046            let mut table = SymbolTable::new();
1047            let sym = table.intern(cstring.clone()).unwrap();
1048            let sym_again = table.intern(cstring).unwrap();
1049            sym == sym_again
1050        }
1051
1052        fn intern_get_roundtrip(cstring: CString) -> bool {
1053            let mut table = SymbolTable::new();
1054            let sym = table.intern(cstring.clone()).unwrap();
1055            let retrieved_c_string = table.get(sym).unwrap();
1056            *cstring == *retrieved_c_string
1057        }
1058
1059        fn table_contains_sym(cstring: CString) -> bool {
1060            let mut table = SymbolTable::new();
1061            let sym = table.intern(cstring).unwrap();
1062            table.contains(sym)
1063        }
1064
1065        fn table_does_not_contain_missing_symbol_ids(sym: u32) -> bool {
1066            let table = SymbolTable::new();
1067            !table.contains(sym.into())
1068        }
1069
1070        fn empty_table_does_not_report_any_interned_c_strings(cstring: CString) -> bool {
1071            let table = SymbolTable::new();
1072            !table.is_interned(cstring.as_c_str())
1073        }
1074
1075        fn table_reports_interned_c_strings_as_interned(cstring: CString) -> bool {
1076            let mut table = SymbolTable::new();
1077            table.intern(cstring.clone()).unwrap();
1078            table.is_interned(cstring.as_c_str())
1079        }
1080    }
1081}