intaglio/cstr.rs
1//! Intern C strings.
2//!
3//! This module provides a nearly identical API to the one found in the
4//! top-level of this crate. There is one important difference:
5//!
6//! 1. Interned contents are [`&CStr`] instead of `&str`. Additionally,
7//! [`CString`] is used where `String` would have been used.
8//!
9//! # Example: intern C string
10//!
11//! ```
12//! # use std::ffi::CString;
13//! # use intaglio::cstr::SymbolTable;
14//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
15//! let mut table = SymbolTable::new();
16//! let sym = table.intern(c"abc")?;
17//! assert_eq!(sym, table.intern(CString::from(c"abc"))?);
18//! assert_eq!(Some(c"abc"), table.get(sym));
19//! # Ok(())
20//! # }
21//! # example().unwrap();
22//! ```
23//!
24//! # Example: symbol iterators
25//!
26//! ```
27//! # use std::collections::HashMap;
28//! # use intaglio::cstr::SymbolTable;
29//! # use intaglio::Symbol;
30//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
31//! let mut table = SymbolTable::new();
32//! let sym = table.intern(c"abc")?;
33//! // Retrieve set of `Symbol`s.
34//! let all_symbols = table.all_symbols();
35//! assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
36//!
37//! table.intern(c"xyz")?;
38//! let mut map = HashMap::new();
39//! map.insert(Symbol::new(0), c"abc");
40//! map.insert(Symbol::new(1), c"xyz");
41//! // Retrieve symbol to C string content mappings.
42//! let iter = table.iter();
43//! assert_eq!(map, iter.collect::<HashMap<_, _>>());
44//! # Ok(())
45//! # }
46//! # example().unwrap();
47//! ```
48//!
49//! # Performance
50//!
51//! In general, one should expect this crate's performance on `&CStr` to be
52//! roughly similar to performance on `&str`.
53//!
54//! [`CString`]: std::ffi::CString
55//! [`&CStr`]: std::ffi::CStr
56
57use core::hash::BuildHasher;
58use core::iter::{FromIterator, FusedIterator, Zip};
59use core::marker::PhantomData;
60use core::mem::ManuallyDrop;
61use core::ops::Range;
62use core::slice;
63use std::borrow::Cow;
64use std::collections::{
65 hash_map::{HashMap, RandomState},
66 TryReserveError,
67};
68use std::ffi::CStr;
69
70use crate::internal::Interned;
71use crate::{Symbol, SymbolOverflowError, DEFAULT_SYMBOL_TABLE_CAPACITY};
72
73/// An iterator over all [`Symbol`]s in a [`SymbolTable`].
74///
75/// See the [`all_symbols`](SymbolTable::all_symbols) method in [`SymbolTable`].
76///
77/// # Usage
78///
79/// ```
80/// # use intaglio::cstr::SymbolTable;
81/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
82/// let mut table = SymbolTable::new();
83/// let sym = table.intern(c"abc")?;
84/// let all_symbols = table.all_symbols();
85/// assert_eq!(vec![sym], all_symbols.collect::<Vec<_>>());
86/// # Ok(())
87/// # }
88/// # example().unwrap();
89/// ```
90#[derive(Debug, Clone, Hash, PartialEq, Eq)]
91#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
92pub struct AllSymbols<'a> {
93 range: Range<usize>,
94 // Hold a shared reference to the underlying `SymbolTable` to ensure the
95 // table is not modified while we are iterating which would make the results
96 // not match the real state.
97 phantom: PhantomData<&'a SymbolTable>,
98}
99
100impl Iterator for AllSymbols<'_> {
101 type Item = Symbol;
102
103 fn next(&mut self) -> Option<Self::Item> {
104 let next = self.range.next()?;
105 debug_assert!(u32::try_from(next).is_ok());
106 Some((next as u32).into())
107 }
108
109 fn size_hint(&self) -> (usize, Option<usize>) {
110 self.range.size_hint()
111 }
112
113 fn count(self) -> usize {
114 self.range.count()
115 }
116
117 fn last(self) -> Option<Self::Item> {
118 let last = self.range.last()?;
119 debug_assert!(u32::try_from(last).is_ok());
120 Some((last as u32).into())
121 }
122
123 fn nth(&mut self, n: usize) -> Option<Self::Item> {
124 let nth = self.range.nth(n)?;
125 debug_assert!(u32::try_from(nth).is_ok());
126 Some((nth as u32).into())
127 }
128
129 fn collect<B: FromIterator<Self::Item>>(self) -> B {
130 self.range.map(|sym| Symbol::from(sym as u32)).collect()
131 }
132}
133
134impl DoubleEndedIterator for AllSymbols<'_> {
135 fn next_back(&mut self) -> Option<Self::Item> {
136 let next = self.range.next_back()?;
137 debug_assert!(u32::try_from(next).is_ok());
138 Some((next as u32).into())
139 }
140
141 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
142 let nth = self.range.nth_back(n)?;
143 debug_assert!(u32::try_from(nth).is_ok());
144 Some((nth as u32).into())
145 }
146}
147
148impl FusedIterator for AllSymbols<'_> {}
149
150/// An iterator over all interned C strings in a [`SymbolTable`].
151///
152/// See the [`c_strings`](SymbolTable::c_strings) method in [`SymbolTable`].
153///
154/// # Usage
155///
156/// ```
157/// # use std::ffi::CString;
158/// # use intaglio::cstr::SymbolTable;
159/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
160/// let mut table = SymbolTable::new();
161/// let sym = table.intern(CString::from(c"abc"))?;
162/// let c_strings = table.c_strings();
163/// assert_eq!(vec![c"abc"], c_strings.collect::<Vec<_>>());
164/// # Ok(())
165/// # }
166/// # example().unwrap();
167/// ```
168#[derive(Debug, Clone)]
169#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
170pub struct CStrings<'a>(slice::Iter<'a, Interned<CStr>>);
171
172impl<'a> Iterator for CStrings<'a> {
173 type Item = &'a CStr;
174
175 fn next(&mut self) -> Option<Self::Item> {
176 self.0.next().map(Interned::as_slice)
177 }
178
179 fn size_hint(&self) -> (usize, Option<usize>) {
180 self.0.size_hint()
181 }
182
183 fn count(self) -> usize {
184 self.0.count()
185 }
186
187 fn last(self) -> Option<Self::Item> {
188 self.0.last().map(Interned::as_slice)
189 }
190
191 fn nth(&mut self, n: usize) -> Option<Self::Item> {
192 self.0.nth(n).map(Interned::as_slice)
193 }
194
195 fn collect<B: FromIterator<Self::Item>>(self) -> B {
196 self.0.map(Interned::as_slice).collect()
197 }
198}
199
200impl DoubleEndedIterator for CStrings<'_> {
201 fn next_back(&mut self) -> Option<Self::Item> {
202 self.0.next_back().map(Interned::as_slice)
203 }
204
205 fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
206 self.0.nth_back(n).map(Interned::as_slice)
207 }
208
209 fn rfold<B, F>(self, accum: B, f: F) -> B
210 where
211 F: FnMut(B, Self::Item) -> B,
212 {
213 self.0.map(Interned::as_slice).rfold(accum, f)
214 }
215}
216
217impl ExactSizeIterator for CStrings<'_> {
218 fn len(&self) -> usize {
219 self.0.len()
220 }
221}
222
223impl FusedIterator for CStrings<'_> {}
224
225/// An iterator over all symbols and interned C strings in a [`SymbolTable`].
226///
227/// See the [`iter`](SymbolTable::iter) method in [`SymbolTable`].
228///
229/// # Usage
230///
231/// ```
232/// # use std::collections::HashMap;
233/// # use intaglio::cstr::SymbolTable;
234/// # use intaglio::Symbol;
235/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
236/// let mut table = SymbolTable::new();
237/// let sym = table.intern(c"abc")?;
238/// let iter = table.iter();
239/// let mut map = HashMap::new();
240/// map.insert(Symbol::new(0), c"abc");
241/// assert_eq!(map, iter.collect::<HashMap<_, _>>());
242/// # Ok(())
243/// # }
244/// # example().unwrap();
245/// ```
246#[derive(Debug, Clone)]
247#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
248pub struct Iter<'a>(Zip<AllSymbols<'a>, CStrings<'a>>);
249
250impl<'a> Iterator for Iter<'a> {
251 type Item = (Symbol, &'a CStr);
252
253 fn next(&mut self) -> Option<Self::Item> {
254 self.0.next()
255 }
256
257 fn size_hint(&self) -> (usize, Option<usize>) {
258 self.0.size_hint()
259 }
260
261 fn count(self) -> usize {
262 self.0.count()
263 }
264
265 fn last(self) -> Option<Self::Item> {
266 self.0.last()
267 }
268
269 fn nth(&mut self, n: usize) -> Option<Self::Item> {
270 self.0.nth(n)
271 }
272
273 fn collect<B: FromIterator<Self::Item>>(self) -> B {
274 self.0.collect()
275 }
276}
277
278impl FusedIterator for Iter<'_> {}
279
280impl<'a, S> IntoIterator for &'a SymbolTable<S> {
281 type Item = (Symbol, &'a CStr);
282 type IntoIter = Iter<'a>;
283
284 fn into_iter(self) -> Self::IntoIter {
285 self.iter()
286 }
287}
288
289/// C string interner.
290///
291/// This symbol table is implemented by storing [`CString`]s with a fast path
292/// for [`&CStr`] that are already `'static`.
293///
294/// See module documentation for more.
295///
296/// # Usage
297///
298/// ```
299/// # use std::ffi::CString;
300/// # use intaglio::cstr::SymbolTable;
301/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
302/// let mut table = SymbolTable::new();
303/// let sym = table.intern(c"abc")?;
304/// assert_eq!(sym, table.intern(CString::from(c"abc"))?);
305/// assert!(table.contains(sym));
306/// assert!(table.is_interned(c"abc"));
307/// # Ok(())
308/// # }
309/// # example().unwrap();
310/// ```
311///
312/// [`CString`]: std::ffi::CString
313/// [`&CStr`]: std::ffi::CStr
314#[derive(Default, Debug)]
315#[cfg_attr(docsrs, doc(cfg(feature = "cstr")))]
316pub struct SymbolTable<S = RandomState> {
317 map: ManuallyDrop<HashMap<&'static CStr, Symbol, S>>,
318 vec: ManuallyDrop<Vec<Interned<CStr>>>,
319}
320
321impl<S> Drop for SymbolTable<S> {
322 fn drop(&mut self) {
323 // SAFETY: No mutable references to `SymbolTable` internal fields are
324 // given out, which means `ManuallyDrop::drop` can only be invoked in
325 // this `Drop::drop` impl. Interal fields are guaranteed to be
326 // initialized by `SymbolTable` constructors.
327 unsafe {
328 // `Interned` requires that the `'static` references it gives out
329 // are dropped before the owning buffer stored in the `Interned`.
330 ManuallyDrop::drop(&mut self.map);
331 ManuallyDrop::drop(&mut self.vec);
332 }
333 }
334}
335
336impl SymbolTable<RandomState> {
337 /// Constructs a new, empty `SymbolTable` with [default capacity].
338 ///
339 /// This function will always allocate. To construct a symbol table without
340 /// allocating, call [`SymbolTable::with_capacity(0)`].
341 ///
342 /// # Examples
343 ///
344 /// ```
345 /// # use intaglio::cstr::SymbolTable;
346 /// let table = SymbolTable::new();
347 /// assert_eq!(0, table.len());
348 /// assert!(table.capacity() >= 4096);
349 /// ```
350 ///
351 /// [default capacity]: DEFAULT_SYMBOL_TABLE_CAPACITY
352 /// [`SymbolTable::with_capacity(0)`]: Self::with_capacity
353 #[must_use]
354 pub fn new() -> Self {
355 Self::with_capacity(DEFAULT_SYMBOL_TABLE_CAPACITY)
356 }
357
358 /// Constructs a new, empty `SymbolTable` with the specified capacity.
359 ///
360 /// The symbol table will be able to hold at least `capacity` C strings
361 /// without reallocating. If `capacity` is 0, the symbol table will not
362 /// allocate.
363 ///
364 /// # Examples
365 ///
366 /// ```
367 /// # use intaglio::cstr::SymbolTable;
368 /// let table = SymbolTable::with_capacity(10);
369 /// assert_eq!(0, table.len());
370 /// assert!(table.capacity() >= 10);
371 /// ```
372 #[must_use]
373 pub fn with_capacity(capacity: usize) -> Self {
374 let capacity = capacity.next_power_of_two();
375 Self {
376 map: ManuallyDrop::new(HashMap::with_capacity(capacity)),
377 vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
378 }
379 }
380}
381
382impl<S> SymbolTable<S> {
383 /// Constructs a new, empty `SymbolTable` with
384 /// [default capacity](DEFAULT_SYMBOL_TABLE_CAPACITY) and the given hash
385 /// builder.
386 ///
387 /// # Examples
388 ///
389 /// ```
390 /// # use std::collections::hash_map::RandomState;
391 /// # use intaglio::cstr::SymbolTable;
392 /// let hash_builder = RandomState::new();
393 /// let table = SymbolTable::with_hasher(hash_builder);
394 /// assert_eq!(0, table.len());
395 /// assert!(table.capacity() >= 4096);
396 /// ```
397 pub fn with_hasher(hash_builder: S) -> Self {
398 Self::with_capacity_and_hasher(DEFAULT_SYMBOL_TABLE_CAPACITY, hash_builder)
399 }
400
401 /// Constructs a new, empty `SymbolTable` with the specified capacity and
402 /// the given hash builder.
403 ///
404 /// # Examples
405 ///
406 /// ```
407 /// # use std::collections::hash_map::RandomState;
408 /// # use intaglio::cstr::SymbolTable;
409 /// let hash_builder = RandomState::new();
410 /// let table = SymbolTable::with_capacity_and_hasher(10, hash_builder);
411 /// assert_eq!(0, table.len());
412 /// assert!(table.capacity() >= 10);
413 /// ```
414 pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self {
415 Self {
416 map: ManuallyDrop::new(HashMap::with_capacity_and_hasher(capacity, hash_builder)),
417 vec: ManuallyDrop::new(Vec::with_capacity(capacity)),
418 }
419 }
420
421 /// Returns the number of C strings the table can hold without reallocating.
422 ///
423 /// # Examples
424 ///
425 /// ```
426 /// # use intaglio::cstr::SymbolTable;
427 /// let table = SymbolTable::with_capacity(10);
428 /// assert!(table.capacity() >= 10);
429 /// ```
430 pub fn capacity(&self) -> usize {
431 usize::min(self.vec.capacity(), self.map.capacity())
432 }
433
434 /// Returns the number of interned C strings in the table.
435 ///
436 /// # Examples
437 ///
438 /// ```
439 /// # use std::ffi::CString;
440 /// # use intaglio::cstr::SymbolTable;
441 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
442 /// let mut table = SymbolTable::new();
443 /// assert_eq!(0, table.len());
444 ///
445 /// table.intern(CString::new(*b"abc")?)?;
446 /// // only uniquely interned C strings grow the symbol table.
447 /// table.intern(CString::new(*b"abc")?)?;
448 /// table.intern(CString::new(*b"xyz")?)?;
449 /// assert_eq!(2, table.len());
450 /// # Ok(())
451 /// # }
452 /// # example().unwrap();
453 /// ```
454 pub fn len(&self) -> usize {
455 self.vec.len()
456 }
457
458 /// Returns `true` if the symbol table contains no interned C strings.
459 ///
460 /// # Examples
461 ///
462 /// ```
463 /// # use std::ffi::CString;
464 /// # use intaglio::cstr::SymbolTable;
465 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
466 /// let mut table = SymbolTable::new();
467 /// assert!(table.is_empty());
468 ///
469 /// table.intern(CString::new(*b"abc")?)?;
470 /// assert!(!table.is_empty());
471 /// # Ok(())
472 /// # }
473 /// # example().unwrap();
474 /// ```
475 pub fn is_empty(&self) -> bool {
476 self.vec.is_empty()
477 }
478
479 /// Returns `true` if the symbol table contains the given symbol.
480 ///
481 /// # Examples
482 ///
483 /// ```
484 /// # use std::ffi::CString;
485 /// # use intaglio::cstr::SymbolTable;
486 /// # use intaglio::Symbol;
487 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
488 /// let mut table = SymbolTable::new();
489 /// assert!(!table.contains(Symbol::new(0)));
490 ///
491 /// let sym = table.intern(CString::new(*b"abc")?)?;
492 /// assert!(table.contains(Symbol::new(0)));
493 /// assert!(table.contains(sym));
494 /// # Ok(())
495 /// # }
496 /// # example().unwrap();
497 /// ```
498 #[must_use]
499 pub fn contains(&self, id: Symbol) -> bool {
500 self.get(id).is_some()
501 }
502
503 /// Returns a reference to the C string associated with the given symbol.
504 ///
505 /// If the given symbol does not exist in the underlying symbol table,
506 /// `None` is returned.
507 ///
508 /// The lifetime of the returned reference is bound to the symbol table.
509 ///
510 /// # Examples
511 ///
512 /// ```
513 /// # use std::ffi::CString;
514 /// # use intaglio::cstr::SymbolTable;
515 /// # use intaglio::Symbol;
516 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
517 /// let mut table = SymbolTable::new();
518 /// assert!(table.get(Symbol::new(0)).is_none());
519 ///
520 /// let sym = table.intern(CString::from(c"abc"))?;
521 /// assert_eq!(Some(c"abc"), table.get(Symbol::new(0)));
522 /// assert_eq!(Some(c"abc"), table.get(sym));
523 /// # Ok(())
524 /// # }
525 /// # example().unwrap();
526 /// ```
527 #[must_use]
528 pub fn get(&self, id: Symbol) -> Option<&CStr> {
529 let cstr = self.vec.get(usize::from(id))?;
530 Some(cstr.as_slice())
531 }
532
533 /// Returns an iterator over all [`Symbol`]s and C strings in the
534 /// [`SymbolTable`].
535 ///
536 /// # Examples
537 ///
538 /// ```
539 /// # use std::collections::HashMap;
540 /// # use std::ffi::CString;
541 /// # use intaglio::cstr::SymbolTable;
542 /// # use intaglio::Symbol;
543 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
544 /// let mut table = SymbolTable::new();
545 /// table.intern(CString::from(c"abc"))?;
546 /// table.intern(CString::from(c"xyz"))?;
547 /// table.intern(CString::from(c"123"))?;
548 /// table.intern(CString::from(c"789"))?;
549 ///
550 /// let iter = table.iter();
551 /// let mut map = HashMap::new();
552 /// map.insert(Symbol::new(0), c"abc");
553 /// map.insert(Symbol::new(1), c"xyz");
554 /// map.insert(Symbol::new(2), c"123");
555 /// map.insert(Symbol::new(3), c"789");
556 /// assert_eq!(map, iter.collect::<HashMap<_, _>>());
557 /// # Ok(())
558 /// # }
559 /// # example().unwrap();
560 /// ```
561 ///
562 /// ```
563 /// # use std::ffi::CString;
564 /// # use intaglio::cstr::SymbolTable;
565 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
566 /// let mut table = SymbolTable::new();
567 /// table.intern(CString::from(c"abc"))?;
568 /// table.intern(CString::from(c"xyz"))?;
569 /// table.intern(CString::from(c"123"))?;
570 /// table.intern(CString::from(c"789"))?;
571 ///
572 /// let iter = table.iter();
573 /// assert_eq!(table.len(), iter.count());
574 /// # Ok(())
575 /// # }
576 /// # example().unwrap();
577 /// ```
578 pub fn iter(&self) -> Iter<'_> {
579 Iter(self.all_symbols().zip(self.c_strings()))
580 }
581
582 /// Returns an iterator over all [`Symbol`]s in the [`SymbolTable`].
583 ///
584 /// # Examples
585 ///
586 /// ```
587 /// # use std::ffi::CString;
588 /// # use intaglio::cstr::SymbolTable;
589 /// # use intaglio::Symbol;
590 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
591 /// let mut table = SymbolTable::new();
592 /// table.intern(CString::from(c"abc"))?;
593 /// table.intern(CString::from(c"xyz"))?;
594 /// table.intern(CString::from(c"123"))?;
595 /// table.intern(CString::from(c"789"))?;
596 ///
597 /// let mut all_symbols = table.all_symbols();
598 /// assert_eq!(Some(Symbol::new(0)), all_symbols.next());
599 /// assert_eq!(Some(Symbol::new(1)), all_symbols.nth_back(2));
600 /// assert_eq!(None, all_symbols.next());
601 /// # Ok(())
602 /// # }
603 /// # example().unwrap();
604 /// ```
605 ///
606 /// ```
607 /// # use std::ffi::CString;
608 /// # use intaglio::cstr::SymbolTable;
609 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
610 /// let mut table = SymbolTable::new();
611 /// table.intern(CString::from(c"abc"))?;
612 /// table.intern(CString::from(c"xyz"))?;
613 /// table.intern(CString::from(c"123"))?;
614 /// table.intern(CString::from(c"789"))?;
615 ///
616 /// let all_symbols = table.all_symbols();
617 /// assert_eq!(table.len(), all_symbols.count());
618 /// # Ok(())
619 /// # }
620 /// # example().unwrap();
621 /// ```
622 pub fn all_symbols(&self) -> AllSymbols<'_> {
623 AllSymbols {
624 range: 0..self.len(),
625 phantom: PhantomData,
626 }
627 }
628
629 /// Returns an iterator over all C strings in the [`SymbolTable`].
630 ///
631 /// # Examples
632 ///
633 /// ```
634 /// # use std::ffi::CString;
635 /// # use intaglio::cstr::SymbolTable;
636 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
637 /// let mut table = SymbolTable::new();
638 /// table.intern(CString::from(c"abc"))?;
639 /// table.intern(CString::from(c"xyz"))?;
640 /// table.intern(CString::from(c"123"))?;
641 /// table.intern(CString::from(c"789"))?;
642 ///
643 /// let mut c_strings = table.c_strings();
644 /// assert_eq!(Some(c"abc"), c_strings.next());
645 /// assert_eq!(Some(c"xyz"), c_strings.nth_back(2));
646 /// assert_eq!(None, c_strings.next());
647 /// # Ok(())
648 /// # }
649 /// # example().unwrap();
650 /// ```
651 ///
652 /// ```
653 /// # use std::ffi::CString;
654 /// # use intaglio::cstr::SymbolTable;
655 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
656 /// let mut table = SymbolTable::new();
657 /// table.intern(CString::from(c"abc"))?;
658 /// table.intern(CString::from(c"xyz"))?;
659 /// table.intern(CString::from(c"123"))?;
660 /// table.intern(CString::from(c"789"))?;
661 ///
662 /// let c_strings = table.c_strings();
663 /// assert_eq!(table.len(), c_strings.count());
664 /// # Ok(())
665 /// # }
666 /// # example().unwrap();
667 /// ```
668 pub fn c_strings(&self) -> CStrings<'_> {
669 CStrings(self.vec.iter())
670 }
671}
672
673impl<S> SymbolTable<S>
674where
675 S: BuildHasher,
676{
677 /// Intern a C string for the lifetime of the symbol table.
678 ///
679 /// The returned `Symbol` allows retrieving of the underlying [`CStr`].
680 /// Equal C strings will be inserted into the symbol table exactly once.
681 ///
682 /// This function only allocates if the underlying symbol table has no
683 /// remaining capacity.
684 ///
685 /// # Errors
686 ///
687 /// If the symbol table would grow larger than `u32::MAX` interned C
688 /// strings, the [`Symbol`] counter would overflow and a
689 /// [`SymbolOverflowError`] is returned.
690 ///
691 /// # Examples
692 ///
693 /// ```
694 /// # use std::ffi::CString;
695 /// # use intaglio::cstr::SymbolTable;
696 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
697 /// let mut table = SymbolTable::new();
698 /// let sym = table.intern(CString::from(c"abc"))?;
699 /// table.intern(CString::from(c"xyz"))?;
700 /// table.intern(c"123")?;
701 /// table.intern(c"789")?;
702 ///
703 /// assert_eq!(4, table.len());
704 /// assert_eq!(Some(c"abc"), table.get(sym));
705 /// # Ok(())
706 /// # }
707 /// # example().unwrap();
708 /// ```
709 pub fn intern<T>(&mut self, contents: T) -> Result<Symbol, SymbolOverflowError>
710 where
711 T: Into<Cow<'static, CStr>>,
712 {
713 let contents = contents.into();
714 if let Some(&id) = self.map.get(&*contents) {
715 return Ok(id);
716 }
717
718 // The `Interned::Owned` variant is derived from a `Box<T>`. When such
719 // a structure is moved or assigned, as it is below in the call to
720 // `self.vec.push`, the allocation is "retagged" in Miri/stacked borrows.
721 //
722 // Retagging an allocation pops all of the borrows derived from it off
723 // of the stack. This means we need to move the `Interned` into the
724 // `Vec` before calling `Interned::as_static_slice` to ensure the
725 // reference does not get invalidated by retagging.
726 //
727 // However, that alone may be insufficient as the `Interened` may be
728 // moved when the symbol table grows.
729 //
730 // The `SymbolTable` API prevents shared references to the `Interned`
731 // being invalidated by a retag by tying resolved symbol contents,
732 // `&'a T`, to `&'a SymbolTable`, which means the `SymbolTable` cannot
733 // grow, shrink, or otherwise reallocate/move contents while a reference
734 // to the `Interned`'s inner `T` is alive.
735 //
736 // To protect against future updates to stacked borrows or the unsafe
737 // code operational semantics, we can address this additional invariant
738 // with updated `Interned` internals which store the `Box<T>` in a raw
739 // pointer form, which allows moves to be treated as untyped copies.
740 //
741 // See:
742 //
743 // - <https://github.com/artichoke/intaglio/issues/235>
744 // - <https://github.com/artichoke/intaglio/pull/236>
745 let name = Interned::from(contents);
746 let id = Symbol::try_from(self.map.len())?;
747
748 // Move the `Interned` into the `Vec`, causing it to be retagged under
749 // stacked borrows, before taking any references to its inner `T`.
750 self.vec.push(name);
751 // Ensure we grow the map before we take any shared references to the
752 // inner `T`.
753 self.map.reserve(1);
754
755 // SAFETY: `self.vec` is non-empty because the preceding line of code
756 // pushed an entry into it.
757 let name = unsafe { self.vec.last().unwrap_unchecked() };
758
759 // SAFETY: This expression creates a reference with a `'static` lifetime
760 // from an owned and interned buffer, which is permissible because:
761 //
762 // - `Interned` is an internal implementation detail of `SymbolTable`.
763 // - `SymbolTable` never gives out `'static` references to underlying
764 // contents.
765 // - All slice references given out by the `SymbolTable` have the same
766 // lifetime as the `SymbolTable`.
767 // - The `map` field of `SymbolTable`, which contains the `'static`
768 // references, is dropped before the owned buffers stored in this
769 // `Interned`.
770 // - The shared reference may be derived from a `PinBox` which prevents
771 // moves from retagging the underlying boxed `T` under stacked borrows.
772 // - The symbol table cannot grow, shrink, or otherwise move its contents
773 // while this reference is alive.
774 let slice = unsafe { name.as_static_slice() };
775
776 self.map.insert(slice, id);
777
778 debug_assert_eq!(self.get(id), Some(slice));
779 debug_assert_eq!(self.intern(slice), Ok(id));
780
781 Ok(id)
782 }
783
784 /// Returns the `Symbol` identifier for `contents` if it has been interned
785 /// before, `None` otherwise.
786 ///
787 /// This method does not modify the symbol table.
788 ///
789 /// # Examples
790 ///
791 /// ```
792 /// # use std::ffi::CString;
793 /// # use intaglio::cstr::SymbolTable;
794 /// # use intaglio::Symbol;
795 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
796 /// let mut table = SymbolTable::new();
797 /// assert!(!table.is_interned(c"abc"));
798 /// assert_eq!(None, table.check_interned(c"abc"));
799 ///
800 /// table.intern(CString::from(c"abc"))?;
801 /// assert!(table.is_interned(c"abc"));
802 /// assert_eq!(Some(Symbol::new(0)), table.check_interned(c"abc"));
803 /// # Ok(())
804 /// # }
805 /// # example().unwrap();
806 /// ```
807 #[must_use]
808 pub fn check_interned(&self, contents: &CStr) -> Option<Symbol> {
809 self.map.get(contents).copied()
810 }
811
812 /// Returns `true` if the given C string has been interned before.
813 ///
814 /// This method does not modify the symbol table.
815 ///
816 /// # Examples
817 ///
818 /// ```
819 /// # use std::ffi::CString;
820 /// # use intaglio::cstr::SymbolTable;
821 /// # use intaglio::Symbol;
822 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
823 /// let mut table = SymbolTable::new();
824 /// assert!(!table.is_interned(c"abc"));
825 /// assert_eq!(None, table.check_interned(c"abc"));
826 ///
827 /// table.intern(CString::from(c"abc"))?;
828 /// assert!(table.is_interned(c"abc"));
829 /// assert_eq!(Some(Symbol::new(0)), table.check_interned(c"abc"));
830 /// # Ok(())
831 /// # }
832 /// # example().unwrap();
833 /// ```
834 #[must_use]
835 pub fn is_interned(&self, contents: &CStr) -> bool {
836 self.map.contains_key(contents)
837 }
838
839 /// Reserves capacity for at least additional more elements to be inserted
840 /// in the given `SymbolTable`. The collection may reserve more space to
841 /// avoid frequent reallocations. After calling reserve, capacity will be
842 /// greater than or equal to `self.len() + additional`. Does nothing if
843 /// capacity is already sufficient.
844 ///
845 /// # Panics
846 ///
847 /// Panics if the new capacity overflows `usize`.
848 ///
849 /// # Examples
850 ///
851 /// ```
852 /// # use std::ffi::CString;
853 /// # use intaglio::cstr::SymbolTable;
854 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
855 /// let mut table = SymbolTable::with_capacity(1);
856 /// table.intern(CString::from(c"abc"))?;
857 /// table.reserve(10);
858 /// assert!(table.capacity() >= 11);
859 /// # Ok(())
860 /// # }
861 /// # example().unwrap();
862 /// ```
863 pub fn reserve(&mut self, additional: usize) {
864 self.map.reserve(additional);
865 self.vec.reserve(additional);
866 }
867
868 /// Tries to reserve capacity for at least `additional` more elements in
869 /// the `SymbolTable`, returning an error if the allocation fails.
870 ///
871 /// After calling `try_reserve`, capacity will be greater than or equal to
872 /// `self.len() + additional` on success. Does nothing if capacity is
873 /// already sufficient.
874 ///
875 /// # Errors
876 ///
877 /// Returns a [`TryReserveError`] if the allocation fails or if the new
878 /// capacity would overflow `usize`.
879 ///
880 /// # Examples
881 ///
882 /// ```
883 /// # use intaglio::cstr::SymbolTable;
884 /// # use std::ffi::CString;
885 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
886 /// let mut table = SymbolTable::with_capacity(1);
887 /// table.intern(CString::new("abc")?)?;
888 /// table.try_reserve(10)?;
889 /// assert!(table.capacity() >= 11);
890 /// # Ok(())
891 /// # }
892 /// # example().unwrap();
893 /// ```
894 pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
895 self.map.try_reserve(additional)?;
896 self.vec.try_reserve(additional)
897 }
898
899 /// Shrinks the capacity of the symbol table as much as possible.
900 ///
901 /// It will drop down as close as possible to the length but the allocator
902 /// may still inform the symbol table that there is space for a few more
903 /// elements.
904 ///
905 /// # Examples
906 ///
907 /// ```
908 /// # use std::ffi::CString;
909 /// # use intaglio::cstr::SymbolTable;
910 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
911 /// let mut table = SymbolTable::with_capacity(10);
912 /// table.intern(CString::from(c"abc"));
913 /// table.intern(CString::from(c"xyz"));
914 /// table.intern(CString::from(c"123"));
915 /// table.shrink_to_fit();
916 /// assert!(table.capacity() >= 3);
917 /// # Ok(())
918 /// # }
919 /// # example().unwrap();
920 /// ```
921 pub fn shrink_to_fit(&mut self) {
922 self.map.shrink_to_fit();
923 self.vec.shrink_to_fit();
924 }
925
926 /// Shrinks the capacity of the symbol table with a lower bound.
927 ///
928 /// The capacity will remain at least as large as both the length and the
929 /// supplied value.
930 ///
931 /// If the current capacity is less than the lower limit, this is a no-op.
932 ///
933 /// # Examples
934 ///
935 /// ```
936 /// # use std::ffi::CString;
937 /// # use intaglio::cstr::SymbolTable;
938 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
939 /// let mut table = SymbolTable::with_capacity(10);
940 /// table.intern(CString::from(c"abc"));
941 /// table.intern(CString::from(c"xyz"));
942 /// table.intern(CString::from(c"123"));
943 /// table.shrink_to(5);
944 /// assert!(table.capacity() >= 5);
945 /// # Ok(())
946 /// # }
947 /// # example().unwrap();
948 /// ```
949 pub fn shrink_to(&mut self, min_capacity: usize) {
950 self.map.shrink_to(min_capacity);
951 self.vec.shrink_to(min_capacity);
952 }
953}
954
955#[cfg(test)]
956#[allow(clippy::needless_pass_by_value)]
957mod tests {
958 use std::ffi::CString;
959
960 use quickcheck::quickcheck;
961
962 use super::SymbolTable;
963
964 #[test]
965 fn alloc_drop_new() {
966 let table = SymbolTable::new();
967 drop(table);
968 }
969
970 #[test]
971 fn alloc_drop_with_capacity() {
972 let table = SymbolTable::with_capacity(1 << 14);
973 drop(table);
974 }
975
976 #[test]
977 fn drop_with_true_static_data() {
978 let mut table = SymbolTable::new();
979 table.intern(c"1").unwrap();
980 table.intern(c"2").unwrap();
981 table.intern(c"3").unwrap();
982 table.intern(c"4").unwrap();
983 table.intern(c"5").unwrap();
984 drop(table);
985 }
986
987 #[test]
988 fn drop_with_owned_data() {
989 let mut table = SymbolTable::new();
990 table.intern(c"1".to_owned()).unwrap();
991 table.intern(c"2".to_owned()).unwrap();
992 table.intern(c"3".to_owned()).unwrap();
993 table.intern(c"4".to_owned()).unwrap();
994 table.intern(c"5".to_owned()).unwrap();
995 drop(table);
996 }
997
998 #[test]
999 fn set_owned_value_and_get_with_owned_and_borrowed() {
1000 let mut table = SymbolTable::new();
1001 // intern an owned value
1002 let sym = table.intern(c"abc".to_owned()).unwrap();
1003 // retrieve C string bytes
1004 assert_eq!(&b"abc\0"[..], table.get(sym).unwrap().to_bytes_with_nul());
1005 // intern owned value again
1006 assert_eq!(sym, table.intern(c"abc".to_owned()).unwrap());
1007 // intern borrowed value
1008 assert_eq!(sym, table.intern(c"abc").unwrap());
1009 }
1010
1011 #[test]
1012 fn set_borrowed_value_and_get_with_owned_and_borrowed() {
1013 let mut table = SymbolTable::new();
1014 // intern a borrowed value
1015 let sym = table.intern(c"abc").unwrap();
1016 // retrieve C string bytes
1017 assert_eq!(&b"abc\0"[..], table.get(sym).unwrap().to_bytes_with_nul());
1018 // intern owned value
1019 assert_eq!(sym, table.intern(c"abc".to_owned()).unwrap());
1020 // intern borrowed value again
1021 assert_eq!(sym, table.intern(c"abc").unwrap());
1022 }
1023
1024 #[test]
1025 fn try_reserve_grows_capacity() {
1026 let mut table = SymbolTable::with_capacity(1);
1027 table.intern(c"abc").unwrap();
1028 let len = table.len();
1029
1030 table.try_reserve(10).unwrap();
1031
1032 assert!(table.capacity() >= len + 10);
1033 }
1034
1035 #[test]
1036 fn try_reserve_overflow_errors() {
1037 let mut table = SymbolTable::new();
1038
1039 let result = table.try_reserve(usize::MAX);
1040
1041 assert!(result.is_err());
1042 }
1043
1044 quickcheck! {
1045 fn intern_twice_symbol_equality(cstring: CString) -> bool {
1046 let mut table = SymbolTable::new();
1047 let sym = table.intern(cstring.clone()).unwrap();
1048 let sym_again = table.intern(cstring).unwrap();
1049 sym == sym_again
1050 }
1051
1052 fn intern_get_roundtrip(cstring: CString) -> bool {
1053 let mut table = SymbolTable::new();
1054 let sym = table.intern(cstring.clone()).unwrap();
1055 let retrieved_c_string = table.get(sym).unwrap();
1056 *cstring == *retrieved_c_string
1057 }
1058
1059 fn table_contains_sym(cstring: CString) -> bool {
1060 let mut table = SymbolTable::new();
1061 let sym = table.intern(cstring).unwrap();
1062 table.contains(sym)
1063 }
1064
1065 fn table_does_not_contain_missing_symbol_ids(sym: u32) -> bool {
1066 let table = SymbolTable::new();
1067 !table.contains(sym.into())
1068 }
1069
1070 fn empty_table_does_not_report_any_interned_c_strings(cstring: CString) -> bool {
1071 let table = SymbolTable::new();
1072 !table.is_interned(cstring.as_c_str())
1073 }
1074
1075 fn table_reports_interned_c_strings_as_interned(cstring: CString) -> bool {
1076 let mut table = SymbolTable::new();
1077 table.intern(cstring.clone()).unwrap();
1078 table.is_interned(cstring.as_c_str())
1079 }
1080 }
1081}