stringleton_registry/site.rs
1#[allow(unused_imports)]
2use core::{
3 cell::UnsafeCell,
4 sync::atomic::{AtomicBool, AtomicPtr, Ordering},
5};
6
7use crate::Symbol;
8
9/// Registration site for a static symbol created by the `sym!()` macro in
10/// `stringleton`.
11///
12/// You should never need to construct this manually.
13#[repr(C)]
14pub struct Site {
15 /// Before global symbol registration, this is the string that will be interned. After global
16 /// symbol registration, this contains the value of the symbol directly.
17 ///
18 /// Safety: Access to this field is guarded in different ways at different points in time.
19 ///
20 /// - Static initializer functions are guaranteed to run in sequence (no
21 /// threads), so access is trivially synchronized.
22 /// - After static initializers, this field is only ever read immutably.
23 inner: UnsafeCell<&'static &'static str>,
24 #[cfg(any(miri, target_arch = "wasm32", feature = "debug-assertions"))]
25 initialized: AtomicBool,
26}
27
28// SAFETY: The contents of `SymbolRegistration` are synchronized by (a) static
29// constructors at upstart, or (b) a global rwlock at runtime.
30//
31// Note that `SymbolRegistrationSite` does not need to (and probably should not)
32// implement `Send`, only `Sync`.
33unsafe impl Sync for Site {}
34
35impl Site {
36 #[inline(always)]
37 #[must_use]
38 #[doc(hidden)]
39 pub const fn new(string: &'static &'static str) -> Self {
40 Self {
41 inner: UnsafeCell::new(string),
42 #[cfg(any(miri, target_arch = "wasm32", feature = "debug-assertions"))]
43 initialized: AtomicBool::new(false),
44 }
45 }
46
47 /// # Safety
48 ///
49 /// This must only be called from the registry's static ctor, or after
50 /// static ctors have finished running.
51 #[inline(always)]
52 pub unsafe fn get_string(&self) -> &'static &'static str {
53 unsafe {
54 // SAFETY: Preconditions of `initialize`.
55 *self.inner.get()
56 }
57 }
58
59 /// Initialize the symbol callsite.
60 ///
61 /// # Safety
62 ///
63 /// This must only be called from static constructors.
64 #[doc(hidden)]
65 #[inline(always)]
66 pub unsafe fn initialize(&self, interned: Symbol) {
67 #[cfg(any(miri, target_arch = "wasm32", feature = "debug-assertions"))]
68 {
69 self.initialized
70 .store(true, core::sync::atomic::Ordering::SeqCst);
71 }
72 unsafe {
73 *self.inner.get() = interned.inner();
74 }
75 }
76
77 /// Get a reference to the symbol contained in this site.
78 ///
79 /// # Safety
80 ///
81 /// This *MUST* only be called when `self` is part of the distributed slice
82 /// used by the ctor, and after static ctors have run. For example,
83 /// obtaining a `&'static self` via `Box::leak()` and calling this will not
84 /// work.
85 #[inline(always)]
86 #[must_use]
87 pub unsafe fn get_ref_after_ctor(&'static self) -> &'static Symbol {
88 #[cfg(any(miri, target_arch = "wasm32"))]
89 unsafe {
90 // Slow path.
91 return get_without_ctor_support(self);
92 }
93
94 #[cfg(not(any(miri, target_arch = "wasm32")))]
95 unsafe {
96 // Fast path.
97 get_with_ctor_support(self)
98 }
99 }
100
101 /// Get the deduplicated symbol value.
102 ///
103 /// # Safety
104 ///
105 /// This *MUST* only be called when `self` is part of the distributed slice
106 /// used by the ctor, and after static ctors have run. For example,
107 /// obtaining a `&'static self` via `Box::leak()` and calling this will not
108 /// work.
109 #[inline(always)]
110 #[must_use]
111 pub unsafe fn get_after_ctor(&'static self) -> Symbol {
112 unsafe { *self.get_ref_after_ctor() }
113 }
114}
115
116/// # Safety
117///
118/// Must be called after static ctors have run.
119#[inline(always)]
120#[allow(unused)] // unused under `cfg(any(miri, target_arch = "wasm32"))`
121unsafe fn get_with_ctor_support(site: &'static Site) -> &'static Symbol {
122 #[cfg(feature = "debug-assertions")]
123 {
124 assert!(
125 site.initialized.load(core::sync::atomic::Ordering::Relaxed),
126 "This `sym!()` call site has not been initialized by a static constructor. This can happen for the following reasons: \n
127 a) The current platform does not support static constructors (e.g., Miri)\n
128 b) The current crate is a dynamic library, but it reuses the registration from another crate, i.e., stringleton!(foreign_crate) is being used across a dynamic linking boundary\n
129 c) The call site is somehow reached without its containing binary having its static ctor functions called"
130 );
131 }
132
133 unsafe {
134 // SAFETY: The safety invariant is that this is only called after ctors
135 // have run, and only ctors write to this location, so we can do
136 // a non-atomic load.
137 let ptr: *const &'static &'static str = site.inner.get();
138 // SAFETY: Symbol is `#[repr(transparent)]`, so it is safe to cast
139 // `&&'static &'static str` to `&Symbol`.
140 let ptr: *const Symbol = ptr.cast();
141 &*ptr
142 }
143}
144
145/// This is the "slow path" used when Miri is active, because `linkme` and
146/// `ctor` are not supported there. It performs an atomic check on every access,
147/// and is therefore a lot slower.
148#[inline(always)]
149#[cfg(any(miri, target_arch = "wasm32"))]
150unsafe fn get_without_ctor_support(site: &'static Site) -> &'static Symbol {
151 // CAUTION:
152 //
153 // Hold on for dear life, things are about to get nasty.
154
155 // This performs no memory access, only pointer casts.
156 let inner_ptr: *mut *mut &'static str = {
157 // We're casting a `&'static &'static str` to a `*mut &'static str`, and
158 // it's fine because we are never actually writing through the second
159 // indirection.
160 let ptr: *mut &'static &'static str = site.inner.get();
161 ptr.cast()
162 };
163
164 if site.initialized.load(Ordering::SeqCst) {
165 unsafe {
166 // SAFETY:
167 // - Already initialized, so it is safe to access `inner`
168 // non-atomically.
169 // - Symbol is `repr(transparent)`, so it is safe to cast a
170 // `&&'static &'static str` to `&'static Symbol`.
171 return &*(inner_ptr as *const Symbol);
172 }
173 }
174
175 unsafe {
176 // SAFETY: See `initialize_atomic`.
177 initialize_atomic(inner_ptr, &site.initialized);
178 }
179
180 unsafe {
181 // SAFETY: Non-atomic access is safe from here on out.
182 &*(inner_ptr as *const Symbol)
183 }
184}
185
186#[cfg(any(miri, target_arch = "wasm32"))]
187unsafe fn initialize_atomic(inner_ptr: *mut *mut &'static str, initialized: &'static AtomicBool) {
188 // Cast to an atomic pointer
189 let atomic_inner: &AtomicPtr<&'static str> = unsafe {
190 // SAFETY: Until we set `initialized = true`, the location is only
191 // accessed atomically.
192 AtomicPtr::from_ptr(inner_ptr)
193 };
194
195 let stored_value: &'static &'static str = unsafe {
196 // SAFETY: The pointer is valid.
197 //
198 // RELAXED: It doesn't matter if we read an outdated value here, because
199 // `initialized` is what controls the order of operations, and we
200 // unconditionally perform a `SeqCst` load above and one below.
201 &*(atomic_inner.load(Ordering::Relaxed))
202 };
203
204 let interned = crate::Registry::global().get_or_insert_static(stored_value);
205
206 // Store the value.
207 //
208 // Note: This is idempotent, because `Registry::get_or_insert_static()` is
209 // guaranteed to return the same pointer for the same string value.
210 let ptr = core::ptr::from_ref(interned.inner());
211 atomic_inner.store(ptr as *mut &'static str, Ordering::SeqCst);
212
213 // Use the fast path for subsequent calls. Nobody takes the non-atomic route
214 // until they see this store.
215 initialized.store(true, Ordering::SeqCst);
216}