stringleton/lib.rs
1#![doc = include_str!("README.md")]
2
3pub use stringleton_registry::{Registry, StaticSymbol, Symbol};
4
5/// Create a literal symbol from a literal identifier or string
6///
7/// Symbols created with the [`sym!(...)`](crate::sym) macro are statically
8/// allocated and deduplicated on program startup. This means that there is no
9/// discernible overhead at the point of use, making them suitable even in long
10/// chains of `if` statements and inner loops.
11///
12/// **IMPORTANT:** For this macro to work in a particular crate, the
13/// [`enable!()`](crate::enable) macro must appear exactly once in the crate's
14/// root. This creates the global registration table at link-time.
15///
16/// # Safety
17///
18/// This macro is safe (and performant) to use everywhere, with important
19/// caveats:
20///
21/// 1. If you are using "static initializers" (code that runs before `main()`,
22/// like through the `ctor` crate), this macro must **NOT** be called in such
23/// a static initializer function. See
24/// <https://github.com/mmastrac/rust-ctor/issues/159>. Using
25/// [`Symbol::new()`] in such a function is fine.
26///
27/// 2. If you are using C-style dynamic libraries (`cdylib` crate type), those
28/// libraries must use the `stringleton-dylib` crate instead of
29/// `stringleton`.
30///
31/// 3. If you are loading dynamic libraries at runtime (i.e., outside of Cargo's
32/// dependency graph), the host crate must also use the `stringleton-dylib`
33/// crate instead of `stringleton`.
34///
35/// # Low-level details
36///
37/// This macro creates an entry in a per-crate `linkme` "distributed slice", as
38/// well as a static initializer called by the OS when the current crate is
39/// loaded at runtime (before `main()`), either as part of an executable or as
40/// part of a dynamic library.
41///
42/// On x86-64 and ARM64, this macro is guaranteed to compile into a single
43/// relaxed atomic memory load instruction from an offset in the `.bss` segment.
44/// On x86, relaxed atomic load instructions have no additional overhead
45/// compared to non-atomic loads.
46///
47/// Internally, this uses the `linkme` and `ctor` crates to register this
48/// callsite in static binary memory and initialize it on startup. However, when
49/// running under Miri (or other platforms not supported by `linkme`), the
50/// implementation falls back on a slower implementation that effectively calls
51/// `Symbol::new()` every time, which takes a global read-lock.
52///
53/// When the `debug-assertions` feature is enabled, there is an additional check
54/// that panics if the call site has not been populated by a static ctor. This
55/// assertion will only be triggered if the current platform does not support
56/// static initializers.
57#[macro_export]
58#[allow(clippy::crate_in_macro_def)]
59macro_rules! sym {
60 ($sym:ident) => {
61 $crate::sym!(@impl stringify!($sym))
62 };
63 ($sym:literal) => {
64 $crate::sym!(@impl $sym)
65 };
66 (@impl $sym:expr) => {{
67 // Note: Using `crate` to refer to the calling crate - this is deliberate.
68 #[cfg_attr(not(any(miri, target_arch = "wasm32")), $crate::internal::linkme::distributed_slice(crate::_stringleton_enabled::TABLE))]
69 #[cfg_attr(not(any(miri, target_arch = "wasm32")), linkme(crate = $crate::internal::linkme))]
70 static SITE: $crate::internal::Site = $crate::internal::Site::new(&$sym);
71 unsafe {
72 // SAFETY: This site will be initialized by the static ctor because
73 // it participates in the distributed slice.
74 SITE.get_after_ctor()
75 }}
76 }
77}
78
79/// Create a static location for a literal symbol.
80///
81/// This macro works the same as [`sym!(...)`](crate::sym), except that it
82/// produces a [`StaticSymbol`] instead of a [`Symbol`]. [`StaticSymbol`]
83/// implements `Deref<Target = Symbol>`, so it can be used in most places where
84/// a `Symbol` is expected.
85///
86/// This macro also requires the presence of a call to the
87/// [`enable!()`](crate::enable) macro at the crate root.
88///
89/// This macro can be used in the initialization of a `static` or `const` variable:
90///
91/// ```rust,ignore
92/// static MY_SYMBOL: StaticSymbol = static_sym!("Hello, World!");
93/// const OTHER_SYMBOL: StaticSymbol = static_sym!(abc);
94///
95/// assert_eq!(MY_SYMBOL, sym!("Hello, World!"));
96/// assert_eq!(OTHER_SYMBOL, sym("abc"));
97/// ```
98///
99/// # Use case
100///
101/// Use this macro to avoid having too many "magic symbols" in your code
102/// (similar to "magic numbers"). Declare common symbol names centrally, and
103/// refer to them by their Rust names instead.
104///
105/// At runtime, using symbols declared as `static_sym!(...)` is actually very
106/// slightly less efficient than using `sym!(...)` directly, due to a necessary
107/// extra indirection. This is probably negligible in almost all cases, but it
108/// is counterintuitive nevertheless. _(This caveat may be lifted in future, but
109/// is due to a - potentially overzealous - check in the compiler which requires
110/// the indirection.)_
111///
112/// # Low-level details
113///
114/// Another (extremely niche) effect of using this macro over `sym!(...)` is
115/// that it can help reduce the link-time size of the symbol table. Each
116/// `sym!(...)` and `static_sym!(...)` call site adds 8 bytes to the `.bss`
117/// segment, so this can only matter when you have in the order of millions of
118/// symbols in your binary. Still, worth knowing if you are golfing binary size.
119#[macro_export]
120#[allow(clippy::crate_in_macro_def)]
121macro_rules! static_sym {
122 ($sym:ident) => {
123 $crate::static_sym!(@impl stringify!($sym))
124 };
125 ($sym:literal) => {
126 $crate::static_sym!(@impl $sym)
127 };
128 (@impl $sym:expr) => {{
129 unsafe {
130 // SAFETY: `new_unchecked()` is called with a `Site` that
131 // participates in the crate's symbol table.
132 $crate::StaticSymbol::new_unchecked({
133 // Tiny function just to get the `Site` for this symbol.
134 fn _stringleton_static_symbol_call_site() -> &'static $crate::internal::Site {
135 // Note: Using `crate` to refer to the calling crate - this is deliberate.
136 #[cfg_attr(not(any(miri, target_arch = "wasm32")), $crate::internal::linkme::distributed_slice(crate::_stringleton_enabled::TABLE))]
137 #[cfg_attr(not(any(miri, target_arch = "wasm32")), linkme(crate = $crate::internal::linkme))]
138 static SITE: $crate::internal::Site = $crate::internal::Site::new(&$sym);
139 &SITE
140 }
141 _stringleton_static_symbol_call_site
142 })
143 }
144 }}
145}
146
147/// Enable the [`sym!(...)`](crate::sym) macro in the calling crate.
148///
149/// Put a call to this macro somewhere in the root of each crate that uses the
150/// `sym!(...)` macro.
151///
152/// The second variant reuses the symbol table of another crate, and this is
153/// particularly needed due to the way external tests (in the `tests/`
154/// subdirectory of the project) are compiled. Test files `tests/foo.rs`, are
155/// compiled as "pseudo-crates", so they are semantically a separate crate from
156/// the main library crate. But they are compiled into the same binary, so just
157/// using `stringleton::enable!()` without arguments will cause linker errors
158/// (`duplicate #[distributed_slice] with name "TABLE"`).
159///
160/// In external tests using [`sym!()`], the test file should instead use
161/// `stringleton::enable!(main_library_crate)`.
162///
163/// ## Details
164///
165/// This creates a "distributed slice" containing all symbols in this crate, as
166/// well as a static constructor that deduplicates all symbols on startup, or
167/// when a dynamic library is loaded when the target binary is a `dylib` or a
168/// `cdylib`.
169///
170/// This macro may also be invoked with a module path to another crate, which
171/// causes symbols in this crate to be registered as part of symbols in the
172/// other crate.
173///
174/// **CAUTION:** Using the second variant is discouraged, because it will not
175/// work when the other crate is being loaded as a dynamic library. However, it
176/// is very slightly more efficient.
177///
178/// ## Why?
179///
180/// The reason that this macro is necessary is dynamic linking. Under "normal"
181/// circumstances where all dependencies are statically linked, all crates could
182/// share a single symbol table. But dynamic libraries are linked independently
183/// of their host binary, so they have no access to the host's symbol table, if
184/// it even has one.
185///
186/// On Unix-like platforms, there is likely a solution for this based on "weak"
187/// linkage, but:
188///
189/// 1. Weak linkage is not a thing in Windows (DLLs need to explicitly request
190/// functions from the host binary using `GetModuleHandle()`, which is more
191/// brittle).
192/// 2. The `#[linkage]` attribute is unstable in Rust.
193#[macro_export]
194macro_rules! enable {
195 () => {
196 #[doc(hidden)]
197 #[cfg(not(any(miri, target_arch = "wasm32")))]
198 pub mod _stringleton_enabled {
199 #[$crate::internal::linkme::distributed_slice]
200 #[linkme(crate = $crate::internal::linkme)]
201 #[doc(hidden)]
202 pub static TABLE: [$crate::internal::Site] = [..];
203
204 $crate::internal::ctor::declarative::ctor! {
205 #[ctor]
206 #[doc(hidden)]
207 pub fn _stringleton_register_symbols() {
208 unsafe {
209 // SAFETY: This is a static ctor.
210 $crate::internal::Registry::register_sites(&TABLE);
211 }
212 }
213 }
214 }
215
216 #[allow(unused)]
217 #[doc(hidden)]
218 #[cfg(not(any(miri, target_arch = "wasm32")))]
219 pub use _stringleton_enabled::_stringleton_register_symbols;
220 };
221 ($($krate:tt)+) => {
222 #[doc(hidden)]
223 #[cfg(not(any(miri, target_arch = "wasm32")))]
224 pub use $($krate)*::_stringleton_enabled;
225 };
226}
227
228#[doc(hidden)]
229pub mod internal {
230 pub use ctor;
231 pub use linkme;
232 pub use stringleton_registry::Registry;
233 pub use stringleton_registry::Site;
234}
235
236#[cfg(test)]
237enable!();
238
239#[cfg(test)]
240mod tests {
241 #[cfg(target_arch = "wasm32")]
242 wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
243 #[cfg(target_arch = "wasm32")]
244 use wasm_bindgen_test::wasm_bindgen_test as test;
245
246 use hashbrown::HashMap;
247
248 use super::{StaticSymbol, Symbol};
249
250 #[test]
251 #[cfg(feature = "alloc")]
252 fn basic() {
253 let a = sym!(a);
254 let b = sym!(b);
255 let c = sym!(c);
256 let a2 = sym!(a);
257
258 assert_ne!(a, b);
259 assert_ne!(a, c);
260 assert_ne!(b, c);
261 assert_eq!(a, a2);
262 }
263
264 #[test]
265 fn sym_macro() {
266 let ident: Symbol = sym!(hello);
267 let string: Symbol = sym!("hello");
268 let dynamic = Symbol::new_static(&"hello");
269 assert_eq!(ident, string);
270 assert_eq!(ident, dynamic);
271
272 let mut map = HashMap::new();
273 map.insert(ident, 1);
274 map.insert(string, 2);
275 map.insert(dynamic, 3);
276 assert_eq!(map.len(), 1);
277 assert!(map.into_iter().eq([(ident, 3)]));
278
279 assert_eq!(ident.to_string(), "hello");
280 assert_eq!(ident.as_str(), "hello");
281
282 let t = sym!(SYM_CACHE);
283 assert_eq!(t, "SYM_CACHE");
284 }
285
286 #[test]
287 fn statics() {
288 static A: StaticSymbol = static_sym!(a);
289 const A2: StaticSymbol = static_sym!(a);
290 const C: StaticSymbol = static_sym!(c);
291 assert_eq!(A, A2);
292 assert_eq!(A, sym!(a));
293 assert_ne!(A2, sym!(b));
294 assert_eq!(C, sym!(c));
295 }
296}