stringleton/
lib.rs

1#![doc = include_str!("README.md")]
2
3pub use stringleton_registry::{Registry, StaticSymbol, Symbol};
4
5/// Create a literal symbol from a literal identifier or string
6///
7/// Symbols created with the [`sym!(...)`](crate::sym) macro are statically
8/// allocated and deduplicated on program startup. This means that there is no
9/// discernible overhead at the point of use, making them suitable even in long
10/// chains of `if` statements and inner loops.
11///
12/// **IMPORTANT:** For this macro to work in a particular crate, the
13/// [`enable!()`](crate::enable) macro must appear exactly once in the crate's
14/// root. This creates the global registration table at link-time.
15///
16/// # Safety
17///
18/// This macro is safe (and performant) to use everywhere, with important
19/// caveats:
20///
21/// 1. If you are using "static initializers" (code that runs before `main()`,
22///    like through the `ctor` crate), this macro must **NOT** be called in such
23///    a static initializer function. See
24///    <https://github.com/mmastrac/rust-ctor/issues/159>. Using
25///    [`Symbol::new()`] in such a function is fine.
26///
27/// 2. If you are using C-style dynamic libraries (`cdylib` crate type), those
28///    libraries must use the `stringleton-dylib` crate instead of
29///    `stringleton`.
30///
31/// 3. If you are loading dynamic libraries at runtime (i.e., outside of Cargo's
32///    dependency graph), the host crate must also use the `stringleton-dylib`
33///    crate instead of `stringleton`.
34///
35/// # Low-level details
36///
37/// This macro creates an entry in a per-crate `linkme` "distributed slice", as
38/// well as a static initializer called by the OS when the current crate is
39/// loaded at runtime (before `main()`), either as part of an executable or as
40/// part of a dynamic library.
41///
42/// On x86-64 and ARM64, this macro is guaranteed to compile into a single
43/// relaxed atomic memory load instruction from an offset in the `.bss` segment.
44/// On x86, relaxed atomic load instructions have no additional overhead
45/// compared to non-atomic loads.
46///
47/// Internally, this uses the `linkme` and `ctor` crates to register this
48/// callsite in static binary memory and initialize it on startup. However, when
49/// running under Miri (or other platforms not supported by `linkme`), the
50/// implementation falls back on a slower implementation that effectively calls
51/// `Symbol::new()` every time, which takes a global read-lock.
52///
53/// When the `debug-assertions` feature is enabled, there is an additional check
54/// that panics if the call site has not been populated by a static ctor. This
55/// assertion will only be triggered if the current platform does not support
56/// static initializers.
57#[macro_export]
58#[allow(clippy::crate_in_macro_def)]
59macro_rules! sym {
60    ($sym:ident) => {
61        $crate::sym!(@impl stringify!($sym))
62    };
63    ($sym:literal) => {
64        $crate::sym!(@impl $sym)
65    };
66    (@impl $sym:expr) => {{
67        // Note: Using `crate` to refer to the calling crate - this is deliberate.
68        #[cfg_attr(not(any(miri, target_arch = "wasm32")), $crate::internal::linkme::distributed_slice(crate::_stringleton_enabled::TABLE))]
69        #[cfg_attr(not(any(miri, target_arch = "wasm32")), linkme(crate = $crate::internal::linkme))]
70        static SITE: $crate::internal::Site = $crate::internal::Site::new(&$sym);
71        unsafe {
72            // SAFETY: This site will be initialized by the static ctor because
73            // it participates in the distributed slice.
74            SITE.get_after_ctor()
75        }}
76    }
77}
78
79/// Create a static location for a literal symbol.
80///
81/// This macro works the same as [`sym!(...)`](crate::sym), except that it
82/// produces a [`StaticSymbol`] instead of a [`Symbol`]. [`StaticSymbol`]
83/// implements `Deref<Target = Symbol>`, so it can be used in most places where
84/// a `Symbol` is expected.
85///
86/// This macro also requires the presence of a call to the
87/// [`enable!()`](crate::enable) macro at the crate root.
88///
89/// This macro can be used in the initialization of a `static` or `const` variable:
90///
91/// ```rust,ignore
92/// static MY_SYMBOL: StaticSymbol = static_sym!("Hello, World!");
93/// const OTHER_SYMBOL: StaticSymbol = static_sym!(abc);
94///
95/// assert_eq!(MY_SYMBOL, sym!("Hello, World!"));
96/// assert_eq!(OTHER_SYMBOL, sym("abc"));
97/// ```
98///
99/// # Use case
100///
101/// Use this macro to avoid having too many "magic symbols" in your code
102/// (similar to "magic numbers"). Declare common symbol names centrally, and
103/// refer to them by their Rust names instead.
104///
105/// At runtime, using symbols declared as `static_sym!(...)` is actually very
106/// slightly less efficient than using `sym!(...)` directly, due to a necessary
107/// extra indirection. This is probably negligible in almost all cases, but it
108/// is counterintuitive nevertheless. _(This caveat may be lifted in future, but
109/// is due to a - potentially overzealous - check in the compiler which requires
110/// the indirection.)_
111///
112/// # Low-level details
113///
114/// Another (extremely niche) effect of using this macro over `sym!(...)` is
115/// that it can help reduce the link-time size of the symbol table. Each
116/// `sym!(...)` and `static_sym!(...)` call site adds 8 bytes to the `.bss`
117/// segment, so this can only matter when you have in the order of millions of
118/// symbols in your binary. Still, worth knowing if you are golfing binary size.
119#[macro_export]
120#[allow(clippy::crate_in_macro_def)]
121macro_rules! static_sym {
122    ($sym:ident) => {
123        $crate::static_sym!(@impl stringify!($sym))
124    };
125    ($sym:literal) => {
126        $crate::static_sym!(@impl $sym)
127    };
128    (@impl $sym:expr) => {{
129        unsafe {
130            // SAFETY: `new_unchecked()` is called with a `Site` that
131            // participates in the crate's symbol table.
132            $crate::StaticSymbol::new_unchecked({
133                // Tiny function just to get the `Site` for this symbol.
134                fn _stringleton_static_symbol_call_site() -> &'static $crate::internal::Site {
135                    // Note: Using `crate` to refer to the calling crate - this is deliberate.
136                    #[cfg_attr(not(any(miri, target_arch = "wasm32")), $crate::internal::linkme::distributed_slice(crate::_stringleton_enabled::TABLE))]
137                    #[cfg_attr(not(any(miri, target_arch = "wasm32")), linkme(crate = $crate::internal::linkme))]
138                    static SITE: $crate::internal::Site = $crate::internal::Site::new(&$sym);
139                    &SITE
140                }
141                _stringleton_static_symbol_call_site
142            })
143        }
144    }}
145}
146
147/// Enable the [`sym!(...)`](crate::sym) macro in the calling crate.
148///
149/// Put a call to this macro somewhere in the root of each crate that uses the
150/// `sym!(...)` macro.
151///
152/// The second variant reuses the symbol table of another crate, and this is
153/// particularly needed due to the way external tests (in the `tests/`
154/// subdirectory of the project) are compiled. Test files `tests/foo.rs`, are
155/// compiled as "pseudo-crates", so they are semantically a separate crate from
156/// the main library crate. But they are compiled into the same binary, so just
157/// using `stringleton::enable!()` without arguments will cause linker errors
158/// (`duplicate #[distributed_slice] with name "TABLE"`).
159///
160/// In external tests using [`sym!()`], the test file should instead use
161/// `stringleton::enable!(main_library_crate)`.
162///
163/// ## Details
164///
165/// This creates a "distributed slice" containing all symbols in this crate, as
166/// well as a static constructor that deduplicates all symbols on startup, or
167/// when a dynamic library is loaded when the target binary is a `dylib` or a
168/// `cdylib`.
169///
170/// This macro may also be invoked with a module path to another crate, which
171/// causes symbols in this crate to be registered as part of symbols in the
172/// other crate.
173///
174/// **CAUTION:** Using the second variant is discouraged, because it will not
175/// work when the other crate is being loaded as a dynamic library. However, it
176/// is very slightly more efficient.
177///
178/// ## Why?
179///
180/// The reason that this macro is necessary is dynamic linking. Under "normal"
181/// circumstances where all dependencies are statically linked, all crates could
182/// share a single symbol table. But dynamic libraries are linked independently
183/// of their host binary, so they have no access to the host's symbol table, if
184/// it even has one.
185///
186/// On Unix-like platforms, there is likely a solution for this based on "weak"
187/// linkage, but:
188///
189/// 1. Weak linkage is not a thing in Windows (DLLs need to explicitly request
190///    functions from the host binary using `GetModuleHandle()`, which is more
191///    brittle).
192/// 2. The `#[linkage]` attribute is unstable in Rust.
193#[macro_export]
194macro_rules! enable {
195    () => {
196        #[doc(hidden)]
197        #[cfg(not(any(miri, target_arch = "wasm32")))]
198        pub mod _stringleton_enabled {
199            #[$crate::internal::linkme::distributed_slice]
200            #[linkme(crate = $crate::internal::linkme)]
201            #[doc(hidden)]
202            pub static TABLE: [$crate::internal::Site] = [..];
203
204            $crate::internal::ctor::declarative::ctor! {
205                #[ctor]
206                #[doc(hidden)]
207                pub fn _stringleton_register_symbols() {
208                    unsafe {
209                        // SAFETY: This is a static ctor.
210                        $crate::internal::Registry::register_sites(&TABLE);
211                    }
212                }
213            }
214        }
215
216        #[allow(unused)]
217        #[doc(hidden)]
218        #[cfg(not(any(miri, target_arch = "wasm32")))]
219        pub use _stringleton_enabled::_stringleton_register_symbols;
220    };
221    ($($krate:tt)+) => {
222        #[doc(hidden)]
223        #[cfg(not(any(miri, target_arch = "wasm32")))]
224        pub use $($krate)*::_stringleton_enabled;
225    };
226}
227
228#[doc(hidden)]
229pub mod internal {
230    pub use ctor;
231    pub use linkme;
232    pub use stringleton_registry::Registry;
233    pub use stringleton_registry::Site;
234}
235
236#[cfg(test)]
237enable!();
238
239#[cfg(test)]
240mod tests {
241    #[cfg(target_arch = "wasm32")]
242    wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
243    #[cfg(target_arch = "wasm32")]
244    use wasm_bindgen_test::wasm_bindgen_test as test;
245
246    use hashbrown::HashMap;
247
248    use super::{StaticSymbol, Symbol};
249
250    #[test]
251    #[cfg(feature = "alloc")]
252    fn basic() {
253        let a = sym!(a);
254        let b = sym!(b);
255        let c = sym!(c);
256        let a2 = sym!(a);
257
258        assert_ne!(a, b);
259        assert_ne!(a, c);
260        assert_ne!(b, c);
261        assert_eq!(a, a2);
262    }
263
264    #[test]
265    fn sym_macro() {
266        let ident: Symbol = sym!(hello);
267        let string: Symbol = sym!("hello");
268        let dynamic = Symbol::new_static(&"hello");
269        assert_eq!(ident, string);
270        assert_eq!(ident, dynamic);
271
272        let mut map = HashMap::new();
273        map.insert(ident, 1);
274        map.insert(string, 2);
275        map.insert(dynamic, 3);
276        assert_eq!(map.len(), 1);
277        assert!(map.into_iter().eq([(ident, 3)]));
278
279        assert_eq!(ident.to_string(), "hello");
280        assert_eq!(ident.as_str(), "hello");
281
282        let t = sym!(SYM_CACHE);
283        assert_eq!(t, "SYM_CACHE");
284    }
285
286    #[test]
287    fn statics() {
288        static A: StaticSymbol = static_sym!(a);
289        const A2: StaticSymbol = static_sym!(a);
290        const C: StaticSymbol = static_sym!(c);
291        assert_eq!(A, A2);
292        assert_eq!(A, sym!(a));
293        assert_ne!(A2, sym!(b));
294        assert_eq!(C, sym!(c));
295    }
296}