flowlog_runtime/intern.rs
1//! Thread-safe string interning via `lasso::ThreadedRodeo`.
2
3use lasso::{Key, Spur, ThreadedRodeo};
4use rustc_hash::FxBuildHasher;
5use std::sync::{LazyLock, OnceLock};
6
7/// Global string interner shared across all FlowLog engines in the process.
8///
9/// Uses `FxBuildHasher` instead of lasso's default SipHash: interner keys are
10/// program-controlled (`.dl` literals + input facts), not adversarial, so
11/// SipHash's HashDoS resistance is pure per-byte overhead on every intern and
12/// resolve.
13///
14/// **Limitation**: this is a process-local pool. In a distributed DD
15/// deployment (multiple machines), each process gets its own independent
16/// `INTERNER`, so `Spur` values are NOT comparable across machines.
17/// Distributed support would require a global interning protocol or
18/// switching back to `String`-keyed collections.
19pub static INTERNER: LazyLock<ThreadedRodeo<Spur, FxBuildHasher>> =
20 LazyLock::new(|| ThreadedRodeo::with_hasher(FxBuildHasher));
21
22const MAX_RETRIES: usize = 1024;
23
24/// Intern a string, returning its [`Spur`] handle.
25#[inline(always)]
26pub fn intern(s: &str) -> Spur {
27 for _ in 0..MAX_RETRIES {
28 match INTERNER.try_get_or_intern(s) {
29 Ok(key) => return key,
30 Err(_) => std::thread::yield_now(),
31 }
32 }
33 panic!("string interner failed after {MAX_RETRIES} attempts for {s:?}");
34}
35
36/// Resolve a [`Spur`] handle back to a `&'static str`.
37#[inline(always)]
38pub fn resolve(key: Spur) -> &'static str {
39 INTERNER.resolve(&key)
40}
41
42/// Flat snapshot of the interner (`Spur::into_usize()` → string) used for
43/// O(1) resolution at output/drain time. `Spur` keys are dense in
44/// `[0, len)`, so a plain `Vec` index replaces the concurrent
45/// [`ThreadedRodeo::resolve`] path (which hashes the key and takes a
46/// `DashMap` read lock on every call).
47static RESOLVED: OnceLock<Box<[&'static str]>> = OnceLock::new();
48
49/// Build the flat snapshot from the current interner contents.
50///
51/// `INTERNER` is borrowed from a `static`, so its strings are genuinely
52/// `'static`; the dense `Spur` keying lets us address them by index.
53fn build_snapshot() -> Box<[&'static str]> {
54 let mut table: Vec<&'static str> = vec![""; INTERNER.len()];
55 for (key, string) in INTERNER.iter() {
56 table[key.into_usize()] = string;
57 }
58 table.into_boxed_slice()
59}
60
61/// Resolve a [`Spur`] at output time via a flat index instead of the
62/// concurrent `DashMap` path taken by [`resolve`].
63///
64/// Built lazily on first use. Output runs after fixpoint, so the snapshot is
65/// complete in batch mode. Keys interned later (e.g. new epochs in incremental
66/// mode) fall outside its range and fall back to [`resolve`], staying correct
67/// without a rebuild.
68#[inline]
69pub fn resolve_out(key: Spur) -> &'static str {
70 let table = RESOLVED.get_or_init(build_snapshot);
71 match table.get(key.into_usize()) {
72 Some(&string) => string,
73 None => resolve(key),
74 }
75}