1use std::borrow::Borrow;
2use std::cmp::Ordering;
3use std::fmt::{self, Debug, Display, Formatter};
4use std::hash::{Hash, Hasher};
5use std::num::NonZeroU64;
6use std::ops::Deref;
7use std::sync::{LazyLock, RwLock};
8
9use rustc_hash::FxHashMap;
10
11const MARKER: u64 = 1 << 63;
13
14static INTERNER: LazyLock<RwLock<Interner>> = LazyLock::new(|| {
16 RwLock::new(Interner { seen: FxHashMap::default(), strings: Vec::new() })
17});
18
19struct Interner {
21 seen: FxHashMap<&'static str, PicoStr>,
22 strings: Vec<&'static str>,
23}
24
25#[derive(Copy, Clone, Eq, PartialEq, Hash)]
40pub struct PicoStr(NonZeroU64);
41
42impl PicoStr {
43 pub fn intern(string: &str) -> PicoStr {
45 if let Ok(value) = PicoStr::try_constant(string) {
47 return value;
48 }
49
50 let mut interner = INTERNER.write().unwrap();
56 if let Some(&id) = interner.seen.get(string) {
57 return id;
58 }
59
60 let num = exceptions::LIST.len() + interner.strings.len() + 1;
63 let id = Self(NonZeroU64::new(num as u64).unwrap());
64 let string = Box::leak(string.to_string().into_boxed_str());
65 interner.seen.insert(string, id);
66 interner.strings.push(string);
67 id
68 }
69
70 pub fn get(string: &str) -> Option<PicoStr> {
77 if let Ok(value) = PicoStr::try_constant(string) {
79 return Some(value);
80 }
81
82 let interner = INTERNER.read().unwrap();
84 interner.seen.get(string).copied()
85 }
86
87 #[track_caller]
91 pub const fn constant(string: &'static str) -> PicoStr {
92 match PicoStr::try_constant(string) {
93 Ok(value) => value,
94 Err(err) => failed_to_compile_time_intern(err, string),
95 }
96 }
97
98 pub const fn try_constant(string: &str) -> Result<PicoStr, bitcode::EncodingError> {
100 let value = match bitcode::encode(string) {
102 Ok(v) => v | MARKER,
105
106 Err(e) => {
108 if let Some(i) = exceptions::get(string) {
109 i as u64 + 1
111 } else {
112 return Err(e);
113 }
114 }
115 };
116
117 Ok(Self(NonZeroU64::new(value).unwrap()))
118 }
119
120 pub fn resolve(self) -> ResolvedPicoStr {
122 let value = self.0.get();
124 if value & MARKER != 0 {
125 return bitcode::decode(value & !MARKER);
126 }
127
128 let index = (value - 1) as usize;
129 let string = if let Some(runtime) = index.checked_sub(exceptions::LIST.len()) {
130 INTERNER.read().unwrap().strings[runtime]
131 } else {
132 exceptions::LIST[index]
133 };
134
135 ResolvedPicoStr(ResolvedPicoStrInner::Static(string))
136 }
137}
138
139impl Debug for PicoStr {
140 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
141 Debug::fmt(self.resolve().as_str(), f)
142 }
143}
144
145mod bitcode {
148 use super::{ResolvedPicoStr, ResolvedPicoStrInner};
149
150 const DECODE: &[u8; 32] = b"\0abcdefghijklmnopqrstuvwxyz-1234";
152
153 const ENCODE: &[u8; 256] = &{
155 let mut map = [0; 256];
156 let mut i = 0;
157 while i < DECODE.len() {
158 map[DECODE[i] as usize] = i as u8;
159 i += 1;
160 }
161 map
162 };
163
164 pub const fn encode(string: &str) -> Result<u64, EncodingError> {
166 let bytes = string.as_bytes();
167
168 if bytes.len() > 12 {
169 return Err(EncodingError::TooLong);
170 }
171
172 let mut num: u64 = 0;
173 let mut i = bytes.len();
174 while i > 0 {
175 i -= 1;
176 let b = bytes[i];
177 let v = ENCODE[b as usize];
178 if v == 0 {
179 return Err(EncodingError::BadChar);
180 }
181 num <<= 5;
182 num |= v as u64;
183 }
184
185 Ok(num)
186 }
187
188 pub const fn decode(mut value: u64) -> ResolvedPicoStr {
190 let mut buf = [0; 12];
191 let mut len = 0;
192
193 while value != 0 {
194 let v = value & 0b11111;
195 buf[len as usize] = DECODE[v as usize];
196 len += 1;
197 value >>= 5;
198 }
199
200 ResolvedPicoStr(ResolvedPicoStrInner::Inline(buf, len))
201 }
202
203 pub enum EncodingError {
205 TooLong,
206 BadChar,
207 }
208
209 impl EncodingError {
210 pub const fn message(&self) -> &'static str {
211 match self {
212 Self::TooLong => "the maximum auto-internible string length is 12",
213 Self::BadChar => {
214 "can only auto-intern the chars 'a'-'z', '1'-'4', and '-'"
215 }
216 }
217 }
218 }
219}
220
221mod exceptions {
223 use std::cmp::Ordering;
224
225 pub const LIST: &[&str] = &[
229 "accept-charset",
230 "allowfullscreen",
231 "annotation-xml",
232 "aria-activedescendant",
233 "aria-autocomplete",
234 "aria-colcount",
235 "aria-colindex",
236 "aria-controls",
237 "aria-describedby",
238 "aria-disabled",
239 "aria-dropeffect",
240 "aria-errormessage",
241 "aria-expanded",
242 "aria-haspopup",
243 "aria-keyshortcuts",
244 "aria-labelledby",
245 "aria-multiline",
246 "aria-multiselectable",
247 "aria-orientation",
248 "aria-placeholder",
249 "aria-posinset",
250 "aria-readonly",
251 "aria-relevant",
252 "aria-required",
253 "aria-roledescription",
254 "aria-rowcount",
255 "aria-rowindex",
256 "aria-selected",
257 "aria-valuemax",
258 "aria-valuemin",
259 "aria-valuenow",
260 "aria-valuetext",
261 "autocapitalize",
262 "cjk-latin-spacing",
263 "contenteditable",
264 "discretionary-ligatures",
265 "fetchpriority",
266 "formnovalidate",
267 "h5",
268 "h6",
269 "historical-ligatures",
270 "linethickness",
271 "mmultiscripts",
272 "movablelimits",
273 "number-clearance",
274 "number-margin",
275 "numbering-scope",
276 "onbeforeprint",
277 "onbeforeunload",
278 "onlanguagechange",
279 "onmessageerror",
280 "onrejectionhandled",
281 "onunhandledrejection",
282 "page-numbering",
283 "par-line-marker",
284 "popovertarget",
285 "popovertargetaction",
286 "referrerpolicy",
287 "shadowrootclonable",
288 "shadowrootcustomelementregistry",
289 "shadowrootdelegatesfocus",
290 "shadowrootmode",
291 "shadowrootserializable",
292 "transparentize",
293 "writingsuggestions",
294 ];
295
296 pub const fn get(string: &str) -> Option<usize> {
298 let mut lo = 0;
299 let mut hi = LIST.len();
300 while lo < hi {
301 let mid = (lo + hi) / 2;
302 match strcmp(string, LIST[mid]) {
303 Ordering::Less => hi = mid,
304 Ordering::Greater => lo = mid + 1,
305 Ordering::Equal => return Some(mid),
306 }
307 }
308 None
309 }
310
311 const fn strcmp(a: &str, b: &str) -> Ordering {
313 let a = a.as_bytes();
314 let b = b.as_bytes();
315 let l = min(a.len(), b.len());
316
317 let mut i = 0;
318 while i < l {
319 if a[i] == b[i] {
320 i += 1;
321 } else if a[i] < b[i] {
322 return Ordering::Less;
323 } else {
324 return Ordering::Greater;
325 }
326 }
327
328 if i < b.len() {
329 Ordering::Less
330 } else if i < a.len() {
331 Ordering::Greater
332 } else {
333 Ordering::Equal
334 }
335 }
336
337 const fn min(a: usize, b: usize) -> usize {
339 if a < b { a } else { b }
340 }
341}
342
343#[derive(Copy, Clone)]
347pub struct ResolvedPicoStr(ResolvedPicoStrInner);
348
349#[derive(Copy, Clone)]
351enum ResolvedPicoStrInner {
352 Inline([u8; 12], u8),
353 Static(&'static str),
354}
355
356impl ResolvedPicoStr {
357 pub fn as_str(&self) -> &str {
359 match &self.0 {
360 ResolvedPicoStrInner::Inline(buf, len) => unsafe {
361 std::str::from_utf8_unchecked(&buf[..*len as usize])
362 },
363 ResolvedPicoStrInner::Static(s) => s,
364 }
365 }
366}
367
368impl Debug for ResolvedPicoStr {
369 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
370 Debug::fmt(self.as_str(), f)
371 }
372}
373
374impl Display for ResolvedPicoStr {
375 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
376 Display::fmt(self.as_str(), f)
377 }
378}
379
380impl Deref for ResolvedPicoStr {
381 type Target = str;
382
383 fn deref(&self) -> &Self::Target {
384 self.as_str()
385 }
386}
387
388impl AsRef<str> for ResolvedPicoStr {
389 fn as_ref(&self) -> &str {
390 self.as_str()
391 }
392}
393
394impl Borrow<str> for ResolvedPicoStr {
395 fn borrow(&self) -> &str {
396 self.as_str()
397 }
398}
399
400impl Eq for ResolvedPicoStr {}
401
402impl PartialEq for ResolvedPicoStr {
403 fn eq(&self, other: &Self) -> bool {
404 self.as_str().eq(other.as_str())
405 }
406}
407
408impl Ord for ResolvedPicoStr {
409 fn cmp(&self, other: &Self) -> Ordering {
410 self.as_str().cmp(other.as_str())
411 }
412}
413
414impl PartialOrd for ResolvedPicoStr {
415 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
416 Some(self.cmp(other))
417 }
418}
419
420impl Hash for ResolvedPicoStr {
421 fn hash<H: Hasher>(&self, state: &mut H) {
422 self.as_str().hash(state);
423 }
424}
425
426#[track_caller]
430const fn failed_to_compile_time_intern(
431 error: bitcode::EncodingError,
432 string: &'static str,
433) -> ! {
434 const CAPACITY: usize = 512;
435 const fn push((buf, i): &mut ([u8; CAPACITY], usize), s: &str) {
436 let mut k = 0;
437 while k < s.len() && *i < buf.len() {
438 buf[*i] = s.as_bytes()[k];
439 k += 1;
440 *i += 1;
441 }
442 }
443
444 let mut dest = ([0; CAPACITY], 0);
445 push(&mut dest, "failed to compile-time intern string \"");
446 push(&mut dest, string);
447 push(&mut dest, "\". ");
448 push(&mut dest, error.message());
449 push(&mut dest, ". you can add an exception to ");
450 push(&mut dest, file!());
451 push(&mut dest, " to intern longer strings.");
452
453 let (slice, _) = dest.0.split_at(dest.1);
454 let Ok(message) = std::str::from_utf8(slice) else { panic!() };
455
456 panic!("{}", message);
457}
458
459#[cfg(test)]
460mod tests {
461 use super::*;
462
463 #[track_caller]
464 fn roundtrip(s: &str) {
465 assert_eq!(PicoStr::intern(s).resolve().as_str(), s);
466 }
467
468 #[test]
469 fn test_pico_str() {
470 const H1: PicoStr = PicoStr::constant("h1");
472 assert_eq!(H1, PicoStr::intern("h1"));
473 assert_eq!(H1.resolve().as_str(), "h1");
474
475 const DISC: PicoStr = PicoStr::constant("discretionary-ligatures");
477 assert_eq!(DISC, PicoStr::intern("discretionary-ligatures"));
478 assert_eq!(DISC.resolve().as_str(), "discretionary-ligatures");
479
480 roundtrip("");
482 roundtrip("hi");
483 roundtrip("∆@<hi-10_");
484 roundtrip("you");
485 roundtrip("discretionary-ligatures");
486 }
487
488 #[test]
490 fn test_exceptions_not_bitcode_encodable() {
491 for s in exceptions::LIST {
492 assert!(
493 bitcode::encode(s).is_err(),
494 "{s:?} can be encoded with bitcode and should not be an exception"
495 );
496 }
497 }
498
499 #[test]
501 fn test_exceptions_sorted() {
502 for group in exceptions::LIST.windows(2) {
503 assert!(group[0] < group[1], "{group:?} are out of order");
504 }
505 }
506
507 #[test]
509 fn test_exception_find() {
510 for (i, s) in exceptions::LIST.iter().enumerate() {
511 assert_eq!(exceptions::get(s), Some(i), "wrong index for {s:?}");
512 }
513 assert_eq!(exceptions::get("a"), None);
514 assert_eq!(exceptions::get("another-"), None);
515 assert_eq!(exceptions::get("z"), None);
516 }
517}