1use std::borrow::Borrow;
2use std::cmp::Ordering;
3use std::fmt::{self, Debug, Display, Formatter};
4use std::hash::{Hash, Hasher};
5use std::num::NonZeroU64;
6use std::ops::Deref;
7use std::sync::{LazyLock, RwLock};
8
9use rustc_hash::FxHashMap;
10
11const MARKER: u64 = 1 << 63;
13
14static INTERNER: LazyLock<RwLock<Interner>> = LazyLock::new(|| {
16 RwLock::new(Interner { seen: FxHashMap::default(), strings: Vec::new() })
17});
18
19struct Interner {
21 seen: FxHashMap<&'static str, PicoStr>,
22 strings: Vec<&'static str>,
23}
24
25#[derive(Copy, Clone, Eq, PartialEq, Hash)]
40pub struct PicoStr(NonZeroU64);
41
42impl PicoStr {
43 pub fn intern(string: &str) -> PicoStr {
45 if let Ok(value) = PicoStr::try_constant(string) {
47 return value;
48 }
49
50 let mut interner = INTERNER.write().unwrap();
56 if let Some(&id) = interner.seen.get(string) {
57 return id;
58 }
59
60 let num = exceptions::LIST.len() + interner.strings.len() + 1;
63 let id = Self(NonZeroU64::new(num as u64).unwrap());
64 let string = Box::leak(string.to_string().into_boxed_str());
65 interner.seen.insert(string, id);
66 interner.strings.push(string);
67 id
68 }
69
70 pub fn get(string: &str) -> Option<PicoStr> {
77 if let Ok(value) = PicoStr::try_constant(string) {
79 return Some(value);
80 }
81
82 let interner = INTERNER.read().unwrap();
84 interner.seen.get(string).copied()
85 }
86
87 #[track_caller]
91 pub const fn constant(string: &'static str) -> PicoStr {
92 match PicoStr::try_constant(string) {
93 Ok(value) => value,
94 Err(err) => failed_to_compile_time_intern(err, string),
95 }
96 }
97
98 pub const fn try_constant(string: &str) -> Result<PicoStr, bitcode::EncodingError> {
100 let value = match bitcode::encode(string) {
102 Ok(v) => v | MARKER,
105
106 Err(e) => {
108 if let Some(i) = exceptions::get(string) {
109 i as u64 + 1
111 } else {
112 return Err(e);
113 }
114 }
115 };
116
117 Ok(Self(NonZeroU64::new(value).unwrap()))
118 }
119
120 pub fn resolve(self) -> ResolvedPicoStr {
122 let value = self.0.get();
124 if value & MARKER != 0 {
125 return bitcode::decode(value & !MARKER);
126 }
127
128 let index = (value - 1) as usize;
129 let string = if let Some(runtime) = index.checked_sub(exceptions::LIST.len()) {
130 INTERNER.read().unwrap().strings[runtime]
131 } else {
132 exceptions::LIST[index]
133 };
134
135 ResolvedPicoStr(Repr::Static(string))
136 }
137}
138
139impl Debug for PicoStr {
140 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
141 Debug::fmt(self.resolve().as_str(), f)
142 }
143}
144
145mod bitcode {
148 use super::{Repr, ResolvedPicoStr};
149
150 const DECODE: &[u8; 32] = b"\0abcdefghijklmnopqrstuvwxyz-1234";
152
153 const ENCODE: &[u8; 256] = &{
155 let mut map = [0; 256];
156 let mut i = 0;
157 while i < DECODE.len() {
158 map[DECODE[i] as usize] = i as u8;
159 i += 1;
160 }
161 map
162 };
163
164 pub const fn encode(string: &str) -> Result<u64, EncodingError> {
166 let bytes = string.as_bytes();
167
168 if bytes.len() > 12 {
169 return Err(EncodingError::TooLong);
170 }
171
172 let mut num: u64 = 0;
173 let mut i = bytes.len();
174 while i > 0 {
175 i -= 1;
176 let b = bytes[i];
177 let v = ENCODE[b as usize];
178 if v == 0 {
179 return Err(EncodingError::BadChar);
180 }
181 num <<= 5;
182 num |= v as u64;
183 }
184
185 Ok(num)
186 }
187
188 pub const fn decode(mut value: u64) -> ResolvedPicoStr {
190 let mut buf = [0; 12];
191 let mut len = 0;
192
193 while value != 0 {
194 let v = value & 0b11111;
195 buf[len as usize] = DECODE[v as usize];
196 len += 1;
197 value >>= 5;
198 }
199
200 ResolvedPicoStr(Repr::Inline(buf, len))
201 }
202
203 pub enum EncodingError {
205 TooLong,
206 BadChar,
207 }
208
209 impl EncodingError {
210 pub const fn message(&self) -> &'static str {
211 match self {
212 Self::TooLong => "the maximum auto-internible string length is 12",
213 Self::BadChar => {
214 "can only auto-intern the chars 'a'-'z', '1'-'4', and '-'"
215 }
216 }
217 }
218 }
219}
220
221mod exceptions {
223 use std::cmp::Ordering;
224
225 pub const LIST: &[&str] = &[
229 "accept-charset",
230 "allowfullscreen",
231 "aria-activedescendant",
232 "aria-autocomplete",
233 "aria-colcount",
234 "aria-colindex",
235 "aria-controls",
236 "aria-describedby",
237 "aria-disabled",
238 "aria-dropeffect",
239 "aria-errormessage",
240 "aria-expanded",
241 "aria-haspopup",
242 "aria-keyshortcuts",
243 "aria-labelledby",
244 "aria-multiline",
245 "aria-multiselectable",
246 "aria-orientation",
247 "aria-placeholder",
248 "aria-posinset",
249 "aria-readonly",
250 "aria-relevant",
251 "aria-required",
252 "aria-roledescription",
253 "aria-rowcount",
254 "aria-rowindex",
255 "aria-selected",
256 "aria-valuemax",
257 "aria-valuemin",
258 "aria-valuenow",
259 "aria-valuetext",
260 "autocapitalize",
261 "cjk-latin-spacing",
262 "contenteditable",
263 "discretionary-ligatures",
264 "fetchpriority",
265 "formnovalidate",
266 "h5",
267 "h6",
268 "historical-ligatures",
269 "number-clearance",
270 "number-margin",
271 "numbering-scope",
272 "onbeforeprint",
273 "onbeforeunload",
274 "onlanguagechange",
275 "onmessageerror",
276 "onrejectionhandled",
277 "onunhandledrejection",
278 "page-numbering",
279 "par-line-marker",
280 "popovertarget",
281 "popovertargetaction",
282 "referrerpolicy",
283 "shadowrootclonable",
284 "shadowrootcustomelementregistry",
285 "shadowrootdelegatesfocus",
286 "shadowrootmode",
287 "shadowrootserializable",
288 "transparentize",
289 "writingsuggestions",
290 ];
291
292 pub const fn get(string: &str) -> Option<usize> {
294 let mut lo = 0;
295 let mut hi = LIST.len();
296 while lo < hi {
297 let mid = (lo + hi) / 2;
298 match strcmp(string, LIST[mid]) {
299 Ordering::Less => hi = mid,
300 Ordering::Greater => lo = mid + 1,
301 Ordering::Equal => return Some(mid),
302 }
303 }
304 None
305 }
306
307 const fn strcmp(a: &str, b: &str) -> Ordering {
309 let a = a.as_bytes();
310 let b = b.as_bytes();
311 let l = min(a.len(), b.len());
312
313 let mut i = 0;
314 while i < l {
315 if a[i] == b[i] {
316 i += 1;
317 } else if a[i] < b[i] {
318 return Ordering::Less;
319 } else {
320 return Ordering::Greater;
321 }
322 }
323
324 if i < b.len() {
325 Ordering::Less
326 } else if i < a.len() {
327 Ordering::Greater
328 } else {
329 Ordering::Equal
330 }
331 }
332
333 const fn min(a: usize, b: usize) -> usize {
335 if a < b { a } else { b }
336 }
337}
338
339pub struct ResolvedPicoStr(Repr);
343
344enum Repr {
346 Inline([u8; 12], u8),
347 Static(&'static str),
348}
349
350impl ResolvedPicoStr {
351 pub fn as_str(&self) -> &str {
353 match &self.0 {
354 Repr::Inline(buf, len) => unsafe {
355 std::str::from_utf8_unchecked(&buf[..*len as usize])
356 },
357 Repr::Static(s) => s,
358 }
359 }
360}
361
362impl Debug for ResolvedPicoStr {
363 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
364 Debug::fmt(self.as_str(), f)
365 }
366}
367
368impl Display for ResolvedPicoStr {
369 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
370 Display::fmt(self.as_str(), f)
371 }
372}
373
374impl Deref for ResolvedPicoStr {
375 type Target = str;
376
377 fn deref(&self) -> &Self::Target {
378 self.as_str()
379 }
380}
381
382impl AsRef<str> for ResolvedPicoStr {
383 fn as_ref(&self) -> &str {
384 self.as_str()
385 }
386}
387
388impl Borrow<str> for ResolvedPicoStr {
389 fn borrow(&self) -> &str {
390 self.as_str()
391 }
392}
393
394impl Eq for ResolvedPicoStr {}
395
396impl PartialEq for ResolvedPicoStr {
397 fn eq(&self, other: &Self) -> bool {
398 self.as_str().eq(other.as_str())
399 }
400}
401
402impl Ord for ResolvedPicoStr {
403 fn cmp(&self, other: &Self) -> Ordering {
404 self.as_str().cmp(other.as_str())
405 }
406}
407
408impl PartialOrd for ResolvedPicoStr {
409 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
410 Some(self.cmp(other))
411 }
412}
413
414impl Hash for ResolvedPicoStr {
415 fn hash<H: Hasher>(&self, state: &mut H) {
416 self.as_str().hash(state);
417 }
418}
419
420#[track_caller]
424const fn failed_to_compile_time_intern(
425 error: bitcode::EncodingError,
426 string: &'static str,
427) -> ! {
428 const CAPACITY: usize = 512;
429 const fn push((buf, i): &mut ([u8; CAPACITY], usize), s: &str) {
430 let mut k = 0;
431 while k < s.len() && *i < buf.len() {
432 buf[*i] = s.as_bytes()[k];
433 k += 1;
434 *i += 1;
435 }
436 }
437
438 let mut dest = ([0; CAPACITY], 0);
439 push(&mut dest, "failed to compile-time intern string \"");
440 push(&mut dest, string);
441 push(&mut dest, "\". ");
442 push(&mut dest, error.message());
443 push(&mut dest, ". you can add an exception to ");
444 push(&mut dest, file!());
445 push(&mut dest, " to intern longer strings.");
446
447 let (slice, _) = dest.0.split_at(dest.1);
448 let Ok(message) = std::str::from_utf8(slice) else { panic!() };
449
450 panic!("{}", message);
451}
452
453#[cfg(test)]
454mod tests {
455 use super::*;
456
457 #[track_caller]
458 fn roundtrip(s: &str) {
459 assert_eq!(PicoStr::intern(s).resolve().as_str(), s);
460 }
461
462 #[test]
463 fn test_pico_str() {
464 const H1: PicoStr = PicoStr::constant("h1");
466 assert_eq!(H1, PicoStr::intern("h1"));
467 assert_eq!(H1.resolve().as_str(), "h1");
468
469 const DISC: PicoStr = PicoStr::constant("discretionary-ligatures");
471 assert_eq!(DISC, PicoStr::intern("discretionary-ligatures"));
472 assert_eq!(DISC.resolve().as_str(), "discretionary-ligatures");
473
474 roundtrip("");
476 roundtrip("hi");
477 roundtrip("∆@<hi-10_");
478 roundtrip("you");
479 roundtrip("discretionary-ligatures");
480 }
481
482 #[test]
484 fn test_exceptions_not_bitcode_encodable() {
485 for s in exceptions::LIST {
486 assert!(
487 bitcode::encode(s).is_err(),
488 "{s:?} can be encoded with bitcode and should not be an exception"
489 );
490 }
491 }
492
493 #[test]
495 fn test_exceptions_sorted() {
496 for group in exceptions::LIST.windows(2) {
497 assert!(group[0] < group[1], "{group:?} are out of order");
498 }
499 }
500
501 #[test]
503 fn test_exception_find() {
504 for (i, s) in exceptions::LIST.iter().enumerate() {
505 assert_eq!(exceptions::get(s), Some(i), "wrong index for {s:?}");
506 }
507 assert_eq!(exceptions::get("a"), None);
508 assert_eq!(exceptions::get("another-"), None);
509 assert_eq!(exceptions::get("z"), None);
510 }
511}