1use std::borrow::Borrow;
2use std::cmp::Ordering;
3use std::collections::HashMap;
4use std::fmt::{self, Debug, Display, Formatter};
5use std::hash::{Hash, Hasher};
6use std::num::NonZeroU64;
7use std::ops::Deref;
8use std::sync::{LazyLock, RwLock};
9
10const MARKER: u64 = 1 << 63;
12
13static INTERNER: LazyLock<RwLock<Interner>> =
15 LazyLock::new(|| RwLock::new(Interner { seen: HashMap::new(), strings: Vec::new() }));
16
17struct Interner {
19 seen: HashMap<&'static str, PicoStr>,
20 strings: Vec<&'static str>,
21}
22
23#[derive(Copy, Clone, Eq, PartialEq, Hash)]
38pub struct PicoStr(NonZeroU64);
39
40impl PicoStr {
41 pub fn intern(string: &str) -> PicoStr {
43 if let Ok(value) = PicoStr::try_constant(string) {
45 return value;
46 }
47
48 let mut interner = INTERNER.write().unwrap();
54 if let Some(&id) = interner.seen.get(string) {
55 return id;
56 }
57
58 let num = exceptions::LIST.len() + interner.strings.len() + 1;
61 let id = Self(NonZeroU64::new(num as u64).unwrap());
62 let string = Box::leak(string.to_string().into_boxed_str());
63 interner.seen.insert(string, id);
64 interner.strings.push(string);
65 id
66 }
67
68 #[track_caller]
72 pub const fn constant(string: &'static str) -> PicoStr {
73 match PicoStr::try_constant(string) {
74 Ok(value) => value,
75 Err(err) => panic!("{}", err.message()),
76 }
77 }
78
79 pub const fn try_constant(string: &str) -> Result<PicoStr, bitcode::EncodingError> {
81 let value = match bitcode::encode(string) {
83 Ok(v) => v | MARKER,
86
87 Err(e) => {
89 if let Some(i) = exceptions::get(string) {
90 i as u64 + 1
92 } else {
93 return Err(e);
94 }
95 }
96 };
97
98 match NonZeroU64::new(value) {
99 Some(value) => Ok(Self(value)),
100 None => unreachable!(),
101 }
102 }
103
104 pub fn resolve(self) -> ResolvedPicoStr {
106 let value = self.0.get();
108 if value & MARKER != 0 {
109 return bitcode::decode(value & !MARKER);
110 }
111
112 let index = (value - 1) as usize;
113 let string = if let Some(runtime) = index.checked_sub(exceptions::LIST.len()) {
114 INTERNER.read().unwrap().strings[runtime]
115 } else {
116 exceptions::LIST[index]
117 };
118
119 ResolvedPicoStr(Repr::Static(string))
120 }
121}
122
123impl Debug for PicoStr {
124 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
125 Debug::fmt(self.resolve().as_str(), f)
126 }
127}
128
129mod bitcode {
132 use super::{Repr, ResolvedPicoStr};
133
134 const DECODE: &[u8; 32] = b"\0abcdefghijklmnopqrstuvwxyz-1234";
136
137 const ENCODE: &[u8; 256] = &{
139 let mut map = [0; 256];
140 let mut i = 0;
141 while i < DECODE.len() {
142 map[DECODE[i] as usize] = i as u8;
143 i += 1;
144 }
145 map
146 };
147
148 pub const fn encode(string: &str) -> Result<u64, EncodingError> {
150 let bytes = string.as_bytes();
151
152 if bytes.len() > 12 {
153 return Err(EncodingError::TooLong);
154 }
155
156 let mut num: u64 = 0;
157 let mut i = bytes.len();
158 while i > 0 {
159 i -= 1;
160 let b = bytes[i];
161 let v = ENCODE[b as usize];
162 if v == 0 {
163 return Err(EncodingError::BadChar);
164 }
165 num <<= 5;
166 num |= v as u64;
167 }
168
169 Ok(num)
170 }
171
172 pub const fn decode(mut value: u64) -> ResolvedPicoStr {
174 let mut buf = [0; 12];
175 let mut len = 0;
176
177 while value != 0 {
178 let v = value & 0b11111;
179 buf[len as usize] = DECODE[v as usize];
180 len += 1;
181 value >>= 5;
182 }
183
184 ResolvedPicoStr(Repr::Inline(buf, len))
185 }
186
187 pub enum EncodingError {
189 TooLong,
190 BadChar,
191 }
192
193 impl EncodingError {
194 pub const fn message(&self) -> &'static str {
195 match self {
196 Self::TooLong => {
197 "the maximum auto-internible string length is 12. \
198 you can add an exception to typst-utils/src/pico.rs \
199 to intern longer strings."
200 }
201 Self::BadChar => {
202 "can only auto-intern the chars 'a'-'z', '1'-'4', and '-'. \
203 you can add an exception to typst-utils/src/pico.rs \
204 to intern other strings."
205 }
206 }
207 }
208 }
209}
210
211mod exceptions {
213 use std::cmp::Ordering;
214
215 pub const LIST: &[&str] = &[
217 "cjk-latin-spacing",
218 "discretionary-ligatures",
219 "h5",
220 "h6",
221 "historical-ligatures",
222 "number-clearance",
223 "number-margin",
224 "numbering-scope",
225 "page-numbering",
226 "par-line-marker",
227 "transparentize",
228 ];
229
230 pub const fn get(string: &str) -> Option<usize> {
232 let mut lo = 0;
233 let mut hi = LIST.len();
234 while lo < hi {
235 let mid = (lo + hi) / 2;
236 match strcmp(string, LIST[mid]) {
237 Ordering::Less => hi = mid,
238 Ordering::Greater => lo = mid + 1,
239 Ordering::Equal => return Some(mid),
240 }
241 }
242 None
243 }
244
245 const fn strcmp(a: &str, b: &str) -> Ordering {
247 let a = a.as_bytes();
248 let b = b.as_bytes();
249 let l = min(a.len(), b.len());
250
251 let mut i = 0;
252 while i < l {
253 if a[i] == b[i] {
254 i += 1;
255 } else if a[i] < b[i] {
256 return Ordering::Less;
257 } else {
258 return Ordering::Greater;
259 }
260 }
261
262 if i < b.len() {
263 Ordering::Less
264 } else if i < a.len() {
265 Ordering::Greater
266 } else {
267 Ordering::Equal
268 }
269 }
270
271 const fn min(a: usize, b: usize) -> usize {
273 if a < b {
274 a
275 } else {
276 b
277 }
278 }
279}
280
281pub struct ResolvedPicoStr(Repr);
285
286enum Repr {
288 Inline([u8; 12], u8),
289 Static(&'static str),
290}
291
292impl ResolvedPicoStr {
293 pub fn as_str(&self) -> &str {
295 match &self.0 {
296 Repr::Inline(buf, len) => unsafe {
297 std::str::from_utf8_unchecked(&buf[..*len as usize])
298 },
299 Repr::Static(s) => s,
300 }
301 }
302}
303
304impl Debug for ResolvedPicoStr {
305 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
306 Debug::fmt(self.as_str(), f)
307 }
308}
309
310impl Display for ResolvedPicoStr {
311 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
312 Display::fmt(self.as_str(), f)
313 }
314}
315
316impl Deref for ResolvedPicoStr {
317 type Target = str;
318
319 fn deref(&self) -> &Self::Target {
320 self.as_str()
321 }
322}
323
324impl AsRef<str> for ResolvedPicoStr {
325 fn as_ref(&self) -> &str {
326 self.as_str()
327 }
328}
329
330impl Borrow<str> for ResolvedPicoStr {
331 fn borrow(&self) -> &str {
332 self.as_str()
333 }
334}
335
336impl Eq for ResolvedPicoStr {}
337
338impl PartialEq for ResolvedPicoStr {
339 fn eq(&self, other: &Self) -> bool {
340 self.as_str().eq(other.as_str())
341 }
342}
343
344impl Ord for ResolvedPicoStr {
345 fn cmp(&self, other: &Self) -> Ordering {
346 self.as_str().cmp(other.as_str())
347 }
348}
349
350impl PartialOrd for ResolvedPicoStr {
351 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
352 Some(self.cmp(other))
353 }
354}
355
356impl Hash for ResolvedPicoStr {
357 fn hash<H: Hasher>(&self, state: &mut H) {
358 self.as_str().hash(state);
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 #[track_caller]
367 fn roundtrip(s: &str) {
368 assert_eq!(PicoStr::intern(s).resolve().as_str(), s);
369 }
370
371 #[test]
372 fn test_pico_str() {
373 const H1: PicoStr = PicoStr::constant("h1");
375 assert_eq!(H1, PicoStr::intern("h1"));
376 assert_eq!(H1.resolve().as_str(), "h1");
377
378 const DISC: PicoStr = PicoStr::constant("discretionary-ligatures");
380 assert_eq!(DISC, PicoStr::intern("discretionary-ligatures"));
381 assert_eq!(DISC.resolve().as_str(), "discretionary-ligatures");
382
383 roundtrip("");
385 roundtrip("hi");
386 roundtrip("∆@<hi-10_");
387 roundtrip("you");
388 roundtrip("discretionary-ligatures");
389 }
390
391 #[test]
393 fn test_exceptions_not_bitcode_encodable() {
394 for s in exceptions::LIST {
395 assert!(
396 bitcode::encode(s).is_err(),
397 "{s:?} can be encoded with bitcode and should not be an exception"
398 );
399 }
400 }
401
402 #[test]
404 fn test_exceptions_sorted() {
405 for group in exceptions::LIST.windows(2) {
406 assert!(group[0] < group[1], "{group:?} are out of order");
407 }
408 }
409
410 #[test]
412 fn test_exception_find() {
413 for (i, s) in exceptions::LIST.iter().enumerate() {
414 assert_eq!(exceptions::get(s), Some(i), "wrong index for {s:?}");
415 }
416 assert_eq!(exceptions::get("a"), None);
417 assert_eq!(exceptions::get("another-"), None);
418 assert_eq!(exceptions::get("z"), None);
419 }
420}