1use anyhow::bail;
13use fxhash::FxHashSet;
14use once_cell::sync::Lazy;
15use parking_lot::Mutex;
16use std::{borrow::Borrow, collections::HashSet, hash::Hash, ops::Deref, slice, str};
17
18const CHUNK_SIZE: usize = 1 * 1024 * 1024;
19
20struct Chunk {
21 data: Vec<u8>,
22 pos: usize,
23}
24
25impl Chunk {
26 fn new() -> &'static mut Self {
27 Box::leak(Box::new(Chunk {
28 data: vec![0; CHUNK_SIZE],
29 pos: 0
30 }))
31 }
32
33 fn insert(&mut self, str: &str) -> (*mut Chunk, Str) {
34 let str = str.as_bytes();
35 let mut t = self;
36 loop {
37 if CHUNK_SIZE - t.pos > str.len() {
38 t.data[t.pos] = str.len() as u8;
39 t.data[t.pos + 1..t.pos + 1 + str.len()].copy_from_slice(str);
40 let res = Str(t.data.as_ptr().wrapping_add(t.pos));
41 t.pos += 1 + str.len();
42 break (t, res);
43 } else {
44 t = Self::new();
45 }
46 }
47 }
48}
49
50struct Root {
51 all: FxHashSet<Str>,
52 root: *mut Chunk,
53}
54
55unsafe impl Send for Root {}
56unsafe impl Sync for Root {}
57
58static ROOT: Lazy<Mutex<Root>> = Lazy::new(|| {
59 Mutex::new(Root {
60 all: HashSet::default(),
61 root: Chunk::new(),
62 })
63});
64
65#[derive(Clone, Copy)]
71pub struct Str(*const u8);
72
73unsafe impl Send for Str {}
74unsafe impl Sync for Str {}
75
76impl Str {
77 fn get(&self) -> &'static str {
78 unsafe {
79 let len = *self.0 as usize;
80 let ptr = self.0.wrapping_add(1);
81 let slice = slice::from_raw_parts(ptr, len);
82 str::from_utf8_unchecked(slice)
83 }
84 }
85}
86
87impl Deref for Str {
88 type Target = str;
89
90 fn deref(&self) -> &'static Self::Target {
91 self.get()
92 }
93}
94
95impl Borrow<str> for Str {
96 fn borrow(&self) -> &'static str {
97 self.get()
98 }
99}
100
101impl AsRef<str> for Str {
102 fn as_ref(&self) -> &'static str {
103 self.get()
104 }
105}
106
107impl Hash for Str {
108 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
109 (&**self).hash(state)
110 }
111}
112
113impl PartialEq for Str {
114 fn eq(&self, other: &Self) -> bool {
115 &**self == &**other
116 }
117}
118
119impl Eq for Str {}
120
121impl PartialOrd for Str {
122 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
123 (&**self).partial_cmp(&**other)
124 }
125}
126
127impl Ord for Str {
128 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
129 (&**self).cmp(&**other)
130 }
131}
132
133impl TryFrom<&str> for Str {
134 type Error = anyhow::Error;
135
136 fn try_from(s: &str) -> Result<Self, Self::Error> {
137 if s.as_bytes().len() > u8::MAX as usize {
138 bail!("string is too long")
139 } else {
140 let mut root = ROOT.lock();
141 match root.all.get(s) {
142 Some(t) => Ok(*t),
143 None => unsafe {
144 let (r, t) = (*root.root).insert(s);
145 root.root = r;
146 root.all.insert(t);
147 Ok(t)
148 }
149 }
150 }
151 }
152}
153
154#[cfg(test)]
155mod test {
156 use super::*;
157 use rand::{thread_rng, Rng};
158
159 fn rand(size: usize) -> String {
160 let mut s = String::new();
161 for _ in 0..size {
162 s.push(thread_rng().gen())
163 }
164 s
165 }
166
167 #[test]
168 fn test_single() {
169 let s = rand(32);
170 let t0 = Str::try_from(s.as_str()).unwrap();
171 assert_eq!(&*t0, &*s);
172 let t1 = Str::try_from(s.as_str()).unwrap();
173 assert_eq!(t0.0, t1.0);
174 }
175
176 #[test]
177 fn test_lots() {
178 for _ in 0..1000000 {
179 let s = rand(32);
180 let t0 = Str::try_from(s.as_str()).unwrap();
181 assert_eq!(&*t0, &*s);
182 let t1 = Str::try_from(s.as_str()).unwrap();
183 assert_eq!(t0.0, t1.0)
184 }
185 }
186}