rbx_types/
shared_string.rs

1use std::{
2    cmp::Ordering,
3    collections::{hash_map::Entry, HashMap},
4    fmt,
5    hash::{Hash, Hasher},
6    sync::{Arc, Mutex, Weak},
7};
8
9use blake3::Hash as Blake3Hash;
10
11lazy_static::lazy_static! {
12    static ref STRING_CACHE: Arc<Mutex<HashMap<Blake3Hash, Weak<Vec<u8>>>>> = {
13        Arc::new(Mutex::new(HashMap::new()))
14    };
15}
16
17/// A version of `BinaryString` used for data that's commonly repeated.
18/// `rbx_types` automatically deduplicates data as it's loaded into
19/// `SharedString` values.
20#[derive(Debug, Clone)]
21pub struct SharedString {
22    data: Option<Arc<Vec<u8>>>,
23    hash: Blake3Hash,
24}
25
26impl SharedString {
27    /// Construct a SharedString from an owned buffer of data.
28    pub fn new(data: Vec<u8>) -> SharedString {
29        let hash = blake3::hash(&data);
30
31        let data = {
32            let mut cache = STRING_CACHE.lock().unwrap();
33
34            match cache.entry(hash) {
35                Entry::Occupied(mut occupied) => match occupied.get().upgrade() {
36                    Some(handle) => {
37                        // An existing entry that we can reference
38                        handle
39                    }
40                    None => {
41                        // An existing entry that's starting to be evicted from
42                        // the Drop of another SharedString instance.
43                        //
44                        // We can replace this handle with our copy of the data,
45                        // but re-use this spot in the map.
46
47                        let handle = Arc::from(data);
48                        occupied.insert(Arc::downgrade(&handle));
49                        handle
50                    }
51                },
52                Entry::Vacant(vacant) => {
53                    // This string didn't exist before, so we'll populate it.
54
55                    let handle = Arc::from(data);
56                    vacant.insert(Arc::downgrade(&handle));
57                    handle
58                }
59            }
60        };
61
62        SharedString {
63            data: Some(data),
64            hash,
65        }
66    }
67
68    #[inline]
69    pub fn data(&self) -> &[u8] {
70        self.data.as_ref().unwrap()
71    }
72
73    #[inline]
74    pub fn hash(&self) -> SharedStringHash {
75        SharedStringHash(self.hash)
76    }
77}
78
79impl Hash for SharedString {
80    fn hash<H>(&self, state: &mut H)
81    where
82        H: Hasher,
83    {
84        state.write(self.hash.as_bytes());
85    }
86}
87
88impl PartialEq for SharedString {
89    fn eq(&self, other: &Self) -> bool {
90        self.hash == other.hash
91    }
92}
93
94impl Eq for SharedString {}
95
96impl AsRef<[u8]> for SharedString {
97    fn as_ref(&self) -> &[u8] {
98        self.data()
99    }
100}
101
102impl Drop for SharedString {
103    fn drop(&mut self) {
104        // If the reference we're about to drop is the very last reference to
105        // the buffer, we'll be able to unwrap it and remove it from the
106        // SharedString cache.
107        if Arc::into_inner(self.data.take().unwrap()).is_some() {
108            let mut cache = match STRING_CACHE.lock() {
109                Ok(v) => v,
110                Err(_) => {
111                    // If the lock is poisoned, we should just leave it
112                    // alone so that we don't accidentally double-panic.
113                    return;
114                }
115            };
116
117            cache.remove(&self.hash);
118        }
119    }
120}
121
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
123pub struct SharedStringHash(Blake3Hash);
124
125impl SharedStringHash {
126    #[inline]
127    pub fn as_bytes(&self) -> &[u8] {
128        self.0.as_bytes().as_ref()
129    }
130}
131
132impl Ord for SharedStringHash {
133    fn cmp(&self, other: &Self) -> Ordering {
134        self.as_bytes().cmp(other.as_bytes())
135    }
136}
137
138impl PartialOrd for SharedStringHash {
139    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
140        Some(self.cmp(other))
141    }
142}
143
144impl fmt::Display for SharedStringHash {
145    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
146        f.write_str(self.0.to_hex().as_str())
147    }
148}
149
150#[cfg(feature = "serde")]
151pub(crate) mod serde_impl {
152    use super::*;
153
154    use serde::{de::Error as _, Deserialize, Deserializer, Serialize, Serializer};
155
156    impl Serialize for SharedString {
157        fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
158            if serializer.is_human_readable() {
159                let encoded = base64::encode(self.data());
160
161                serializer.serialize_str(&encoded)
162            } else {
163                self.data().serialize(serializer)
164            }
165        }
166    }
167
168    impl<'de> Deserialize<'de> for SharedString {
169        fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
170            if deserializer.is_human_readable() {
171                let encoded = <&str>::deserialize(deserializer)?;
172                let buffer = base64::decode(encoded).map_err(D::Error::custom)?;
173
174                Ok(SharedString::new(buffer))
175            } else {
176                let buffer = <Vec<u8>>::deserialize(deserializer)?;
177                Ok(SharedString::new(buffer))
178            }
179        }
180    }
181}
182
183/// A type used by Roblox for certain networking and memory guarantees.
184///
185/// This type is functionally identical to a `SharedString` when serialized.
186#[derive(Debug, Clone, Hash, PartialEq, Eq)]
187#[cfg_attr(
188    feature = "serde",
189    derive(serde::Serialize, serde::Deserialize),
190    serde(transparent)
191)]
192pub struct NetAssetRef(SharedString);
193
194impl NetAssetRef {
195    /// Construct a `NetAssetRef` from an owned buffer of data.
196    pub fn new(data: Vec<u8>) -> Self {
197        Self(SharedString::new(data))
198    }
199
200    #[inline]
201    pub fn data(&self) -> &[u8] {
202        self.0.data()
203    }
204
205    pub fn hash(&self) -> NetAssetRefHash {
206        NetAssetRefHash(self.0.hash)
207    }
208}
209
210impl AsRef<[u8]> for NetAssetRef {
211    fn as_ref(&self) -> &[u8] {
212        self.data()
213    }
214}
215
216impl From<SharedString> for NetAssetRef {
217    fn from(value: SharedString) -> Self {
218        Self(value)
219    }
220}
221
222impl From<NetAssetRef> for SharedString {
223    fn from(value: NetAssetRef) -> Self {
224        value.0
225    }
226}
227
228#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
229pub struct NetAssetRefHash(Blake3Hash);
230
231impl NetAssetRefHash {
232    #[inline]
233    pub fn as_bytes(&self) -> &[u8] {
234        self.0.as_bytes().as_ref()
235    }
236}
237
238impl Ord for NetAssetRefHash {
239    fn cmp(&self, other: &Self) -> Ordering {
240        self.as_bytes().cmp(other.as_bytes())
241    }
242}
243
244impl PartialOrd for NetAssetRefHash {
245    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
246        Some(self.cmp(other))
247    }
248}
249
250impl fmt::Display for NetAssetRefHash {
251    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252        f.write_str(self.0.to_hex().as_str())
253    }
254}
255
256#[cfg(test)]
257mod test {
258    use super::*;
259
260    #[test]
261    fn insert_twice() {
262        let handle_1 = SharedString::new(vec![5, 4, 3]);
263        let handle_2 = SharedString::new(vec![5, 4, 3]);
264
265        let data_1 = handle_1.data.as_ref().unwrap();
266        let data_2 = handle_2.data.as_ref().unwrap();
267
268        assert!(Arc::ptr_eq(data_1, data_2));
269    }
270
271    #[test]
272    fn drop() {
273        {
274            let _x = SharedString::new(vec![2]);
275        }
276
277        {
278            let _y = SharedString::new(vec![5, 6, 7, 1]);
279        }
280    }
281
282    #[cfg(feature = "serde")]
283    #[test]
284    fn serde_human() {
285        let sstr = SharedString::new(b"a test string".to_vec());
286        let serialized = serde_json::to_string(&sstr).unwrap();
287
288        assert_eq!(serialized, r#""YSB0ZXN0IHN0cmluZw==""#);
289
290        let deserialized: SharedString = serde_json::from_str(&serialized).unwrap();
291
292        assert_eq!(sstr, deserialized);
293    }
294
295    #[cfg(feature = "serde")]
296    #[test]
297    fn serde_non_human() {
298        use std::{io::Write, mem};
299
300        let sstr = SharedString::new(b"a test string".to_vec());
301        let data = sstr.data();
302        let serialized = bincode::serialize(&sstr).unwrap();
303
304        // Write the length of the string as little-endian u64 followed by the
305        // bytes of the string. This is analoglous to how bincode does.
306        let mut expected = Vec::with_capacity(mem::size_of::<u64>() + data.len());
307        expected
308            .write_all(&(data.len() as u64).to_le_bytes())
309            .unwrap();
310        expected.write_all(data).unwrap();
311
312        assert_eq!(serialized, expected);
313
314        let deserialized: SharedString = bincode::deserialize(&serialized).unwrap();
315
316        assert_eq!(sstr, deserialized);
317    }
318
319    #[cfg(feature = "serde")]
320    #[test]
321    fn netassetref_serde() {
322        let sstr = SharedString::new(vec![13, 37]);
323        let net = NetAssetRef::new(vec![13, 37]);
324
325        let ser_sstr_1 = serde_json::to_string(&sstr).unwrap();
326        let ser_net_1 = serde_json::to_string(&net).unwrap();
327
328        assert_eq!(ser_sstr_1, ser_net_1);
329
330        let de_net_1: NetAssetRef = serde_json::from_str(&ser_net_1).unwrap();
331
332        assert_eq!(net, de_net_1);
333
334        let ser_sstr_2 = bincode::serialize(&sstr).unwrap();
335        let ser_net_2 = bincode::serialize(&net).unwrap();
336
337        assert_eq!(ser_sstr_2, ser_net_2);
338
339        let de_net_2: NetAssetRef = bincode::deserialize(&ser_net_2).unwrap();
340
341        assert_eq!(net, de_net_2);
342    }
343}