1#![forbid(missing_docs, unsafe_code)]
29#![cfg_attr(docsrs, feature(doc_cfg))]
30
31#[cfg(feature = "delta")]
32mod delta;
33mod slice;
34
35use appendvec::AppendVec;
36use dashtable::DashTable;
37#[cfg(feature = "delta")]
38pub use delta::{Accumulator, DeltaEncoding};
39#[cfg(feature = "get-size2")]
40use get_size2::{GetSize, GetSizeTracker};
41use hashbrown::DefaultHashBuilder;
42#[cfg(feature = "serde")]
43use serde::de::{SeqAccess, Visitor};
44#[cfg(feature = "serde")]
45use serde::{Deserialize, Deserializer, Serialize, Serializer};
46pub use slice::{ArenaSlice, InternedSlice};
47use std::borrow::Borrow;
48use std::cmp::Ordering;
49use std::fmt::Debug;
50use std::hash::{BuildHasher, Hash, Hasher};
51use std::marker::PhantomData;
52#[cfg(feature = "get-size2")]
53use std::mem::size_of;
54#[cfg(feature = "debug")]
55use std::sync::atomic::{self, AtomicUsize};
56
57pub struct Interned<T: ?Sized, Storage = T> {
64 id: u32,
65 _phantom: PhantomData<fn() -> (*const T, *const Storage)>,
66}
67
68impl<T: ?Sized, Storage> Debug for Interned<T, Storage> {
69 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70 f.debug_tuple("I").field(&self.id).finish()
71 }
72}
73
74impl<T: ?Sized, Storage> Clone for Interned<T, Storage> {
75 fn clone(&self) -> Self {
76 *self
77 }
78}
79
80impl<T: ?Sized, Storage> Copy for Interned<T, Storage> {}
81
82impl<T: ?Sized, Storage> PartialEq for Interned<T, Storage> {
83 fn eq(&self, other: &Self) -> bool {
84 self.id.eq(&other.id)
85 }
86}
87
88impl<T: ?Sized, Storage> Eq for Interned<T, Storage> {}
89
90impl<T: ?Sized, Storage> PartialOrd for Interned<T, Storage> {
91 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
92 Some(self.cmp(other))
93 }
94}
95
96impl<T: ?Sized, Storage> Ord for Interned<T, Storage> {
97 fn cmp(&self, other: &Self) -> Ordering {
98 self.id.cmp(&other.id)
99 }
100}
101
102impl<T: ?Sized, Storage> Hash for Interned<T, Storage> {
103 fn hash<H>(&self, state: &mut H)
104 where
105 H: Hasher,
106 {
107 self.id.hash(state);
108 }
109}
110
111#[cfg(feature = "get-size2")]
112impl<T: ?Sized, Storage> GetSize for Interned<T, Storage> {
113 }
116
117#[cfg(feature = "raw")]
118impl<T: ?Sized, Storage> Interned<T, Storage> {
119 pub fn from_id(id: u32) -> Self {
125 Self {
126 id,
127 _phantom: PhantomData,
128 }
129 }
130
131 pub fn id(&self) -> u32 {
137 self.id
138 }
139}
140
141impl<T: ?Sized, Storage> Interned<T, Storage>
142where
143 T: Eq + Hash,
144 Storage: Borrow<T>,
145{
146 pub fn from(arena: &Arena<T, Storage>, value: impl Borrow<T> + Into<Storage>) -> Self {
152 let id = arena.intern(value);
153 Self {
154 id,
155 _phantom: PhantomData,
156 }
157 }
158}
159
160impl<T: ?Sized, Storage> Interned<T, Storage>
161where
162 Storage: Clone,
163{
164 pub fn lookup(&self, arena: &Arena<T, Storage>) -> Storage {
173 arena.lookup(self.id)
174 }
175}
176
177impl<T: ?Sized, Storage> Interned<T, Storage>
178where
179 Storage: Borrow<T>,
180{
181 pub fn lookup_ref<'a>(&self, arena: &'a Arena<T, Storage>) -> &'a T {
189 arena.lookup_ref(self.id)
190 }
191}
192
193#[cfg(feature = "serde")]
194impl<T: ?Sized, Storage> Serialize for Interned<T, Storage> {
195 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
196 where
197 S: Serializer,
198 {
199 serializer.serialize_u32(self.id)
200 }
201}
202
203#[cfg(feature = "serde")]
204impl<'de, T: ?Sized, Storage> Deserialize<'de> for Interned<T, Storage> {
205 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
206 where
207 D: Deserializer<'de>,
208 {
209 let id = deserializer.deserialize_u32(U32Visitor)?;
210 Ok(Self {
211 id,
212 _phantom: PhantomData,
213 })
214 }
215}
216
217#[cfg(feature = "serde")]
218struct U32Visitor;
219
220#[cfg(feature = "serde")]
221impl Visitor<'_> for U32Visitor {
222 type Value = u32;
223
224 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
225 formatter.write_str("an integer between 0 and 2^32")
226 }
227
228 fn visit_u8<E>(self, value: u8) -> Result<Self::Value, E>
229 where
230 E: serde::de::Error,
231 {
232 Ok(u32::from(value))
233 }
234
235 fn visit_u16<E>(self, value: u16) -> Result<Self::Value, E>
236 where
237 E: serde::de::Error,
238 {
239 Ok(u32::from(value))
240 }
241
242 fn visit_u32<E>(self, value: u32) -> Result<Self::Value, E>
243 where
244 E: serde::de::Error,
245 {
246 Ok(value)
247 }
248
249 fn visit_u64<E>(self, value: u64) -> Result<Self::Value, E>
250 where
251 E: serde::de::Error,
252 {
253 value
254 .try_into()
255 .map_err(|_| E::custom(format!("u32 out of range: {}", value)))
256 }
257
258 fn visit_i8<E>(self, value: i8) -> Result<Self::Value, E>
259 where
260 E: serde::de::Error,
261 {
262 value
263 .try_into()
264 .map_err(|_| E::custom(format!("u32 out of range: {}", value)))
265 }
266
267 fn visit_i16<E>(self, value: i16) -> Result<Self::Value, E>
268 where
269 E: serde::de::Error,
270 {
271 value
272 .try_into()
273 .map_err(|_| E::custom(format!("u32 out of range: {}", value)))
274 }
275
276 fn visit_i32<E>(self, value: i32) -> Result<Self::Value, E>
277 where
278 E: serde::de::Error,
279 {
280 value
281 .try_into()
282 .map_err(|_| E::custom(format!("u32 out of range: {}", value)))
283 }
284
285 fn visit_i64<E>(self, value: i64) -> Result<Self::Value, E>
286 where
287 E: serde::de::Error,
288 {
289 value
290 .try_into()
291 .map_err(|_| E::custom(format!("u32 out of range: {}", value)))
292 }
293}
294
295pub struct Arena<T: ?Sized, Storage = T> {
302 vec: AppendVec<Storage>,
303 map: DashTable<u32>,
304 hasher: DefaultHashBuilder,
305 #[cfg(feature = "debug")]
306 references: AtomicUsize,
307 _phantom: PhantomData<fn() -> *const T>,
308}
309
310impl<T: ?Sized, Storage> Default for Arena<T, Storage> {
311 fn default() -> Self {
312 Self {
313 vec: AppendVec::new(),
314 map: DashTable::new(),
315 hasher: DefaultHashBuilder::default(),
316 #[cfg(feature = "debug")]
317 references: AtomicUsize::new(0),
318 _phantom: PhantomData,
319 }
320 }
321}
322
323impl<T: ?Sized, Storage> Debug for Arena<T, Storage>
324where
325 T: Debug,
326 Storage: Borrow<T>,
327{
328 fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
329 fmt.debug_list()
330 .entries(self.vec.iter().map(|x| x.borrow()))
331 .finish()
332 }
333}
334
335impl<T: ?Sized, Storage> PartialEq for Arena<T, Storage>
336where
337 T: Eq,
338 Storage: Borrow<T>,
339{
340 fn eq(&self, other: &Self) -> bool {
341 self.vec
342 .iter()
343 .map(|x| x.borrow())
344 .eq(other.vec.iter().map(|x| x.borrow()))
345 }
346}
347
348impl<T: ?Sized, Storage> Eq for Arena<T, Storage>
349where
350 T: Eq,
351 Storage: Borrow<T>,
352{
353}
354
355#[cfg(feature = "get-size2")]
356impl<T: ?Sized, Storage> GetSize for Arena<T, Storage>
357where
358 Storage: GetSize,
359{
360 fn get_heap_size_with_tracker<Tr: GetSizeTracker>(&self, tracker: Tr) -> (usize, Tr) {
361 let heap_size = self.vec.iter().map(|x| x.get_size()).sum::<usize>()
362 + self.vec.len() * size_of::<u32>();
363 (heap_size, tracker)
364 }
365}
366
367#[cfg(feature = "debug")]
368impl<T: ?Sized, Storage> Arena<T, Storage>
369where
370 Storage: GetSize,
371{
372 pub fn print_summary(&self, prefix: &str, title: &str, total_bytes: usize) {
374 let len = self.len();
375 let references = self.references();
376 let estimated_bytes = self.get_size();
377 println!(
378 "{}[{:.02}%] {} interner: {} objects | {} bytes ({:.02} bytes/object) | {} references ({:.02} refs/object)",
379 prefix,
380 estimated_bytes as f64 * 100.0 / total_bytes as f64,
381 title,
382 len,
383 estimated_bytes,
384 estimated_bytes as f64 / len as f64,
385 references,
386 references as f64 / len as f64,
387 );
388 }
389}
390
391#[cfg(feature = "debug")]
392impl<T: ?Sized, Storage> Arena<T, Storage> {
393 fn len(&self) -> usize {
394 self.vec.len()
395 }
396
397 fn references(&self) -> usize {
398 self.references.load(atomic::Ordering::Relaxed)
399 }
400}
401
402impl<T: ?Sized, Storage> Arena<T, Storage>
403where
404 T: Eq + Hash,
405 Storage: Borrow<T>,
406{
407 fn intern(&self, value: impl Borrow<T> + Into<Storage>) -> u32 {
408 #[cfg(feature = "debug")]
409 self.references.fetch_add(1, atomic::Ordering::Relaxed);
410
411 let hash = self.hasher.hash_one(value.borrow());
412 *self
413 .map
414 .entry(
415 hash,
416 |&i| self.vec[i as usize].borrow() == value.borrow(),
417 |&i| self.hasher.hash_one(self.vec[i as usize].borrow()),
418 )
419 .or_insert_with(|| {
420 let x: Storage = value.into();
421 let id = self.vec.push(x);
422 assert!(id <= u32::MAX as usize);
423 id as u32
424 })
425 .get()
426 }
427
428 #[cfg(feature = "serde")]
431 fn push(&mut self, value: Storage) -> u32 {
432 #[cfg(feature = "debug")]
433 self.references.fetch_add(1, atomic::Ordering::Relaxed);
434
435 let hash = self.hasher.hash_one(value.borrow());
436
437 let id = self.vec.push_mut(value);
438 assert!(id <= u32::MAX as usize);
439 let id = id as u32;
440
441 self.map.insert_unique(hash, id, |&i| {
442 self.hasher.hash_one(self.vec[i as usize].borrow())
443 });
444
445 id
446 }
447}
448
449impl<T: ?Sized, Storage> Arena<T, Storage>
450where
451 Storage: Clone,
452{
453 fn lookup(&self, id: u32) -> Storage {
454 self.vec[id as usize].clone()
455 }
456}
457
458impl<T: ?Sized, Storage> Arena<T, Storage>
459where
460 Storage: Borrow<T>,
461{
462 fn lookup_ref(&self, id: u32) -> &T {
463 self.vec[id as usize].borrow()
464 }
465}
466
467#[cfg(feature = "serde")]
468impl<T: ?Sized, Storage> Serialize for Arena<T, Storage>
469where
470 T: Serialize,
471 Storage: Borrow<T>,
472{
473 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
474 where
475 S: Serializer,
476 {
477 serializer.collect_seq(self.vec.iter().map(|x| x.borrow()))
478 }
479}
480
481#[cfg(feature = "serde")]
482impl<'de, T: ?Sized, Storage> Deserialize<'de> for Arena<T, Storage>
483where
484 T: Eq + Hash,
485 Storage: Borrow<T> + Deserialize<'de>,
486{
487 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
488 where
489 D: Deserializer<'de>,
490 {
491 deserializer.deserialize_seq(ArenaVisitor::new())
492 }
493}
494
495#[cfg(feature = "serde")]
496struct ArenaVisitor<T: ?Sized, Storage> {
497 _phantom: PhantomData<fn() -> Arena<T, Storage>>,
498}
499
500#[cfg(feature = "serde")]
501impl<T: ?Sized, Storage> ArenaVisitor<T, Storage> {
502 fn new() -> Self {
503 Self {
504 _phantom: PhantomData,
505 }
506 }
507}
508
509#[cfg(feature = "serde")]
510impl<'de, T: ?Sized, Storage> Visitor<'de> for ArenaVisitor<T, Storage>
511where
512 T: Eq + Hash,
513 Storage: Borrow<T> + Deserialize<'de>,
514{
515 type Value = Arena<T, Storage>;
516
517 fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
518 formatter.write_str("a sequence of values")
519 }
520
521 fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
522 where
523 A: SeqAccess<'de>,
524 {
525 let mut arena = match seq.size_hint() {
526 None => Arena::default(),
527 Some(size_hint) => Arena {
528 vec: AppendVec::with_capacity(size_hint),
529 map: DashTable::with_capacity(size_hint),
530 hasher: DefaultHashBuilder::default(),
531 #[cfg(feature = "debug")]
532 references: AtomicUsize::new(0),
533 _phantom: PhantomData,
534 },
535 };
536
537 while let Some(t) = seq.next_element()? {
538 arena.push(t);
539 }
540
541 Ok(arena)
542 }
543}
544
545#[cfg(test)]
546mod test {
547 use super::*;
548 use std::borrow::Cow;
549
550 #[test]
551 fn test_str_interner() {
552 let arena: Arena<str, Box<str>> = Arena::default();
553
554 let key: &str = "Hello";
555 assert_eq!(arena.intern(key), 0);
556
557 let key: String = "world".into();
558 assert_eq!(arena.intern(key), 1);
559
560 let key: Box<str> = "Hello".into();
561 assert_eq!(arena.intern(key), 0);
562
563 let key: Box<str> = "world".into();
564 assert_eq!(arena.intern(key), 1);
565
566 let key: Cow<'_, str> = "Hello world".into();
567 assert_eq!(arena.intern(key), 2);
568 }
569}