1use std::collections::HashMap;
36use std::fmt;
37use std::sync::Arc;
38
39use serde::{Deserialize, Serialize};
40
41use super::super::string::id::StringId;
42
43#[derive(Debug, Clone, PartialEq, Eq)]
45pub struct ResolveError {
46 pub id: StringId,
48}
49
50impl fmt::Display for ResolveError {
51 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52 write!(f, "failed to resolve StringId {:?}", self.id)
53 }
54}
55
56impl std::error::Error for ResolveError {}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
60pub enum InternError {
61 CapacityExhausted,
63}
64
65impl fmt::Display for InternError {
66 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67 match self {
68 Self::CapacityExhausted => {
69 write!(f, "string interner capacity exhausted (> 2^32 - 2 strings)")
70 }
71 }
72 }
73}
74
75impl std::error::Error for InternError {}
76
77#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct StringInterner {
108 strings: Vec<Option<Arc<str>>>,
110 #[serde(with = "super::serde_helpers::sorted_arc_str_map")]
112 lookup: HashMap<Arc<str>, u32>,
113 ref_counts: Vec<u32>,
115 free_list: Vec<u32>,
117 #[serde(skip, default)]
123 max_ids: Option<u32>,
124 #[serde(skip, default)]
132 lookup_stale: bool,
133}
134
135impl StringInterner {
136 #[must_use]
138 pub fn new() -> Self {
139 Self {
141 strings: vec![None],
142 lookup: HashMap::new(),
143 ref_counts: vec![0],
144 free_list: Vec::new(),
145 max_ids: None,
146 lookup_stale: false,
147 }
148 }
149
150 #[must_use]
152 pub fn with_capacity(capacity: usize) -> Self {
153 let mut strings = Vec::with_capacity(capacity + 1);
154 let mut ref_counts = Vec::with_capacity(capacity + 1);
155 strings.push(None); ref_counts.push(0);
157
158 Self {
159 strings,
160 lookup: HashMap::with_capacity(capacity),
161 ref_counts,
162 free_list: Vec::new(),
163 max_ids: None,
164 lookup_stale: false,
165 }
166 }
167
168 #[must_use]
192 pub fn with_max_ids(max_ids: u32) -> Self {
193 Self {
194 strings: vec![None],
195 lookup: HashMap::new(),
196 ref_counts: vec![0],
197 free_list: Vec::new(),
198 max_ids: Some(max_ids),
199 lookup_stale: false,
200 }
201 }
202
203 #[inline]
209 #[must_use]
210 pub fn len(&self) -> usize {
211 assert!(
212 !self.lookup_stale,
213 "StringInterner::len() called while lookup is stale \
214 (bulk slots written without rebuild). Call build_dedup_table() first."
215 );
216 self.lookup.len()
217 }
218
219 #[inline]
225 #[must_use]
226 pub fn is_empty(&self) -> bool {
227 assert!(
228 !self.lookup_stale,
229 "StringInterner::is_empty() called while lookup is stale \
230 (bulk slots written without rebuild). Call build_dedup_table() first."
231 );
232 self.lookup.is_empty()
233 }
234
235 pub fn intern(&mut self, s: &str) -> Result<StringId, InternError> {
251 assert!(
252 !self.lookup_stale,
253 "StringInterner::intern() called while lookup is stale \
254 (bulk slots written without rebuild). Call build_dedup_table() first."
255 );
256 if let Some(&index) = self.lookup.get(s) {
258 self.ref_counts[index as usize] = self.ref_counts[index as usize].saturating_add(1);
260 return Ok(StringId::new(index));
261 }
262
263 if let Some(max) = self.max_ids
265 && self.lookup.len() >= max as usize
266 {
267 return Err(InternError::CapacityExhausted);
268 }
269
270 let arc_str: Arc<str> = Arc::from(s);
272 let index = if let Some(free_idx) = self.free_list.pop() {
273 self.strings[free_idx as usize] = Some(Arc::clone(&arc_str));
275 self.ref_counts[free_idx as usize] = 1;
276 free_idx
277 } else {
278 let idx = self.strings.len();
280 let idx_u32 = u32::try_from(idx).map_err(|_| InternError::CapacityExhausted)?;
281 if idx_u32 == u32::MAX {
282 return Err(InternError::CapacityExhausted);
283 }
284 if idx_u32 & StringId::LOCAL_TAG_BIT != 0 {
286 return Err(InternError::CapacityExhausted);
287 }
288 self.strings.push(Some(Arc::clone(&arc_str)));
289 self.ref_counts.push(1);
290 idx_u32
291 };
292
293 self.lookup.insert(arc_str, index);
294 Ok(StringId::new(index))
295 }
296
297 #[allow(dead_code)] pub fn intern_without_ref(&mut self, s: &str) -> Result<StringId, InternError> {
314 assert!(
315 !self.lookup_stale,
316 "StringInterner::intern_without_ref() called while lookup is stale \
317 (bulk slots written without rebuild). Call build_dedup_table() first."
318 );
319 if let Some(&index) = self.lookup.get(s) {
321 return Ok(StringId::new(index));
322 }
323
324 if let Some(max) = self.max_ids
326 && self.lookup.len() >= max as usize
327 {
328 return Err(InternError::CapacityExhausted);
329 }
330
331 let arc_str: Arc<str> = Arc::from(s);
333 let index = if let Some(free_idx) = self.free_list.pop() {
334 self.strings[free_idx as usize] = Some(Arc::clone(&arc_str));
335 self.ref_counts[free_idx as usize] = 0;
336 free_idx
337 } else {
338 let idx = self.strings.len();
339 let idx_u32 = u32::try_from(idx).map_err(|_| InternError::CapacityExhausted)?;
340 if idx_u32 == u32::MAX {
341 return Err(InternError::CapacityExhausted);
342 }
343 if idx_u32 & StringId::LOCAL_TAG_BIT != 0 {
345 return Err(InternError::CapacityExhausted);
346 }
347 self.strings.push(Some(Arc::clone(&arc_str)));
348 self.ref_counts.push(0);
349 idx_u32
350 };
351
352 self.lookup.insert(arc_str, index);
353 Ok(StringId::new(index))
354 }
355
356 #[must_use]
360 pub fn resolve(&self, id: StringId) -> Option<Arc<str>> {
361 if id.is_invalid() {
362 return None;
363 }
364
365 let index = id.index() as usize;
366 self.strings.get(index).and_then(std::clone::Clone::clone)
367 }
368
369 #[must_use]
373 pub fn ref_count(&self, id: StringId) -> u32 {
374 if id.is_invalid() {
375 return 0;
376 }
377
378 let index = id.index() as usize;
379 self.ref_counts.get(index).copied().unwrap_or(0)
380 }
381
382 pub fn inc_ref(&mut self, id: StringId) -> Option<u32> {
386 if id.is_invalid() {
387 return None;
388 }
389
390 let index = id.index() as usize;
391 if index < self.ref_counts.len() && self.strings[index].is_some() {
392 self.ref_counts[index] = self.ref_counts[index].saturating_add(1);
393 Some(self.ref_counts[index])
394 } else {
395 None
396 }
397 }
398
399 pub fn dec_ref(&mut self, id: StringId) -> Option<u32> {
405 if id.is_invalid() {
406 return None;
407 }
408
409 let index = id.index() as usize;
410 if index < self.ref_counts.len() && self.strings[index].is_some() {
411 self.ref_counts[index] = self.ref_counts[index].saturating_sub(1);
412 Some(self.ref_counts[index])
413 } else {
414 None
415 }
416 }
417
418 #[allow(dead_code)] pub fn recycle_unreferenced(&mut self) -> usize {
428 assert!(
429 !self.lookup_stale,
430 "StringInterner::recycle_unreferenced() called while lookup is stale \
431 (bulk slots written without rebuild). Call build_dedup_table() first."
432 );
433 let mut recycled = 0;
434
435 for index in 1..self.strings.len() {
436 if self.ref_counts[index] == 0
437 && let Some(arc_str) = self.strings[index].take()
438 {
439 self.lookup.remove(&arc_str);
440 if let Ok(index_u32) = u32::try_from(index) {
441 self.free_list.push(index_u32);
442 }
443 recycled += 1;
444 }
445 }
446
447 recycled
448 }
449
450 #[must_use]
456 pub fn contains(&self, s: &str) -> bool {
457 assert!(
458 !self.lookup_stale,
459 "StringInterner::contains() called while lookup is stale \
460 (bulk slots written without rebuild). Call build_dedup_table() first."
461 );
462 self.lookup.contains_key(s)
463 }
464
465 #[must_use]
473 pub fn get(&self, s: &str) -> Option<StringId> {
474 assert!(
475 !self.lookup_stale,
476 "StringInterner::get() called while lookup is stale \
477 (bulk slots written without rebuild). Call build_dedup_table() first."
478 );
479 self.lookup.get(s).map(|&idx| StringId::new(idx))
480 }
481
482 pub fn iter(&self) -> impl Iterator<Item = (StringId, &Arc<str>)> {
484 self.strings
485 .iter()
486 .enumerate()
487 .skip(1) .filter_map(|(idx, opt)| {
489 let index_u32 = u32::try_from(idx).ok()?;
490 opt.as_ref().map(|s| (StringId::new(index_u32), s))
491 })
492 }
493
494 pub fn clear(&mut self) {
499 self.strings.truncate(1); self.strings[0] = None;
501 self.lookup.clear();
502 self.ref_counts.truncate(1);
503 self.ref_counts[0] = 0;
504 self.free_list.clear();
505 self.lookup_stale = false;
506 }
507
508 pub fn reserve(&mut self, additional: usize) {
510 self.strings.reserve(additional);
511 self.ref_counts.reserve(additional);
512 self.lookup.reserve(additional);
513 }
514
515 pub fn alloc_range(&mut self, count: u32) -> Result<u32, InternError> {
535 let start = self.strings.len();
536 let start_u32 = u32::try_from(start).map_err(|_| InternError::CapacityExhausted)?;
537
538 if count == 0 {
539 return Ok(start_u32);
540 }
541
542 let end_u32 = start_u32
544 .checked_add(count)
545 .ok_or(InternError::CapacityExhausted)?;
546 if (end_u32 - 1) & StringId::LOCAL_TAG_BIT != 0 {
548 return Err(InternError::CapacityExhausted);
549 }
550
551 self.strings.resize(end_u32 as usize, None);
553 self.ref_counts.resize(end_u32 as usize, 0);
554
555 self.lookup_stale = true;
559
560 Ok(start_u32)
561 }
562
563 pub fn bulk_slices_mut(
578 &mut self,
579 start: u32,
580 count: u32,
581 ) -> (&mut [Option<Arc<str>>], &mut [u32]) {
582 if count > 0 {
583 self.lookup_stale = true;
584 }
585 let s = start as usize;
586 let e = s + count as usize;
587 let strings_slice = &mut self.strings[s..e];
588 let ref_counts_slice = &mut self.ref_counts[s..e];
589 (strings_slice, ref_counts_slice)
590 }
591
592 pub fn build_dedup_table(&mut self) -> HashMap<StringId, StringId> {
607 let mut remap: HashMap<StringId, StringId> = HashMap::new();
608 let mut canonical: HashMap<Arc<str>, u32> = HashMap::new();
610
611 for idx in 1..self.strings.len() {
612 let Some(ref arc_str) = self.strings[idx] else {
613 continue;
614 };
615
616 if let Some(&canon_idx) = canonical.get(arc_str) {
617 let dup_rc = self.ref_counts[idx];
619 self.ref_counts[canon_idx as usize] =
620 self.ref_counts[canon_idx as usize].saturating_add(dup_rc);
621 self.strings[idx] = None;
623 self.ref_counts[idx] = 0;
624 let idx_u32 = u32::try_from(idx)
625 .unwrap_or_else(|_| unreachable!("string slot index exceeds u32 invariant"));
626 self.free_list.push(idx_u32);
627 remap.insert(StringId::new(idx_u32), StringId::new(canon_idx));
629 } else {
630 let idx_u32 = u32::try_from(idx)
632 .unwrap_or_else(|_| unreachable!("string slot index exceeds u32 invariant"));
633 canonical.insert(Arc::clone(arc_str), idx_u32);
634 }
635 }
636
637 self.lookup.clear();
639 self.lookup.reserve(canonical.len());
640 for (arc_str, idx) in canonical {
641 self.lookup.insert(arc_str, idx);
642 }
643
644 self.lookup_stale = false;
646
647 remap
648 }
649
650 pub fn truncate_to(&mut self, saved_len: usize) {
661 assert!(saved_len >= 1, "cannot truncate sentinel slot at index 0");
662 self.strings.truncate(saved_len);
663 self.ref_counts.truncate(saved_len);
664 self.lookup_stale = false;
668 }
669
670 #[inline]
675 #[must_use]
676 pub fn string_count_raw(&self) -> usize {
677 self.strings.len()
678 }
679
680 #[inline]
685 #[must_use]
686 pub fn is_lookup_stale(&self) -> bool {
687 self.lookup_stale
688 }
689
690 #[must_use]
695 pub fn stats(&self) -> InternerStats {
696 let total_bytes: usize = self
697 .strings
698 .iter()
699 .filter_map(|opt| opt.as_ref())
700 .map(|s| s.len())
701 .sum();
702
703 let string_count = self
705 .strings
706 .iter()
707 .skip(1) .filter(|s| s.is_some())
709 .count();
710
711 InternerStats {
712 string_count,
713 total_bytes,
714 free_slots: self.free_list.len(),
715 capacity: self.strings.capacity(),
716 }
717 }
718}
719
720impl Default for StringInterner {
721 fn default() -> Self {
722 Self::new()
723 }
724}
725
726impl fmt::Display for StringInterner {
727 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
728 let count = self.stats().string_count;
730 write!(
731 f,
732 "StringInterner(strings={count}, free={}{})",
733 self.free_list.len(),
734 if self.lookup_stale {
735 ", lookup_stale"
736 } else {
737 ""
738 }
739 )
740 }
741}
742
743#[derive(Debug, Clone, Copy, PartialEq, Eq)]
745pub struct InternerStats {
746 pub string_count: usize,
748 pub total_bytes: usize,
750 pub free_slots: usize,
752 pub capacity: usize,
754}
755
756#[cfg(test)]
757mod tests {
758 use super::*;
759
760 #[test]
761 fn test_new() {
762 let interner = StringInterner::new();
763 assert_eq!(interner.len(), 0);
764 assert!(interner.is_empty());
765 }
766
767 #[test]
768 fn test_with_capacity() {
769 let interner = StringInterner::with_capacity(100);
770 assert_eq!(interner.len(), 0);
771 assert!(interner.strings.capacity() >= 101); }
773
774 #[test]
775 fn test_intern_single() {
776 let mut interner = StringInterner::new();
777 let id = interner.intern("hello").unwrap();
778
779 assert!(!id.is_invalid());
780 assert_eq!(interner.len(), 1);
781
782 let resolved = interner.resolve(id).unwrap();
783 assert_eq!(&*resolved, "hello");
784 }
785
786 #[test]
787 fn test_intern_duplicate() {
788 let mut interner = StringInterner::new();
789 let id1 = interner.intern("foo").unwrap();
790 let id2 = interner.intern("foo").unwrap();
791
792 assert_eq!(id1, id2);
793 assert_eq!(interner.len(), 1);
794 assert_eq!(interner.ref_count(id1), 2);
795 }
796
797 #[test]
798 fn test_intern_different() {
799 let mut interner = StringInterner::new();
800 let id1 = interner.intern("foo").unwrap();
801 let id2 = interner.intern("bar").unwrap();
802
803 assert_ne!(id1, id2);
804 assert_eq!(interner.len(), 2);
805 }
806
807 #[test]
808 fn test_resolve_invalid() {
809 let interner = StringInterner::new();
810 assert!(interner.resolve(StringId::INVALID).is_none());
811 }
812
813 #[test]
814 fn test_resolve_out_of_bounds() {
815 let interner = StringInterner::new();
816 assert!(interner.resolve(StringId::new(999)).is_none());
817 }
818
819 #[test]
820 fn test_ref_count() {
821 let mut interner = StringInterner::new();
822 let id = interner.intern("test").unwrap();
823
824 assert_eq!(interner.ref_count(id), 1);
825
826 interner.intern("test").unwrap(); assert_eq!(interner.ref_count(id), 2);
828
829 interner.dec_ref(id);
830 assert_eq!(interner.ref_count(id), 1);
831
832 interner.inc_ref(id);
833 assert_eq!(interner.ref_count(id), 2);
834 }
835
836 #[test]
837 fn test_inc_ref_invalid() {
838 let mut interner = StringInterner::new();
839 assert!(interner.inc_ref(StringId::INVALID).is_none());
840 }
841
842 #[test]
843 fn test_dec_ref_invalid() {
844 let mut interner = StringInterner::new();
845 assert!(interner.dec_ref(StringId::INVALID).is_none());
846 }
847
848 #[test]
849 fn test_dec_ref_saturating() {
850 let mut interner = StringInterner::new();
851 let id = interner.intern_without_ref("test").unwrap();
852
853 assert_eq!(interner.ref_count(id), 0);
854 interner.dec_ref(id);
855 assert_eq!(interner.ref_count(id), 0); }
857
858 #[test]
859 fn test_intern_without_ref() {
860 let mut interner = StringInterner::new();
861 let id = interner.intern_without_ref("test").unwrap();
862
863 assert_eq!(interner.ref_count(id), 0);
864 assert_eq!(interner.resolve(id).unwrap().as_ref(), "test");
865 }
866
867 #[test]
868 fn test_recycle_unreferenced() {
869 let mut interner = StringInterner::new();
870 let id1 = interner.intern_without_ref("foo").unwrap();
871 let id2 = interner.intern("bar").unwrap();
872
873 assert_eq!(interner.len(), 2);
874
875 let recycled = interner.recycle_unreferenced();
876 assert_eq!(recycled, 1); assert_eq!(interner.len(), 1);
878
879 assert!(interner.resolve(id1).is_none());
881 assert_eq!(interner.resolve(id2).unwrap().as_ref(), "bar");
883 }
884
885 #[test]
886 fn test_free_list_reuse() {
887 let mut interner = StringInterner::new();
888 let id1 = interner.intern_without_ref("foo").unwrap();
889 let _id2 = interner.intern("bar").unwrap();
890
891 interner.recycle_unreferenced();
893
894 let id3 = interner.intern("baz").unwrap();
896 assert_eq!(id3.index(), id1.index()); }
898
899 #[test]
900 fn test_contains() {
901 let mut interner = StringInterner::new();
902 interner.intern("hello").unwrap();
903
904 assert!(interner.contains("hello"));
905 assert!(!interner.contains("world"));
906 }
907
908 #[test]
909 fn test_get() {
910 let mut interner = StringInterner::new();
911 let id = interner.intern("hello").unwrap();
912
913 assert_eq!(interner.get("hello"), Some(id));
914 assert_eq!(interner.get("world"), None);
915 }
916
917 #[test]
918 fn test_iter() {
919 let mut interner = StringInterner::new();
920 interner.intern("foo").unwrap();
921 interner.intern("bar").unwrap();
922 interner.intern("baz").unwrap();
923
924 let strings: Vec<_> = interner.iter().map(|(_, s)| s.as_ref()).collect();
925 assert_eq!(strings.len(), 3);
926 assert!(strings.contains(&"foo"));
927 assert!(strings.contains(&"bar"));
928 assert!(strings.contains(&"baz"));
929 }
930
931 #[test]
932 fn test_clear() {
933 let mut interner = StringInterner::new();
934 interner.intern("foo").unwrap();
935 interner.intern("bar").unwrap();
936
937 assert_eq!(interner.len(), 2);
938 interner.clear();
939 assert_eq!(interner.len(), 0);
940 assert!(interner.is_empty());
941 }
942
943 #[test]
944 fn test_reserve() {
945 let mut interner = StringInterner::new();
946 interner.reserve(1000);
947 assert!(interner.strings.capacity() >= 1001);
948 }
949
950 #[test]
951 fn test_display() {
952 let mut interner = StringInterner::new();
953 interner.intern("test").unwrap();
954
955 let display = format!("{interner}");
956 assert!(display.contains("StringInterner"));
957 assert!(display.contains("strings=1"));
958 }
959
960 #[test]
961 fn test_stats() {
962 let mut interner = StringInterner::new();
963 interner.intern("hello").unwrap(); interner.intern("world").unwrap(); let stats = interner.stats();
967 assert_eq!(stats.string_count, 2);
968 assert_eq!(stats.total_bytes, 10);
969 assert_eq!(stats.free_slots, 0);
970 }
971
972 #[test]
973 fn test_empty_string() {
974 let mut interner = StringInterner::new();
975 let id = interner.intern("").unwrap();
976
977 assert!(!id.is_invalid());
978 assert_eq!(interner.resolve(id).unwrap().as_ref(), "");
979 }
980
981 #[test]
982 fn test_unicode() {
983 let mut interner = StringInterner::new();
984 let id = interner.intern("日本語").unwrap();
985
986 let resolved = interner.resolve(id).unwrap();
987 assert_eq!(&*resolved, "日本語");
988 }
989
990 #[test]
991 fn test_default() {
992 let interner: StringInterner = StringInterner::default();
993 assert_eq!(interner.len(), 0);
994 }
995
996 #[test]
997 fn test_resolve_error_display() {
998 let err = ResolveError {
999 id: StringId::new(42),
1000 };
1001 let display = format!("{err}");
1002 assert!(display.contains("StringId"));
1003 }
1004
1005 #[test]
1006 fn test_intern_error_display() {
1007 let err = InternError::CapacityExhausted;
1008 let display = format!("{err}");
1009 assert!(display.contains("capacity exhausted"));
1010 }
1011
1012 #[test]
1015 fn test_alloc_range_basic() {
1016 let mut interner = StringInterner::new();
1017 assert_eq!(interner.string_count_raw(), 1);
1019
1020 let start = interner.alloc_range(5).unwrap();
1021 assert!(start >= 1);
1023 assert_eq!(start, 1); assert_eq!(interner.string_count_raw(), 6); for i in start..start + 5 {
1028 let id = StringId::new(i);
1029 assert!(interner.resolve(id).is_none());
1030 assert_eq!(interner.ref_count(id), 0);
1031 }
1032 }
1033
1034 #[test]
1035 fn test_alloc_range_zero_noop() {
1036 let mut interner = StringInterner::new();
1037 let before = interner.string_count_raw();
1038 let start = interner.alloc_range(0).unwrap();
1039 assert_eq!(start as usize, before);
1040 assert_eq!(interner.string_count_raw(), before);
1041 }
1042
1043 #[test]
1044 fn test_alloc_range_after_existing_strings() {
1045 let mut interner = StringInterner::new();
1046 interner.intern("existing").unwrap();
1047 let start = interner.alloc_range(3).unwrap();
1049 assert_eq!(start, 2);
1050 assert_eq!(interner.string_count_raw(), 5);
1051 assert_eq!(
1053 interner.resolve(StringId::new(1)).unwrap().as_ref(),
1054 "existing"
1055 );
1056 }
1057
1058 #[test]
1059 fn test_bulk_slices_mut() {
1060 let mut interner = StringInterner::new();
1061 let start = interner.alloc_range(3).unwrap();
1062
1063 {
1065 let (strings, ref_counts) = interner.bulk_slices_mut(start, 3);
1066 strings[0] = Some(Arc::from("alpha"));
1067 strings[1] = Some(Arc::from("beta"));
1068 strings[2] = Some(Arc::from("gamma"));
1069 ref_counts[0] = 1;
1070 ref_counts[1] = 2;
1071 ref_counts[2] = 1;
1072 }
1073
1074 assert_eq!(
1076 interner.resolve(StringId::new(start)).unwrap().as_ref(),
1077 "alpha"
1078 );
1079 assert_eq!(
1080 interner.resolve(StringId::new(start + 1)).unwrap().as_ref(),
1081 "beta"
1082 );
1083 assert_eq!(
1084 interner.resolve(StringId::new(start + 2)).unwrap().as_ref(),
1085 "gamma"
1086 );
1087
1088 assert_eq!(interner.ref_count(StringId::new(start)), 1);
1090 assert_eq!(interner.ref_count(StringId::new(start + 1)), 2);
1091 assert_eq!(interner.ref_count(StringId::new(start + 2)), 1);
1092 }
1093
1094 #[test]
1095 fn test_build_dedup_table_no_duplicates() {
1096 let mut interner = StringInterner::new();
1097 let start = interner.alloc_range(3).unwrap();
1098
1099 {
1100 let (strings, ref_counts) = interner.bulk_slices_mut(start, 3);
1101 strings[0] = Some(Arc::from("one"));
1102 strings[1] = Some(Arc::from("two"));
1103 strings[2] = Some(Arc::from("three"));
1104 ref_counts[0] = 1;
1105 ref_counts[1] = 1;
1106 ref_counts[2] = 1;
1107 }
1108
1109 let remap = interner.build_dedup_table();
1110 assert!(remap.is_empty(), "no duplicates means empty remap");
1111
1112 assert_eq!(
1114 interner.resolve(StringId::new(start)).unwrap().as_ref(),
1115 "one"
1116 );
1117 assert_eq!(
1118 interner.resolve(StringId::new(start + 1)).unwrap().as_ref(),
1119 "two"
1120 );
1121 assert_eq!(
1122 interner.resolve(StringId::new(start + 2)).unwrap().as_ref(),
1123 "three"
1124 );
1125
1126 assert_eq!(interner.len(), 3);
1128 }
1129
1130 #[test]
1131 fn test_build_dedup_table_with_duplicates() {
1132 let mut interner = StringInterner::new();
1133 let start = interner.alloc_range(4).unwrap();
1134
1135 {
1136 let (strings, ref_counts) = interner.bulk_slices_mut(start, 4);
1137 strings[0] = Some(Arc::from("hello"));
1138 strings[1] = Some(Arc::from("world"));
1139 strings[2] = Some(Arc::from("hello")); strings[3] = Some(Arc::from("world")); ref_counts[0] = 1;
1142 ref_counts[1] = 3;
1143 ref_counts[2] = 2;
1144 ref_counts[3] = 1;
1145 }
1146
1147 let remap = interner.build_dedup_table();
1148
1149 assert_eq!(remap.len(), 2);
1151 assert_eq!(remap[&StringId::new(start + 2)], StringId::new(start)); assert_eq!(remap[&StringId::new(start + 3)], StringId::new(start + 1)); assert_eq!(interner.ref_count(StringId::new(start)), 3); assert_eq!(interner.ref_count(StringId::new(start + 1)), 4); assert!(interner.resolve(StringId::new(start + 2)).is_none());
1160 assert!(interner.resolve(StringId::new(start + 3)).is_none());
1161 assert_eq!(interner.ref_count(StringId::new(start + 2)), 0);
1162 assert_eq!(interner.ref_count(StringId::new(start + 3)), 0);
1163
1164 assert_eq!(interner.len(), 2);
1166 assert_eq!(interner.get("hello"), Some(StringId::new(start)));
1167 assert_eq!(interner.get("world"), Some(StringId::new(start + 1)));
1168 }
1169
1170 #[test]
1171 fn test_truncate_to() {
1172 let mut interner = StringInterner::new();
1173 interner.intern("keep").unwrap();
1174
1175 let saved = interner.string_count_raw(); let _start = interner.alloc_range(10).unwrap();
1177 assert_eq!(interner.string_count_raw(), 12);
1178
1179 interner.truncate_to(saved);
1180 assert_eq!(interner.string_count_raw(), 2);
1181
1182 assert_eq!(interner.resolve(StringId::new(1)).unwrap().as_ref(), "keep");
1184 }
1185
1186 #[test]
1187 #[should_panic(expected = "cannot truncate sentinel")]
1188 fn test_truncate_to_zero_panics() {
1189 let mut interner = StringInterner::new();
1190 interner.truncate_to(0);
1191 }
1192
1193 #[test]
1194 fn test_dedup_preserves_sentinel() {
1195 let mut interner = StringInterner::new();
1196 let start = interner.alloc_range(2).unwrap();
1197
1198 {
1199 let (strings, ref_counts) = interner.bulk_slices_mut(start, 2);
1200 strings[0] = Some(Arc::from("a"));
1201 strings[1] = Some(Arc::from("a")); ref_counts[0] = 1;
1203 ref_counts[1] = 1;
1204 }
1205
1206 let _remap = interner.build_dedup_table();
1207
1208 assert!(interner.resolve(StringId::new(0)).is_none());
1210 assert_eq!(interner.ref_counts[0], 0);
1213 assert!(interner.strings[0].is_none());
1214 }
1215
1216 #[test]
1217 fn test_string_count_raw() {
1218 let mut interner = StringInterner::new();
1219 assert_eq!(interner.string_count_raw(), 1); interner.intern("a").unwrap();
1222 assert_eq!(interner.string_count_raw(), 2);
1223
1224 interner.alloc_range(5).unwrap();
1225 assert_eq!(interner.string_count_raw(), 7);
1226 }
1227
1228 #[test]
1229 fn test_ref_count_accessor() {
1230 let mut interner = StringInterner::new();
1231 let id = interner.intern("test").unwrap();
1232 assert_eq!(interner.ref_count(id), 1);
1233
1234 interner.intern("test").unwrap(); assert_eq!(interner.ref_count(id), 2);
1236
1237 assert_eq!(interner.ref_count(StringId::INVALID), 0);
1239
1240 assert_eq!(interner.ref_count(StringId::new(999)), 0);
1242 }
1243
1244 #[test]
1245 fn test_alloc_range_basic_succeeds() {
1246 let mut interner = StringInterner::with_max_ids(u32::MAX);
1252 let start = interner.alloc_range(10).unwrap();
1253 assert_eq!(start, 1, "first allocation should start at index 1");
1254 assert_eq!(
1255 interner.string_count_raw(),
1256 11,
1257 "string count should advance by the allocated range size"
1258 );
1259 }
1260
1261 #[test]
1262 fn test_dedup_frees_slots_for_reuse() {
1263 let mut interner = StringInterner::new();
1266 let start = interner.alloc_range(3).unwrap();
1267
1268 {
1269 let (strings, ref_counts) = interner.bulk_slices_mut(start, 3);
1270 strings[0] = Some(Arc::from("dup"));
1271 strings[1] = Some(Arc::from("unique"));
1272 strings[2] = Some(Arc::from("dup")); ref_counts[0] = 1;
1274 ref_counts[1] = 1;
1275 ref_counts[2] = 1;
1276 }
1277
1278 let remap = interner.build_dedup_table();
1279 assert_eq!(remap.len(), 1);
1280
1281 let dup_slot = start + 2;
1283 assert!(interner.resolve(StringId::new(dup_slot)).is_none());
1284
1285 let new_id = interner.intern("fresh").unwrap();
1287 assert_eq!(
1288 new_id.index(),
1289 dup_slot,
1290 "new intern should reuse freed duplicate slot"
1291 );
1292 }
1293
1294 #[test]
1295 fn test_build_dedup_table_with_gaps() {
1296 let mut interner = StringInterner::new();
1298 let start = interner.alloc_range(4).unwrap();
1299
1300 {
1301 let (strings, ref_counts) = interner.bulk_slices_mut(start, 4);
1302 strings[0] = Some(Arc::from("x"));
1303 strings[2] = Some(Arc::from("y"));
1305 strings[3] = Some(Arc::from("x")); ref_counts[0] = 1;
1307 ref_counts[2] = 1;
1308 ref_counts[3] = 1;
1309 }
1310
1311 let remap = interner.build_dedup_table();
1312
1313 assert_eq!(remap.len(), 1);
1315 assert_eq!(remap[&StringId::new(start + 3)], StringId::new(start));
1316
1317 assert_eq!(interner.ref_count(StringId::new(start)), 2);
1319
1320 assert_eq!(interner.ref_count(StringId::new(start + 2)), 1);
1322
1323 assert!(interner.resolve(StringId::new(start + 1)).is_none());
1325 }
1326
1327 #[test]
1330 fn test_interner_deterministic_serialization() {
1331 let mut interner1 = StringInterner::new();
1335 interner1.intern("alpha").unwrap();
1336 interner1.intern("beta").unwrap();
1337 interner1.intern("gamma").unwrap();
1338
1339 let mut interner2 = StringInterner::new();
1340 interner2.intern("alpha").unwrap();
1341 interner2.intern("beta").unwrap();
1342 interner2.intern("gamma").unwrap();
1343
1344 let bytes1 = postcard::to_stdvec(&interner1).expect("serialize interner1");
1345 let bytes2 = postcard::to_stdvec(&interner2).expect("serialize interner2");
1346
1347 assert_eq!(
1348 bytes1, bytes2,
1349 "same insertion order must produce identical postcard bytes"
1350 );
1351 }
1352
1353 #[test]
1354 fn test_interner_serialization_roundtrip() {
1355 let mut interner = StringInterner::new();
1356 interner.intern("foo").unwrap();
1357 interner.intern("bar").unwrap();
1358 interner.intern("baz").unwrap();
1359 interner.intern("foo").unwrap();
1361
1362 let bytes = postcard::to_stdvec(&interner).expect("serialize");
1363 let deserialized: StringInterner = postcard::from_bytes(&bytes).expect("deserialize");
1364
1365 assert_eq!(deserialized.len(), 3);
1367 assert!(deserialized.contains("foo"));
1368 assert!(deserialized.contains("bar"));
1369 assert!(deserialized.contains("baz"));
1370
1371 let foo_id = deserialized.get("foo").unwrap();
1373 assert_eq!(deserialized.ref_count(foo_id), 2);
1374
1375 let bar_id = deserialized.get("bar").unwrap();
1376 assert_eq!(deserialized.ref_count(bar_id), 1);
1377 }
1378
1379 #[test]
1380 fn test_interner_sorted_lookup_produces_stable_bytes() {
1381 let mut interner = StringInterner::new();
1386 interner.intern("gamma").unwrap();
1387 interner.intern("alpha").unwrap();
1388 interner.intern("beta").unwrap();
1389
1390 let bytes = postcard::to_stdvec(&interner).expect("serialize");
1391
1392 let bytes_str = String::from_utf8_lossy(&bytes);
1394 let find_second = |needle: &str| {
1397 let first = bytes_str.find(needle).unwrap();
1398 bytes_str[first + needle.len()..]
1399 .find(needle)
1400 .map(|pos| pos + first + needle.len())
1401 };
1402
1403 let alpha_pos = find_second("alpha");
1406 let beta_pos = find_second("beta");
1407 let gamma_pos = find_second("gamma");
1408
1409 if let (Some(a), Some(b), Some(g)) = (alpha_pos, beta_pos, gamma_pos) {
1410 assert!(a < b, "alpha should appear before beta in sorted lookup");
1411 assert!(b < g, "beta should appear before gamma in sorted lookup");
1412 }
1413 }
1416
1417 #[test]
1420 fn test_alloc_range_sets_lookup_stale() {
1421 let mut interner = StringInterner::new();
1422 assert!(!interner.is_lookup_stale());
1423
1424 interner.alloc_range(5).unwrap();
1425 assert!(interner.is_lookup_stale());
1426 }
1427
1428 #[test]
1429 fn test_alloc_range_zero_does_not_set_stale() {
1430 let mut interner = StringInterner::new();
1431 interner.alloc_range(0).unwrap();
1432 assert!(!interner.is_lookup_stale());
1434 }
1435
1436 #[test]
1437 fn test_build_dedup_table_clears_lookup_stale() {
1438 let mut interner = StringInterner::new();
1439 let start = interner.alloc_range(2).unwrap();
1440 assert!(interner.is_lookup_stale());
1441
1442 interner.strings[start as usize] = Some(Arc::from("alpha"));
1444 interner.ref_counts[start as usize] = 1;
1445 interner.strings[(start + 1) as usize] = Some(Arc::from("beta"));
1446 interner.ref_counts[(start + 1) as usize] = 1;
1447
1448 let _remap = interner.build_dedup_table();
1449 assert!(!interner.is_lookup_stale());
1450
1451 assert!(interner.contains("alpha"));
1453 assert!(interner.contains("beta"));
1454 assert_eq!(interner.len(), 2);
1455 }
1456
1457 #[test]
1458 fn test_truncate_to_clears_lookup_stale() {
1459 let mut interner = StringInterner::new();
1460 let saved_len = interner.string_count_raw();
1461
1462 interner.alloc_range(10).unwrap();
1463 assert!(interner.is_lookup_stale());
1464
1465 interner.truncate_to(saved_len);
1466 assert!(!interner.is_lookup_stale());
1467
1468 let id = interner.intern("after_rollback").unwrap();
1470 assert!(interner.contains("after_rollback"));
1471 assert_eq!(interner.resolve(id).unwrap().as_ref(), "after_rollback");
1472 }
1473
1474 #[test]
1475 #[should_panic(expected = "lookup is stale")]
1476 fn test_intern_panics_when_lookup_stale() {
1477 let mut interner = StringInterner::new();
1478 interner.alloc_range(1).unwrap();
1479 let _ = interner.intern("should_panic");
1480 }
1481
1482 #[test]
1483 #[should_panic(expected = "lookup is stale")]
1484 fn test_intern_without_ref_panics_when_lookup_stale() {
1485 let mut interner = StringInterner::new();
1486 interner.alloc_range(1).unwrap();
1487 let _ = interner.intern_without_ref("should_panic");
1488 }
1489
1490 #[test]
1491 #[should_panic(expected = "lookup is stale")]
1492 fn test_get_panics_when_lookup_stale() {
1493 let mut interner = StringInterner::new();
1494 interner.alloc_range(1).unwrap();
1495 let _ = interner.get("should_panic");
1496 }
1497
1498 #[test]
1499 #[should_panic(expected = "lookup is stale")]
1500 fn test_contains_panics_when_lookup_stale() {
1501 let mut interner = StringInterner::new();
1502 interner.alloc_range(1).unwrap();
1503 let _ = interner.contains("should_panic");
1504 }
1505
1506 #[test]
1507 #[should_panic(expected = "lookup is stale")]
1508 fn test_len_panics_when_lookup_stale() {
1509 let mut interner = StringInterner::new();
1510 interner.alloc_range(1).unwrap();
1511 let _ = interner.len();
1512 }
1513
1514 #[test]
1515 #[should_panic(expected = "lookup is stale")]
1516 fn test_is_empty_panics_when_lookup_stale() {
1517 let mut interner = StringInterner::new();
1518 interner.alloc_range(1).unwrap();
1519 let _ = interner.is_empty();
1520 }
1521
1522 #[test]
1523 #[should_panic(expected = "lookup is stale")]
1524 fn test_recycle_unreferenced_panics_when_lookup_stale() {
1525 let mut interner = StringInterner::new();
1526 interner.alloc_range(1).unwrap();
1527 let _ = interner.recycle_unreferenced();
1528 }
1529
1530 #[test]
1531 fn test_resolve_works_when_lookup_stale() {
1532 let mut interner = StringInterner::new();
1535 let id = interner.intern("before_bulk").unwrap();
1536
1537 interner.alloc_range(5).unwrap();
1538 assert!(interner.is_lookup_stale());
1539
1540 assert_eq!(interner.resolve(id).unwrap().as_ref(), "before_bulk");
1542 }
1543
1544 #[test]
1545 fn test_stats_works_when_lookup_stale() {
1546 let mut interner = StringInterner::new();
1548 interner.intern("existing").unwrap();
1549
1550 let start = interner.alloc_range(2).unwrap();
1551 interner.strings[start as usize] = Some(Arc::from("bulk1"));
1552 interner.ref_counts[start as usize] = 1;
1553
1554 assert!(interner.is_lookup_stale());
1555
1556 let stats = interner.stats();
1557 assert_eq!(stats.string_count, 2);
1560 }
1561
1562 #[test]
1563 fn test_display_works_when_lookup_stale() {
1564 let mut interner = StringInterner::new();
1565 interner.alloc_range(3).unwrap();
1566 assert!(interner.is_lookup_stale());
1567
1568 let display = format!("{interner}");
1569 assert!(
1570 display.contains("lookup_stale"),
1571 "Display should indicate stale state: {display}"
1572 );
1573 }
1574
1575 #[test]
1576 fn test_display_omits_stale_when_consistent() {
1577 let mut interner = StringInterner::new();
1578 interner.intern("hello").unwrap();
1579
1580 let display = format!("{interner}");
1581 assert!(
1582 !display.contains("lookup_stale"),
1583 "Display should not mention stale when consistent: {display}"
1584 );
1585 }
1586
1587 #[test]
1588 fn test_full_parallel_commit_lifecycle() {
1589 let mut interner = StringInterner::new();
1595
1596 let pre_id = interner.intern("pre_existing").unwrap();
1598 assert_eq!(interner.len(), 1);
1599
1600 let start = interner.alloc_range(6).unwrap();
1602 assert!(interner.is_lookup_stale());
1603
1604 interner.strings[start as usize] = Some(Arc::from("alpha"));
1607 interner.ref_counts[start as usize] = 1;
1608 interner.strings[(start + 1) as usize] = Some(Arc::from("beta"));
1609 interner.ref_counts[(start + 1) as usize] = 1;
1610
1611 interner.strings[(start + 2) as usize] = Some(Arc::from("alpha"));
1613 interner.ref_counts[(start + 2) as usize] = 1;
1614 interner.strings[(start + 3) as usize] = Some(Arc::from("gamma"));
1615 interner.ref_counts[(start + 3) as usize] = 1;
1616
1617 interner.strings[(start + 4) as usize] = Some(Arc::from("beta"));
1619 interner.ref_counts[(start + 4) as usize] = 1;
1620 interner.strings[(start + 5) as usize] = Some(Arc::from("delta"));
1621 interner.ref_counts[(start + 5) as usize] = 1;
1622
1623 assert_eq!(interner.resolve(pre_id).unwrap().as_ref(), "pre_existing");
1625
1626 let remap = interner.build_dedup_table();
1628 assert!(!interner.is_lookup_stale());
1629
1630 assert_eq!(remap.len(), 2);
1632
1633 assert_eq!(interner.len(), 5); assert!(interner.contains("pre_existing"));
1636 assert!(interner.contains("alpha"));
1637 assert!(interner.contains("beta"));
1638 assert!(interner.contains("gamma"));
1639 assert!(interner.contains("delta"));
1640 assert_eq!(interner.get("alpha"), Some(StringId::new(start)));
1641 assert_eq!(interner.get("beta"), Some(StringId::new(start + 1)));
1642
1643 assert_eq!(interner.ref_count(StringId::new(start)), 2); assert_eq!(interner.ref_count(StringId::new(start + 1)), 2); }
1647
1648 #[test]
1649 fn test_clear_resets_lookup_stale() {
1650 let mut interner = StringInterner::new();
1653 interner.alloc_range(5).unwrap();
1654 assert!(interner.is_lookup_stale());
1655
1656 interner.clear();
1657 assert!(!interner.is_lookup_stale());
1658
1659 assert_eq!(interner.len(), 0);
1661 assert!(interner.is_empty());
1662 assert!(!interner.contains("anything"));
1663 assert_eq!(interner.get("anything"), None);
1664
1665 let id = interner.intern("after_clear").unwrap();
1667 assert_eq!(interner.resolve(id).unwrap().as_ref(), "after_clear");
1668 assert_eq!(interner.len(), 1);
1669 }
1670
1671 #[test]
1672 fn test_bulk_slices_mut_sets_lookup_stale() {
1673 let mut interner = StringInterner::new();
1676
1677 interner.strings.resize(4, None);
1679 interner.ref_counts.resize(4, 0);
1680 assert!(!interner.is_lookup_stale());
1681
1682 let (str_slots, rc_slots) = interner.bulk_slices_mut(1, 3);
1684 str_slots[0] = Some(Arc::from("x"));
1685 rc_slots[0] = 1;
1686 assert!(interner.is_lookup_stale());
1687
1688 let _remap = interner.build_dedup_table();
1690 assert!(!interner.is_lookup_stale());
1691 assert!(interner.contains("x"));
1692 }
1693
1694 #[test]
1695 fn test_bulk_slices_mut_zero_does_not_set_stale() {
1696 let mut interner = StringInterner::new();
1697 interner.intern("existing").unwrap();
1698 assert!(!interner.is_lookup_stale());
1699
1700 let (_s, _r) = interner.bulk_slices_mut(1, 0);
1702 assert!(!interner.is_lookup_stale());
1703
1704 assert!(interner.contains("existing"));
1706 }
1707
1708 #[test]
1709 fn test_with_max_ids_capacity_exhaustion_via_intern() {
1710 let mut interner = StringInterner::with_max_ids(2);
1712
1713 interner.intern("a").unwrap();
1714 interner.intern("b").unwrap();
1715
1716 let err = interner.intern("c").unwrap_err();
1718 assert_eq!(err, InternError::CapacityExhausted);
1719
1720 let id = interner.intern("a").unwrap();
1722 assert!(interner.resolve(id).is_some());
1723 }
1724
1725 #[test]
1726 fn test_with_max_ids_capacity_exhaustion_via_intern_without_ref() {
1727 let mut interner = StringInterner::with_max_ids(1);
1728 interner.intern_without_ref("only").unwrap();
1729
1730 let err = interner.intern_without_ref("second").unwrap_err();
1732 assert_eq!(err, InternError::CapacityExhausted);
1733 }
1734
1735 #[test]
1736 fn test_intern_without_ref_already_interned_no_ref_count_change() {
1737 let mut interner = StringInterner::new();
1738 let id = interner.intern("hello").unwrap();
1739 assert_eq!(interner.ref_count(id), 1);
1740
1741 let id2 = interner.intern_without_ref("hello").unwrap();
1744 assert_eq!(id, id2);
1745 assert_eq!(interner.ref_count(id), 1); }
1747
1748 #[test]
1749 fn test_recycle_unreferenced_noop_when_all_referenced() {
1750 let mut interner = StringInterner::new();
1751 interner.intern("a").unwrap(); interner.intern("b").unwrap(); let recycled = interner.recycle_unreferenced();
1755 assert_eq!(recycled, 0); assert_eq!(interner.len(), 2); }
1758
1759 #[test]
1760 fn test_inc_ref_out_of_bounds_returns_none() {
1761 let mut interner = StringInterner::new();
1762 let result = interner.inc_ref(StringId::new(999));
1764 assert!(result.is_none());
1765 }
1766
1767 #[test]
1768 fn test_dec_ref_out_of_bounds_returns_none() {
1769 let mut interner = StringInterner::new();
1770 let result = interner.dec_ref(StringId::new(999));
1772 assert!(result.is_none());
1773 }
1774
1775 #[test]
1776 fn test_intern_without_ref_reuses_free_list_slot() {
1777 let mut interner = StringInterner::new();
1778 interner.intern_without_ref("recycle_me").unwrap(); interner.intern("anchor").unwrap(); let recycled = interner.recycle_unreferenced();
1782 assert_eq!(recycled, 1); let new_id = interner.intern_without_ref("fresh").unwrap();
1786 assert_eq!(interner.resolve(new_id).unwrap().as_ref(), "fresh");
1787 assert_eq!(interner.ref_count(new_id), 0);
1788 }
1789}