1use scirs2_core::metrics::{Counter, Histogram, Timer};
17use std::collections::HashMap;
18use std::hash::{Hash, Hasher};
19use std::sync::{Arc, RwLock, Weak};
20
21pub struct StringInterner {
29 strings: RwLock<HashMap<String, Weak<str>>>,
31 string_to_id: RwLock<HashMap<String, u32>>,
33 id_to_string: RwLock<HashMap<u32, Arc<str>>>,
35 next_id: AtomicU32,
37 stats: RwLock<InternerStats>,
39 cache_hit_counter: Arc<Counter>,
41 cache_miss_counter: Arc<Counter>,
42 intern_timer: Arc<Timer>,
43 string_length_histogram: Arc<Histogram>,
44 memory_usage_histogram: Arc<Histogram>,
45}
46
47use std::sync::atomic::AtomicU32;
49
50#[derive(Debug, Clone, Default)]
52pub struct InternerStats {
53 pub total_requests: usize,
54 pub cache_hits: usize,
55 pub cache_misses: usize,
56 pub total_strings_stored: usize,
57 pub memory_saved_bytes: usize,
58}
59
60#[derive(Debug, Clone)]
62pub struct MemoryUsage {
63 pub interned_strings: usize,
64 pub id_mappings: usize,
65 pub estimated_memory_bytes: usize,
66 pub memory_saved_bytes: usize,
67 pub compression_ratio: f64,
68}
69
70#[derive(Debug, Clone)]
72pub struct InternerMetrics {
73 pub cache_hits: u64,
75 pub cache_misses: u64,
77 pub total_requests: u64,
79 pub hit_ratio: f64,
81 pub avg_intern_time_secs: f64,
83 pub total_intern_observations: u64,
85 pub avg_string_length: f64,
87 pub total_memory_tracked_bytes: u64,
89}
90
91impl InternerStats {
92 pub fn hit_ratio(&self) -> f64 {
93 if self.total_requests == 0 {
94 0.0
95 } else {
96 self.cache_hits as f64 / self.total_requests as f64
97 }
98 }
99}
100
101impl StringInterner {
102 pub fn new() -> Self {
110 Self::with_capacity(1024) }
112
113 pub fn with_capacity(capacity: usize) -> Self {
118 StringInterner {
119 strings: RwLock::new(HashMap::with_capacity(capacity)),
120 string_to_id: RwLock::new(HashMap::with_capacity(capacity)),
121 id_to_string: RwLock::new(HashMap::with_capacity(capacity)),
122 next_id: AtomicU32::new(0),
123 stats: RwLock::new(InternerStats::default()),
124 cache_hit_counter: Arc::new(Counter::new("interner.cache_hits".to_string())),
125 cache_miss_counter: Arc::new(Counter::new("interner.cache_misses".to_string())),
126 intern_timer: Arc::new(Timer::new("interner.intern_time".to_string())),
127 string_length_histogram: Arc::new(Histogram::new("interner.string_length".to_string())),
128 memory_usage_histogram: Arc::new(Histogram::new("interner.memory_usage".to_string())),
129 }
130 }
131
132 pub fn intern(&self, s: &str) -> Arc<str> {
139 let _guard = self.intern_timer.start();
140
141 self.string_length_histogram.observe(s.len() as f64);
143
144 {
146 let strings = self.strings.read().expect("strings lock poisoned");
147 if let Some(weak_ref) = strings.get(s) {
148 if let Some(arc_str) = weak_ref.upgrade() {
149 self.cache_hit_counter.inc();
151 {
152 let mut stats = self.stats.write().expect("stats lock poisoned");
153 stats.total_requests += 1;
154 stats.cache_hits += 1;
155 }
156 return arc_str;
157 }
158 }
159 }
160
161 let mut strings = self.strings.write().expect("strings lock poisoned");
163
164 if let Some(weak_ref) = strings.get(s) {
166 if let Some(arc_str) = weak_ref.upgrade() {
167 self.cache_hit_counter.inc();
169 drop(strings); {
171 let mut stats = self.stats.write().expect("stats lock poisoned");
172 stats.total_requests += 1;
173 stats.cache_hits += 1;
174 }
175 return arc_str;
176 }
177 }
178
179 let arc_str: Arc<str> = Arc::from(s);
181 let weak_ref = Arc::downgrade(&arc_str);
182 strings.insert(s.to_string(), weak_ref);
183
184 self.cache_miss_counter.inc();
186 drop(strings); {
188 let mut stats = self.stats.write().expect("stats lock poisoned");
189 stats.total_requests += 1;
190 stats.cache_misses += 1;
191 stats.total_strings_stored += 1;
192 stats.memory_saved_bytes += s.len(); }
194
195 arc_str
196 }
197
198 pub fn intern_with_id(&self, s: &str) -> (Arc<str>, u32) {
200 {
202 let string_to_id = self
203 .string_to_id
204 .read()
205 .expect("string_to_id lock poisoned");
206 if let Some(&id) = string_to_id.get(s) {
207 let id_to_string = self
209 .id_to_string
210 .read()
211 .expect("id_to_string lock poisoned");
212 if let Some(arc_str) = id_to_string.get(&id) {
213 {
215 let mut stats = self.stats.write().expect("stats lock poisoned");
216 stats.total_requests += 1;
217 stats.cache_hits += 1;
218 }
219 return (arc_str.clone(), id);
220 }
221 }
222 }
223
224 let arc_str = self.intern(s); let id = self
227 .next_id
228 .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
229
230 {
232 let mut string_to_id = self
233 .string_to_id
234 .write()
235 .expect("string_to_id lock poisoned");
236 string_to_id.insert(s.to_string(), id);
237 }
238 {
239 let mut id_to_string = self
240 .id_to_string
241 .write()
242 .expect("id_to_string lock poisoned");
243 id_to_string.insert(id, arc_str.clone());
244 }
245
246 (arc_str, id)
247 }
248
249 pub fn get_id(&self, s: &str) -> Option<u32> {
251 let string_to_id = self
252 .string_to_id
253 .read()
254 .expect("string_to_id lock poisoned");
255 string_to_id.get(s).copied()
256 }
257
258 pub fn get_string(&self, id: u32) -> Option<Arc<str>> {
260 let id_to_string = self
261 .id_to_string
262 .read()
263 .expect("id_to_string lock poisoned");
264 id_to_string.get(&id).cloned()
265 }
266
267 pub fn get_all_mappings(&self) -> Vec<(u32, Arc<str>)> {
269 let id_to_string = self
270 .id_to_string
271 .read()
272 .expect("id_to_string lock poisoned");
273 id_to_string
274 .iter()
275 .map(|(&id, s)| (id, s.clone()))
276 .collect()
277 }
278
279 pub fn cleanup(&self) -> usize {
283 let mut strings = self.strings.write().expect("strings lock poisoned");
284 let before = strings.len();
285 strings.retain(|_, weak_ref| weak_ref.strong_count() > 0);
286 let after = strings.len();
287 before - after
288 }
289
290 pub fn stats(&self) -> InternerStats {
292 self.stats.read().expect("stats lock poisoned").clone()
293 }
294
295 pub fn len(&self) -> usize {
297 let id_count = self
299 .string_to_id
300 .read()
301 .expect("string_to_id lock poisoned")
302 .len();
303 let string_count = self.strings.read().expect("strings lock poisoned").len();
304 std::cmp::max(id_count, string_count)
305 }
306
307 pub fn id_mapping_count(&self) -> usize {
309 self.string_to_id
310 .read()
311 .expect("string_to_id lock poisoned")
312 .len()
313 }
314
315 pub fn is_empty(&self) -> bool {
317 self.strings
318 .read()
319 .expect("strings lock poisoned")
320 .is_empty()
321 }
322
323 pub fn intern_batch(&self, strings: &[&str]) -> Vec<Arc<str>> {
326 let mut result = Vec::with_capacity(strings.len());
327 let mut to_create = Vec::new();
328
329 {
331 let string_map = self.strings.read().expect("strings lock poisoned");
332 for &s in strings {
333 if let Some(weak_ref) = string_map.get(s) {
334 if let Some(arc_str) = weak_ref.upgrade() {
335 result.push(arc_str);
336 continue;
337 }
338 }
339 to_create.push((result.len(), s));
340 result.push(Arc::from("")); }
342 }
343
344 if !to_create.is_empty() {
346 let mut string_map = self.strings.write().expect("strings lock poisoned");
347 let mut stats = self.stats.write().expect("stats lock poisoned");
348
349 for (index, s) in to_create {
350 if let Some(weak_ref) = string_map.get(s) {
352 if let Some(arc_str) = weak_ref.upgrade() {
353 result[index] = arc_str;
354 stats.cache_hits += 1;
355 continue;
356 }
357 }
358
359 let arc_str: Arc<str> = Arc::from(s);
361 let weak_ref = Arc::downgrade(&arc_str);
362 string_map.insert(s.to_string(), weak_ref);
363 result[index] = arc_str;
364
365 stats.cache_misses += 1;
366 stats.total_strings_stored += 1;
367 stats.memory_saved_bytes += s.len();
368 }
369
370 stats.total_requests += strings.len();
371 }
372
373 result
374 }
375
376 pub fn prefetch(&self, strings: &[&str]) {
379 let _ = self.intern_batch(strings);
380 }
381
382 pub fn memory_usage(&self) -> MemoryUsage {
384 let string_map_size = self.strings.read().expect("strings lock poisoned").len();
385 let id_map_size = self
386 .string_to_id
387 .read()
388 .expect("string_to_id lock poisoned")
389 .len();
390 let stats = self.stats.read().expect("stats lock poisoned");
391
392 MemoryUsage {
393 interned_strings: string_map_size,
394 id_mappings: id_map_size,
395 estimated_memory_bytes: string_map_size * 64 + id_map_size * 8, memory_saved_bytes: stats.memory_saved_bytes,
397 compression_ratio: if stats.memory_saved_bytes > 0 {
398 stats.memory_saved_bytes as f64
399 / (stats.memory_saved_bytes + string_map_size * 32) as f64
400 } else {
401 0.0
402 },
403 }
404 }
405
406 pub fn get_metrics(&self) -> InternerMetrics {
414 let cache_hits = self.cache_hit_counter.get();
415 let cache_misses = self.cache_miss_counter.get();
416 let total_requests = cache_hits + cache_misses;
417 let hit_ratio = if total_requests > 0 {
418 cache_hits as f64 / total_requests as f64
419 } else {
420 0.0
421 };
422
423 let timer_stats = self.intern_timer.get_stats();
424 let string_length_stats = self.string_length_histogram.get_stats();
425 let memory_stats = self.memory_usage_histogram.get_stats();
426
427 InternerMetrics {
428 cache_hits,
429 cache_misses,
430 total_requests,
431 hit_ratio,
432 avg_intern_time_secs: timer_stats.mean,
433 total_intern_observations: timer_stats.count,
434 avg_string_length: string_length_stats.mean,
435 total_memory_tracked_bytes: memory_stats.sum as u64,
436 }
437 }
438
439 pub fn optimize(&self) {
447 let start = std::time::Instant::now();
448
449 let cleaned_count = self.cleanup();
451
452 let current_size = {
454 let strings = self.strings.read().expect("strings lock poisoned");
455 strings.len()
456 };
457
458 let optimal_capacity = ((current_size as f64 * 1.3) as usize).max(1024);
460
461 {
462 let mut strings = self.strings.write().expect("strings lock poisoned");
463 let mut string_to_id = self
464 .string_to_id
465 .write()
466 .expect("string_to_id lock poisoned");
467 let mut id_to_string = self
468 .id_to_string
469 .write()
470 .expect("id_to_string lock poisoned");
471
472 let mut new_strings = HashMap::with_capacity(optimal_capacity);
474 let mut new_string_to_id = HashMap::with_capacity(optimal_capacity);
475 let mut new_id_to_string = HashMap::with_capacity(optimal_capacity);
476
477 for (key, value) in strings.drain() {
479 new_strings.insert(key, value);
480 }
481 for (key, value) in string_to_id.drain() {
482 new_string_to_id.insert(key, value);
483 }
484 for (key, value) in id_to_string.drain() {
485 new_id_to_string.insert(key, value);
486 }
487
488 *strings = new_strings;
490 *string_to_id = new_string_to_id;
491 *id_to_string = new_id_to_string;
492 }
493
494 let mem_usage = self.memory_usage();
496 self.memory_usage_histogram
497 .observe(mem_usage.estimated_memory_bytes as f64);
498
499 {
501 let mut stats = self.stats.write().expect("stats lock poisoned");
502 stats.total_strings_stored = current_size;
503 }
504
505 let duration = start.elapsed();
506 tracing::debug!(
507 "Interner optimized: cleaned {} entries, rehashed to capacity {}, took {:?}",
508 cleaned_count,
509 optimal_capacity,
510 duration
511 );
512 }
513}
514
515impl Default for StringInterner {
516 fn default() -> Self {
517 Self::new()
518 }
519}
520
521impl std::fmt::Debug for StringInterner {
522 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
523 f.debug_struct("StringInterner")
524 .field(
525 "strings_count",
526 &self.strings.read().expect("strings lock poisoned").len(),
527 )
528 .field(
529 "id_mappings_count",
530 &self
531 .string_to_id
532 .read()
533 .expect("string_to_id lock poisoned")
534 .len(),
535 )
536 .field(
537 "next_id",
538 &self.next_id.load(std::sync::atomic::Ordering::Relaxed),
539 )
540 .field("stats", &self.stats.read().expect("stats lock poisoned"))
541 .finish()
542 }
543}
544
545lazy_static::lazy_static! {
546 pub static ref IRI_INTERNER: StringInterner = StringInterner::new();
549
550 pub static ref DATATYPE_INTERNER: StringInterner = StringInterner::new();
552
553 pub static ref LANGUAGE_INTERNER: StringInterner = StringInterner::new();
555
556 pub static ref STRING_INTERNER: StringInterner = StringInterner::new();
558}
559
560#[derive(Debug, Clone)]
562pub struct InternedString {
563 inner: Arc<str>,
564}
565
566impl InternedString {
567 pub fn new(s: &str) -> Self {
569 InternedString {
570 inner: IRI_INTERNER.intern(s),
571 }
572 }
573
574 pub fn new_with_interner(s: &str, interner: &StringInterner) -> Self {
576 InternedString {
577 inner: interner.intern(s),
578 }
579 }
580
581 pub fn new_datatype(s: &str) -> Self {
583 InternedString {
584 inner: DATATYPE_INTERNER.intern(s),
585 }
586 }
587
588 pub fn new_language(s: &str) -> Self {
590 InternedString {
591 inner: LANGUAGE_INTERNER.intern(s),
592 }
593 }
594
595 pub fn as_str(&self) -> &str {
597 &self.inner
598 }
599
600 pub fn as_arc_str(&self) -> &Arc<str> {
602 &self.inner
603 }
604
605 pub fn into_arc_str(self) -> Arc<str> {
607 self.inner
608 }
609}
610
611impl std::fmt::Display for InternedString {
612 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
613 write!(f, "{}", self.inner)
614 }
615}
616
617impl std::ops::Deref for InternedString {
618 type Target = str;
619
620 fn deref(&self) -> &Self::Target {
621 &self.inner
622 }
623}
624
625impl AsRef<str> for InternedString {
626 fn as_ref(&self) -> &str {
627 &self.inner
628 }
629}
630
631impl PartialEq for InternedString {
632 fn eq(&self, other: &Self) -> bool {
633 Arc::ptr_eq(&self.inner, &other.inner) || self.inner == other.inner
635 }
636}
637
638impl Eq for InternedString {}
639
640impl Hash for InternedString {
641 fn hash<H: Hasher>(&self, state: &mut H) {
642 self.inner.hash(state);
644 }
645}
646
647impl PartialOrd for InternedString {
648 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
649 Some(self.cmp(other))
650 }
651}
652
653impl Ord for InternedString {
654 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
655 self.inner.cmp(&other.inner)
656 }
657}
658
659impl From<&str> for InternedString {
660 fn from(s: &str) -> Self {
661 InternedString::new(s)
662 }
663}
664
665impl From<String> for InternedString {
666 fn from(s: String) -> Self {
667 InternedString::new(&s)
668 }
669}
670
671pub trait RdfVocabulary {
673 const XSD_NS: &'static str = "http://www.w3.org/2001/XMLSchema#";
675 const RDF_NS: &'static str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
677 const RDFS_NS: &'static str = "http://www.w3.org/2000/01/rdf-schema#";
679 const OWL_NS: &'static str = "http://www.w3.org/2002/07/owl#";
681
682 fn xsd_string() -> InternedString {
683 InternedString::new_datatype(&format!("{}string", Self::XSD_NS))
684 }
685
686 fn xsd_integer() -> InternedString {
687 InternedString::new_datatype(&format!("{}integer", Self::XSD_NS))
688 }
689
690 fn xsd_decimal() -> InternedString {
691 InternedString::new_datatype(&format!("{}decimal", Self::XSD_NS))
692 }
693
694 fn xsd_boolean() -> InternedString {
695 InternedString::new_datatype(&format!("{}boolean", Self::XSD_NS))
696 }
697
698 fn xsd_double() -> InternedString {
699 InternedString::new_datatype(&format!("{}double", Self::XSD_NS))
700 }
701
702 fn xsd_float() -> InternedString {
703 InternedString::new_datatype(&format!("{}float", Self::XSD_NS))
704 }
705
706 fn xsd_date_time() -> InternedString {
707 InternedString::new_datatype(&format!("{}dateTime", Self::XSD_NS))
708 }
709
710 fn rdf_type() -> InternedString {
711 InternedString::new(&format!("{}type", Self::RDF_NS))
712 }
713
714 fn rdfs_label() -> InternedString {
715 InternedString::new(&format!("{}label", Self::RDFS_NS))
716 }
717
718 fn rdfs_comment() -> InternedString {
719 InternedString::new(&format!("{}comment", Self::RDFS_NS))
720 }
721}
722
723impl RdfVocabulary for InternedString {}
725
726#[cfg(test)]
727mod tests {
728 use super::*;
729
730 #[test]
731 fn test_string_interner_basic() {
732 let interner = StringInterner::new();
733
734 let s1 = interner.intern("http://example.org/test");
735 let s2 = interner.intern("http://example.org/test");
736 let s3 = interner.intern("http://example.org/different");
737
738 assert!(Arc::ptr_eq(&s1, &s2));
740 assert!(!Arc::ptr_eq(&s1, &s3));
741
742 assert_eq!(s1.as_ref(), "http://example.org/test");
744 assert_eq!(s2.as_ref(), "http://example.org/test");
745 assert_eq!(s3.as_ref(), "http://example.org/different");
746 }
747
748 #[test]
749 fn test_string_interner_stats() {
750 let interner = StringInterner::new();
751
752 let _s1 = interner.intern("test");
754 let stats = interner.stats();
755 assert_eq!(stats.total_requests, 1);
756 assert_eq!(stats.cache_misses, 1);
757 assert_eq!(stats.cache_hits, 0);
758
759 let _s2 = interner.intern("test");
761 let stats = interner.stats();
762 assert_eq!(stats.total_requests, 2);
763 assert_eq!(stats.cache_misses, 1);
764 assert_eq!(stats.cache_hits, 1);
765 assert_eq!(stats.hit_ratio(), 0.5);
766 }
767
768 #[test]
769 fn test_string_interner_cleanup() {
770 let interner = StringInterner::new();
771
772 {
773 let _s1 = interner.intern("temporary");
774 assert_eq!(interner.len(), 1);
775 } interner.cleanup();
778 assert_eq!(interner.len(), 0);
779 }
780
781 #[test]
782 fn test_interned_string_creation() {
783 let s1 = InternedString::new("http://example.org/test");
784 let s2 = InternedString::new("http://example.org/test");
785 let s3 = InternedString::new("http://example.org/different");
786
787 assert_eq!(s1, s2);
788 assert_ne!(s1, s3);
789 assert_eq!(s1.as_str(), "http://example.org/test");
790 }
791
792 #[test]
793 fn test_interned_string_ordering() {
794 let s1 = InternedString::new("apple");
795 let s2 = InternedString::new("banana");
796 let s3 = InternedString::new("apple");
797
798 assert!(s1 < s2);
799 assert!(s2 > s1);
800 assert_eq!(s1, s3);
801
802 let mut strings = vec![s2.clone(), s1.clone(), s3.clone()];
804 strings.sort();
805 assert_eq!(strings, vec![s1, s3, s2]);
806 }
807
808 #[test]
809 fn test_interned_string_hashing() {
810 use std::collections::HashMap;
811
812 let s1 = InternedString::new("test");
813 let s2 = InternedString::new("test");
814 let s3 = InternedString::new("different");
815
816 let mut map = HashMap::new();
817 map.insert(s1.clone(), "value1");
818 map.insert(s3.clone(), "value2");
819
820 assert_eq!(map.get(&s2), Some(&"value1"));
822 assert_eq!(map.get(&s3), Some(&"value2"));
823 assert_eq!(map.len(), 2);
824 }
825
826 #[test]
827 fn test_global_interners() {
828 let iri1 = InternedString::new("http://example.org/test");
829 let iri2 = InternedString::new("http://example.org/test");
830
831 let datatype1 = InternedString::new_datatype("http://www.w3.org/2001/XMLSchema#string");
832 let datatype2 = InternedString::new_datatype("http://www.w3.org/2001/XMLSchema#string");
833
834 let lang1 = InternedString::new_language("en");
835 let lang2 = InternedString::new_language("en");
836
837 assert_eq!(iri1, iri2);
839 assert_eq!(datatype1, datatype2);
840 assert_eq!(lang1, lang2);
841 }
842
843 #[test]
844 fn test_rdf_vocabulary() {
845 let string_type = InternedString::xsd_string();
846 let integer_type = InternedString::xsd_integer();
847 let rdf_type = InternedString::rdf_type();
848
849 assert_eq!(
850 string_type.as_str(),
851 "http://www.w3.org/2001/XMLSchema#string"
852 );
853 assert_eq!(
854 integer_type.as_str(),
855 "http://www.w3.org/2001/XMLSchema#integer"
856 );
857 assert_eq!(
858 rdf_type.as_str(),
859 "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
860 );
861
862 let string_type2 = InternedString::xsd_string();
864 assert_eq!(string_type, string_type2);
865 }
866
867 #[test]
868 fn test_interned_string_display() {
869 let s = InternedString::new("http://example.org/test");
870 assert_eq!(format!("{s}"), "http://example.org/test");
871 }
872
873 #[test]
874 fn test_interned_string_deref() {
875 let s = InternedString::new("test");
876 assert_eq!(&*s, "test");
877 assert_eq!(s.len(), 4);
878 assert!(s.starts_with("te"));
879 }
880
881 #[test]
882 fn test_interned_string_conversions() {
883 let s1 = InternedString::from("test");
884 let s2 = InternedString::from("test".to_string());
885
886 assert_eq!(s1, s2);
887 assert_eq!(s1.as_str(), "test");
888 }
889
890 #[test]
891 fn test_concurrent_interning() {
892 use std::sync::Arc;
893 use std::thread;
894
895 let interner = Arc::new(StringInterner::new());
896 let handles: Vec<_> = (0..10)
897 .map(|i| {
898 let interner = Arc::clone(&interner);
899 thread::spawn(move || {
900 let s = format!("http://example.org/test{}", i % 3);
901 (0..100).map(|_| interner.intern(&s)).collect::<Vec<_>>()
902 })
903 })
904 .collect();
905
906 let results: Vec<Vec<Arc<str>>> = handles.into_iter().map(|h| h.join().unwrap()).collect();
907
908 for result_set in &results {
910 for (i, s1) in result_set.iter().enumerate() {
911 for s2 in &result_set[i + 1..] {
912 if s1.as_ref() == s2.as_ref() {
913 assert!(Arc::ptr_eq(s1, s2));
914 }
915 }
916 }
917 }
918
919 assert!(interner.len() <= 3);
921 }
922
923 #[test]
924 fn test_term_id_mapping() {
925 let interner = StringInterner::new();
926
927 let (arc1, id1) = interner.intern_with_id("test_string");
929 let (arc2, id2) = interner.intern_with_id("test_string");
930
931 assert_eq!(id1, id2);
933 assert!(Arc::ptr_eq(&arc1, &arc2));
934
935 let (arc3, id3) = interner.intern_with_id("different_string");
937 assert_ne!(id1, id3);
938 assert!(!Arc::ptr_eq(&arc1, &arc3));
939
940 assert_eq!(interner.get_id("test_string"), Some(id1));
942 assert_eq!(interner.get_id("different_string"), Some(id3));
943 assert_eq!(interner.get_id("nonexistent"), None);
944
945 assert_eq!(interner.get_string(id1).unwrap().as_ref(), "test_string");
947 assert_eq!(
948 interner.get_string(id3).unwrap().as_ref(),
949 "different_string"
950 );
951 assert_eq!(interner.get_string(999), None);
952 }
953
954 #[test]
955 fn test_id_mapping_stats() {
956 let interner = StringInterner::new();
957
958 assert_eq!(interner.id_mapping_count(), 0);
959
960 interner.intern_with_id("string1");
961 assert_eq!(interner.id_mapping_count(), 1);
962
963 interner.intern_with_id("string2");
964 assert_eq!(interner.id_mapping_count(), 2);
965
966 interner.intern_with_id("string1");
968 assert_eq!(interner.id_mapping_count(), 2);
969 }
970
971 #[test]
972 fn test_get_all_mappings() {
973 let interner = StringInterner::new();
974
975 let (_, id1) = interner.intern_with_id("first");
976 let (_, id2) = interner.intern_with_id("second");
977 let (_, id3) = interner.intern_with_id("third");
978
979 let mappings = interner.get_all_mappings();
980 assert_eq!(mappings.len(), 3);
981
982 let mut found_ids = [false; 3];
984 for (id, string) in mappings {
985 match string.as_ref() {
986 "first" => {
987 assert_eq!(id, id1);
988 found_ids[0] = true;
989 }
990 "second" => {
991 assert_eq!(id, id2);
992 found_ids[1] = true;
993 }
994 "third" => {
995 assert_eq!(id, id3);
996 found_ids[2] = true;
997 }
998 _ => panic!("Unexpected string in mappings"),
999 }
1000 }
1001 assert!(found_ids.iter().all(|&found| found));
1002 }
1003
1004 #[test]
1005 fn test_mixed_interning_modes() {
1006 let interner = StringInterner::new();
1007
1008 let arc1 = interner.intern("regular");
1010 let (_arc2, id2) = interner.intern_with_id("with_id");
1011 let arc3 = interner.intern("regular"); assert!(Arc::ptr_eq(&arc1, &arc3));
1015
1016 assert_eq!(interner.get_string(id2).unwrap().as_ref(), "with_id");
1018
1019 assert!(interner.len() >= 2);
1021 }
1022}