1use parking_lot::{Mutex, RwLock};
35use smol_str::SmolStr;
36use std::borrow::Cow;
37use std::collections::{HashMap, HashSet};
38use std::hash::{Hash, Hasher};
39use std::sync::Arc;
40
41#[derive(Clone, Debug)]
50pub struct InternedStr(Arc<str>);
51
52impl InternedStr {
53 #[inline]
55 pub fn new(s: Arc<str>) -> Self {
56 Self(s)
57 }
58
59 #[inline]
61 pub fn as_str(&self) -> &str {
62 &self.0
63 }
64
65 #[inline]
67 pub fn ptr_eq(a: &Self, b: &Self) -> bool {
68 Arc::ptr_eq(&a.0, &b.0)
69 }
70
71 #[inline]
73 pub fn into_arc(self) -> Arc<str> {
74 self.0
75 }
76
77 #[inline]
79 pub fn to_smol(&self) -> SmolStr {
80 SmolStr::new(&*self.0)
81 }
82
83 #[inline]
87 pub fn to_cow(&self) -> Cow<'static, str> {
88 Cow::Owned(self.0.to_string())
89 }
90}
91
92impl AsRef<str> for InternedStr {
93 #[inline]
94 fn as_ref(&self) -> &str {
95 &self.0
96 }
97}
98
99impl std::ops::Deref for InternedStr {
100 type Target = str;
101
102 #[inline]
103 fn deref(&self) -> &Self::Target {
104 &self.0
105 }
106}
107
108impl PartialEq for InternedStr {
109 #[inline]
110 fn eq(&self, other: &Self) -> bool {
111 if Arc::ptr_eq(&self.0, &other.0) {
113 return true;
114 }
115 *self.0 == *other.0
117 }
118}
119
120impl Eq for InternedStr {}
121
122impl Hash for InternedStr {
123 #[inline]
124 fn hash<H: Hasher>(&self, state: &mut H) {
125 self.0.hash(state)
126 }
127}
128
129impl std::fmt::Display for InternedStr {
130 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131 self.0.fmt(f)
132 }
133}
134
135impl From<&str> for InternedStr {
136 fn from(s: &str) -> Self {
137 GlobalInterner::get().intern(s)
138 }
139}
140
141impl From<String> for InternedStr {
142 fn from(s: String) -> Self {
143 GlobalInterner::get().intern(&s)
144 }
145}
146
147pub struct GlobalInterner {
156 strings: RwLock<HashSet<Arc<str>>>,
157 stats: Mutex<InternerStats>,
158}
159
160impl GlobalInterner {
161 pub fn get() -> &'static Self {
163 static INSTANCE: std::sync::OnceLock<GlobalInterner> = std::sync::OnceLock::new();
164 INSTANCE.get_or_init(|| {
165 let interner = GlobalInterner {
166 strings: RwLock::new(HashSet::with_capacity(256)),
167 stats: Mutex::new(InternerStats::default()),
168 };
169 interner.prepopulate();
171 interner
172 })
173 }
174
175 fn prepopulate(&self) {
177 for name in COMMON_IDENTIFIERS {
178 self.intern(name);
179 }
180 }
181
182 #[inline]
187 pub fn intern(&self, s: &str) -> InternedStr {
188 {
190 let strings = self.strings.read();
191 if let Some(existing) = strings.get(s) {
192 let mut stats = self.stats.lock();
193 stats.hits += 1;
194 return InternedStr(Arc::clone(existing));
195 }
196 }
197
198 let mut strings = self.strings.write();
200
201 if let Some(existing) = strings.get(s) {
203 let mut stats = self.stats.lock();
204 stats.hits += 1;
205 return InternedStr(Arc::clone(existing));
206 }
207
208 let arc: Arc<str> = Arc::from(s);
210 strings.insert(Arc::clone(&arc));
211
212 let mut stats = self.stats.lock();
213 stats.misses += 1;
214 stats.total_bytes += s.len();
215
216 InternedStr(arc)
217 }
218
219 #[inline]
221 pub fn lookup(&self, s: &str) -> Option<InternedStr> {
222 let strings = self.strings.read();
223 strings.get(s).map(|arc| InternedStr(Arc::clone(arc)))
224 }
225
226 pub fn len(&self) -> usize {
228 self.strings.read().len()
229 }
230
231 pub fn is_empty(&self) -> bool {
233 self.strings.read().is_empty()
234 }
235
236 pub fn stats(&self) -> InternerStats {
238 self.stats.lock().clone()
239 }
240
241 pub fn clear(&self) {
246 self.strings.write().clear();
247 *self.stats.lock() = InternerStats::default();
248 }
249}
250
251#[derive(Default)]
260pub struct ScopedInterner {
261 strings: HashSet<Arc<str>>,
262 stats: InternerStats,
263}
264
265impl ScopedInterner {
266 pub fn new() -> Self {
268 Self::default()
269 }
270
271 pub fn with_capacity(capacity: usize) -> Self {
273 Self {
274 strings: HashSet::with_capacity(capacity),
275 stats: InternerStats::default(),
276 }
277 }
278
279 #[inline]
281 pub fn intern(&mut self, s: &str) -> InternedStr {
282 if let Some(existing) = self.strings.get(s) {
283 self.stats.hits += 1;
284 return InternedStr(Arc::clone(existing));
285 }
286
287 let arc: Arc<str> = Arc::from(s);
288 self.strings.insert(Arc::clone(&arc));
289 self.stats.misses += 1;
290 self.stats.total_bytes += s.len();
291
292 InternedStr(arc)
293 }
294
295 #[inline]
297 pub fn get(&self, s: &str) -> Option<InternedStr> {
298 self.strings.get(s).map(|arc| InternedStr(Arc::clone(arc)))
299 }
300
301 pub fn len(&self) -> usize {
303 self.strings.len()
304 }
305
306 pub fn is_empty(&self) -> bool {
308 self.strings.is_empty()
309 }
310
311 pub fn stats(&self) -> &InternerStats {
313 &self.stats
314 }
315
316 pub fn clear(&mut self) {
318 self.strings.clear();
319 self.stats = InternerStats::default();
320 }
321}
322
323pub struct IdentifierCache {
332 full: RwLock<HashMap<String, InternedStr>>,
334 components: RwLock<HashSet<Arc<str>>>,
336}
337
338impl IdentifierCache {
339 pub fn new() -> Self {
341 Self {
342 full: RwLock::new(HashMap::with_capacity(128)),
343 components: RwLock::new(HashSet::with_capacity(256)),
344 }
345 }
346
347 pub fn global() -> &'static Self {
349 static INSTANCE: std::sync::OnceLock<IdentifierCache> = std::sync::OnceLock::new();
350 INSTANCE.get_or_init(Self::new)
351 }
352
353 pub fn intern_qualified(&self, table: &str, column: &str) -> InternedStr {
357 let key = format!("{}.{}", table, column);
358
359 if let Some(cached) = self.full.read().get(&key) {
361 return cached.clone();
362 }
363
364 self.intern_component(table);
366 self.intern_component(column);
367
368 let interned = GlobalInterner::get().intern(&key);
370
371 self.full.write().insert(key, interned.clone());
373
374 interned
375 }
376
377 pub fn intern_component(&self, name: &str) -> InternedStr {
379 {
381 let components = self.components.read();
382 if let Some(existing) = components.get(name) {
383 return InternedStr(Arc::clone(existing));
384 }
385 }
386
387 let interned = GlobalInterner::get().intern(name);
389
390 self.components.write().insert(interned.0.clone());
392
393 interned
394 }
395
396 pub fn get_qualified(&self, table: &str, column: &str) -> Option<InternedStr> {
398 let key = format!("{}.{}", table, column);
399 self.full.read().get(&key).cloned()
400 }
401
402 pub fn component_count(&self) -> usize {
404 self.components.read().len()
405 }
406
407 pub fn qualified_count(&self) -> usize {
409 self.full.read().len()
410 }
411}
412
413impl Default for IdentifierCache {
414 fn default() -> Self {
415 Self::new()
416 }
417}
418
419#[derive(Debug, Clone, Default)]
425pub struct InternerStats {
426 pub hits: u64,
428 pub misses: u64,
430 pub total_bytes: usize,
432}
433
434impl InternerStats {
435 pub fn hit_ratio(&self) -> f64 {
437 let total = self.hits + self.misses;
438 if total == 0 {
439 0.0
440 } else {
441 self.hits as f64 / total as f64
442 }
443 }
444}
445
446const COMMON_IDENTIFIERS: &[&str] = &[
452 "id",
454 "uuid",
455 "name",
456 "email",
457 "username",
458 "password",
459 "password_hash",
460 "title",
461 "description",
462 "content",
463 "body",
464 "text",
465 "status",
466 "state",
467 "type",
468 "kind",
469 "role",
470 "active",
471 "enabled",
472 "deleted",
473 "archived",
474 "verified",
475 "confirmed",
476 "published",
477 "visible",
478 "public",
479 "private",
480 "count",
482 "total",
483 "score",
484 "rating",
485 "priority",
486 "order",
487 "position",
488 "rank",
489 "level",
490 "index",
491 "sequence",
492 "age",
493 "amount",
494 "price",
495 "cost",
496 "quantity",
497 "weight",
498 "height",
499 "width",
500 "length",
501 "size",
502 "user_id",
504 "account_id",
505 "organization_id",
506 "tenant_id",
507 "post_id",
508 "comment_id",
509 "article_id",
510 "product_id",
511 "order_id",
512 "item_id",
513 "category_id",
514 "tag_id",
515 "parent_id",
516 "author_id",
517 "owner_id",
518 "creator_id",
519 "assignee_id",
520 "reviewer_id",
521 "created_at",
523 "updated_at",
524 "deleted_at",
525 "published_at",
526 "expires_at",
527 "starts_at",
528 "ends_at",
529 "last_login_at",
530 "last_seen_at",
531 "verified_at",
532 "confirmed_at",
533 "slug",
535 "url",
536 "uri",
537 "path",
538 "permalink",
539 "link",
540 "href",
541 "src",
542 "source",
543 "destination",
544 "key",
546 "value",
547 "token",
548 "secret",
549 "code",
550 "pin",
551 "otp",
552 "api_key",
553 "access_token",
554 "refresh_token",
555 "version",
557 "revision",
558 "checksum",
559 "hash",
560 "signature",
561 "fingerprint",
562 "metadata",
563 "data",
564 "payload",
565 "config",
566 "settings",
567 "options",
568 "preferences",
569 "users",
571 "accounts",
572 "organizations",
573 "tenants",
574 "posts",
575 "comments",
576 "articles",
577 "products",
578 "orders",
579 "items",
580 "categories",
581 "tags",
582 "files",
583 "images",
584 "documents",
585 "messages",
586 "notifications",
587 "events",
588 "logs",
589 "sessions",
590 "tokens",
591 "SELECT",
593 "FROM",
594 "WHERE",
595 "AND",
596 "OR",
597 "NOT",
598 "IN",
599 "IS",
600 "NULL",
601 "TRUE",
602 "FALSE",
603 "ASC",
604 "DESC",
605 "LIMIT",
606 "OFFSET",
607 "ORDER",
608 "BY",
609 "GROUP",
610 "HAVING",
611 "JOIN",
612 "LEFT",
613 "RIGHT",
614 "INNER",
615 "OUTER",
616 "ON",
617 "AS",
618];
619
620#[inline]
626pub fn intern(s: &str) -> InternedStr {
627 GlobalInterner::get().intern(s)
628}
629
630#[inline]
632pub fn get_interned(s: &str) -> Option<InternedStr> {
633 GlobalInterner::get().lookup(s)
634}
635
636#[inline]
638pub fn intern_qualified(table: &str, column: &str) -> InternedStr {
639 IdentifierCache::global().intern_qualified(table, column)
640}
641
642#[inline]
644pub fn intern_component(name: &str) -> InternedStr {
645 IdentifierCache::global().intern_component(name)
646}
647
648#[cfg(test)]
649mod tests {
650 use super::*;
651
652 #[test]
653 fn test_global_interner_dedup() {
654 let interner = GlobalInterner::get();
655
656 let s1 = interner.intern("test_field");
657 let s2 = interner.intern("test_field");
658
659 assert!(InternedStr::ptr_eq(&s1, &s2));
661 }
662
663 #[test]
664 fn test_scoped_interner() {
665 let mut interner = ScopedInterner::new();
666
667 let s1 = interner.intern("scoped_field");
668 let s2 = interner.intern("scoped_field");
669
670 assert!(InternedStr::ptr_eq(&s1, &s2));
671 assert_eq!(interner.len(), 1);
672 }
673
674 #[test]
675 fn test_identifier_cache_qualified() {
676 let cache = IdentifierCache::new();
677
678 let id1 = cache.intern_qualified("users", "email");
679 let id2 = cache.intern_qualified("users", "email");
680
681 assert!(InternedStr::ptr_eq(&id1, &id2));
682 assert_eq!(id1.as_str(), "users.email");
683 }
684
685 #[test]
686 fn test_interned_str_equality() {
687 let interner = GlobalInterner::get();
688
689 let s1 = interner.intern("equal_test");
690 let s2 = interner.intern("equal_test");
691 let s3 = interner.intern("different");
692
693 assert_eq!(s1, s2);
694 assert_ne!(s1, s3);
695 }
696
697 #[test]
698 fn test_interned_str_hash() {
699 use std::collections::HashSet;
700
701 let interner = GlobalInterner::get();
702
703 let s1 = interner.intern("hash_test");
704 let s2 = interner.intern("hash_test");
705
706 let mut set = HashSet::new();
707 set.insert(s1.clone());
708
709 assert!(set.contains(&s2));
710 }
711
712 #[test]
713 fn test_interner_stats() {
714 let mut interner = ScopedInterner::new();
715
716 let _ = interner.intern("stats_test");
718 assert_eq!(interner.stats().misses, 1);
719 assert_eq!(interner.stats().hits, 0);
720
721 let _ = interner.intern("stats_test");
723 assert_eq!(interner.stats().misses, 1);
724 assert_eq!(interner.stats().hits, 1);
725
726 assert!(interner.stats().hit_ratio() > 0.4);
727 }
728
729 #[test]
730 fn test_common_identifiers_prepopulated() {
731 let interner = GlobalInterner::get();
732
733 let _ = interner.intern("id");
735 let _ = interner.intern("created_at");
736 let _ = interner.intern("user_id");
737
738 assert!(interner.lookup("id").is_some());
740 assert!(interner.lookup("email").is_some());
741 }
742
743 #[test]
744 fn test_interned_str_from() {
745 let s1: InternedStr = "from_str".into();
746 let s2: InternedStr = String::from("from_string").into();
747
748 assert_eq!(s1.as_str(), "from_str");
749 assert_eq!(s2.as_str(), "from_string");
750 }
751}
752