prax_query/intern.rs
1//! String interning for efficient field name storage.
2//!
3//! This module provides string interning to reduce memory allocations when the same
4//! field names are used across many filters. Interned strings share memory, making
5//! cloning essentially free.
6//!
7//! # When to Use
8//!
9//! String interning is most beneficial when:
10//! - The same field names are used in many filters
11//! - Field names come from dynamic sources (not `&'static str`)
12//! - You're building complex query trees with repeated field references
13//!
14//! For static field names (`&'static str`), use them directly - they're already
15//! "interned" by the compiler with zero overhead.
16//!
17//! # Examples
18//!
19//! ## Using Pre-defined Field Names
20//!
21//! ```rust
22//! use prax_query::intern::fields;
23//! use prax_query::{Filter, FilterValue};
24//!
25//! // Common field names are pre-defined as constants
26//! let filter = Filter::Equals(fields::ID.into(), FilterValue::Int(42));
27//! let filter = Filter::Equals(fields::EMAIL.into(), FilterValue::String("test@example.com".into()));
28//! let filter = Filter::Gt(fields::CREATED_AT.into(), FilterValue::String("2024-01-01".into()));
29//! ```
30//!
31//! ## Interning Dynamic Strings
32//!
33//! ```rust
34//! use prax_query::intern::{intern, intern_cow};
35//! use prax_query::{Filter, FilterValue, FieldName};
36//!
37//! // Intern a dynamic string - subsequent calls return the same Arc<str>
38//! let field1 = intern("dynamic_field");
39//! let field2 = intern("dynamic_field");
40//! // field1 and field2 point to the same memory
41//!
42//! // Use interned string directly in filters
43//! let filter = Filter::Equals(intern_cow("user_id"), FilterValue::Int(1));
44//! ```
45
46use std::borrow::Cow;
47use std::cell::RefCell;
48use std::collections::HashSet;
49use std::sync::Arc;
50
51/// Pre-defined common field name constants.
52///
53/// These are compile-time `&'static str` values that require zero allocation.
54/// Use these when your field names match common database column names.
55pub mod fields {
56 /// Primary key field: "id"
57 pub const ID: &str = "id";
58 /// UUID field: "uuid"
59 pub const UUID: &str = "uuid";
60 /// Email field: "email"
61 pub const EMAIL: &str = "email";
62 /// Name field: "name"
63 pub const NAME: &str = "name";
64 /// Title field: "title"
65 pub const TITLE: &str = "title";
66 /// Description field: "description"
67 pub const DESCRIPTION: &str = "description";
68 /// Status field: "status"
69 pub const STATUS: &str = "status";
70 /// Active flag: "active"
71 pub const ACTIVE: &str = "active";
72 /// Enabled flag: "enabled"
73 pub const ENABLED: &str = "enabled";
74 /// Deleted flag: "deleted"
75 pub const DELETED: &str = "deleted";
76 /// Created timestamp: "created_at"
77 pub const CREATED_AT: &str = "created_at";
78 /// Updated timestamp: "updated_at"
79 pub const UPDATED_AT: &str = "updated_at";
80 /// Deleted timestamp: "deleted_at"
81 pub const DELETED_AT: &str = "deleted_at";
82 /// User ID foreign key: "user_id"
83 pub const USER_ID: &str = "user_id";
84 /// Author ID foreign key: "author_id"
85 pub const AUTHOR_ID: &str = "author_id";
86 /// Parent ID foreign key: "parent_id"
87 pub const PARENT_ID: &str = "parent_id";
88 /// Owner ID foreign key: "owner_id"
89 pub const OWNER_ID: &str = "owner_id";
90 /// Tenant ID for multi-tenancy: "tenant_id"
91 pub const TENANT_ID: &str = "tenant_id";
92 /// Organization ID: "org_id"
93 pub const ORG_ID: &str = "org_id";
94 /// Type discriminator: "type"
95 pub const TYPE: &str = "type";
96 /// Kind discriminator: "kind"
97 pub const KIND: &str = "kind";
98 /// Slug field: "slug"
99 pub const SLUG: &str = "slug";
100 /// Content field: "content"
101 pub const CONTENT: &str = "content";
102 /// Body field: "body"
103 pub const BODY: &str = "body";
104 /// Order/position field: "order"
105 pub const ORDER: &str = "order";
106 /// Position field: "position"
107 pub const POSITION: &str = "position";
108 /// Priority field: "priority"
109 pub const PRIORITY: &str = "priority";
110 /// Score field: "score"
111 pub const SCORE: &str = "score";
112 /// Count field: "count"
113 pub const COUNT: &str = "count";
114 /// Price field: "price"
115 pub const PRICE: &str = "price";
116 /// Amount field: "amount"
117 pub const AMOUNT: &str = "amount";
118 /// Quantity field: "quantity"
119 pub const QUANTITY: &str = "quantity";
120 /// Version field: "version"
121 pub const VERSION: &str = "version";
122 /// Age field: "age"
123 pub const AGE: &str = "age";
124 /// Role field: "role"
125 pub const ROLE: &str = "role";
126 /// Verified field: "verified"
127 pub const VERIFIED: &str = "verified";
128 /// Password field: "password"
129 pub const PASSWORD: &str = "password";
130 /// First name: "first_name"
131 pub const FIRST_NAME: &str = "first_name";
132 /// Last name: "last_name"
133 pub const LAST_NAME: &str = "last_name";
134 /// Category field: "category"
135 pub const CATEGORY: &str = "category";
136 /// Tags field: "tags"
137 pub const TAGS: &str = "tags";
138 /// Published flag: "published"
139 pub const PUBLISHED: &str = "published";
140 /// Published timestamp: "published_at"
141 pub const PUBLISHED_AT: &str = "published_at";
142 /// Expires timestamp: "expires_at"
143 pub const EXPIRES_AT: &str = "expires_at";
144 /// Started timestamp: "started_at"
145 pub const STARTED_AT: &str = "started_at";
146 /// Completed timestamp: "completed_at"
147 pub const COMPLETED_AT: &str = "completed_at";
148 /// Archived flag: "archived"
149 pub const ARCHIVED: &str = "archived";
150 /// Flagged flag: "flagged"
151 pub const FLAGGED: &str = "flagged";
152 /// Data field: "data"
153 pub const DATA: &str = "data";
154 /// Metadata field: "metadata"
155 pub const METADATA: &str = "metadata";
156 /// URL field: "url"
157 pub const URL: &str = "url";
158 /// Image URL: "image_url"
159 pub const IMAGE_URL: &str = "image_url";
160 /// Avatar URL: "avatar_url"
161 pub const AVATAR_URL: &str = "avatar_url";
162 /// File field: "file"
163 pub const FILE: &str = "file";
164 /// Path field: "path"
165 pub const PATH: &str = "path";
166 /// Attempts field: "attempts"
167 pub const ATTEMPTS: &str = "attempts";
168 /// Max attempts: "max_attempts"
169 pub const MAX_ATTEMPTS: &str = "max_attempts";
170
171 /// All registered static field names (sorted for binary search).
172 /// Use `lookup()` to check if a field name is registered.
173 pub const ALL_SORTED: &[&str] = &[
174 ACTIVE,
175 AGE,
176 AMOUNT,
177 ARCHIVED,
178 ATTEMPTS,
179 AUTHOR_ID,
180 AVATAR_URL,
181 BODY,
182 CATEGORY,
183 COMPLETED_AT,
184 CONTENT,
185 COUNT,
186 CREATED_AT,
187 DATA,
188 DELETED,
189 DELETED_AT,
190 DESCRIPTION,
191 EMAIL,
192 ENABLED,
193 EXPIRES_AT,
194 FILE,
195 FIRST_NAME,
196 FLAGGED,
197 ID,
198 IMAGE_URL,
199 KIND,
200 LAST_NAME,
201 MAX_ATTEMPTS,
202 METADATA,
203 NAME,
204 ORDER,
205 ORG_ID,
206 OWNER_ID,
207 PARENT_ID,
208 PASSWORD,
209 PATH,
210 POSITION,
211 PRICE,
212 PRIORITY,
213 PUBLISHED,
214 PUBLISHED_AT,
215 QUANTITY,
216 ROLE,
217 SCORE,
218 SLUG,
219 STARTED_AT,
220 STATUS,
221 TAGS,
222 TENANT_ID,
223 TITLE,
224 TYPE,
225 UPDATED_AT,
226 URL,
227 USER_ID,
228 UUID,
229 VERIFIED,
230 VERSION,
231 ];
232
233 /// Look up a field name in the static registry using binary search.
234 /// Returns `Some(&'static str)` if found, `None` otherwise.
235 ///
236 /// # Performance
237 ///
238 /// O(log n) binary search through ~57 entries.
239 ///
240 /// # Examples
241 ///
242 /// ```rust
243 /// use prax_query::intern::fields;
244 ///
245 /// assert_eq!(fields::lookup("id"), Some("id"));
246 /// assert_eq!(fields::lookup("email"), Some("email"));
247 /// assert_eq!(fields::lookup("unknown"), None);
248 /// ```
249 #[inline]
250 pub fn lookup(name: &str) -> Option<&'static str> {
251 ALL_SORTED.binary_search(&name).ok().map(|i| ALL_SORTED[i])
252 }
253
254 /// Get a field name as `Cow<'static, str>`, using static lookup first.
255 ///
256 /// If the field name matches a registered static field, returns `Cow::Borrowed`.
257 /// Otherwise, returns `Cow::Owned` with the input string.
258 ///
259 /// # Examples
260 ///
261 /// ```rust
262 /// use prax_query::intern::fields;
263 /// use std::borrow::Cow;
264 ///
265 /// // Static field - zero allocation
266 /// let name = fields::as_cow("id");
267 /// assert!(matches!(name, Cow::Borrowed(_)));
268 ///
269 /// // Unknown field - allocates
270 /// let name = fields::as_cow("custom_field");
271 /// assert!(matches!(name, Cow::Owned(_)));
272 /// ```
273 #[inline]
274 pub fn as_cow(name: &str) -> std::borrow::Cow<'static, str> {
275 match lookup(name) {
276 Some(s) => std::borrow::Cow::Borrowed(s),
277 None => std::borrow::Cow::Owned(name.to_string()),
278 }
279 }
280}
281
282// Thread-local string interner.
283//
284// Uses `Arc<str>` for reference-counted string storage. Interned strings are
285// stored in a thread-local `HashSet` for deduplication.
286thread_local! {
287 static INTERNER: RefCell<HashSet<Arc<str>>> = RefCell::new(HashSet::new());
288}
289
290/// Intern a string, returning a reference-counted pointer.
291///
292/// If the string has been interned before, returns the existing `Arc<str>`.
293/// Otherwise, allocates a new `Arc<str>` and stores it for future lookups.
294///
295/// # Performance
296///
297/// - First call for a string: O(n) where n is string length (allocation + hash)
298/// - Subsequent calls: O(n) for hash lookup, but no allocation
299/// - Cloning the result: O(1) (just incrementing reference count)
300///
301/// # Examples
302///
303/// ```rust
304/// use prax_query::intern::intern;
305/// use std::sync::Arc;
306///
307/// let s1 = intern("field_name");
308/// let s2 = intern("field_name");
309///
310/// // Both point to the same allocation
311/// assert!(Arc::ptr_eq(&s1, &s2));
312/// ```
313#[inline]
314pub fn intern(s: &str) -> Arc<str> {
315 INTERNER.with(|interner| {
316 let mut set = interner.borrow_mut();
317
318 // Check if already interned
319 if let Some(existing) = set.get(s) {
320 return Arc::clone(existing);
321 }
322
323 // Intern the new string
324 let arc: Arc<str> = Arc::from(s);
325 set.insert(Arc::clone(&arc));
326 arc
327 })
328}
329
330/// Intern a string and return it as a `Cow<'static, str>`.
331///
332/// This is a convenience function for use with filter APIs that expect `FieldName`.
333/// The returned `Cow` contains an owned `String` created from the interned `Arc<str>`.
334///
335/// # Note
336///
337/// For static strings, prefer using them directly (e.g., `"id".into()`) as that
338/// creates a `Cow::Borrowed` with zero allocation. Use this function only for
339/// dynamic strings that are repeated many times.
340///
341/// # Examples
342///
343/// ```rust
344/// use prax_query::intern::intern_cow;
345/// use prax_query::{Filter, FilterValue};
346///
347/// // Good: Interning a dynamic field name used in many filters
348/// let field_name = format!("field_{}", 42);
349/// let filter1 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(1));
350/// let filter2 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(2));
351/// ```
352#[inline]
353pub fn intern_cow(s: &str) -> Cow<'static, str> {
354 // Note: We convert Arc<str> to String here because Cow<'static, str>
355 // can't hold an Arc. The interning benefit comes from the HashSet
356 // deduplication during the intern() call.
357 Cow::Owned(intern(s).to_string())
358}
359
360/// Clear all interned strings from the thread-local cache.
361///
362/// This is primarily useful for testing or when you know interned strings
363/// will no longer be needed and want to free memory.
364///
365/// # Examples
366///
367/// ```rust
368/// use prax_query::intern::{intern, clear_interned};
369///
370/// let _ = intern("some_field");
371/// // ... use the interned string ...
372///
373/// // Later, free all interned strings
374/// clear_interned();
375/// ```
376pub fn clear_interned() {
377 INTERNER.with(|interner| {
378 interner.borrow_mut().clear();
379 });
380}
381
382/// Get the number of currently interned strings.
383///
384/// Useful for debugging and memory profiling.
385///
386/// # Examples
387///
388/// ```rust
389/// use prax_query::intern::{intern, interned_count, clear_interned};
390///
391/// clear_interned();
392/// assert_eq!(interned_count(), 0);
393///
394/// intern("field1");
395/// intern("field2");
396/// intern("field1"); // Already interned, doesn't increase count
397///
398/// assert_eq!(interned_count(), 2);
399/// ```
400pub fn interned_count() -> usize {
401 INTERNER.with(|interner| interner.borrow().len())
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407
408 #[test]
409 fn test_intern_returns_same_arc() {
410 clear_interned();
411
412 let s1 = intern("test_field");
413 let s2 = intern("test_field");
414
415 // Same Arc pointer
416 assert!(Arc::ptr_eq(&s1, &s2));
417 assert_eq!(&*s1, "test_field");
418 }
419
420 #[test]
421 fn test_intern_different_strings() {
422 clear_interned();
423
424 let s1 = intern("field_a");
425 let s2 = intern("field_b");
426
427 // Different Arc pointers
428 assert!(!Arc::ptr_eq(&s1, &s2));
429 assert_eq!(&*s1, "field_a");
430 assert_eq!(&*s2, "field_b");
431 }
432
433 #[test]
434 fn test_interned_count() {
435 clear_interned();
436
437 assert_eq!(interned_count(), 0);
438
439 intern("a");
440 assert_eq!(interned_count(), 1);
441
442 intern("b");
443 assert_eq!(interned_count(), 2);
444
445 intern("a"); // Already interned
446 assert_eq!(interned_count(), 2);
447 }
448
449 #[test]
450 fn test_clear_interned() {
451 clear_interned();
452
453 intern("x");
454 intern("y");
455 assert_eq!(interned_count(), 2);
456
457 clear_interned();
458 assert_eq!(interned_count(), 0);
459 }
460
461 #[test]
462 fn test_intern_cow() {
463 clear_interned();
464
465 let cow = intern_cow("field_name");
466 assert!(matches!(cow, Cow::Owned(_)));
467 assert_eq!(cow.as_ref(), "field_name");
468 }
469
470 #[test]
471 fn test_predefined_fields() {
472 // Just verify the constants exist and have expected values
473 assert_eq!(fields::ID, "id");
474 assert_eq!(fields::EMAIL, "email");
475 assert_eq!(fields::CREATED_AT, "created_at");
476 assert_eq!(fields::USER_ID, "user_id");
477 assert_eq!(fields::TENANT_ID, "tenant_id");
478 }
479
480 #[test]
481 fn test_intern_empty_string() {
482 clear_interned();
483
484 let s1 = intern("");
485 let s2 = intern("");
486
487 assert!(Arc::ptr_eq(&s1, &s2));
488 assert_eq!(&*s1, "");
489 }
490
491 #[test]
492 fn test_intern_unicode() {
493 clear_interned();
494
495 let s1 = intern("フィールド");
496 let s2 = intern("フィールド");
497
498 assert!(Arc::ptr_eq(&s1, &s2));
499 assert_eq!(&*s1, "フィールド");
500 }
501
502 #[test]
503 fn test_fields_lookup() {
504 // Test that lookup finds registered fields
505 assert_eq!(fields::lookup("id"), Some("id"));
506 assert_eq!(fields::lookup("email"), Some("email"));
507 assert_eq!(fields::lookup("created_at"), Some("created_at"));
508 assert_eq!(fields::lookup("user_id"), Some("user_id"));
509 assert_eq!(fields::lookup("status"), Some("status"));
510
511 // Test that lookup returns None for unknown fields
512 assert_eq!(fields::lookup("unknown_field"), None);
513 assert_eq!(fields::lookup("custom_field_123"), None);
514 }
515
516 #[test]
517 fn test_fields_as_cow() {
518 // Known field - should be Borrowed
519 let cow = fields::as_cow("id");
520 assert!(matches!(cow, Cow::Borrowed(_)));
521 assert_eq!(cow.as_ref(), "id");
522
523 // Unknown field - should be Owned
524 let cow = fields::as_cow("custom_field");
525 assert!(matches!(cow, Cow::Owned(_)));
526 assert_eq!(cow.as_ref(), "custom_field");
527 }
528
529 #[test]
530 fn test_fields_all_sorted() {
531 // Verify the array is actually sorted
532 let mut prev = "";
533 for &field in fields::ALL_SORTED {
534 assert!(
535 field >= prev,
536 "ALL_SORTED is not sorted: {} should come before {}",
537 prev,
538 field
539 );
540 prev = field;
541 }
542 }
543}