prax_query/intern.rs
1//! String interning for efficient field name storage.
2//!
3//! This module provides string interning to reduce memory allocations when the same
4//! field names are used across many filters. Interned strings share memory, making
5//! cloning essentially free.
6//!
7//! # When to Use
8//!
9//! String interning is most beneficial when:
10//! - The same field names are used in many filters
11//! - Field names come from dynamic sources (not `&'static str`)
12//! - You're building complex query trees with repeated field references
13//!
14//! For static field names (`&'static str`), use them directly - they're already
15//! "interned" by the compiler with zero overhead.
16//!
17//! # Examples
18//!
19//! ## Using Pre-defined Field Names
20//!
21//! ```rust
22//! use prax_query::intern::fields;
23//! use prax_query::{Filter, FilterValue};
24//!
25//! // Common field names are pre-defined as constants
26//! let filter = Filter::Equals(fields::ID.into(), FilterValue::Int(42));
27//! let filter = Filter::Equals(fields::EMAIL.into(), FilterValue::String("test@example.com".into()));
28//! let filter = Filter::Gt(fields::CREATED_AT.into(), FilterValue::String("2024-01-01".into()));
29//! ```
30//!
31//! ## Interning Dynamic Strings
32//!
33//! ```rust
34//! use prax_query::intern::{intern, intern_cow};
35//! use prax_query::{Filter, FilterValue, FieldName};
36//!
37//! // Intern a dynamic string - subsequent calls return the same Arc<str>
38//! let field1 = intern("dynamic_field");
39//! let field2 = intern("dynamic_field");
40//! // field1 and field2 point to the same memory
41//!
42//! // Use interned string directly in filters
43//! let filter = Filter::Equals(intern_cow("user_id"), FilterValue::Int(1));
44//! ```
45
46use std::borrow::Cow;
47use std::cell::RefCell;
48use std::collections::HashSet;
49use std::sync::Arc;
50
51/// Pre-defined common field name constants.
52///
53/// These are compile-time `&'static str` values that require zero allocation.
54/// Use these when your field names match common database column names.
55pub mod fields {
56 /// Primary key field: "id"
57 pub const ID: &str = "id";
58 /// UUID field: "uuid"
59 pub const UUID: &str = "uuid";
60 /// Email field: "email"
61 pub const EMAIL: &str = "email";
62 /// Name field: "name"
63 pub const NAME: &str = "name";
64 /// Title field: "title"
65 pub const TITLE: &str = "title";
66 /// Description field: "description"
67 pub const DESCRIPTION: &str = "description";
68 /// Status field: "status"
69 pub const STATUS: &str = "status";
70 /// Active flag: "active"
71 pub const ACTIVE: &str = "active";
72 /// Enabled flag: "enabled"
73 pub const ENABLED: &str = "enabled";
74 /// Deleted flag: "deleted"
75 pub const DELETED: &str = "deleted";
76 /// Created timestamp: "created_at"
77 pub const CREATED_AT: &str = "created_at";
78 /// Updated timestamp: "updated_at"
79 pub const UPDATED_AT: &str = "updated_at";
80 /// Deleted timestamp: "deleted_at"
81 pub const DELETED_AT: &str = "deleted_at";
82 /// User ID foreign key: "user_id"
83 pub const USER_ID: &str = "user_id";
84 /// Author ID foreign key: "author_id"
85 pub const AUTHOR_ID: &str = "author_id";
86 /// Parent ID foreign key: "parent_id"
87 pub const PARENT_ID: &str = "parent_id";
88 /// Owner ID foreign key: "owner_id"
89 pub const OWNER_ID: &str = "owner_id";
90 /// Tenant ID for multi-tenancy: "tenant_id"
91 pub const TENANT_ID: &str = "tenant_id";
92 /// Organization ID: "org_id"
93 pub const ORG_ID: &str = "org_id";
94 /// Type discriminator: "type"
95 pub const TYPE: &str = "type";
96 /// Kind discriminator: "kind"
97 pub const KIND: &str = "kind";
98 /// Slug field: "slug"
99 pub const SLUG: &str = "slug";
100 /// Content field: "content"
101 pub const CONTENT: &str = "content";
102 /// Body field: "body"
103 pub const BODY: &str = "body";
104 /// Order/position field: "order"
105 pub const ORDER: &str = "order";
106 /// Position field: "position"
107 pub const POSITION: &str = "position";
108 /// Priority field: "priority"
109 pub const PRIORITY: &str = "priority";
110 /// Score field: "score"
111 pub const SCORE: &str = "score";
112 /// Count field: "count"
113 pub const COUNT: &str = "count";
114 /// Price field: "price"
115 pub const PRICE: &str = "price";
116 /// Amount field: "amount"
117 pub const AMOUNT: &str = "amount";
118 /// Quantity field: "quantity"
119 pub const QUANTITY: &str = "quantity";
120 /// Version field: "version"
121 pub const VERSION: &str = "version";
122 /// Age field: "age"
123 pub const AGE: &str = "age";
124 /// Role field: "role"
125 pub const ROLE: &str = "role";
126 /// Verified field: "verified"
127 pub const VERIFIED: &str = "verified";
128 /// Password field: "password"
129 pub const PASSWORD: &str = "password";
130 /// First name: "first_name"
131 pub const FIRST_NAME: &str = "first_name";
132 /// Last name: "last_name"
133 pub const LAST_NAME: &str = "last_name";
134 /// Category field: "category"
135 pub const CATEGORY: &str = "category";
136 /// Tags field: "tags"
137 pub const TAGS: &str = "tags";
138 /// Published flag: "published"
139 pub const PUBLISHED: &str = "published";
140 /// Published timestamp: "published_at"
141 pub const PUBLISHED_AT: &str = "published_at";
142 /// Expires timestamp: "expires_at"
143 pub const EXPIRES_AT: &str = "expires_at";
144 /// Started timestamp: "started_at"
145 pub const STARTED_AT: &str = "started_at";
146 /// Completed timestamp: "completed_at"
147 pub const COMPLETED_AT: &str = "completed_at";
148 /// Archived flag: "archived"
149 pub const ARCHIVED: &str = "archived";
150 /// Flagged flag: "flagged"
151 pub const FLAGGED: &str = "flagged";
152 /// Data field: "data"
153 pub const DATA: &str = "data";
154 /// Metadata field: "metadata"
155 pub const METADATA: &str = "metadata";
156 /// URL field: "url"
157 pub const URL: &str = "url";
158 /// Image URL: "image_url"
159 pub const IMAGE_URL: &str = "image_url";
160 /// Avatar URL: "avatar_url"
161 pub const AVATAR_URL: &str = "avatar_url";
162 /// File field: "file"
163 pub const FILE: &str = "file";
164 /// Path field: "path"
165 pub const PATH: &str = "path";
166 /// Attempts field: "attempts"
167 pub const ATTEMPTS: &str = "attempts";
168 /// Max attempts: "max_attempts"
169 pub const MAX_ATTEMPTS: &str = "max_attempts";
170
171 /// All registered static field names (sorted for binary search).
172 /// Use `lookup()` to check if a field name is registered.
173 pub const ALL_SORTED: &[&str] = &[
174 ACTIVE, AGE, AMOUNT, ARCHIVED, ATTEMPTS, AUTHOR_ID, AVATAR_URL, BODY, CATEGORY,
175 COMPLETED_AT, CONTENT, COUNT, CREATED_AT, DATA, DELETED, DELETED_AT, DESCRIPTION,
176 EMAIL, ENABLED, EXPIRES_AT, FILE, FIRST_NAME, FLAGGED, ID, IMAGE_URL, KIND,
177 LAST_NAME, MAX_ATTEMPTS, METADATA, NAME, ORDER, ORG_ID, OWNER_ID, PARENT_ID,
178 PASSWORD, PATH, POSITION, PRICE, PRIORITY, PUBLISHED, PUBLISHED_AT, QUANTITY,
179 ROLE, SCORE, SLUG, STARTED_AT, STATUS, TAGS, TENANT_ID, TITLE, TYPE, UPDATED_AT,
180 URL, USER_ID, UUID, VERIFIED, VERSION,
181 ];
182
183 /// Look up a field name in the static registry using binary search.
184 /// Returns `Some(&'static str)` if found, `None` otherwise.
185 ///
186 /// # Performance
187 ///
188 /// O(log n) binary search through ~57 entries.
189 ///
190 /// # Examples
191 ///
192 /// ```rust
193 /// use prax_query::intern::fields;
194 ///
195 /// assert_eq!(fields::lookup("id"), Some("id"));
196 /// assert_eq!(fields::lookup("email"), Some("email"));
197 /// assert_eq!(fields::lookup("unknown"), None);
198 /// ```
199 #[inline]
200 pub fn lookup(name: &str) -> Option<&'static str> {
201 ALL_SORTED.binary_search(&name).ok().map(|i| ALL_SORTED[i])
202 }
203
204 /// Get a field name as `Cow<'static, str>`, using static lookup first.
205 ///
206 /// If the field name matches a registered static field, returns `Cow::Borrowed`.
207 /// Otherwise, returns `Cow::Owned` with the input string.
208 ///
209 /// # Examples
210 ///
211 /// ```rust
212 /// use prax_query::intern::fields;
213 /// use std::borrow::Cow;
214 ///
215 /// // Static field - zero allocation
216 /// let name = fields::as_cow("id");
217 /// assert!(matches!(name, Cow::Borrowed(_)));
218 ///
219 /// // Unknown field - allocates
220 /// let name = fields::as_cow("custom_field");
221 /// assert!(matches!(name, Cow::Owned(_)));
222 /// ```
223 #[inline]
224 pub fn as_cow(name: &str) -> std::borrow::Cow<'static, str> {
225 match lookup(name) {
226 Some(s) => std::borrow::Cow::Borrowed(s),
227 None => std::borrow::Cow::Owned(name.to_string()),
228 }
229 }
230}
231
232/// Thread-local string interner.
233///
234/// Uses `Arc<str>` for reference-counted string storage. Interned strings are
235/// stored in a thread-local `HashSet` for deduplication.
236thread_local! {
237 static INTERNER: RefCell<HashSet<Arc<str>>> = RefCell::new(HashSet::new());
238}
239
240/// Intern a string, returning a reference-counted pointer.
241///
242/// If the string has been interned before, returns the existing `Arc<str>`.
243/// Otherwise, allocates a new `Arc<str>` and stores it for future lookups.
244///
245/// # Performance
246///
247/// - First call for a string: O(n) where n is string length (allocation + hash)
248/// - Subsequent calls: O(n) for hash lookup, but no allocation
249/// - Cloning the result: O(1) (just incrementing reference count)
250///
251/// # Examples
252///
253/// ```rust
254/// use prax_query::intern::intern;
255/// use std::sync::Arc;
256///
257/// let s1 = intern("field_name");
258/// let s2 = intern("field_name");
259///
260/// // Both point to the same allocation
261/// assert!(Arc::ptr_eq(&s1, &s2));
262/// ```
263#[inline]
264pub fn intern(s: &str) -> Arc<str> {
265 INTERNER.with(|interner| {
266 let mut set = interner.borrow_mut();
267
268 // Check if already interned
269 if let Some(existing) = set.get(s) {
270 return Arc::clone(existing);
271 }
272
273 // Intern the new string
274 let arc: Arc<str> = Arc::from(s);
275 set.insert(Arc::clone(&arc));
276 arc
277 })
278}
279
280/// Intern a string and return it as a `Cow<'static, str>`.
281///
282/// This is a convenience function for use with filter APIs that expect `FieldName`.
283/// The returned `Cow` contains an owned `String` created from the interned `Arc<str>`.
284///
285/// # Note
286///
287/// For static strings, prefer using them directly (e.g., `"id".into()`) as that
288/// creates a `Cow::Borrowed` with zero allocation. Use this function only for
289/// dynamic strings that are repeated many times.
290///
291/// # Examples
292///
293/// ```rust
294/// use prax_query::intern::intern_cow;
295/// use prax_query::{Filter, FilterValue};
296///
297/// // Good: Interning a dynamic field name used in many filters
298/// let field_name = format!("field_{}", 42);
299/// let filter1 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(1));
300/// let filter2 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(2));
301/// ```
302#[inline]
303pub fn intern_cow(s: &str) -> Cow<'static, str> {
304 // Note: We convert Arc<str> to String here because Cow<'static, str>
305 // can't hold an Arc. The interning benefit comes from the HashSet
306 // deduplication during the intern() call.
307 Cow::Owned(intern(s).to_string())
308}
309
310/// Clear all interned strings from the thread-local cache.
311///
312/// This is primarily useful for testing or when you know interned strings
313/// will no longer be needed and want to free memory.
314///
315/// # Examples
316///
317/// ```rust
318/// use prax_query::intern::{intern, clear_interned};
319///
320/// let _ = intern("some_field");
321/// // ... use the interned string ...
322///
323/// // Later, free all interned strings
324/// clear_interned();
325/// ```
326pub fn clear_interned() {
327 INTERNER.with(|interner| {
328 interner.borrow_mut().clear();
329 });
330}
331
332/// Get the number of currently interned strings.
333///
334/// Useful for debugging and memory profiling.
335///
336/// # Examples
337///
338/// ```rust
339/// use prax_query::intern::{intern, interned_count, clear_interned};
340///
341/// clear_interned();
342/// assert_eq!(interned_count(), 0);
343///
344/// intern("field1");
345/// intern("field2");
346/// intern("field1"); // Already interned, doesn't increase count
347///
348/// assert_eq!(interned_count(), 2);
349/// ```
350pub fn interned_count() -> usize {
351 INTERNER.with(|interner| interner.borrow().len())
352}
353
354#[cfg(test)]
355mod tests {
356 use super::*;
357
358 #[test]
359 fn test_intern_returns_same_arc() {
360 clear_interned();
361
362 let s1 = intern("test_field");
363 let s2 = intern("test_field");
364
365 // Same Arc pointer
366 assert!(Arc::ptr_eq(&s1, &s2));
367 assert_eq!(&*s1, "test_field");
368 }
369
370 #[test]
371 fn test_intern_different_strings() {
372 clear_interned();
373
374 let s1 = intern("field_a");
375 let s2 = intern("field_b");
376
377 // Different Arc pointers
378 assert!(!Arc::ptr_eq(&s1, &s2));
379 assert_eq!(&*s1, "field_a");
380 assert_eq!(&*s2, "field_b");
381 }
382
383 #[test]
384 fn test_interned_count() {
385 clear_interned();
386
387 assert_eq!(interned_count(), 0);
388
389 intern("a");
390 assert_eq!(interned_count(), 1);
391
392 intern("b");
393 assert_eq!(interned_count(), 2);
394
395 intern("a"); // Already interned
396 assert_eq!(interned_count(), 2);
397 }
398
399 #[test]
400 fn test_clear_interned() {
401 clear_interned();
402
403 intern("x");
404 intern("y");
405 assert_eq!(interned_count(), 2);
406
407 clear_interned();
408 assert_eq!(interned_count(), 0);
409 }
410
411 #[test]
412 fn test_intern_cow() {
413 clear_interned();
414
415 let cow = intern_cow("field_name");
416 assert!(matches!(cow, Cow::Owned(_)));
417 assert_eq!(cow.as_ref(), "field_name");
418 }
419
420 #[test]
421 fn test_predefined_fields() {
422 // Just verify the constants exist and have expected values
423 assert_eq!(fields::ID, "id");
424 assert_eq!(fields::EMAIL, "email");
425 assert_eq!(fields::CREATED_AT, "created_at");
426 assert_eq!(fields::USER_ID, "user_id");
427 assert_eq!(fields::TENANT_ID, "tenant_id");
428 }
429
430 #[test]
431 fn test_intern_empty_string() {
432 clear_interned();
433
434 let s1 = intern("");
435 let s2 = intern("");
436
437 assert!(Arc::ptr_eq(&s1, &s2));
438 assert_eq!(&*s1, "");
439 }
440
441 #[test]
442 fn test_intern_unicode() {
443 clear_interned();
444
445 let s1 = intern("フィールド");
446 let s2 = intern("フィールド");
447
448 assert!(Arc::ptr_eq(&s1, &s2));
449 assert_eq!(&*s1, "フィールド");
450 }
451
452 #[test]
453 fn test_fields_lookup() {
454 // Test that lookup finds registered fields
455 assert_eq!(fields::lookup("id"), Some("id"));
456 assert_eq!(fields::lookup("email"), Some("email"));
457 assert_eq!(fields::lookup("created_at"), Some("created_at"));
458 assert_eq!(fields::lookup("user_id"), Some("user_id"));
459 assert_eq!(fields::lookup("status"), Some("status"));
460
461 // Test that lookup returns None for unknown fields
462 assert_eq!(fields::lookup("unknown_field"), None);
463 assert_eq!(fields::lookup("custom_field_123"), None);
464 }
465
466 #[test]
467 fn test_fields_as_cow() {
468 // Known field - should be Borrowed
469 let cow = fields::as_cow("id");
470 assert!(matches!(cow, Cow::Borrowed(_)));
471 assert_eq!(cow.as_ref(), "id");
472
473 // Unknown field - should be Owned
474 let cow = fields::as_cow("custom_field");
475 assert!(matches!(cow, Cow::Owned(_)));
476 assert_eq!(cow.as_ref(), "custom_field");
477 }
478
479 #[test]
480 fn test_fields_all_sorted() {
481 // Verify the array is actually sorted
482 let mut prev = "";
483 for &field in fields::ALL_SORTED {
484 assert!(
485 field >= prev,
486 "ALL_SORTED is not sorted: {} should come before {}",
487 prev,
488 field
489 );
490 prev = field;
491 }
492 }
493}
494