prax_query/
intern.rs

1//! String interning for efficient field name storage.
2//!
3//! This module provides string interning to reduce memory allocations when the same
4//! field names are used across many filters. Interned strings share memory, making
5//! cloning essentially free.
6//!
7//! # When to Use
8//!
9//! String interning is most beneficial when:
10//! - The same field names are used in many filters
11//! - Field names come from dynamic sources (not `&'static str`)
12//! - You're building complex query trees with repeated field references
13//!
14//! For static field names (`&'static str`), use them directly - they're already
15//! "interned" by the compiler with zero overhead.
16//!
17//! # Examples
18//!
19//! ## Using Pre-defined Field Names
20//!
21//! ```rust
22//! use prax_query::intern::fields;
23//! use prax_query::{Filter, FilterValue};
24//!
25//! // Common field names are pre-defined as constants
26//! let filter = Filter::Equals(fields::ID.into(), FilterValue::Int(42));
27//! let filter = Filter::Equals(fields::EMAIL.into(), FilterValue::String("test@example.com".into()));
28//! let filter = Filter::Gt(fields::CREATED_AT.into(), FilterValue::String("2024-01-01".into()));
29//! ```
30//!
31//! ## Interning Dynamic Strings
32//!
33//! ```rust
34//! use prax_query::intern::{intern, intern_cow};
35//! use prax_query::{Filter, FilterValue, FieldName};
36//!
37//! // Intern a dynamic string - subsequent calls return the same Arc<str>
38//! let field1 = intern("dynamic_field");
39//! let field2 = intern("dynamic_field");
40//! // field1 and field2 point to the same memory
41//!
42//! // Use interned string directly in filters
43//! let filter = Filter::Equals(intern_cow("user_id"), FilterValue::Int(1));
44//! ```
45
46use std::borrow::Cow;
47use std::cell::RefCell;
48use std::collections::HashSet;
49use std::sync::Arc;
50
51/// Pre-defined common field name constants.
52///
53/// These are compile-time `&'static str` values that require zero allocation.
54/// Use these when your field names match common database column names.
55pub mod fields {
56    /// Primary key field: "id"
57    pub const ID: &str = "id";
58    /// UUID field: "uuid"
59    pub const UUID: &str = "uuid";
60    /// Email field: "email"
61    pub const EMAIL: &str = "email";
62    /// Name field: "name"
63    pub const NAME: &str = "name";
64    /// Title field: "title"
65    pub const TITLE: &str = "title";
66    /// Description field: "description"
67    pub const DESCRIPTION: &str = "description";
68    /// Status field: "status"
69    pub const STATUS: &str = "status";
70    /// Active flag: "active"
71    pub const ACTIVE: &str = "active";
72    /// Enabled flag: "enabled"
73    pub const ENABLED: &str = "enabled";
74    /// Deleted flag: "deleted"
75    pub const DELETED: &str = "deleted";
76    /// Created timestamp: "created_at"
77    pub const CREATED_AT: &str = "created_at";
78    /// Updated timestamp: "updated_at"
79    pub const UPDATED_AT: &str = "updated_at";
80    /// Deleted timestamp: "deleted_at"
81    pub const DELETED_AT: &str = "deleted_at";
82    /// User ID foreign key: "user_id"
83    pub const USER_ID: &str = "user_id";
84    /// Author ID foreign key: "author_id"
85    pub const AUTHOR_ID: &str = "author_id";
86    /// Parent ID foreign key: "parent_id"
87    pub const PARENT_ID: &str = "parent_id";
88    /// Owner ID foreign key: "owner_id"
89    pub const OWNER_ID: &str = "owner_id";
90    /// Tenant ID for multi-tenancy: "tenant_id"
91    pub const TENANT_ID: &str = "tenant_id";
92    /// Organization ID: "org_id"
93    pub const ORG_ID: &str = "org_id";
94    /// Type discriminator: "type"
95    pub const TYPE: &str = "type";
96    /// Kind discriminator: "kind"
97    pub const KIND: &str = "kind";
98    /// Slug field: "slug"
99    pub const SLUG: &str = "slug";
100    /// Content field: "content"
101    pub const CONTENT: &str = "content";
102    /// Body field: "body"
103    pub const BODY: &str = "body";
104    /// Order/position field: "order"
105    pub const ORDER: &str = "order";
106    /// Position field: "position"
107    pub const POSITION: &str = "position";
108    /// Priority field: "priority"
109    pub const PRIORITY: &str = "priority";
110    /// Score field: "score"
111    pub const SCORE: &str = "score";
112    /// Count field: "count"
113    pub const COUNT: &str = "count";
114    /// Price field: "price"
115    pub const PRICE: &str = "price";
116    /// Amount field: "amount"
117    pub const AMOUNT: &str = "amount";
118    /// Quantity field: "quantity"
119    pub const QUANTITY: &str = "quantity";
120    /// Version field: "version"
121    pub const VERSION: &str = "version";
122    /// Age field: "age"
123    pub const AGE: &str = "age";
124    /// Role field: "role"
125    pub const ROLE: &str = "role";
126    /// Verified field: "verified"
127    pub const VERIFIED: &str = "verified";
128    /// Password field: "password"
129    pub const PASSWORD: &str = "password";
130    /// First name: "first_name"
131    pub const FIRST_NAME: &str = "first_name";
132    /// Last name: "last_name"
133    pub const LAST_NAME: &str = "last_name";
134    /// Category field: "category"
135    pub const CATEGORY: &str = "category";
136    /// Tags field: "tags"
137    pub const TAGS: &str = "tags";
138    /// Published flag: "published"
139    pub const PUBLISHED: &str = "published";
140    /// Published timestamp: "published_at"
141    pub const PUBLISHED_AT: &str = "published_at";
142    /// Expires timestamp: "expires_at"
143    pub const EXPIRES_AT: &str = "expires_at";
144    /// Started timestamp: "started_at"
145    pub const STARTED_AT: &str = "started_at";
146    /// Completed timestamp: "completed_at"
147    pub const COMPLETED_AT: &str = "completed_at";
148    /// Archived flag: "archived"
149    pub const ARCHIVED: &str = "archived";
150    /// Flagged flag: "flagged"
151    pub const FLAGGED: &str = "flagged";
152    /// Data field: "data"
153    pub const DATA: &str = "data";
154    /// Metadata field: "metadata"
155    pub const METADATA: &str = "metadata";
156    /// URL field: "url"
157    pub const URL: &str = "url";
158    /// Image URL: "image_url"
159    pub const IMAGE_URL: &str = "image_url";
160    /// Avatar URL: "avatar_url"
161    pub const AVATAR_URL: &str = "avatar_url";
162    /// File field: "file"
163    pub const FILE: &str = "file";
164    /// Path field: "path"
165    pub const PATH: &str = "path";
166    /// Attempts field: "attempts"
167    pub const ATTEMPTS: &str = "attempts";
168    /// Max attempts: "max_attempts"
169    pub const MAX_ATTEMPTS: &str = "max_attempts";
170
171    /// All registered static field names (sorted for binary search).
172    /// Use `lookup()` to check if a field name is registered.
173    pub const ALL_SORTED: &[&str] = &[
174        ACTIVE,
175        AGE,
176        AMOUNT,
177        ARCHIVED,
178        ATTEMPTS,
179        AUTHOR_ID,
180        AVATAR_URL,
181        BODY,
182        CATEGORY,
183        COMPLETED_AT,
184        CONTENT,
185        COUNT,
186        CREATED_AT,
187        DATA,
188        DELETED,
189        DELETED_AT,
190        DESCRIPTION,
191        EMAIL,
192        ENABLED,
193        EXPIRES_AT,
194        FILE,
195        FIRST_NAME,
196        FLAGGED,
197        ID,
198        IMAGE_URL,
199        KIND,
200        LAST_NAME,
201        MAX_ATTEMPTS,
202        METADATA,
203        NAME,
204        ORDER,
205        ORG_ID,
206        OWNER_ID,
207        PARENT_ID,
208        PASSWORD,
209        PATH,
210        POSITION,
211        PRICE,
212        PRIORITY,
213        PUBLISHED,
214        PUBLISHED_AT,
215        QUANTITY,
216        ROLE,
217        SCORE,
218        SLUG,
219        STARTED_AT,
220        STATUS,
221        TAGS,
222        TENANT_ID,
223        TITLE,
224        TYPE,
225        UPDATED_AT,
226        URL,
227        USER_ID,
228        UUID,
229        VERIFIED,
230        VERSION,
231    ];
232
233    /// Look up a field name in the static registry using binary search.
234    /// Returns `Some(&'static str)` if found, `None` otherwise.
235    ///
236    /// # Performance
237    ///
238    /// O(log n) binary search through ~57 entries.
239    ///
240    /// # Examples
241    ///
242    /// ```rust
243    /// use prax_query::intern::fields;
244    ///
245    /// assert_eq!(fields::lookup("id"), Some("id"));
246    /// assert_eq!(fields::lookup("email"), Some("email"));
247    /// assert_eq!(fields::lookup("unknown"), None);
248    /// ```
249    #[inline]
250    pub fn lookup(name: &str) -> Option<&'static str> {
251        ALL_SORTED.binary_search(&name).ok().map(|i| ALL_SORTED[i])
252    }
253
254    /// Get a field name as `Cow<'static, str>`, using static lookup first.
255    ///
256    /// If the field name matches a registered static field, returns `Cow::Borrowed`.
257    /// Otherwise, returns `Cow::Owned` with the input string.
258    ///
259    /// # Examples
260    ///
261    /// ```rust
262    /// use prax_query::intern::fields;
263    /// use std::borrow::Cow;
264    ///
265    /// // Static field - zero allocation
266    /// let name = fields::as_cow("id");
267    /// assert!(matches!(name, Cow::Borrowed(_)));
268    ///
269    /// // Unknown field - allocates
270    /// let name = fields::as_cow("custom_field");
271    /// assert!(matches!(name, Cow::Owned(_)));
272    /// ```
273    #[inline]
274    pub fn as_cow(name: &str) -> std::borrow::Cow<'static, str> {
275        match lookup(name) {
276            Some(s) => std::borrow::Cow::Borrowed(s),
277            None => std::borrow::Cow::Owned(name.to_string()),
278        }
279    }
280}
281
282// Thread-local string interner.
283//
284// Uses `Arc<str>` for reference-counted string storage. Interned strings are
285// stored in a thread-local `HashSet` for deduplication.
286thread_local! {
287    static INTERNER: RefCell<HashSet<Arc<str>>> = RefCell::new(HashSet::new());
288}
289
290/// Intern a string, returning a reference-counted pointer.
291///
292/// If the string has been interned before, returns the existing `Arc<str>`.
293/// Otherwise, allocates a new `Arc<str>` and stores it for future lookups.
294///
295/// # Performance
296///
297/// - First call for a string: O(n) where n is string length (allocation + hash)
298/// - Subsequent calls: O(n) for hash lookup, but no allocation
299/// - Cloning the result: O(1) (just incrementing reference count)
300///
301/// # Examples
302///
303/// ```rust
304/// use prax_query::intern::intern;
305/// use std::sync::Arc;
306///
307/// let s1 = intern("field_name");
308/// let s2 = intern("field_name");
309///
310/// // Both point to the same allocation
311/// assert!(Arc::ptr_eq(&s1, &s2));
312/// ```
313#[inline]
314pub fn intern(s: &str) -> Arc<str> {
315    INTERNER.with(|interner| {
316        let mut set = interner.borrow_mut();
317
318        // Check if already interned
319        if let Some(existing) = set.get(s) {
320            return Arc::clone(existing);
321        }
322
323        // Intern the new string
324        let arc: Arc<str> = Arc::from(s);
325        set.insert(Arc::clone(&arc));
326        arc
327    })
328}
329
330/// Intern a string and return it as a `Cow<'static, str>`.
331///
332/// This is a convenience function for use with filter APIs that expect `FieldName`.
333/// The returned `Cow` contains an owned `String` created from the interned `Arc<str>`.
334///
335/// # Note
336///
337/// For static strings, prefer using them directly (e.g., `"id".into()`) as that
338/// creates a `Cow::Borrowed` with zero allocation. Use this function only for
339/// dynamic strings that are repeated many times.
340///
341/// # Examples
342///
343/// ```rust
344/// use prax_query::intern::intern_cow;
345/// use prax_query::{Filter, FilterValue};
346///
347/// // Good: Interning a dynamic field name used in many filters
348/// let field_name = format!("field_{}", 42);
349/// let filter1 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(1));
350/// let filter2 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(2));
351/// ```
352#[inline]
353pub fn intern_cow(s: &str) -> Cow<'static, str> {
354    // Note: We convert Arc<str> to String here because Cow<'static, str>
355    // can't hold an Arc. The interning benefit comes from the HashSet
356    // deduplication during the intern() call.
357    Cow::Owned(intern(s).to_string())
358}
359
360/// Clear all interned strings from the thread-local cache.
361///
362/// This is primarily useful for testing or when you know interned strings
363/// will no longer be needed and want to free memory.
364///
365/// # Examples
366///
367/// ```rust
368/// use prax_query::intern::{intern, clear_interned};
369///
370/// let _ = intern("some_field");
371/// // ... use the interned string ...
372///
373/// // Later, free all interned strings
374/// clear_interned();
375/// ```
376pub fn clear_interned() {
377    INTERNER.with(|interner| {
378        interner.borrow_mut().clear();
379    });
380}
381
382/// Get the number of currently interned strings.
383///
384/// Useful for debugging and memory profiling.
385///
386/// # Examples
387///
388/// ```rust
389/// use prax_query::intern::{intern, interned_count, clear_interned};
390///
391/// clear_interned();
392/// assert_eq!(interned_count(), 0);
393///
394/// intern("field1");
395/// intern("field2");
396/// intern("field1"); // Already interned, doesn't increase count
397///
398/// assert_eq!(interned_count(), 2);
399/// ```
400pub fn interned_count() -> usize {
401    INTERNER.with(|interner| interner.borrow().len())
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn test_intern_returns_same_arc() {
410        clear_interned();
411
412        let s1 = intern("test_field");
413        let s2 = intern("test_field");
414
415        // Same Arc pointer
416        assert!(Arc::ptr_eq(&s1, &s2));
417        assert_eq!(&*s1, "test_field");
418    }
419
420    #[test]
421    fn test_intern_different_strings() {
422        clear_interned();
423
424        let s1 = intern("field_a");
425        let s2 = intern("field_b");
426
427        // Different Arc pointers
428        assert!(!Arc::ptr_eq(&s1, &s2));
429        assert_eq!(&*s1, "field_a");
430        assert_eq!(&*s2, "field_b");
431    }
432
433    #[test]
434    fn test_interned_count() {
435        clear_interned();
436
437        assert_eq!(interned_count(), 0);
438
439        intern("a");
440        assert_eq!(interned_count(), 1);
441
442        intern("b");
443        assert_eq!(interned_count(), 2);
444
445        intern("a"); // Already interned
446        assert_eq!(interned_count(), 2);
447    }
448
449    #[test]
450    fn test_clear_interned() {
451        clear_interned();
452
453        intern("x");
454        intern("y");
455        assert_eq!(interned_count(), 2);
456
457        clear_interned();
458        assert_eq!(interned_count(), 0);
459    }
460
461    #[test]
462    fn test_intern_cow() {
463        clear_interned();
464
465        let cow = intern_cow("field_name");
466        assert!(matches!(cow, Cow::Owned(_)));
467        assert_eq!(cow.as_ref(), "field_name");
468    }
469
470    #[test]
471    fn test_predefined_fields() {
472        // Just verify the constants exist and have expected values
473        assert_eq!(fields::ID, "id");
474        assert_eq!(fields::EMAIL, "email");
475        assert_eq!(fields::CREATED_AT, "created_at");
476        assert_eq!(fields::USER_ID, "user_id");
477        assert_eq!(fields::TENANT_ID, "tenant_id");
478    }
479
480    #[test]
481    fn test_intern_empty_string() {
482        clear_interned();
483
484        let s1 = intern("");
485        let s2 = intern("");
486
487        assert!(Arc::ptr_eq(&s1, &s2));
488        assert_eq!(&*s1, "");
489    }
490
491    #[test]
492    fn test_intern_unicode() {
493        clear_interned();
494
495        let s1 = intern("フィールド");
496        let s2 = intern("フィールド");
497
498        assert!(Arc::ptr_eq(&s1, &s2));
499        assert_eq!(&*s1, "フィールド");
500    }
501
502    #[test]
503    fn test_fields_lookup() {
504        // Test that lookup finds registered fields
505        assert_eq!(fields::lookup("id"), Some("id"));
506        assert_eq!(fields::lookup("email"), Some("email"));
507        assert_eq!(fields::lookup("created_at"), Some("created_at"));
508        assert_eq!(fields::lookup("user_id"), Some("user_id"));
509        assert_eq!(fields::lookup("status"), Some("status"));
510
511        // Test that lookup returns None for unknown fields
512        assert_eq!(fields::lookup("unknown_field"), None);
513        assert_eq!(fields::lookup("custom_field_123"), None);
514    }
515
516    #[test]
517    fn test_fields_as_cow() {
518        // Known field - should be Borrowed
519        let cow = fields::as_cow("id");
520        assert!(matches!(cow, Cow::Borrowed(_)));
521        assert_eq!(cow.as_ref(), "id");
522
523        // Unknown field - should be Owned
524        let cow = fields::as_cow("custom_field");
525        assert!(matches!(cow, Cow::Owned(_)));
526        assert_eq!(cow.as_ref(), "custom_field");
527    }
528
529    #[test]
530    fn test_fields_all_sorted() {
531        // Verify the array is actually sorted
532        let mut prev = "";
533        for &field in fields::ALL_SORTED {
534            assert!(
535                field >= prev,
536                "ALL_SORTED is not sorted: {} should come before {}",
537                prev,
538                field
539            );
540            prev = field;
541        }
542    }
543}