prax_query/
intern.rs

1//! String interning for efficient field name storage.
2//!
3//! This module provides string interning to reduce memory allocations when the same
4//! field names are used across many filters. Interned strings share memory, making
5//! cloning essentially free.
6//!
7//! # When to Use
8//!
9//! String interning is most beneficial when:
10//! - The same field names are used in many filters
11//! - Field names come from dynamic sources (not `&'static str`)
12//! - You're building complex query trees with repeated field references
13//!
14//! For static field names (`&'static str`), use them directly - they're already
15//! "interned" by the compiler with zero overhead.
16//!
17//! # Examples
18//!
19//! ## Using Pre-defined Field Names
20//!
21//! ```rust
22//! use prax_query::intern::fields;
23//! use prax_query::{Filter, FilterValue};
24//!
25//! // Common field names are pre-defined as constants
26//! let filter = Filter::Equals(fields::ID.into(), FilterValue::Int(42));
27//! let filter = Filter::Equals(fields::EMAIL.into(), FilterValue::String("test@example.com".into()));
28//! let filter = Filter::Gt(fields::CREATED_AT.into(), FilterValue::String("2024-01-01".into()));
29//! ```
30//!
31//! ## Interning Dynamic Strings
32//!
33//! ```rust
34//! use prax_query::intern::{intern, intern_cow};
35//! use prax_query::{Filter, FilterValue, FieldName};
36//!
37//! // Intern a dynamic string - subsequent calls return the same Arc<str>
38//! let field1 = intern("dynamic_field");
39//! let field2 = intern("dynamic_field");
40//! // field1 and field2 point to the same memory
41//!
42//! // Use interned string directly in filters
43//! let filter = Filter::Equals(intern_cow("user_id"), FilterValue::Int(1));
44//! ```
45
46use std::borrow::Cow;
47use std::cell::RefCell;
48use std::collections::HashSet;
49use std::sync::Arc;
50
51/// Pre-defined common field name constants.
52///
53/// These are compile-time `&'static str` values that require zero allocation.
54/// Use these when your field names match common database column names.
55pub mod fields {
56    /// Primary key field: "id"
57    pub const ID: &str = "id";
58    /// UUID field: "uuid"
59    pub const UUID: &str = "uuid";
60    /// Email field: "email"
61    pub const EMAIL: &str = "email";
62    /// Name field: "name"
63    pub const NAME: &str = "name";
64    /// Title field: "title"
65    pub const TITLE: &str = "title";
66    /// Description field: "description"
67    pub const DESCRIPTION: &str = "description";
68    /// Status field: "status"
69    pub const STATUS: &str = "status";
70    /// Active flag: "active"
71    pub const ACTIVE: &str = "active";
72    /// Enabled flag: "enabled"
73    pub const ENABLED: &str = "enabled";
74    /// Deleted flag: "deleted"
75    pub const DELETED: &str = "deleted";
76    /// Created timestamp: "created_at"
77    pub const CREATED_AT: &str = "created_at";
78    /// Updated timestamp: "updated_at"
79    pub const UPDATED_AT: &str = "updated_at";
80    /// Deleted timestamp: "deleted_at"
81    pub const DELETED_AT: &str = "deleted_at";
82    /// User ID foreign key: "user_id"
83    pub const USER_ID: &str = "user_id";
84    /// Author ID foreign key: "author_id"
85    pub const AUTHOR_ID: &str = "author_id";
86    /// Parent ID foreign key: "parent_id"
87    pub const PARENT_ID: &str = "parent_id";
88    /// Owner ID foreign key: "owner_id"
89    pub const OWNER_ID: &str = "owner_id";
90    /// Tenant ID for multi-tenancy: "tenant_id"
91    pub const TENANT_ID: &str = "tenant_id";
92    /// Organization ID: "org_id"
93    pub const ORG_ID: &str = "org_id";
94    /// Type discriminator: "type"
95    pub const TYPE: &str = "type";
96    /// Kind discriminator: "kind"
97    pub const KIND: &str = "kind";
98    /// Slug field: "slug"
99    pub const SLUG: &str = "slug";
100    /// Content field: "content"
101    pub const CONTENT: &str = "content";
102    /// Body field: "body"
103    pub const BODY: &str = "body";
104    /// Order/position field: "order"
105    pub const ORDER: &str = "order";
106    /// Position field: "position"
107    pub const POSITION: &str = "position";
108    /// Priority field: "priority"
109    pub const PRIORITY: &str = "priority";
110    /// Score field: "score"
111    pub const SCORE: &str = "score";
112    /// Count field: "count"
113    pub const COUNT: &str = "count";
114    /// Price field: "price"
115    pub const PRICE: &str = "price";
116    /// Amount field: "amount"
117    pub const AMOUNT: &str = "amount";
118    /// Quantity field: "quantity"
119    pub const QUANTITY: &str = "quantity";
120    /// Version field: "version"
121    pub const VERSION: &str = "version";
122    /// Age field: "age"
123    pub const AGE: &str = "age";
124    /// Role field: "role"
125    pub const ROLE: &str = "role";
126    /// Verified field: "verified"
127    pub const VERIFIED: &str = "verified";
128    /// Password field: "password"
129    pub const PASSWORD: &str = "password";
130    /// First name: "first_name"
131    pub const FIRST_NAME: &str = "first_name";
132    /// Last name: "last_name"
133    pub const LAST_NAME: &str = "last_name";
134    /// Category field: "category"
135    pub const CATEGORY: &str = "category";
136    /// Tags field: "tags"
137    pub const TAGS: &str = "tags";
138    /// Published flag: "published"
139    pub const PUBLISHED: &str = "published";
140    /// Published timestamp: "published_at"
141    pub const PUBLISHED_AT: &str = "published_at";
142    /// Expires timestamp: "expires_at"
143    pub const EXPIRES_AT: &str = "expires_at";
144    /// Started timestamp: "started_at"
145    pub const STARTED_AT: &str = "started_at";
146    /// Completed timestamp: "completed_at"
147    pub const COMPLETED_AT: &str = "completed_at";
148    /// Archived flag: "archived"
149    pub const ARCHIVED: &str = "archived";
150    /// Flagged flag: "flagged"
151    pub const FLAGGED: &str = "flagged";
152    /// Data field: "data"
153    pub const DATA: &str = "data";
154    /// Metadata field: "metadata"
155    pub const METADATA: &str = "metadata";
156    /// URL field: "url"
157    pub const URL: &str = "url";
158    /// Image URL: "image_url"
159    pub const IMAGE_URL: &str = "image_url";
160    /// Avatar URL: "avatar_url"
161    pub const AVATAR_URL: &str = "avatar_url";
162    /// File field: "file"
163    pub const FILE: &str = "file";
164    /// Path field: "path"
165    pub const PATH: &str = "path";
166    /// Attempts field: "attempts"
167    pub const ATTEMPTS: &str = "attempts";
168    /// Max attempts: "max_attempts"
169    pub const MAX_ATTEMPTS: &str = "max_attempts";
170
171    /// All registered static field names (sorted for binary search).
172    /// Use `lookup()` to check if a field name is registered.
173    pub const ALL_SORTED: &[&str] = &[
174        ACTIVE, AGE, AMOUNT, ARCHIVED, ATTEMPTS, AUTHOR_ID, AVATAR_URL, BODY, CATEGORY,
175        COMPLETED_AT, CONTENT, COUNT, CREATED_AT, DATA, DELETED, DELETED_AT, DESCRIPTION,
176        EMAIL, ENABLED, EXPIRES_AT, FILE, FIRST_NAME, FLAGGED, ID, IMAGE_URL, KIND,
177        LAST_NAME, MAX_ATTEMPTS, METADATA, NAME, ORDER, ORG_ID, OWNER_ID, PARENT_ID,
178        PASSWORD, PATH, POSITION, PRICE, PRIORITY, PUBLISHED, PUBLISHED_AT, QUANTITY,
179        ROLE, SCORE, SLUG, STARTED_AT, STATUS, TAGS, TENANT_ID, TITLE, TYPE, UPDATED_AT,
180        URL, USER_ID, UUID, VERIFIED, VERSION,
181    ];
182
183    /// Look up a field name in the static registry using binary search.
184    /// Returns `Some(&'static str)` if found, `None` otherwise.
185    ///
186    /// # Performance
187    ///
188    /// O(log n) binary search through ~57 entries.
189    ///
190    /// # Examples
191    ///
192    /// ```rust
193    /// use prax_query::intern::fields;
194    ///
195    /// assert_eq!(fields::lookup("id"), Some("id"));
196    /// assert_eq!(fields::lookup("email"), Some("email"));
197    /// assert_eq!(fields::lookup("unknown"), None);
198    /// ```
199    #[inline]
200    pub fn lookup(name: &str) -> Option<&'static str> {
201        ALL_SORTED.binary_search(&name).ok().map(|i| ALL_SORTED[i])
202    }
203
204    /// Get a field name as `Cow<'static, str>`, using static lookup first.
205    ///
206    /// If the field name matches a registered static field, returns `Cow::Borrowed`.
207    /// Otherwise, returns `Cow::Owned` with the input string.
208    ///
209    /// # Examples
210    ///
211    /// ```rust
212    /// use prax_query::intern::fields;
213    /// use std::borrow::Cow;
214    ///
215    /// // Static field - zero allocation
216    /// let name = fields::as_cow("id");
217    /// assert!(matches!(name, Cow::Borrowed(_)));
218    ///
219    /// // Unknown field - allocates
220    /// let name = fields::as_cow("custom_field");
221    /// assert!(matches!(name, Cow::Owned(_)));
222    /// ```
223    #[inline]
224    pub fn as_cow(name: &str) -> std::borrow::Cow<'static, str> {
225        match lookup(name) {
226            Some(s) => std::borrow::Cow::Borrowed(s),
227            None => std::borrow::Cow::Owned(name.to_string()),
228        }
229    }
230}
231
232/// Thread-local string interner.
233///
234/// Uses `Arc<str>` for reference-counted string storage. Interned strings are
235/// stored in a thread-local `HashSet` for deduplication.
236thread_local! {
237    static INTERNER: RefCell<HashSet<Arc<str>>> = RefCell::new(HashSet::new());
238}
239
240/// Intern a string, returning a reference-counted pointer.
241///
242/// If the string has been interned before, returns the existing `Arc<str>`.
243/// Otherwise, allocates a new `Arc<str>` and stores it for future lookups.
244///
245/// # Performance
246///
247/// - First call for a string: O(n) where n is string length (allocation + hash)
248/// - Subsequent calls: O(n) for hash lookup, but no allocation
249/// - Cloning the result: O(1) (just incrementing reference count)
250///
251/// # Examples
252///
253/// ```rust
254/// use prax_query::intern::intern;
255/// use std::sync::Arc;
256///
257/// let s1 = intern("field_name");
258/// let s2 = intern("field_name");
259///
260/// // Both point to the same allocation
261/// assert!(Arc::ptr_eq(&s1, &s2));
262/// ```
263#[inline]
264pub fn intern(s: &str) -> Arc<str> {
265    INTERNER.with(|interner| {
266        let mut set = interner.borrow_mut();
267
268        // Check if already interned
269        if let Some(existing) = set.get(s) {
270            return Arc::clone(existing);
271        }
272
273        // Intern the new string
274        let arc: Arc<str> = Arc::from(s);
275        set.insert(Arc::clone(&arc));
276        arc
277    })
278}
279
280/// Intern a string and return it as a `Cow<'static, str>`.
281///
282/// This is a convenience function for use with filter APIs that expect `FieldName`.
283/// The returned `Cow` contains an owned `String` created from the interned `Arc<str>`.
284///
285/// # Note
286///
287/// For static strings, prefer using them directly (e.g., `"id".into()`) as that
288/// creates a `Cow::Borrowed` with zero allocation. Use this function only for
289/// dynamic strings that are repeated many times.
290///
291/// # Examples
292///
293/// ```rust
294/// use prax_query::intern::intern_cow;
295/// use prax_query::{Filter, FilterValue};
296///
297/// // Good: Interning a dynamic field name used in many filters
298/// let field_name = format!("field_{}", 42);
299/// let filter1 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(1));
300/// let filter2 = Filter::Equals(intern_cow(&field_name), FilterValue::Int(2));
301/// ```
302#[inline]
303pub fn intern_cow(s: &str) -> Cow<'static, str> {
304    // Note: We convert Arc<str> to String here because Cow<'static, str>
305    // can't hold an Arc. The interning benefit comes from the HashSet
306    // deduplication during the intern() call.
307    Cow::Owned(intern(s).to_string())
308}
309
310/// Clear all interned strings from the thread-local cache.
311///
312/// This is primarily useful for testing or when you know interned strings
313/// will no longer be needed and want to free memory.
314///
315/// # Examples
316///
317/// ```rust
318/// use prax_query::intern::{intern, clear_interned};
319///
320/// let _ = intern("some_field");
321/// // ... use the interned string ...
322///
323/// // Later, free all interned strings
324/// clear_interned();
325/// ```
326pub fn clear_interned() {
327    INTERNER.with(|interner| {
328        interner.borrow_mut().clear();
329    });
330}
331
332/// Get the number of currently interned strings.
333///
334/// Useful for debugging and memory profiling.
335///
336/// # Examples
337///
338/// ```rust
339/// use prax_query::intern::{intern, interned_count, clear_interned};
340///
341/// clear_interned();
342/// assert_eq!(interned_count(), 0);
343///
344/// intern("field1");
345/// intern("field2");
346/// intern("field1"); // Already interned, doesn't increase count
347///
348/// assert_eq!(interned_count(), 2);
349/// ```
350pub fn interned_count() -> usize {
351    INTERNER.with(|interner| interner.borrow().len())
352}
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357
358    #[test]
359    fn test_intern_returns_same_arc() {
360        clear_interned();
361
362        let s1 = intern("test_field");
363        let s2 = intern("test_field");
364
365        // Same Arc pointer
366        assert!(Arc::ptr_eq(&s1, &s2));
367        assert_eq!(&*s1, "test_field");
368    }
369
370    #[test]
371    fn test_intern_different_strings() {
372        clear_interned();
373
374        let s1 = intern("field_a");
375        let s2 = intern("field_b");
376
377        // Different Arc pointers
378        assert!(!Arc::ptr_eq(&s1, &s2));
379        assert_eq!(&*s1, "field_a");
380        assert_eq!(&*s2, "field_b");
381    }
382
383    #[test]
384    fn test_interned_count() {
385        clear_interned();
386
387        assert_eq!(interned_count(), 0);
388
389        intern("a");
390        assert_eq!(interned_count(), 1);
391
392        intern("b");
393        assert_eq!(interned_count(), 2);
394
395        intern("a"); // Already interned
396        assert_eq!(interned_count(), 2);
397    }
398
399    #[test]
400    fn test_clear_interned() {
401        clear_interned();
402
403        intern("x");
404        intern("y");
405        assert_eq!(interned_count(), 2);
406
407        clear_interned();
408        assert_eq!(interned_count(), 0);
409    }
410
411    #[test]
412    fn test_intern_cow() {
413        clear_interned();
414
415        let cow = intern_cow("field_name");
416        assert!(matches!(cow, Cow::Owned(_)));
417        assert_eq!(cow.as_ref(), "field_name");
418    }
419
420    #[test]
421    fn test_predefined_fields() {
422        // Just verify the constants exist and have expected values
423        assert_eq!(fields::ID, "id");
424        assert_eq!(fields::EMAIL, "email");
425        assert_eq!(fields::CREATED_AT, "created_at");
426        assert_eq!(fields::USER_ID, "user_id");
427        assert_eq!(fields::TENANT_ID, "tenant_id");
428    }
429
430    #[test]
431    fn test_intern_empty_string() {
432        clear_interned();
433
434        let s1 = intern("");
435        let s2 = intern("");
436
437        assert!(Arc::ptr_eq(&s1, &s2));
438        assert_eq!(&*s1, "");
439    }
440
441    #[test]
442    fn test_intern_unicode() {
443        clear_interned();
444
445        let s1 = intern("フィールド");
446        let s2 = intern("フィールド");
447
448        assert!(Arc::ptr_eq(&s1, &s2));
449        assert_eq!(&*s1, "フィールド");
450    }
451
452    #[test]
453    fn test_fields_lookup() {
454        // Test that lookup finds registered fields
455        assert_eq!(fields::lookup("id"), Some("id"));
456        assert_eq!(fields::lookup("email"), Some("email"));
457        assert_eq!(fields::lookup("created_at"), Some("created_at"));
458        assert_eq!(fields::lookup("user_id"), Some("user_id"));
459        assert_eq!(fields::lookup("status"), Some("status"));
460
461        // Test that lookup returns None for unknown fields
462        assert_eq!(fields::lookup("unknown_field"), None);
463        assert_eq!(fields::lookup("custom_field_123"), None);
464    }
465
466    #[test]
467    fn test_fields_as_cow() {
468        // Known field - should be Borrowed
469        let cow = fields::as_cow("id");
470        assert!(matches!(cow, Cow::Borrowed(_)));
471        assert_eq!(cow.as_ref(), "id");
472
473        // Unknown field - should be Owned
474        let cow = fields::as_cow("custom_field");
475        assert!(matches!(cow, Cow::Owned(_)));
476        assert_eq!(cow.as_ref(), "custom_field");
477    }
478
479    #[test]
480    fn test_fields_all_sorted() {
481        // Verify the array is actually sorted
482        let mut prev = "";
483        for &field in fields::ALL_SORTED {
484            assert!(
485                field >= prev,
486                "ALL_SORTED is not sorted: {} should come before {}",
487                prev,
488                field
489            );
490            prev = field;
491        }
492    }
493}
494