rustledger_core/
intern.rs

1//! String interning for accounts and currencies.
2//!
3//! String interning reduces memory usage by storing each unique string once
4//! and using references to that single copy. This is especially useful for
5//! account names and currencies which appear repeatedly throughout a ledger.
6//!
7//! # Example
8//!
9//! ```
10//! use rustledger_core::intern::StringInterner;
11//!
12//! let mut interner = StringInterner::new();
13//!
14//! let s1 = interner.intern("Expenses:Food");
15//! let s2 = interner.intern("Expenses:Food");
16//! let s3 = interner.intern("Assets:Bank");
17//!
18//! // s1 and s2 point to the same string
19//! assert!(std::ptr::eq(s1.as_str().as_ptr(), s2.as_str().as_ptr()));
20//!
21//! // s3 is different
22//! assert!(!std::ptr::eq(s1.as_str().as_ptr(), s3.as_str().as_ptr()));
23//! ```
24
25use std::collections::HashSet;
26use std::sync::Arc;
27
28use serde::{Deserialize, Deserializer, Serialize, Serializer};
29
30/// An interned string reference.
31///
32/// This is a thin wrapper around `Arc<str>` that provides cheap cloning
33/// and comparison. Two `InternedStr` values with the same content will
34/// share the same underlying memory.
35#[derive(Debug, Clone, Eq)]
36pub struct InternedStr(Arc<str>);
37
38// rkyv support: use AsString wrapper to serialize as String
39#[cfg(feature = "rkyv")]
40pub use rkyv_impl::AsInternedStr;
41
42/// Type alias for rkyv wrapper for `Option<InternedStr>`.
43/// Use: `#[rkyv(with = rkyv::with::Map<AsInternedStr>)]`
44#[cfg(feature = "rkyv")]
45pub type AsOptionInternedStr = rkyv::with::Map<AsInternedStr>;
46
47/// Type alias for rkyv wrapper for `Vec<InternedStr>`.
48/// Use: `#[rkyv(with = rkyv::with::Map<AsInternedStr>)]`
49#[cfg(feature = "rkyv")]
50pub type AsVecInternedStr = rkyv::with::Map<AsInternedStr>;
51
52#[cfg(feature = "rkyv")]
53mod rkyv_impl {
54    use super::InternedStr;
55    use rkyv::rancor::Fallible;
56    use rkyv::string::ArchivedString;
57    use rkyv::with::{ArchiveWith, DeserializeWith, SerializeWith};
58    use rkyv::Place;
59
60    /// Wrapper to serialize `InternedStr` as String with rkyv.
61    /// Use with `#[rkyv(with = AsInternedStr)]` on `InternedStr` fields.
62    pub struct AsInternedStr;
63
64    impl ArchiveWith<InternedStr> for AsInternedStr {
65        type Archived = ArchivedString;
66        type Resolver = rkyv::string::StringResolver;
67
68        fn resolve_with(field: &InternedStr, resolver: Self::Resolver, out: Place<Self::Archived>) {
69            ArchivedString::resolve_from_str(field.as_str(), resolver, out);
70        }
71    }
72
73    impl<S> SerializeWith<InternedStr, S> for AsInternedStr
74    where
75        S: Fallible + rkyv::ser::Writer + rkyv::ser::Allocator + ?Sized,
76        S::Error: rkyv::rancor::Source,
77    {
78        fn serialize_with(
79            field: &InternedStr,
80            serializer: &mut S,
81        ) -> Result<Self::Resolver, S::Error> {
82            ArchivedString::serialize_from_str(field.as_str(), serializer)
83        }
84    }
85
86    impl<D> DeserializeWith<ArchivedString, InternedStr, D> for AsInternedStr
87    where
88        D: Fallible + ?Sized,
89    {
90        fn deserialize_with(
91            field: &ArchivedString,
92            _deserializer: &mut D,
93        ) -> Result<InternedStr, D::Error> {
94            Ok(InternedStr::new(field.as_str()))
95        }
96    }
97}
98
99impl Serialize for InternedStr {
100    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
101        self.0.serialize(serializer)
102    }
103}
104
105impl<'de> Deserialize<'de> for InternedStr {
106    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
107        let s = String::deserialize(deserializer)?;
108        Ok(Self::new(s))
109    }
110}
111
112impl PartialOrd for InternedStr {
113    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
114        Some(self.cmp(other))
115    }
116}
117
118impl Ord for InternedStr {
119    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
120        self.0.cmp(&other.0)
121    }
122}
123
124impl InternedStr {
125    /// Create a new interned string (without using an interner).
126    /// Prefer using `StringInterner::intern` for deduplication.
127    pub fn new(s: impl Into<Arc<str>>) -> Self {
128        Self(s.into())
129    }
130
131    /// Get the string slice.
132    pub fn as_str(&self) -> &str {
133        &self.0
134    }
135
136    /// Check if two interned strings share the same allocation.
137    /// This is O(1) pointer comparison.
138    pub fn ptr_eq(&self, other: &Self) -> bool {
139        Arc::ptr_eq(&self.0, &other.0)
140    }
141}
142
143impl PartialEq for InternedStr {
144    fn eq(&self, other: &Self) -> bool {
145        // Fast path: pointer comparison
146        if Arc::ptr_eq(&self.0, &other.0) {
147            return true;
148        }
149        // Slow path: string comparison
150        self.0 == other.0
151    }
152}
153
154impl std::hash::Hash for InternedStr {
155    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
156        self.0.hash(state);
157    }
158}
159
160impl std::fmt::Display for InternedStr {
161    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
162        write!(f, "{}", self.0)
163    }
164}
165
166impl AsRef<str> for InternedStr {
167    fn as_ref(&self) -> &str {
168        &self.0
169    }
170}
171
172impl std::ops::Deref for InternedStr {
173    type Target = str;
174
175    fn deref(&self) -> &Self::Target {
176        &self.0
177    }
178}
179
180impl From<&str> for InternedStr {
181    fn from(s: &str) -> Self {
182        Self::new(s)
183    }
184}
185
186impl From<String> for InternedStr {
187    fn from(s: String) -> Self {
188        Self::new(s)
189    }
190}
191
192impl From<&String> for InternedStr {
193    fn from(s: &String) -> Self {
194        Self::new(s.as_str())
195    }
196}
197
198impl From<&Self> for InternedStr {
199    fn from(s: &Self) -> Self {
200        s.clone()
201    }
202}
203
204impl PartialEq<str> for InternedStr {
205    fn eq(&self, other: &str) -> bool {
206        self.as_str() == other
207    }
208}
209
210impl PartialEq<&str> for InternedStr {
211    fn eq(&self, other: &&str) -> bool {
212        self.as_str() == *other
213    }
214}
215
216impl PartialEq<String> for InternedStr {
217    fn eq(&self, other: &String) -> bool {
218        self.as_str() == other
219    }
220}
221
222impl Default for InternedStr {
223    fn default() -> Self {
224        Self::new("")
225    }
226}
227
228impl std::borrow::Borrow<str> for InternedStr {
229    fn borrow(&self) -> &str {
230        self.as_str()
231    }
232}
233
234/// A string interner that deduplicates strings.
235///
236/// This is useful for reducing memory usage when many strings with the
237/// same content are created, such as account names and currencies in
238/// a large ledger.
239#[derive(Debug, Default)]
240pub struct StringInterner {
241    /// Set of all interned strings.
242    strings: HashSet<Arc<str>>,
243}
244
245impl StringInterner {
246    /// Create a new empty interner.
247    pub fn new() -> Self {
248        Self {
249            strings: HashSet::new(),
250        }
251    }
252
253    /// Create an interner with pre-allocated capacity.
254    pub fn with_capacity(capacity: usize) -> Self {
255        Self {
256            strings: HashSet::with_capacity(capacity),
257        }
258    }
259
260    /// Intern a string.
261    ///
262    /// If the string already exists in the interner, returns a reference
263    /// to the existing copy. Otherwise, stores the string and returns
264    /// a reference to the new copy.
265    pub fn intern(&mut self, s: &str) -> InternedStr {
266        if let Some(existing) = self.strings.get(s) {
267            InternedStr(existing.clone())
268        } else {
269            let arc: Arc<str> = s.into();
270            self.strings.insert(arc.clone());
271            InternedStr(arc)
272        }
273    }
274
275    /// Intern a string, taking ownership.
276    pub fn intern_string(&mut self, s: String) -> InternedStr {
277        if let Some(existing) = self.strings.get(s.as_str()) {
278            InternedStr(existing.clone())
279        } else {
280            let arc: Arc<str> = s.into();
281            self.strings.insert(arc.clone());
282            InternedStr(arc)
283        }
284    }
285
286    /// Check if a string is already interned.
287    pub fn contains(&self, s: &str) -> bool {
288        self.strings.contains(s)
289    }
290
291    /// Get the number of unique strings.
292    pub fn len(&self) -> usize {
293        self.strings.len()
294    }
295
296    /// Check if the interner is empty.
297    pub fn is_empty(&self) -> bool {
298        self.strings.is_empty()
299    }
300
301    /// Get an iterator over all interned strings.
302    pub fn iter(&self) -> impl Iterator<Item = &str> {
303        self.strings.iter().map(std::convert::AsRef::as_ref)
304    }
305
306    /// Clear all interned strings.
307    pub fn clear(&mut self) {
308        self.strings.clear();
309    }
310}
311
312/// A specialized interner for account names.
313///
314/// Account names follow a specific pattern (Type:Component:Component)
315/// and this interner can provide additional functionality like
316/// extracting components.
317#[derive(Debug, Default)]
318pub struct AccountInterner {
319    interner: StringInterner,
320}
321
322impl AccountInterner {
323    /// Create a new account interner.
324    pub fn new() -> Self {
325        Self {
326            interner: StringInterner::new(),
327        }
328    }
329
330    /// Intern an account name.
331    pub fn intern(&mut self, account: &str) -> InternedStr {
332        self.interner.intern(account)
333    }
334
335    /// Get the number of unique accounts.
336    pub fn len(&self) -> usize {
337        self.interner.len()
338    }
339
340    /// Check if empty.
341    pub fn is_empty(&self) -> bool {
342        self.interner.is_empty()
343    }
344
345    /// Get all interned accounts.
346    pub fn accounts(&self) -> impl Iterator<Item = &str> {
347        self.interner.iter()
348    }
349
350    /// Get accounts matching a prefix.
351    pub fn accounts_with_prefix<'a>(&'a self, prefix: &'a str) -> impl Iterator<Item = &'a str> {
352        self.interner.iter().filter(move |s| s.starts_with(prefix))
353    }
354}
355
356/// A specialized interner for currency codes.
357///
358/// Currency codes are typically short (3-4 characters) and uppercase.
359#[derive(Debug, Default)]
360pub struct CurrencyInterner {
361    interner: StringInterner,
362}
363
364impl CurrencyInterner {
365    /// Create a new currency interner.
366    pub fn new() -> Self {
367        Self {
368            interner: StringInterner::new(),
369        }
370    }
371
372    /// Intern a currency code.
373    pub fn intern(&mut self, currency: &str) -> InternedStr {
374        self.interner.intern(currency)
375    }
376
377    /// Get the number of unique currencies.
378    pub fn len(&self) -> usize {
379        self.interner.len()
380    }
381
382    /// Check if empty.
383    pub fn is_empty(&self) -> bool {
384        self.interner.is_empty()
385    }
386
387    /// Get all interned currencies.
388    pub fn currencies(&self) -> impl Iterator<Item = &str> {
389        self.interner.iter()
390    }
391}
392
393/// Thread-safe string interner using a mutex.
394///
395/// Use this when interning strings from multiple threads.
396#[derive(Debug, Default)]
397pub struct SyncStringInterner {
398    inner: std::sync::Mutex<StringInterner>,
399}
400
401impl SyncStringInterner {
402    /// Create a new thread-safe interner.
403    pub fn new() -> Self {
404        Self {
405            inner: std::sync::Mutex::new(StringInterner::new()),
406        }
407    }
408
409    /// Intern a string (thread-safe).
410    pub fn intern(&self, s: &str) -> InternedStr {
411        self.inner.lock().unwrap().intern(s)
412    }
413
414    /// Get the number of unique strings.
415    pub fn len(&self) -> usize {
416        self.inner.lock().unwrap().len()
417    }
418
419    /// Check if empty.
420    pub fn is_empty(&self) -> bool {
421        self.inner.lock().unwrap().is_empty()
422    }
423}
424
425#[cfg(test)]
426mod tests {
427    use super::*;
428
429    #[test]
430    fn test_interned_str_equality() {
431        let s1 = InternedStr::new("hello");
432        let s2 = InternedStr::new("hello");
433        let s3 = InternedStr::new("world");
434
435        assert_eq!(s1, s2);
436        assert_ne!(s1, s3);
437        assert_eq!(s1, "hello");
438        assert_eq!(s1, "hello".to_string());
439    }
440
441    #[test]
442    fn test_interner_deduplication() {
443        let mut interner = StringInterner::new();
444
445        let s1 = interner.intern("Expenses:Food");
446        let s2 = interner.intern("Expenses:Food");
447        let s3 = interner.intern("Assets:Bank");
448
449        // s1 and s2 should share the same allocation
450        assert!(s1.ptr_eq(&s2));
451
452        // s3 is different
453        assert!(!s1.ptr_eq(&s3));
454
455        // Only 2 unique strings
456        assert_eq!(interner.len(), 2);
457    }
458
459    #[test]
460    fn test_interner_contains() {
461        let mut interner = StringInterner::new();
462
463        interner.intern("hello");
464
465        assert!(interner.contains("hello"));
466        assert!(!interner.contains("world"));
467    }
468
469    #[test]
470    fn test_account_interner() {
471        let mut interner = AccountInterner::new();
472
473        interner.intern("Expenses:Food:Coffee");
474        interner.intern("Expenses:Food:Groceries");
475        interner.intern("Assets:Bank:Checking");
476
477        assert_eq!(interner.len(), 3);
478
479        assert_eq!(interner.accounts_with_prefix("Expenses:").count(), 2);
480    }
481
482    #[test]
483    fn test_currency_interner() {
484        let mut interner = CurrencyInterner::new();
485
486        let usd1 = interner.intern("USD");
487        let usd2 = interner.intern("USD");
488        let eur = interner.intern("EUR");
489
490        assert!(usd1.ptr_eq(&usd2));
491        assert!(!usd1.ptr_eq(&eur));
492        assert_eq!(interner.len(), 2);
493    }
494
495    #[test]
496    fn test_sync_interner() {
497        use std::thread;
498
499        let interner = std::sync::Arc::new(SyncStringInterner::new());
500
501        let handles: Vec<_> = (0..4)
502            .map(|_| {
503                let interner = interner.clone();
504                thread::spawn(move || {
505                    for _ in 0..100 {
506                        interner.intern("shared-string");
507                    }
508                })
509            })
510            .collect();
511
512        for handle in handles {
513            handle.join().unwrap();
514        }
515
516        // Should only have one unique string despite being interned 400 times
517        assert_eq!(interner.len(), 1);
518    }
519
520    #[test]
521    fn test_interned_str_hash() {
522        use std::collections::HashMap;
523
524        let s1 = InternedStr::new("key");
525        let s2 = InternedStr::new("key");
526
527        let mut map = HashMap::new();
528        map.insert(s1, 1);
529
530        // s2 should find the same entry as s1
531        assert_eq!(map.get(&s2), Some(&1));
532    }
533}
534
535// rkyv wrapper for rust_decimal::Decimal - serialize as fixed 16 bytes
536#[cfg(feature = "rkyv")]
537pub use rkyv_decimal::AsDecimal;
538
539#[cfg(feature = "rkyv")]
540mod rkyv_decimal {
541    use rkyv::rancor::Fallible;
542    use rkyv::with::{ArchiveWith, DeserializeWith, SerializeWith};
543    use rkyv::Place;
544    use rust_decimal::Decimal;
545
546    /// Wrapper to serialize `Decimal` as fixed 16-byte binary with rkyv.
547    /// This is more compact and faster than string serialization.
548    pub struct AsDecimal;
549
550    impl ArchiveWith<Decimal> for AsDecimal {
551        type Archived = [u8; 16];
552        type Resolver = [(); 16];
553
554        fn resolve_with(field: &Decimal, resolver: Self::Resolver, out: Place<Self::Archived>) {
555            let bytes = field.serialize();
556            // Use rkyv's Archive impl for [u8; 16] which handles this safely
557            rkyv::Archive::resolve(&bytes, resolver, out);
558        }
559    }
560
561    impl<S> SerializeWith<Decimal, S> for AsDecimal
562    where
563        S: Fallible + ?Sized,
564    {
565        fn serialize_with(
566            _field: &Decimal,
567            _serializer: &mut S,
568        ) -> Result<Self::Resolver, S::Error> {
569            // No extra serialization needed - data is inlined
570            Ok([(); 16])
571        }
572    }
573
574    impl<D> DeserializeWith<[u8; 16], Decimal, D> for AsDecimal
575    where
576        D: Fallible + ?Sized,
577    {
578        fn deserialize_with(field: &[u8; 16], _deserializer: &mut D) -> Result<Decimal, D::Error> {
579            Ok(Decimal::deserialize(*field))
580        }
581    }
582}
583
584// rkyv wrapper for chrono::NaiveDate - serialize as i32 (days from CE)
585#[cfg(feature = "rkyv")]
586pub use rkyv_date::AsNaiveDate;
587
588#[cfg(feature = "rkyv")]
589mod rkyv_date {
590    use chrono::{Datelike, NaiveDate};
591    use rkyv::rancor::Fallible;
592    use rkyv::with::{ArchiveWith, DeserializeWith, SerializeWith};
593    use rkyv::Place;
594
595    /// Wrapper to serialize `NaiveDate` as i32 (days from Common Era) with rkyv.
596    /// This is 4 bytes instead of 10+ for string, and faster to serialize.
597    pub struct AsNaiveDate;
598
599    impl ArchiveWith<NaiveDate> for AsNaiveDate {
600        type Archived = rkyv::Archived<i32>;
601        type Resolver = ();
602
603        fn resolve_with(field: &NaiveDate, _resolver: Self::Resolver, out: Place<Self::Archived>) {
604            let days = field.num_days_from_ce();
605            // Use rkyv's Archive impl for i32 which handles endianness
606            rkyv::Archive::resolve(&days, (), out);
607        }
608    }
609
610    impl<S> SerializeWith<NaiveDate, S> for AsNaiveDate
611    where
612        S: Fallible + ?Sized,
613    {
614        fn serialize_with(
615            _field: &NaiveDate,
616            _serializer: &mut S,
617        ) -> Result<Self::Resolver, S::Error> {
618            // No extra serialization needed - data is inlined
619            Ok(())
620        }
621    }
622
623    impl<D> DeserializeWith<rkyv::Archived<i32>, NaiveDate, D> for AsNaiveDate
624    where
625        D: Fallible + ?Sized,
626    {
627        fn deserialize_with(
628            field: &rkyv::Archived<i32>,
629            _deserializer: &mut D,
630        ) -> Result<NaiveDate, D::Error> {
631            let days = field.to_native();
632            Ok(NaiveDate::from_num_days_from_ce_opt(days).expect("valid date"))
633        }
634    }
635}