Skip to main content

rustledger_core/
identifiers.rs

1//! Domain-typed identifiers: [`Account`], [`Currency`], [`Tag`], [`Link`].
2//!
3//! These newtype wrappers around [`InternedStr`] give the type system
4//! enough vocabulary to distinguish the different kinds of identifier
5//! the beancount AST carries. Pre-newtype, every identifier was just
6//! an `InternedStr` — passing an account where a currency was
7//! expected (or vice versa) compiled fine, and the bug surfaced
8//! only at runtime via wrong-but-validly-shaped string matching.
9//! Now the same mistake is a type error.
10//!
11//! # Design
12//!
13//! Each newtype is a transparent wrapper:
14//!
15//! - `Deref<Target = str>` so calls like `account.starts_with("Assets:")`
16//!   work without `.as_str()` everywhere.
17//! - `AsRef<str>` and [`Borrow<str>`](std::borrow::Borrow) so `HashMap` lookups by `&str`
18//!   keep working (`some_map.get("Assets:Bank")` where the map is
19//!   keyed by [`Account`]).
20//! - `PartialEq` against `str` / `&str` / `String` / `InternedStr` /
21//!   the newtype's own type, so `account == "Assets:Bank"` keeps
22//!   reading naturally without coercion.
23//! - `From<&str>`, `From<String>`, `From<InternedStr>` for
24//!   construction at call sites that have a string and need the
25//!   typed form.
26//! - `Hash` delegates to the inner `InternedStr`'s hash, so
27//!   `HashMap<Account, V>` and `HashMap<InternedStr, V>` produce
28//!   the same bucketing for the same underlying string.
29//!
30//! What you DON'T get for free is cross-newtype assignment:
31//!
32//! ```compile_fail
33//! # use rustledger_core::{Account, Currency};
34//! fn want_currency(_: Currency) {}
35//! let acct = Account::from("Assets:Bank");
36//! want_currency(acct); // ← type error
37//! ```
38//!
39//! Conversions between newtypes are deliberate (`Currency::from(account.into_interned())`)
40//! so the compiler can flag accidental crossings.
41//!
42//! # When to use which
43//!
44//! All four newtypes — [`Currency`], [`Account`], [`Tag`], and
45//! [`Link`] — are fully plumbed through the AST, including
46//! `MetaValue` variants:
47//!
48//! - [`Currency`]: `Commodity.currency`, `Open.currencies` entries,
49//!   `Amount.currency`, `CostSpec.currency`, `Price.currency`,
50//!   `IncompleteAmount::CurrencyOnly`, `MetaValue::Currency`.
51//! - [`Account`]: `Open.account`, `Close.account`, `Balance.account`,
52//!   `Pad.account` / `source_account`, `Note.account`,
53//!   `Document.account`, `Posting.account`, `MetaValue::Account`.
54//! - [`Tag`]: `Transaction.tags` entries, `pushtag`/`poptag` stack,
55//!   `Document.tags`, `MetaValue::Tag`.
56//! - [`Link`]: `Transaction.links` entries, `Document.links`,
57//!   `MetaValue::Link`.
58//!
59//! The plugin wire-format type `rustledger_plugin_types::MetaValueData`
60//! deliberately keeps `String` payloads — `plugin-types` is a minimal
61//! WASM-compatible crate that does not depend on `rustledger-core`,
62//! and plugins run without access to the workspace interner anyway.
63//! The convert boundary
64//! (`rustledger_plugin::convert::from_wrapper`) wraps the incoming
65//! strings in fresh `Arc<str>`s; the cross-file canonicalization to
66//! one `Arc<str>` per identifier string happens later in
67//! `rustledger_loader::dedup::reintern_directives`, which walks both
68//! AST identifier fields and `MetaValue::*` payloads inside metadata.
69
70use crate::InternedStr;
71#[cfg(feature = "rkyv")]
72use crate::intern::AsInternedStr;
73
74macro_rules! domain_newtype {
75    ($name:ident, $kind:literal) => {
76        #[doc = concat!("Domain-typed identifier for a ", $kind, ". See the [module docs](crate::identifiers) for rationale.")]
77        #[derive(Debug, Clone, Eq)]
78        #[cfg_attr(
79            feature = "rkyv",
80            derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
81        )]
82        #[repr(transparent)]
83        pub struct $name(
84            #[cfg_attr(feature = "rkyv", rkyv(with = AsInternedStr))] InternedStr,
85        );
86
87        impl $name {
88            /// Construct from anything that can become an `InternedStr`.
89            #[must_use]
90            pub fn new(s: impl Into<InternedStr>) -> Self {
91                Self(s.into())
92            }
93
94            /// Borrow the underlying string slice.
95            #[must_use]
96            pub fn as_str(&self) -> &str {
97                self.0.as_str()
98            }
99
100            /// Borrow the underlying `InternedStr`. Useful when interfacing
101            /// with APIs that still take untyped interned strings.
102            #[must_use]
103            pub const fn as_interned(&self) -> &InternedStr {
104                &self.0
105            }
106
107            /// Unwrap to the underlying `InternedStr`, discarding the
108            /// domain tag. Use deliberately — this is the explicit
109            /// "I'm crossing types on purpose" escape hatch.
110            #[must_use]
111            pub fn into_interned(self) -> InternedStr {
112                self.0
113            }
114
115            /// Pointer-equality on the underlying `Arc<str>`.
116            ///
117            /// `true` iff both values point at the same interner allocation.
118            /// Used by cross-file dedup tests to assert that the loader's
119            /// re-interning pass canonicalized the storage; not a substitute
120            /// for `==` (which is the byte-equality semantics callers want).
121            #[must_use]
122            pub fn ptr_eq(&self, other: &Self) -> bool {
123                self.0.ptr_eq(&other.0)
124            }
125
126            /// Mutable access to the underlying `InternedStr`.
127            /// Used by the loader's cross-file interning pass
128            /// (`rustledger_loader::dedup`) to canonicalize the
129            /// `Arc` after merging directives from multiple files —
130            /// the value semantics don't change, but the storage is
131            /// re-pointed at the workspace-wide interner's copy.
132            pub const fn as_interned_mut(&mut self) -> &mut InternedStr {
133                &mut self.0
134            }
135        }
136
137        impl PartialEq for $name {
138            fn eq(&self, other: &Self) -> bool {
139                self.0 == other.0
140            }
141        }
142
143        impl PartialEq<str> for $name {
144            fn eq(&self, other: &str) -> bool {
145                self.0 == *other
146            }
147        }
148
149        impl PartialEq<&str> for $name {
150            fn eq(&self, other: &&str) -> bool {
151                self.0 == **other
152            }
153        }
154
155        impl PartialEq<String> for $name {
156            fn eq(&self, other: &String) -> bool {
157                self.0 == *other
158            }
159        }
160
161        impl PartialEq<InternedStr> for $name {
162            fn eq(&self, other: &InternedStr) -> bool {
163                self.0 == *other
164            }
165        }
166
167        impl PartialEq<$name> for &str {
168            fn eq(&self, other: &$name) -> bool {
169                other.0 == **self
170            }
171        }
172
173        impl PartialEq<$name> for str {
174            fn eq(&self, other: &$name) -> bool {
175                other.0 == *self
176            }
177        }
178
179        impl PartialEq<$name> for InternedStr {
180            fn eq(&self, other: &$name) -> bool {
181                *self == other.0
182            }
183        }
184
185        impl std::hash::Hash for $name {
186            fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
187                self.0.hash(state);
188            }
189        }
190
191        impl std::cmp::PartialOrd for $name {
192            fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
193                Some(self.cmp(other))
194            }
195        }
196
197        impl std::cmp::Ord for $name {
198            fn cmp(&self, other: &Self) -> std::cmp::Ordering {
199                self.0.cmp(&other.0)
200            }
201        }
202
203        impl std::ops::Deref for $name {
204            type Target = str;
205            fn deref(&self) -> &str {
206                self.0.as_str()
207            }
208        }
209
210        impl AsRef<str> for $name {
211            fn as_ref(&self) -> &str {
212                self.0.as_str()
213            }
214        }
215
216        impl std::borrow::Borrow<str> for $name {
217            fn borrow(&self) -> &str {
218                self.0.as_str()
219            }
220        }
221
222        impl std::fmt::Display for $name {
223            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
224                std::fmt::Display::fmt(&self.0, f)
225            }
226        }
227
228        impl From<&str> for $name {
229            fn from(s: &str) -> Self {
230                Self(InternedStr::from(s))
231            }
232        }
233
234        impl From<String> for $name {
235            fn from(s: String) -> Self {
236                Self(InternedStr::from(s))
237            }
238        }
239
240        impl From<&String> for $name {
241            fn from(s: &String) -> Self {
242                Self(InternedStr::from(s.as_str()))
243            }
244        }
245
246        impl From<InternedStr> for $name {
247            fn from(s: InternedStr) -> Self {
248                Self(s)
249            }
250        }
251
252        impl From<&InternedStr> for $name {
253            fn from(s: &InternedStr) -> Self {
254                Self(s.clone())
255            }
256        }
257
258        impl From<&$name> for $name {
259            fn from(s: &$name) -> Self {
260                s.clone()
261            }
262        }
263
264        impl Default for $name {
265            fn default() -> Self {
266                Self(InternedStr::default())
267            }
268        }
269
270        impl serde::Serialize for $name {
271            fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
272                self.0.serialize(serializer)
273            }
274        }
275
276        impl<'de> serde::Deserialize<'de> for $name {
277            fn deserialize<D: serde::Deserializer<'de>>(
278                deserializer: D,
279            ) -> Result<Self, D::Error> {
280                Ok(Self(InternedStr::deserialize(deserializer)?))
281            }
282        }
283
284        // rkyv archive is `#[derive]`'d above using the field
285        // attribute `#[rkyv(with = AsInternedStr)]` — same wrapper
286        // pattern `Posting.account` (and every other `InternedStr`
287        // field) uses. That goes through `ArchivedString` via the
288        // `AsInternedStr` adapter, picking up bytecheck/CheckBytes
289        // for free.
290    };
291}
292
293domain_newtype!(Account, "beancount account name (e.g. `Assets:Cash:USD`)");
294domain_newtype!(Currency, "currency code (e.g. `USD`, `EUR`, `AAPL`)");
295domain_newtype!(Tag, "beancount tag (e.g. `#travel`)");
296domain_newtype!(Link, "beancount link (e.g. `^invoice-2024-01`)");
297
298/// Returns `true` if `child` is the same account as `parent`, or a
299/// sub-account of it.
300///
301/// Beancount's `balance Assets:Bank` assertion (and several other
302/// account-scoped operations) includes postings to `Assets:Bank` AND
303/// `Assets:Bank:Checking`, `Assets:Bank:Savings`, etc. The match is
304/// exact OR `parent + ":"` prefix; a name that merely starts with
305/// `parent`'s string (`Assets:BankAlias`) is NOT a sub-account.
306///
307/// Both arguments are `&str` so callers can mix `Account`, `&str`,
308/// and `String` without coercion. The function does not allocate.
309///
310/// Lifted from
311/// `rustledger-validate::validators::balance::sum_account_and_subaccounts`
312/// and `rustledger-lsp::handlers::code_lens::is_account_or_subaccount`
313/// so both call sites stay aligned under one definition.
314#[must_use]
315pub fn is_subaccount_or_equal(child: &str, parent: &str) -> bool {
316    if child == parent {
317        return true;
318    }
319    let parent_len = parent.len();
320    child.len() > parent_len && child.as_bytes()[parent_len] == b':' && child.starts_with(parent)
321}
322
323/// The five Beancount root account types, in declaration order.
324///
325/// The canonical root-type list for core consumers: the FFI surfaces
326/// (`util.types`, `util.getAccountType`), the query account-type sort order, and
327/// the LSP account-type check all reference this so they cannot drift.
328/// (`rustledger-completion` keeps its own copy — it is a minimal crate that does
329/// not depend on `rustledger-core`.)
330///
331/// These are the default English roots; the `name_*` loader options can rename
332/// them per-ledger, which this constant does not model — do not use it to
333/// classify accounts in a config-aware context.
334pub const ACCOUNT_TYPES: [&str; 5] = ["Assets", "Liabilities", "Equity", "Income", "Expenses"];
335
336/// The lowercased root account type for `account` — the segment before the
337/// first `:` — or `"unknown"` if it is not one of [`ACCOUNT_TYPES`].
338#[must_use]
339pub fn account_type(account: &str) -> &'static str {
340    match account.split(':').next() {
341        Some("Assets") => "assets",
342        Some("Liabilities") => "liabilities",
343        Some("Equity") => "equity",
344        Some("Income") => "income",
345        Some("Expenses") => "expenses",
346        _ => "unknown",
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    #[test]
355    fn account_type_classifies_roots_and_unknown() {
356        assert_eq!(account_type("Assets:Bank:Checking"), "assets");
357        assert_eq!(account_type("Liabilities:CC"), "liabilities");
358        assert_eq!(account_type("Equity:Opening"), "equity");
359        assert_eq!(account_type("Income:Salary"), "income");
360        assert_eq!(account_type("Expenses:Food"), "expenses");
361        // Bare root (no colon) still classifies.
362        assert_eq!(account_type("Assets"), "assets");
363        // Non-root / empty → unknown.
364        assert_eq!(account_type("Frobnicate:X"), "unknown");
365        assert_eq!(account_type(""), "unknown");
366        // Case-sensitive, like Beancount.
367        assert_eq!(account_type("assets:bank"), "unknown");
368    }
369
370    #[test]
371    fn test_construction_from_str() {
372        let a = Account::from("Assets:Bank");
373        let c = Currency::from("USD");
374        assert_eq!(a, "Assets:Bank");
375        assert_eq!(c, "USD");
376    }
377
378    #[test]
379    fn test_eq_against_str_in_both_directions() {
380        let a = Account::from("Assets:Bank");
381        assert_eq!(a, "Assets:Bank");
382        assert_eq!("Assets:Bank", a);
383        assert_ne!(a, "Assets:Other");
384    }
385
386    #[test]
387    fn test_eq_against_self_kind() {
388        let a1 = Account::from("Assets:Bank");
389        let a2 = Account::from("Assets:Bank");
390        let a3 = Account::from("Assets:Other");
391        assert_eq!(a1, a2);
392        assert_ne!(a1, a3);
393    }
394
395    #[test]
396    fn test_hash_borrow_str() {
397        use std::collections::HashMap;
398        let mut m: HashMap<Account, u32> = HashMap::new();
399        m.insert(Account::from("Assets:Bank"), 1);
400        // Look up by &str via Borrow<str> impl.
401        assert_eq!(m.get("Assets:Bank"), Some(&1));
402        assert_eq!(m.get("Assets:Other"), None);
403    }
404
405    #[test]
406    fn test_deref_str_methods() {
407        let a = Account::from("Assets:Bank:Checking");
408        assert!(a.starts_with("Assets:"));
409        assert!(a.contains(':'));
410        assert_eq!(a.len(), 20);
411    }
412
413    #[test]
414    fn test_round_trip_interned() {
415        let i = InternedStr::from("USD");
416        let c = Currency::from(i.clone());
417        assert_eq!(c.as_interned(), &i);
418        assert_eq!(c.into_interned(), i);
419    }
420
421    #[test]
422    fn test_different_newtypes_dont_cross() {
423        // This test is structural — uncommenting either of the
424        // assignment lines below MUST cause a compile error
425        // (verified by the doc-comment compile_fail block on the
426        // module). Here we just confirm the runtime types are
427        // distinct via a function signature.
428        fn want_account(_: Account) {}
429        fn want_currency(_: Currency) {}
430        want_account(Account::from("Assets:X"));
431        want_currency(Currency::from("USD"));
432    }
433
434    #[test]
435    fn test_serde_roundtrip() {
436        let a = Account::from("Assets:Bank");
437        let json = serde_json::to_string(&a).unwrap();
438        assert_eq!(json, "\"Assets:Bank\"");
439        let back: Account = serde_json::from_str(&json).unwrap();
440        assert_eq!(a, back);
441    }
442
443    #[test]
444    fn is_subaccount_or_equal_exact_match() {
445        assert!(is_subaccount_or_equal("Assets:Bank", "Assets:Bank"));
446    }
447
448    #[test]
449    fn is_subaccount_or_equal_proper_subaccount() {
450        assert!(is_subaccount_or_equal(
451            "Assets:Bank:Checking",
452            "Assets:Bank"
453        ));
454        assert!(is_subaccount_or_equal(
455            "Assets:Bank:Checking:Joint",
456            "Assets:Bank"
457        ));
458    }
459
460    #[test]
461    fn is_subaccount_or_equal_prefix_without_segment_boundary_excluded() {
462        // The whole point of the segment-boundary rule: a name that
463        // starts with the parent's bytes but isn't followed by `:`
464        // is NOT a sub-account. This is the case the validator and
465        // the LSP both depend on; if the rule ever drifts, balance
466        // assertions for `Assets:Bank` would silently include
467        // `Assets:BankAlias` postings.
468        assert!(!is_subaccount_or_equal("Assets:BankAlias", "Assets:Bank"));
469        assert!(!is_subaccount_or_equal(
470            "Assets:BankAlias:Checking",
471            "Assets:Bank"
472        ));
473    }
474
475    #[test]
476    fn is_subaccount_or_equal_parent_is_prefix_substring_excluded() {
477        // `Assets:Ban` is not a sub-account of `Assets:Bank` —
478        // unrelated except for sharing a prefix.
479        assert!(!is_subaccount_or_equal("Assets:Ban", "Assets:Bank"));
480    }
481
482    #[test]
483    fn is_subaccount_or_equal_empty_inputs() {
484        // Both empty: trivially equal, returns true.
485        assert!(is_subaccount_or_equal("", ""));
486        // Empty parent against a non-empty child: child does not
487        // start with `:`, so excluded. (Beancount account names
488        // never start with `:`; this is a defensive-by-construction
489        // case for callers that pass garbage.)
490        assert!(!is_subaccount_or_equal("Assets:Bank", ""));
491        // Empty child against non-empty parent: child shorter, no
492        // segment boundary possible.
493        assert!(!is_subaccount_or_equal("", "Assets:Bank"));
494    }
495
496    #[test]
497    fn is_subaccount_or_equal_case_sensitive() {
498        // Beancount account names are case-sensitive; the helper
499        // delegates to byte equality and starts_with, both of which
500        // honor case.
501        assert!(!is_subaccount_or_equal("Assets:bank", "Assets:Bank"));
502        assert!(!is_subaccount_or_equal(
503            "assets:Bank:Checking",
504            "Assets:Bank"
505        ));
506    }
507}