rustledger_core/identifiers.rs
1//! Domain-typed identifiers: [`Account`], [`Currency`], [`Tag`], [`Link`].
2//!
3//! These newtype wrappers around [`InternedStr`] give the type system
4//! enough vocabulary to distinguish the different kinds of identifier
5//! the beancount AST carries. Pre-newtype, every identifier was just
6//! an `InternedStr` — passing an account where a currency was
7//! expected (or vice versa) compiled fine, and the bug surfaced
8//! only at runtime via wrong-but-validly-shaped string matching.
9//! Now the same mistake is a type error.
10//!
11//! # Design
12//!
13//! Each newtype is a transparent wrapper:
14//!
15//! - `Deref<Target = str>` so calls like `account.starts_with("Assets:")`
16//! work without `.as_str()` everywhere.
17//! - `AsRef<str>` and [`Borrow<str>`](std::borrow::Borrow) so `HashMap` lookups by `&str`
18//! keep working (`some_map.get("Assets:Bank")` where the map is
19//! keyed by [`Account`]).
20//! - `PartialEq` against `str` / `&str` / `String` / `InternedStr` /
21//! the newtype's own type, so `account == "Assets:Bank"` keeps
22//! reading naturally without coercion.
23//! - `From<&str>`, `From<String>`, `From<InternedStr>` for
24//! construction at call sites that have a string and need the
25//! typed form.
26//! - `Hash` delegates to the inner `InternedStr`'s hash, so
27//! `HashMap<Account, V>` and `HashMap<InternedStr, V>` produce
28//! the same bucketing for the same underlying string.
29//!
30//! What you DON'T get for free is cross-newtype assignment:
31//!
32//! ```compile_fail
33//! # use rustledger_core::{Account, Currency};
34//! fn want_currency(_: Currency) {}
35//! let acct = Account::from("Assets:Bank");
36//! want_currency(acct); // ← type error
37//! ```
38//!
39//! Conversions between newtypes are deliberate (`Currency::from(account.into_interned())`)
40//! so the compiler can flag accidental crossings.
41//!
42//! # When to use which
43//!
44//! All four newtypes — [`Currency`], [`Account`], [`Tag`], and
45//! [`Link`] — are fully plumbed through the AST, including
46//! `MetaValue` variants:
47//!
48//! - [`Currency`]: `Commodity.currency`, `Open.currencies` entries,
49//! `Amount.currency`, `CostSpec.currency`, `Price.currency`,
50//! `IncompleteAmount::CurrencyOnly`, `MetaValue::Currency`.
51//! - [`Account`]: `Open.account`, `Close.account`, `Balance.account`,
52//! `Pad.account` / `source_account`, `Note.account`,
53//! `Document.account`, `Posting.account`, `MetaValue::Account`.
54//! - [`Tag`]: `Transaction.tags` entries, `pushtag`/`poptag` stack,
55//! `Document.tags`, `MetaValue::Tag`.
56//! - [`Link`]: `Transaction.links` entries, `Document.links`,
57//! `MetaValue::Link`.
58//!
59//! The plugin wire-format type `rustledger_plugin_types::MetaValueData`
60//! deliberately keeps `String` payloads — `plugin-types` is a minimal
61//! WASM-compatible crate that does not depend on `rustledger-core`,
62//! and plugins run without access to the workspace interner anyway.
63//! The convert boundary
64//! (`rustledger_plugin::convert::from_wrapper`) wraps the incoming
65//! strings in fresh `Arc<str>`s; the cross-file canonicalization to
66//! one `Arc<str>` per identifier string happens later in
67//! `rustledger_loader::dedup::reintern_directives`, which walks both
68//! AST identifier fields and `MetaValue::*` payloads inside metadata.
69
70use crate::InternedStr;
71#[cfg(feature = "rkyv")]
72use crate::intern::AsInternedStr;
73
74macro_rules! domain_newtype {
75 ($name:ident, $kind:literal) => {
76 #[doc = concat!("Domain-typed identifier for a ", $kind, ". See the [module docs](crate::identifiers) for rationale.")]
77 #[derive(Debug, Clone, Eq)]
78 #[cfg_attr(
79 feature = "rkyv",
80 derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)
81 )]
82 #[repr(transparent)]
83 pub struct $name(
84 #[cfg_attr(feature = "rkyv", rkyv(with = AsInternedStr))] InternedStr,
85 );
86
87 impl $name {
88 /// Construct from anything that can become an `InternedStr`.
89 #[must_use]
90 pub fn new(s: impl Into<InternedStr>) -> Self {
91 Self(s.into())
92 }
93
94 /// Borrow the underlying string slice.
95 #[must_use]
96 pub fn as_str(&self) -> &str {
97 self.0.as_str()
98 }
99
100 /// Borrow the underlying `InternedStr`. Useful when interfacing
101 /// with APIs that still take untyped interned strings.
102 #[must_use]
103 pub const fn as_interned(&self) -> &InternedStr {
104 &self.0
105 }
106
107 /// Unwrap to the underlying `InternedStr`, discarding the
108 /// domain tag. Use deliberately — this is the explicit
109 /// "I'm crossing types on purpose" escape hatch.
110 #[must_use]
111 pub fn into_interned(self) -> InternedStr {
112 self.0
113 }
114
115 /// Pointer-equality on the underlying `Arc<str>`.
116 ///
117 /// `true` iff both values point at the same interner allocation.
118 /// Used by cross-file dedup tests to assert that the loader's
119 /// re-interning pass canonicalized the storage; not a substitute
120 /// for `==` (which is the byte-equality semantics callers want).
121 #[must_use]
122 pub fn ptr_eq(&self, other: &Self) -> bool {
123 self.0.ptr_eq(&other.0)
124 }
125
126 /// Mutable access to the underlying `InternedStr`.
127 /// Used by the loader's cross-file interning pass
128 /// (`rustledger_loader::dedup`) to canonicalize the
129 /// `Arc` after merging directives from multiple files —
130 /// the value semantics don't change, but the storage is
131 /// re-pointed at the workspace-wide interner's copy.
132 pub const fn as_interned_mut(&mut self) -> &mut InternedStr {
133 &mut self.0
134 }
135 }
136
137 impl PartialEq for $name {
138 fn eq(&self, other: &Self) -> bool {
139 self.0 == other.0
140 }
141 }
142
143 impl PartialEq<str> for $name {
144 fn eq(&self, other: &str) -> bool {
145 self.0 == *other
146 }
147 }
148
149 impl PartialEq<&str> for $name {
150 fn eq(&self, other: &&str) -> bool {
151 self.0 == **other
152 }
153 }
154
155 impl PartialEq<String> for $name {
156 fn eq(&self, other: &String) -> bool {
157 self.0 == *other
158 }
159 }
160
161 impl PartialEq<InternedStr> for $name {
162 fn eq(&self, other: &InternedStr) -> bool {
163 self.0 == *other
164 }
165 }
166
167 impl PartialEq<$name> for &str {
168 fn eq(&self, other: &$name) -> bool {
169 other.0 == **self
170 }
171 }
172
173 impl PartialEq<$name> for str {
174 fn eq(&self, other: &$name) -> bool {
175 other.0 == *self
176 }
177 }
178
179 impl PartialEq<$name> for InternedStr {
180 fn eq(&self, other: &$name) -> bool {
181 *self == other.0
182 }
183 }
184
185 impl std::hash::Hash for $name {
186 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
187 self.0.hash(state);
188 }
189 }
190
191 impl std::cmp::PartialOrd for $name {
192 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
193 Some(self.cmp(other))
194 }
195 }
196
197 impl std::cmp::Ord for $name {
198 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
199 self.0.cmp(&other.0)
200 }
201 }
202
203 impl std::ops::Deref for $name {
204 type Target = str;
205 fn deref(&self) -> &str {
206 self.0.as_str()
207 }
208 }
209
210 impl AsRef<str> for $name {
211 fn as_ref(&self) -> &str {
212 self.0.as_str()
213 }
214 }
215
216 impl std::borrow::Borrow<str> for $name {
217 fn borrow(&self) -> &str {
218 self.0.as_str()
219 }
220 }
221
222 impl std::fmt::Display for $name {
223 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
224 std::fmt::Display::fmt(&self.0, f)
225 }
226 }
227
228 impl From<&str> for $name {
229 fn from(s: &str) -> Self {
230 Self(InternedStr::from(s))
231 }
232 }
233
234 impl From<String> for $name {
235 fn from(s: String) -> Self {
236 Self(InternedStr::from(s))
237 }
238 }
239
240 impl From<&String> for $name {
241 fn from(s: &String) -> Self {
242 Self(InternedStr::from(s.as_str()))
243 }
244 }
245
246 impl From<InternedStr> for $name {
247 fn from(s: InternedStr) -> Self {
248 Self(s)
249 }
250 }
251
252 impl From<&InternedStr> for $name {
253 fn from(s: &InternedStr) -> Self {
254 Self(s.clone())
255 }
256 }
257
258 impl From<&$name> for $name {
259 fn from(s: &$name) -> Self {
260 s.clone()
261 }
262 }
263
264 impl Default for $name {
265 fn default() -> Self {
266 Self(InternedStr::default())
267 }
268 }
269
270 impl serde::Serialize for $name {
271 fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
272 self.0.serialize(serializer)
273 }
274 }
275
276 impl<'de> serde::Deserialize<'de> for $name {
277 fn deserialize<D: serde::Deserializer<'de>>(
278 deserializer: D,
279 ) -> Result<Self, D::Error> {
280 Ok(Self(InternedStr::deserialize(deserializer)?))
281 }
282 }
283
284 // rkyv archive is `#[derive]`'d above using the field
285 // attribute `#[rkyv(with = AsInternedStr)]` — same wrapper
286 // pattern `Posting.account` (and every other `InternedStr`
287 // field) uses. That goes through `ArchivedString` via the
288 // `AsInternedStr` adapter, picking up bytecheck/CheckBytes
289 // for free.
290 };
291}
292
293domain_newtype!(Account, "beancount account name (e.g. `Assets:Cash:USD`)");
294domain_newtype!(Currency, "currency code (e.g. `USD`, `EUR`, `AAPL`)");
295domain_newtype!(Tag, "beancount tag (e.g. `#travel`)");
296domain_newtype!(Link, "beancount link (e.g. `^invoice-2024-01`)");
297
298/// Returns `true` if `child` is the same account as `parent`, or a
299/// sub-account of it.
300///
301/// Beancount's `balance Assets:Bank` assertion (and several other
302/// account-scoped operations) includes postings to `Assets:Bank` AND
303/// `Assets:Bank:Checking`, `Assets:Bank:Savings`, etc. The match is
304/// exact OR `parent + ":"` prefix; a name that merely starts with
305/// `parent`'s string (`Assets:BankAlias`) is NOT a sub-account.
306///
307/// Both arguments are `&str` so callers can mix `Account`, `&str`,
308/// and `String` without coercion. The function does not allocate.
309///
310/// Lifted from
311/// `rustledger-validate::validators::balance::sum_account_and_subaccounts`
312/// and `rustledger-lsp::handlers::code_lens::is_account_or_subaccount`
313/// so both call sites stay aligned under one definition.
314#[must_use]
315pub fn is_subaccount_or_equal(child: &str, parent: &str) -> bool {
316 if child == parent {
317 return true;
318 }
319 let parent_len = parent.len();
320 child.len() > parent_len && child.as_bytes()[parent_len] == b':' && child.starts_with(parent)
321}
322
323#[cfg(test)]
324mod tests {
325 use super::*;
326
327 #[test]
328 fn test_construction_from_str() {
329 let a = Account::from("Assets:Bank");
330 let c = Currency::from("USD");
331 assert_eq!(a, "Assets:Bank");
332 assert_eq!(c, "USD");
333 }
334
335 #[test]
336 fn test_eq_against_str_in_both_directions() {
337 let a = Account::from("Assets:Bank");
338 assert_eq!(a, "Assets:Bank");
339 assert_eq!("Assets:Bank", a);
340 assert_ne!(a, "Assets:Other");
341 }
342
343 #[test]
344 fn test_eq_against_self_kind() {
345 let a1 = Account::from("Assets:Bank");
346 let a2 = Account::from("Assets:Bank");
347 let a3 = Account::from("Assets:Other");
348 assert_eq!(a1, a2);
349 assert_ne!(a1, a3);
350 }
351
352 #[test]
353 fn test_hash_borrow_str() {
354 use std::collections::HashMap;
355 let mut m: HashMap<Account, u32> = HashMap::new();
356 m.insert(Account::from("Assets:Bank"), 1);
357 // Look up by &str via Borrow<str> impl.
358 assert_eq!(m.get("Assets:Bank"), Some(&1));
359 assert_eq!(m.get("Assets:Other"), None);
360 }
361
362 #[test]
363 fn test_deref_str_methods() {
364 let a = Account::from("Assets:Bank:Checking");
365 assert!(a.starts_with("Assets:"));
366 assert!(a.contains(':'));
367 assert_eq!(a.len(), 20);
368 }
369
370 #[test]
371 fn test_round_trip_interned() {
372 let i = InternedStr::from("USD");
373 let c = Currency::from(i.clone());
374 assert_eq!(c.as_interned(), &i);
375 assert_eq!(c.into_interned(), i);
376 }
377
378 #[test]
379 fn test_different_newtypes_dont_cross() {
380 // This test is structural — uncommenting either of the
381 // assignment lines below MUST cause a compile error
382 // (verified by the doc-comment compile_fail block on the
383 // module). Here we just confirm the runtime types are
384 // distinct via a function signature.
385 fn want_account(_: Account) {}
386 fn want_currency(_: Currency) {}
387 want_account(Account::from("Assets:X"));
388 want_currency(Currency::from("USD"));
389 }
390
391 #[test]
392 fn test_serde_roundtrip() {
393 let a = Account::from("Assets:Bank");
394 let json = serde_json::to_string(&a).unwrap();
395 assert_eq!(json, "\"Assets:Bank\"");
396 let back: Account = serde_json::from_str(&json).unwrap();
397 assert_eq!(a, back);
398 }
399
400 #[test]
401 fn is_subaccount_or_equal_exact_match() {
402 assert!(is_subaccount_or_equal("Assets:Bank", "Assets:Bank"));
403 }
404
405 #[test]
406 fn is_subaccount_or_equal_proper_subaccount() {
407 assert!(is_subaccount_or_equal(
408 "Assets:Bank:Checking",
409 "Assets:Bank"
410 ));
411 assert!(is_subaccount_or_equal(
412 "Assets:Bank:Checking:Joint",
413 "Assets:Bank"
414 ));
415 }
416
417 #[test]
418 fn is_subaccount_or_equal_prefix_without_segment_boundary_excluded() {
419 // The whole point of the segment-boundary rule: a name that
420 // starts with the parent's bytes but isn't followed by `:`
421 // is NOT a sub-account. This is the case the validator and
422 // the LSP both depend on; if the rule ever drifts, balance
423 // assertions for `Assets:Bank` would silently include
424 // `Assets:BankAlias` postings.
425 assert!(!is_subaccount_or_equal("Assets:BankAlias", "Assets:Bank"));
426 assert!(!is_subaccount_or_equal(
427 "Assets:BankAlias:Checking",
428 "Assets:Bank"
429 ));
430 }
431
432 #[test]
433 fn is_subaccount_or_equal_parent_is_prefix_substring_excluded() {
434 // `Assets:Ban` is not a sub-account of `Assets:Bank` —
435 // unrelated except for sharing a prefix.
436 assert!(!is_subaccount_or_equal("Assets:Ban", "Assets:Bank"));
437 }
438
439 #[test]
440 fn is_subaccount_or_equal_empty_inputs() {
441 // Both empty: trivially equal, returns true.
442 assert!(is_subaccount_or_equal("", ""));
443 // Empty parent against a non-empty child: child does not
444 // start with `:`, so excluded. (Beancount account names
445 // never start with `:`; this is a defensive-by-construction
446 // case for callers that pass garbage.)
447 assert!(!is_subaccount_or_equal("Assets:Bank", ""));
448 // Empty child against non-empty parent: child shorter, no
449 // segment boundary possible.
450 assert!(!is_subaccount_or_equal("", "Assets:Bank"));
451 }
452
453 #[test]
454 fn is_subaccount_or_equal_case_sensitive() {
455 // Beancount account names are case-sensitive; the helper
456 // delegates to byte equality and starts_with, both of which
457 // honor case.
458 assert!(!is_subaccount_or_equal("Assets:bank", "Assets:Bank"));
459 assert!(!is_subaccount_or_equal(
460 "assets:Bank:Checking",
461 "Assets:Bank"
462 ));
463 }
464}