Skip to main content

rustledger_loader/
cache.rs

1//! Binary cache for parsed ledgers.
2//!
3//! This module provides a caching layer that can dramatically speed up
4//! subsequent loads of unchanged beancount files by serializing the parsed
5//! directives to a binary format using rkyv.
6//!
7//! # How it works
8//!
9//! 1. When loading a file, compute a hash of all source files
10//! 2. Check if a cache file exists with a matching hash
11//! 3. If yes, deserialize and return immediately (typically <1ms)
12//! 4. If no, parse normally, serialize to cache, and return
13//!
14//! # Cache location
15//!
16//! By default, cache files are stored alongside the main ledger as a hidden
17//! dotfile: `ledger.beancount` → `.ledger.beancount.cache`. This matches Python
18//! beancount's `.{filename}.picklecache` convention.
19//!
20//! Two environment variables control the location, both compatible with
21//! Python beancount and honored at the loader level (so any consumer of
22//! [`load_cache_entry`] / [`save_cache_entry`] gets the kill switch for free):
23//!
24//! - `BEANCOUNT_DISABLE_LOAD_CACHE`: when set (even to an empty value),
25//!   [`load_cache_entry`] returns `None` and [`save_cache_entry`] is a no-op.
26//! - `BEANCOUNT_LOAD_CACHE_FILENAME`: a path pattern that may contain
27//!   `{filename}` (replaced with the source basename). Relative paths resolve
28//!   against the source directory; absolute paths are used as-is. If the
29//!   target directory doesn't exist, [`save_cache_entry`] creates it.
30
31use crate::Options;
32use blake3::Hasher;
33use rust_decimal::Decimal;
34use rustledger_core::Directive;
35use rustledger_parser::Spanned;
36use std::fs;
37use std::io::{Read, Write};
38use std::path::{Path, PathBuf};
39use std::str::FromStr;
40
41/// Cached plugin information.
42#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
43pub struct CachedPlugin {
44    /// Plugin module name.
45    pub name: String,
46    /// Optional configuration string.
47    pub config: Option<String>,
48    /// Whether the `python:` prefix was used to force Python execution.
49    pub force_python: bool,
50}
51
52/// Cached options - a serializable subset of Options.
53///
54/// Excludes parsing-time fields like `set_options` and `warnings`.
55/// These fields mirror the Options struct and inherit their meaning.
56#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
57#[allow(missing_docs)]
58pub struct CachedOptions {
59    pub title: Option<String>,
60    pub filename: Option<String>,
61    pub operating_currency: Vec<String>,
62    pub name_assets: String,
63    pub name_liabilities: String,
64    pub name_equity: String,
65    pub name_income: String,
66    pub name_expenses: String,
67    pub account_rounding: Option<String>,
68    pub account_previous_balances: String,
69    pub account_previous_earnings: String,
70    pub account_previous_conversions: String,
71    pub account_current_earnings: String,
72    pub account_current_conversions: Option<String>,
73    pub account_unrealized_gains: Option<String>,
74    pub conversion_currency: Option<String>,
75    /// Stored as (currency, `tolerance_string`) pairs since Decimal needs special handling
76    pub inferred_tolerance_default: Vec<(String, String)>,
77    pub inferred_tolerance_multiplier: String,
78    pub infer_tolerance_from_cost: bool,
79    pub use_legacy_fixed_tolerances: bool,
80    pub experiment_explicit_tolerances: bool,
81    pub booking_method: String,
82    pub render_commas: bool,
83    pub allow_pipe_separator: bool,
84    pub long_string_maxlines: u32,
85    pub documents: Vec<String>,
86    pub custom: Vec<(String, String)>,
87}
88
89impl From<&Options> for CachedOptions {
90    fn from(opts: &Options) -> Self {
91        Self {
92            title: opts.title.clone(),
93            filename: opts.filename.clone(),
94            operating_currency: opts.operating_currency.clone(),
95            name_assets: opts.name_assets.clone(),
96            name_liabilities: opts.name_liabilities.clone(),
97            name_equity: opts.name_equity.clone(),
98            name_income: opts.name_income.clone(),
99            name_expenses: opts.name_expenses.clone(),
100            account_rounding: opts.account_rounding.clone(),
101            account_previous_balances: opts.account_previous_balances.clone(),
102            account_previous_earnings: opts.account_previous_earnings.clone(),
103            account_previous_conversions: opts.account_previous_conversions.clone(),
104            account_current_earnings: opts.account_current_earnings.clone(),
105            account_current_conversions: opts.account_current_conversions.clone(),
106            account_unrealized_gains: opts.account_unrealized_gains.clone(),
107            conversion_currency: opts.conversion_currency.clone(),
108            inferred_tolerance_default: opts
109                .inferred_tolerance_default
110                .iter()
111                .map(|(k, v)| (k.clone(), v.to_string()))
112                .collect(),
113            inferred_tolerance_multiplier: opts.inferred_tolerance_multiplier.to_string(),
114            infer_tolerance_from_cost: opts.infer_tolerance_from_cost,
115            use_legacy_fixed_tolerances: opts.use_legacy_fixed_tolerances,
116            experiment_explicit_tolerances: opts.experiment_explicit_tolerances,
117            booking_method: opts.booking_method.clone(),
118            render_commas: opts.render_commas,
119            allow_pipe_separator: opts.allow_pipe_separator,
120            long_string_maxlines: opts.long_string_maxlines,
121            documents: opts.documents.clone(),
122            custom: opts
123                .custom
124                .iter()
125                .map(|(k, v)| (k.clone(), v.clone()))
126                .collect(),
127        }
128    }
129}
130
131impl From<CachedOptions> for Options {
132    fn from(cached: CachedOptions) -> Self {
133        let mut opts = Self::new();
134        opts.title = cached.title;
135        opts.filename = cached.filename;
136        opts.operating_currency = cached.operating_currency;
137        opts.name_assets = cached.name_assets;
138        opts.name_liabilities = cached.name_liabilities;
139        opts.name_equity = cached.name_equity;
140        opts.name_income = cached.name_income;
141        opts.name_expenses = cached.name_expenses;
142        opts.account_rounding = cached.account_rounding;
143        opts.account_previous_balances = cached.account_previous_balances;
144        opts.account_previous_earnings = cached.account_previous_earnings;
145        opts.account_previous_conversions = cached.account_previous_conversions;
146        opts.account_current_earnings = cached.account_current_earnings;
147        opts.account_current_conversions = cached.account_current_conversions;
148        opts.account_unrealized_gains = cached.account_unrealized_gains;
149        opts.conversion_currency = cached.conversion_currency;
150        opts.inferred_tolerance_default = cached
151            .inferred_tolerance_default
152            .into_iter()
153            .filter_map(|(k, v)| Decimal::from_str(&v).ok().map(|d| (k, d)))
154            .collect();
155        opts.inferred_tolerance_multiplier =
156            Decimal::from_str(&cached.inferred_tolerance_multiplier)
157                .unwrap_or_else(|_| Decimal::new(5, 1));
158        opts.infer_tolerance_from_cost = cached.infer_tolerance_from_cost;
159        opts.use_legacy_fixed_tolerances = cached.use_legacy_fixed_tolerances;
160        opts.experiment_explicit_tolerances = cached.experiment_explicit_tolerances;
161        opts.booking_method = cached.booking_method;
162        opts.render_commas = cached.render_commas;
163        opts.allow_pipe_separator = cached.allow_pipe_separator;
164        opts.long_string_maxlines = cached.long_string_maxlines;
165        opts.documents = cached.documents;
166        opts.custom = cached.custom.into_iter().collect();
167        opts
168    }
169}
170
171/// Complete cache entry containing all data needed to restore a `LoadResult`.
172#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
173pub struct CacheEntry {
174    /// All parsed directives.
175    pub directives: Vec<Spanned<Directive>>,
176    /// Parsed options.
177    pub options: CachedOptions,
178    /// Plugin declarations.
179    pub plugins: Vec<CachedPlugin>,
180    /// All files that were loaded (as strings, for serialization).
181    pub files: Vec<String>,
182}
183
184impl CacheEntry {
185    /// Get files as `PathBuf` references.
186    pub fn file_paths(&self) -> Vec<PathBuf> {
187        self.files.iter().map(PathBuf::from).collect()
188    }
189}
190
191/// Magic bytes to identify cache files.
192const CACHE_MAGIC: &[u8; 8] = b"RLEDGER\0";
193
194/// Cache version - increment when format changes.
195/// v1: Initial release with string-based Decimal/NaiveDate
196/// v2: Binary Decimal (16 bytes) and `NaiveDate` (i32 days)
197/// v3: Fixed account type defaults in `CachedOptions`
198/// v4: Hash algorithm switched from SHA-256 to BLAKE3 — same 32-byte
199///     output so the header layout is unchanged, but old hashes won't
200///     match new files. Bumping the version short-circuits stale
201///     caches at the header check instead of paying the rkyv
202///     deserialize cost only to fail the hash compare.
203const CACHE_VERSION: u32 = 4;
204
205/// Cache header stored at the start of cache files.
206#[derive(Debug, Clone)]
207struct CacheHeader {
208    /// Magic bytes for identification.
209    magic: [u8; 8],
210    /// Cache format version.
211    version: u32,
212    /// BLAKE3 hash of source files (path + mtime + size).
213    hash: [u8; 32],
214    /// Length of the serialized data.
215    data_len: u64,
216}
217
218impl CacheHeader {
219    const SIZE: usize = 8 + 4 + 32 + 8;
220
221    fn to_bytes(&self) -> [u8; Self::SIZE] {
222        let mut buf = [0u8; Self::SIZE];
223        buf[0..8].copy_from_slice(&self.magic);
224        buf[8..12].copy_from_slice(&self.version.to_le_bytes());
225        buf[12..44].copy_from_slice(&self.hash);
226        buf[44..52].copy_from_slice(&self.data_len.to_le_bytes());
227        buf
228    }
229
230    fn from_bytes(bytes: &[u8]) -> Option<Self> {
231        if bytes.len() < Self::SIZE {
232            return None;
233        }
234
235        let mut magic = [0u8; 8];
236        magic.copy_from_slice(&bytes[0..8]);
237
238        let version = u32::from_le_bytes(bytes[8..12].try_into().ok()?);
239
240        let mut hash = [0u8; 32];
241        hash.copy_from_slice(&bytes[12..44]);
242
243        let data_len = u64::from_le_bytes(bytes[44..52].try_into().ok()?);
244
245        Some(Self {
246            magic,
247            version,
248            hash,
249            data_len,
250        })
251    }
252}
253
254/// Compute a hash of the given files and their modification times.
255///
256/// Files whose metadata cannot be read (e.g., deleted between load and cache)
257/// contribute only their path to the hash. This is intentional — the resulting
258/// hash mismatch will cause a cache miss on next load.
259fn compute_hash(files: &[&Path]) -> [u8; 32] {
260    let mut hasher = Hasher::new();
261
262    for file in files {
263        // Hash the file path
264        hasher.update(file.to_string_lossy().as_bytes());
265
266        // Hash the modification time (skip silently if inaccessible)
267        if let Ok(metadata) = fs::metadata(file) {
268            if let Ok(mtime) = metadata.modified()
269                && let Ok(duration) = mtime.duration_since(std::time::UNIX_EPOCH)
270            {
271                hasher.update(&duration.as_secs().to_le_bytes());
272                hasher.update(&duration.subsec_nanos().to_le_bytes());
273            }
274            // Hash the file size
275            hasher.update(&metadata.len().to_le_bytes());
276        }
277    }
278
279    *hasher.finalize().as_bytes()
280}
281
282/// Environment variable that overrides the default cache filename pattern.
283///
284/// The value is a path that may contain `{filename}` as a placeholder for the
285/// source file's basename. Relative paths are resolved against the source
286/// file's directory; absolute paths are used as-is. Mirrors Python beancount's
287/// `BEANCOUNT_LOAD_CACHE_FILENAME`.
288pub const CACHE_FILENAME_ENV: &str = "BEANCOUNT_LOAD_CACHE_FILENAME";
289
290/// Environment variable that disables the binary cache entirely when set.
291///
292/// Mirrors Python beancount's `BEANCOUNT_DISABLE_LOAD_CACHE`.
293pub const DISABLE_CACHE_ENV: &str = "BEANCOUNT_DISABLE_LOAD_CACHE";
294
295/// Returns the cache file path for a given source file.
296///
297/// Resolution order:
298/// 1. If `BEANCOUNT_LOAD_CACHE_FILENAME` is set, substitute `{filename}` with
299///    the source basename and resolve relative paths against the source dir.
300/// 2. Otherwise, default to a hidden dotfile alongside the source via
301///    [`default_cache_path`]: `path/to/main.beancount` →
302///    `path/to/.main.beancount.cache`.
303///
304/// The dotfile prefix matches Python beancount's `.{filename}.picklecache`
305/// convention, so the cache stays out of the way of `ls` and most file
306/// explorers without breaking from the established beancount ecosystem
307/// behavior. See issue #939.
308///
309/// This function reads process env. Tests that need a deterministic path
310/// regardless of the caller's environment should use [`default_cache_path`]
311/// directly.
312pub fn cache_path(source: &Path) -> PathBuf {
313    if let Ok(pattern) = std::env::var(CACHE_FILENAME_ENV)
314        && !pattern.is_empty()
315    {
316        return resolve_cache_pattern(source, &pattern);
317    }
318    default_cache_path(source)
319}
320
321/// Returns the default cache file path (no env-var lookup).
322///
323/// Use this when you need a path that is independent of process env, e.g.
324/// in tests that mustn't be perturbed by a developer's
325/// `BEANCOUNT_LOAD_CACHE_FILENAME`.
326#[must_use]
327pub fn default_cache_path(source: &Path) -> PathBuf {
328    let mut path = source.to_path_buf();
329    let name = path.file_name().map_or_else(
330        || ".ledger.cache".to_string(),
331        |n| format!(".{}.cache", n.to_string_lossy()),
332    );
333    path.set_file_name(name);
334    path
335}
336
337/// Resolve a `BEANCOUNT_LOAD_CACHE_FILENAME` pattern against a source path.
338///
339/// The `"{filename}"` token below is a literal user-facing substitution
340/// placeholder (matching Python beancount), not a `format!` argument — hence
341/// the explicit allow.
342#[allow(clippy::literal_string_with_formatting_args)]
343fn resolve_cache_pattern(source: &Path, pattern: &str) -> PathBuf {
344    let filename = source.file_name().map_or_else(
345        || "ledger".to_string(),
346        |n| n.to_string_lossy().into_owned(),
347    );
348    let resolved = pattern.replace("{filename}", &filename);
349    let p = PathBuf::from(&resolved);
350    if p.is_absolute() {
351        return p;
352    }
353    source.parent().map_or(p.clone(), |parent| parent.join(&p))
354}
355
356/// Returns the legacy (pre-#939) cache path: `<source>.cache` alongside source.
357///
358/// Used by `save_cache_entry` to opportunistically clean up stale cache files
359/// from earlier rustledger versions. Not part of the lookup path.
360fn legacy_cache_path(source: &Path) -> PathBuf {
361    let mut path = source.to_path_buf();
362    let name = path.file_name().map_or_else(
363        || "ledger.cache".to_string(),
364        |n| format!("{}.cache", n.to_string_lossy()),
365    );
366    path.set_file_name(name);
367    path
368}
369
370/// Returns true if `BEANCOUNT_DISABLE_LOAD_CACHE` is set in the environment.
371///
372/// Mere presence disables — value is ignored, including empty string. Matches
373/// Python beancount's `os.getenv("BEANCOUNT_DISABLE_LOAD_CACHE") is None`
374/// check.
375#[must_use]
376pub fn cache_disabled_by_env() -> bool {
377    std::env::var_os(DISABLE_CACHE_ENV).is_some()
378}
379
380/// Try to load a cache entry from disk.
381///
382/// Returns `Some(CacheEntry)` if cache is valid and file hashes match,
383/// `None` if cache is missing, invalid, outdated, or
384/// `BEANCOUNT_DISABLE_LOAD_CACHE` is set.
385pub fn load_cache_entry(main_file: &Path) -> Option<CacheEntry> {
386    if cache_disabled_by_env() {
387        return None;
388    }
389    let cache_file = cache_path(main_file);
390    let mut file = fs::File::open(&cache_file).ok()?;
391
392    // Read header
393    let mut header_bytes = [0u8; CacheHeader::SIZE];
394    file.read_exact(&mut header_bytes).ok()?;
395    let header = CacheHeader::from_bytes(&header_bytes)?;
396
397    // Validate magic and version
398    if header.magic != *CACHE_MAGIC {
399        return None;
400    }
401    if header.version != CACHE_VERSION {
402        return None;
403    }
404
405    // Read data
406    let mut data = vec![0u8; header.data_len as usize];
407    file.read_exact(&mut data).ok()?;
408
409    // Deserialize
410    let entry: CacheEntry = rkyv::from_bytes::<CacheEntry, rkyv::rancor::Error>(&data).ok()?;
411
412    // Validate hash against the files stored in the cache
413    let file_paths = entry.file_paths();
414    let file_refs: Vec<&Path> = file_paths.iter().map(PathBuf::as_path).collect();
415    let expected_hash = compute_hash(&file_refs);
416    if header.hash != expected_hash {
417        return None;
418    }
419
420    Some(entry)
421}
422
423/// Save a cache entry to disk.
424///
425/// No-op (returns Ok) when `BEANCOUNT_DISABLE_LOAD_CACHE` is set.
426pub fn save_cache_entry(main_file: &Path, entry: &CacheEntry) -> Result<(), std::io::Error> {
427    if cache_disabled_by_env() {
428        return Ok(());
429    }
430    let cache_file = cache_path(main_file);
431
432    // Compute hash from the files in the entry
433    let file_paths = entry.file_paths();
434    let file_refs: Vec<&Path> = file_paths.iter().map(PathBuf::as_path).collect();
435    let hash = compute_hash(&file_refs);
436
437    // Serialize
438    let data = rkyv::to_bytes::<rkyv::rancor::Error>(entry)
439        .map(|v| v.to_vec())
440        .map_err(|e| std::io::Error::other(e.to_string()))?;
441
442    // Write header + data
443    let header = CacheHeader {
444        magic: *CACHE_MAGIC,
445        version: CACHE_VERSION,
446        hash,
447        data_len: data.len() as u64,
448    };
449
450    // Custom BEANCOUNT_LOAD_CACHE_FILENAME patterns can point at a directory
451    // that doesn't exist yet (e.g. ~/.cache/rledger/foo.cache on a fresh
452    // install). Create the parent eagerly so caching isn't silently disabled.
453    if let Some(parent) = cache_file.parent()
454        && !parent.as_os_str().is_empty()
455    {
456        fs::create_dir_all(parent)?;
457    }
458
459    let mut file = fs::File::create(&cache_file)?;
460    file.write_all(&header.to_bytes())?;
461    file.write_all(&data)?;
462
463    // One-shot cleanup of pre-#939 visible cache files. Only attempt when the
464    // legacy path differs from the new path (i.e., we're not using a custom
465    // pattern that happens to land on the old name) and silently ignore
466    // failures — leaving the file is harmless, just untidy.
467    let legacy = legacy_cache_path(main_file);
468    if legacy != cache_file && legacy.exists() {
469        let _ = fs::remove_file(&legacy);
470    }
471
472    Ok(())
473}
474
475/// Serialize directives to bytes using rkyv (for benchmarking).
476#[cfg(test)]
477fn serialize_directives(directives: &Vec<Spanned<Directive>>) -> Result<Vec<u8>, std::io::Error> {
478    rkyv::to_bytes::<rkyv::rancor::Error>(directives)
479        .map(|v| v.to_vec())
480        .map_err(|e| std::io::Error::other(e.to_string()))
481}
482
483/// Deserialize directives from bytes using rkyv (for benchmarking).
484#[cfg(test)]
485fn deserialize_directives(data: &[u8]) -> Option<Vec<Spanned<Directive>>> {
486    rkyv::from_bytes::<Vec<Spanned<Directive>>, rkyv::rancor::Error>(data).ok()
487}
488
489/// Invalidate the cache for a file.
490///
491/// Removes both the current cache file and any legacy pre-#939
492/// `<file>.cache` sidecar so a subsequent load can't pick up stale data.
493pub fn invalidate_cache(main_file: &Path) {
494    let cache_file = cache_path(main_file);
495    let _ = fs::remove_file(&cache_file);
496
497    let legacy = legacy_cache_path(main_file);
498    if legacy != cache_file {
499        let _ = fs::remove_file(&legacy);
500    }
501}
502
503#[cfg(test)]
504mod tests {
505    use super::*;
506    use crate::dedup::reintern_directives;
507    use rust_decimal_macros::dec;
508    use rustledger_core::{Amount, Posting, Transaction};
509    use rustledger_parser::Span;
510
511    #[test]
512    fn test_cache_header_roundtrip() {
513        let header = CacheHeader {
514            magic: *CACHE_MAGIC,
515            version: CACHE_VERSION,
516            hash: [42u8; 32],
517            data_len: 12345,
518        };
519
520        let bytes = header.to_bytes();
521        let parsed = CacheHeader::from_bytes(&bytes).unwrap();
522
523        assert_eq!(parsed.magic, header.magic);
524        assert_eq!(parsed.version, header.version);
525        assert_eq!(parsed.hash, header.hash);
526        assert_eq!(parsed.data_len, header.data_len);
527    }
528
529    #[test]
530    fn test_compute_hash_deterministic() {
531        let files: Vec<&Path> = vec![];
532        let hash1 = compute_hash(&files);
533        let hash2 = compute_hash(&files);
534        assert_eq!(hash1, hash2);
535    }
536
537    #[test]
538    fn test_serialize_deserialize_roundtrip() {
539        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
540
541        let txn = Transaction::new(date, "Test transaction")
542            .with_payee("Test Payee")
543            .with_posting(Posting::new(
544                "Expenses:Test",
545                Amount::new(dec!(100.00), "USD"),
546            ))
547            .with_posting(Posting::auto("Assets:Checking"));
548
549        let directives = vec![Spanned::new(Directive::Transaction(txn), Span::new(0, 100))];
550
551        // Serialize
552        let serialized = serialize_directives(&directives).expect("serialization failed");
553
554        // Deserialize
555        let deserialized = deserialize_directives(&serialized).expect("deserialization failed");
556
557        // Verify roundtrip
558        assert_eq!(directives.len(), deserialized.len());
559        let orig_txn = directives[0].value.as_transaction().unwrap();
560        let deser_txn = deserialized[0].value.as_transaction().unwrap();
561
562        assert_eq!(orig_txn.date, deser_txn.date);
563        assert_eq!(orig_txn.payee, deser_txn.payee);
564        assert_eq!(orig_txn.narration, deser_txn.narration);
565        assert_eq!(orig_txn.postings.len(), deser_txn.postings.len());
566
567        // Check first posting
568        assert_eq!(orig_txn.postings[0].account, deser_txn.postings[0].account);
569        assert_eq!(orig_txn.postings[0].units, deser_txn.postings[0].units);
570    }
571
572    #[test]
573    #[ignore = "manual benchmark - run with: cargo test -p rustledger-loader --release -- --ignored --nocapture"]
574    fn bench_cache_performance() {
575        // Generate test directives
576        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
577        let mut directives = Vec::with_capacity(10000);
578
579        for i in 0..10000 {
580            let txn = Transaction::new(date, format!("Transaction {i}"))
581                .with_payee("Store")
582                .with_posting(Posting::new(
583                    "Expenses:Food",
584                    Amount::new(dec!(25.00), "USD"),
585                ))
586                .with_posting(Posting::auto("Assets:Checking"));
587
588            directives.push(Spanned::new(Directive::Transaction(txn), Span::new(0, 100)));
589        }
590
591        println!("\n=== Cache Benchmark (10,000 directives) ===");
592
593        // Benchmark serialization
594        let start = std::time::Instant::now();
595        let serialized = serialize_directives(&directives).unwrap();
596        let serialize_time = start.elapsed();
597        println!(
598            "Serialize: {:?} ({:.2} MB)",
599            serialize_time,
600            serialized.len() as f64 / 1_000_000.0
601        );
602
603        // Benchmark deserialization
604        let start = std::time::Instant::now();
605        let deserialized = deserialize_directives(&serialized).unwrap();
606        let deserialize_time = start.elapsed();
607        println!("Deserialize: {deserialize_time:?}");
608
609        assert_eq!(directives.len(), deserialized.len());
610
611        println!(
612            "\nSpeedup potential: If parsing takes 100ms, cache load would be {:.1}x faster",
613            100.0 / deserialize_time.as_millis() as f64
614        );
615    }
616
617    // Note: end-to-end coverage of `cache_path()` (including the
618    // `BEANCOUNT_LOAD_CACHE_FILENAME` env var) lives in
619    // `tests/cache_env_var_test.rs`, which can mutate process env without
620    // tripping the crate's `forbid(unsafe_code)`. The tests below cover the
621    // pure pattern-resolution logic and the legacy-path helper.
622
623    /// Fail fast if a developer has set the cache env vars locally — the
624    /// roundtrip tests in this module call `save_cache_entry`/`invalidate_cache`
625    /// which read process env, and a custom pattern would silently redirect
626    /// writes elsewhere (or fail in surprising ways). CI runs with a clean env.
627    fn assert_clean_cache_env() {
628        for var in [CACHE_FILENAME_ENV, DISABLE_CACHE_ENV] {
629            assert!(
630                std::env::var_os(var).is_none(),
631                "unset {var} before running this test"
632            );
633        }
634    }
635
636    #[test]
637    fn test_resolve_cache_pattern_relative_with_substitution() {
638        let source = Path::new("/home/user/finances/main.beancount");
639        let resolved = resolve_cache_pattern(source, ".cache/{filename}.bin");
640        assert_eq!(
641            resolved,
642            Path::new("/home/user/finances/.cache/main.beancount.bin")
643        );
644    }
645
646    #[test]
647    fn test_resolve_cache_pattern_absolute() {
648        let source = Path::new("/home/user/main.beancount");
649        let resolved = resolve_cache_pattern(source, "/var/cache/rledger/{filename}.cache");
650        assert_eq!(
651            resolved,
652            Path::new("/var/cache/rledger/main.beancount.cache")
653        );
654    }
655
656    #[test]
657    fn test_resolve_cache_pattern_no_substitution() {
658        // Pattern without {filename} is used verbatim.
659        let source = Path::new("/home/user/main.beancount");
660        let resolved = resolve_cache_pattern(source, "fixed.cache");
661        assert_eq!(resolved, Path::new("/home/user/fixed.cache"));
662    }
663
664    #[test]
665    fn test_legacy_cache_path() {
666        let source = Path::new("/tmp/ledger.beancount");
667        assert_eq!(
668            legacy_cache_path(source),
669            Path::new("/tmp/ledger.beancount.cache")
670        );
671    }
672
673    #[test]
674    fn test_save_load_cache_entry_roundtrip() {
675        use std::io::Write;
676
677        assert_clean_cache_env();
678
679        // Create a temp directory
680        let temp_dir = std::env::temp_dir().join("rustledger_cache_test");
681        let _ = fs::create_dir_all(&temp_dir);
682
683        // Create a temp beancount file
684        let beancount_file = temp_dir.join("test.beancount");
685        let mut f = fs::File::create(&beancount_file).unwrap();
686        writeln!(f, "2024-01-01 open Assets:Test").unwrap();
687        drop(f);
688
689        // Create a cache entry
690        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
691        let txn = Transaction::new(date, "Test").with_posting(Posting::auto("Assets:Test"));
692        let directives = vec![Spanned::new(Directive::Transaction(txn), Span::new(0, 50))];
693
694        let entry = CacheEntry {
695            directives,
696            options: CachedOptions::from(&Options::new()),
697            plugins: vec![CachedPlugin {
698                name: "test_plugin".to_string(),
699                config: Some("config".to_string()),
700                force_python: false,
701            }],
702            files: vec![beancount_file.to_string_lossy().to_string()],
703        };
704
705        // Save cache
706        save_cache_entry(&beancount_file, &entry).expect("save failed");
707
708        // Load cache
709        let loaded = load_cache_entry(&beancount_file).expect("load failed");
710
711        // Verify
712        assert_eq!(loaded.directives.len(), entry.directives.len());
713        assert_eq!(loaded.plugins.len(), 1);
714        assert_eq!(loaded.plugins[0].name, "test_plugin");
715        assert_eq!(loaded.plugins[0].config, Some("config".to_string()));
716        assert_eq!(loaded.files.len(), 1);
717
718        // Cleanup
719        let _ = fs::remove_file(&beancount_file);
720        let _ = fs::remove_file(cache_path(&beancount_file));
721        let _ = fs::remove_dir(&temp_dir);
722    }
723
724    #[test]
725    fn test_invalidate_cache() {
726        use std::io::Write;
727
728        assert_clean_cache_env();
729
730        let temp_dir = std::env::temp_dir().join("rustledger_invalidate_test");
731        let _ = fs::create_dir_all(&temp_dir);
732
733        let beancount_file = temp_dir.join("test.beancount");
734        let mut f = fs::File::create(&beancount_file).unwrap();
735        writeln!(f, "2024-01-01 open Assets:Test").unwrap();
736        drop(f);
737
738        // Create and save a cache
739        let entry = CacheEntry {
740            directives: vec![],
741            options: CachedOptions::from(&Options::new()),
742            plugins: vec![],
743            files: vec![beancount_file.to_string_lossy().to_string()],
744        };
745        save_cache_entry(&beancount_file, &entry).unwrap();
746
747        // Verify cache exists
748        assert!(cache_path(&beancount_file).exists());
749
750        // Invalidate
751        invalidate_cache(&beancount_file);
752
753        // Verify cache is gone
754        assert!(!cache_path(&beancount_file).exists());
755
756        // Cleanup
757        let _ = fs::remove_file(&beancount_file);
758        let _ = fs::remove_dir(&temp_dir);
759    }
760
761    #[test]
762    fn test_invalidate_cache_removes_legacy_sidecar() {
763        // invalidate_cache should remove both the new dotfile cache and any
764        // pre-#939 visible cache file alongside the source.
765        assert_clean_cache_env();
766
767        let temp_dir = std::env::temp_dir().join("rustledger_invalidate_legacy_test");
768        let _ = fs::create_dir_all(&temp_dir);
769
770        let beancount_file = temp_dir.join("legacy.beancount");
771        // Synthesize a leftover legacy cache file (no need to be valid — we're
772        // only testing that invalidate removes it).
773        let legacy = legacy_cache_path(&beancount_file);
774        fs::write(&legacy, b"stale").unwrap();
775        assert!(legacy.exists());
776
777        invalidate_cache(&beancount_file);
778        assert!(
779            !legacy.exists(),
780            "invalidate_cache should remove the legacy sidecar file"
781        );
782
783        let _ = fs::remove_dir(&temp_dir);
784    }
785
786    #[test]
787    fn test_load_cache_missing_file() {
788        let missing = Path::new("/nonexistent/path/to/file.beancount");
789        assert!(load_cache_entry(missing).is_none());
790    }
791
792    #[test]
793    fn test_load_cache_invalid_magic() {
794        use std::io::Write;
795
796        assert_clean_cache_env();
797
798        let temp_dir = std::env::temp_dir().join("rustledger_magic_test");
799        let _ = fs::create_dir_all(&temp_dir);
800
801        let beancount_file = temp_dir.join("test.beancount");
802        // Write a malformed cache file at the path load_cache_entry will look up.
803        let cache_file = cache_path(&beancount_file);
804        let mut f = fs::File::create(&cache_file).unwrap();
805        // Write invalid magic
806        f.write_all(b"INVALID\0").unwrap();
807        f.write_all(&[0u8; CacheHeader::SIZE - 8]).unwrap();
808        drop(f);
809
810        assert!(load_cache_entry(&beancount_file).is_none());
811
812        // Cleanup
813        let _ = fs::remove_file(&cache_file);
814        let _ = fs::remove_dir(&temp_dir);
815    }
816
817    #[test]
818    fn test_reintern_directives_deduplication() {
819        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
820
821        // Create multiple transactions with the same account
822        let mut directives = vec![];
823        for i in 0..5 {
824            let txn = Transaction::new(date, format!("Txn {i}"))
825                .with_posting(Posting::new(
826                    "Expenses:Food",
827                    Amount::new(dec!(10.00), "USD"),
828                ))
829                .with_posting(Posting::auto("Assets:Checking"));
830            directives.push(Spanned::new(Directive::Transaction(txn), Span::new(0, 50)));
831        }
832
833        // Re-intern should deduplicate the repeated account names and currencies
834        let dedup_count = reintern_directives(&mut directives);
835
836        // We should have deduplicated:
837        // - "Expenses:Food" appears 5 times but only first is new (4 dedup)
838        // - "USD" appears 5 times but only first is new (4 dedup)
839        // - "Assets:Checking" appears 5 times but only first is new (4 dedup)
840        // Total: 12 deduplications
841        assert_eq!(dedup_count, 12);
842    }
843
844    #[test]
845    fn test_cached_options_roundtrip() {
846        let mut opts = Options::new();
847        opts.title = Some("Test Ledger".to_string());
848        opts.operating_currency = vec!["USD".to_string(), "EUR".to_string()];
849        opts.render_commas = true;
850
851        let cached = CachedOptions::from(&opts);
852        let restored: Options = cached.into();
853
854        assert_eq!(restored.title, Some("Test Ledger".to_string()));
855        assert_eq!(restored.operating_currency, vec!["USD", "EUR"]);
856        assert!(restored.render_commas);
857    }
858
859    #[test]
860    fn test_cache_entry_file_paths() {
861        let entry = CacheEntry {
862            directives: vec![],
863            options: CachedOptions::from(&Options::new()),
864            plugins: vec![],
865            files: vec![
866                "/path/to/ledger.beancount".to_string(),
867                "/path/to/include.beancount".to_string(),
868            ],
869        };
870
871        let paths = entry.file_paths();
872        assert_eq!(paths.len(), 2);
873        assert_eq!(paths[0], PathBuf::from("/path/to/ledger.beancount"));
874        assert_eq!(paths[1], PathBuf::from("/path/to/include.beancount"));
875    }
876
877    #[test]
878    fn test_reintern_balance_directive() {
879        use rustledger_core::Balance;
880
881        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
882        let balance = Balance::new(date, "Assets:Checking", Amount::new(dec!(1000.00), "USD"));
883
884        let mut directives = vec![
885            Spanned::new(Directive::Balance(balance.clone()), Span::new(0, 50)),
886            Spanned::new(Directive::Balance(balance), Span::new(51, 100)),
887        ];
888
889        let dedup_count = reintern_directives(&mut directives);
890        // Second occurrence of "Assets:Checking" and "USD" should be deduplicated
891        assert_eq!(dedup_count, 2);
892    }
893
894    #[test]
895    fn test_reintern_open_close_directives() {
896        use rustledger_core::{Close, Open};
897
898        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
899        let open = Open::new(date, "Assets:Checking");
900        let close = Close::new(date, "Assets:Checking");
901
902        let mut directives = vec![
903            Spanned::new(Directive::Open(open), Span::new(0, 50)),
904            Spanned::new(Directive::Close(close), Span::new(51, 100)),
905        ];
906
907        let dedup_count = reintern_directives(&mut directives);
908        // Second "Assets:Checking" should be deduplicated
909        assert_eq!(dedup_count, 1);
910    }
911}