Skip to main content

rustledger_loader/
lib.rs

1//! Beancount file loader with include resolution.
2//!
3//! This crate handles loading beancount files, resolving includes,
4//! and collecting options. It builds on the parser to provide a
5//! complete loading pipeline.
6//!
7//! # Features
8//!
9//! - Recursive include resolution with cycle detection
10//! - Options collection and parsing
11//! - Plugin directive collection
12//! - Source map for error reporting
13//! - Push/pop tag and metadata handling
14//! - Automatic GPG decryption for encrypted files (`.gpg`, `.asc`)
15//!
16//! # Example
17//!
18//! ```ignore
19//! use rustledger_loader::Loader;
20//! use std::path::Path;
21//!
22//! let result = Loader::new().load(Path::new("ledger.beancount"))?;
23//! for directive in result.directives {
24//!     println!("{:?}", directive);
25//! }
26//! ```
27
28#![forbid(unsafe_code)]
29#![warn(missing_docs)]
30
31#[cfg(feature = "cache")]
32pub mod cache;
33mod dedup;
34mod options;
35mod phase;
36#[cfg(any(feature = "booking", feature = "plugins", feature = "validation"))]
37mod process;
38mod source_map;
39mod vfs;
40
41pub use phase::{
42    Booked, Directives, EarlyValidated, Finalized, LateValidated, Phase, Raw,
43    RegularPluginsApplied, Sorted, Synthed,
44};
45// Note: `FailedBookings` is NOT re-exported. It's internal to the
46// pipeline (flowing from `book` to `finalize`) and accessed via the
47// `crate::phase::FailedBookings` path within the crate.
48
49#[cfg(feature = "cache")]
50pub use cache::{
51    CACHE_FILENAME_ENV, CacheEntry, CachedOptions, CachedPlugin, DISABLE_CACHE_ENV,
52    cache_disabled_by_env, cache_path, default_cache_path, invalidate_cache, load_cache_entry,
53    save_cache_entry,
54};
55pub use dedup::{reintern_directives, reintern_plain_directives};
56pub use options::Options;
57pub use source_map::{SourceFile, SourceMap};
58pub use vfs::{DiskFileSystem, FileSystem, VirtualFileSystem};
59
60// Re-export processing API when features are enabled
61#[cfg(any(feature = "booking", feature = "plugins", feature = "validation"))]
62pub use process::{
63    ErrorLocation, ErrorSeverity, ExtraPlugin, Ledger, LedgerError, LoadOptions, ProcessError,
64    load, load_raw, process,
65};
66#[cfg(feature = "plugins")]
67pub use process::{PluginPass, run_plugins};
68
69use rustledger_core::{Directive, DisplayContext};
70use rustledger_parser::{ParseError, Span, Spanned};
71use std::collections::HashSet;
72use std::path::{Path, PathBuf};
73use std::process::Command;
74use thiserror::Error;
75
76/// Try to canonicalize a path, falling back to making it absolute if canonicalize
77/// is not supported (e.g., on WASI).
78///
79/// This function:
80/// 1. First tries `fs::canonicalize()` which resolves symlinks and returns absolute path
81/// 2. If that fails (e.g., WASI doesn't support it), tries to make an absolute path manually
82/// 3. As a last resort, returns the original path
83fn normalize_path(path: &Path) -> PathBuf {
84    // Try canonicalize first (works on most platforms, resolves symlinks)
85    if let Ok(canonical) = path.canonicalize() {
86        return canonical;
87    }
88
89    // Fallback: make absolute without resolving symlinks (WASI-compatible)
90    if path.is_absolute() {
91        path.to_path_buf()
92    } else if let Ok(cwd) = std::env::current_dir() {
93        // Join with current directory and clean up the path
94        let mut result = cwd;
95        for component in path.components() {
96            match component {
97                std::path::Component::ParentDir => {
98                    result.pop();
99                }
100                std::path::Component::Normal(s) => {
101                    result.push(s);
102                }
103                std::path::Component::CurDir => {}
104                std::path::Component::RootDir => {
105                    result = PathBuf::from("/");
106                }
107                std::path::Component::Prefix(p) => {
108                    result = PathBuf::from(p.as_os_str());
109                }
110            }
111        }
112        result
113    } else {
114        // Last resort: just return the path as-is
115        path.to_path_buf()
116    }
117}
118
119/// Errors that can occur during loading.
120#[derive(Debug, Error)]
121pub enum LoadError {
122    /// IO error reading a file.
123    #[error("failed to read file {path}: {source}")]
124    Io {
125        /// The path that failed to read.
126        path: PathBuf,
127        /// The underlying IO error.
128        #[source]
129        source: std::io::Error,
130    },
131
132    /// Include cycle detected.
133    ///
134    /// The Display string intentionally begins with `Duplicate filename
135    /// parsed:` to match Python beancount's wording for the same
136    /// condition. The pta-standards `include-cycle-detection`
137    /// conformance test asserts on the substring `"Duplicate filename"`,
138    /// so this wording is load-bearing (#765). The full cycle path is
139    /// preserved in a trailing parenthetical for debuggability.
140    #[error(
141        "Duplicate filename parsed: \"{}\" (include cycle: {})",
142        .cycle.last().map_or("", String::as_str),
143        .cycle.join(" -> ")
144    )]
145    IncludeCycle {
146        /// The cycle of file paths. The last element is the
147        /// re-encountered filename (equal to one of the earlier
148        /// entries), and it's the one quoted in the `"Duplicate
149        /// filename parsed:"` prefix.
150        cycle: Vec<String>,
151    },
152
153    /// Parse errors occurred.
154    #[error("parse errors in {path}")]
155    ParseErrors {
156        /// The file with parse errors.
157        path: PathBuf,
158        /// The parse errors.
159        errors: Vec<ParseError>,
160    },
161
162    /// Path traversal attempt detected.
163    #[error("path traversal not allowed: {include_path} escapes base directory {base_dir}")]
164    PathTraversal {
165        /// The include path that attempted traversal.
166        include_path: String,
167        /// The base directory.
168        base_dir: PathBuf,
169    },
170
171    /// GPG decryption failed.
172    #[error("failed to decrypt {path}: {message}")]
173    Decryption {
174        /// The encrypted file path.
175        path: PathBuf,
176        /// Error message from GPG.
177        message: String,
178    },
179
180    /// Glob pattern did not match any files.
181    #[error("include pattern \"{pattern}\" does not match any files")]
182    GlobNoMatch {
183        /// The glob pattern that matched nothing.
184        pattern: String,
185    },
186
187    /// Glob pattern expansion failed.
188    #[error("failed to expand include pattern \"{pattern}\": {message}")]
189    GlobError {
190        /// The glob pattern that failed.
191        pattern: String,
192        /// The error message.
193        message: String,
194    },
195}
196
197/// Result of loading a beancount file.
198#[derive(Debug)]
199pub struct LoadResult {
200    /// All directives from all files, in order.
201    pub directives: Vec<Spanned<Directive>>,
202    /// Parsed options.
203    pub options: Options,
204    /// Plugins to load.
205    pub plugins: Vec<Plugin>,
206    /// Source map for error reporting.
207    pub source_map: SourceMap,
208    /// All errors encountered during loading.
209    pub errors: Vec<LoadError>,
210    /// Display context for formatting numbers (tracks precision per currency).
211    pub display_context: DisplayContext,
212}
213
214/// A plugin directive.
215#[derive(Debug, Clone)]
216pub struct Plugin {
217    /// Plugin module name (with any `python:` prefix stripped).
218    pub name: String,
219    /// Optional configuration string.
220    pub config: Option<String>,
221    /// Source location.
222    pub span: Span,
223    /// File this plugin was declared in.
224    pub file_id: usize,
225    /// Whether the `python:` prefix was used to force Python execution.
226    pub force_python: bool,
227}
228
229/// Decrypt a GPG-encrypted file using the system `gpg` command.
230///
231/// This uses `gpg --batch --decrypt` which will use the user's
232/// GPG keyring and gpg-agent for passphrase handling.
233fn decrypt_gpg_file(path: &Path) -> Result<String, LoadError> {
234    let output = Command::new("gpg")
235        .args(["--batch", "--decrypt"])
236        .arg(path)
237        .output()
238        .map_err(|e| LoadError::Decryption {
239            path: path.to_path_buf(),
240            message: format!("failed to run gpg: {e}"),
241        })?;
242
243    if !output.status.success() {
244        return Err(LoadError::Decryption {
245            path: path.to_path_buf(),
246            message: String::from_utf8_lossy(&output.stderr).trim().to_string(),
247        });
248    }
249
250    String::from_utf8(output.stdout).map_err(|e| LoadError::Decryption {
251        path: path.to_path_buf(),
252        message: format!("decrypted content is not valid UTF-8: {e}"),
253    })
254}
255
256/// Beancount file loader.
257#[derive(Debug)]
258pub struct Loader {
259    /// Files that have been loaded (for cycle detection).
260    loaded_files: HashSet<PathBuf>,
261    /// Stack for cycle detection during loading (maintains order for error messages).
262    include_stack: Vec<PathBuf>,
263    /// Set for O(1) cycle detection (mirrors `include_stack`).
264    include_stack_set: HashSet<PathBuf>,
265    /// Root directory for path traversal protection.
266    /// If set, includes must resolve to paths within this directory.
267    root_dir: Option<PathBuf>,
268    /// Whether to enforce path traversal protection.
269    enforce_path_security: bool,
270    /// Filesystem abstraction for reading files.
271    fs: Box<dyn FileSystem>,
272}
273
274impl Default for Loader {
275    fn default() -> Self {
276        Self {
277            loaded_files: HashSet::new(),
278            include_stack: Vec::new(),
279            include_stack_set: HashSet::new(),
280            root_dir: None,
281            enforce_path_security: false,
282            fs: Box::new(DiskFileSystem),
283        }
284    }
285}
286
287impl Loader {
288    /// Create a new loader.
289    #[must_use]
290    pub fn new() -> Self {
291        Self::default()
292    }
293
294    /// Enable path traversal protection.
295    ///
296    /// When enabled, include directives cannot escape the root directory
297    /// of the main beancount file. This prevents malicious ledger files
298    /// from accessing sensitive files outside the ledger directory.
299    ///
300    /// # Example
301    ///
302    /// ```ignore
303    /// let result = Loader::new()
304    ///     .with_path_security(true)
305    ///     .load(Path::new("ledger.beancount"))?;
306    /// ```
307    #[must_use]
308    pub const fn with_path_security(mut self, enabled: bool) -> Self {
309        self.enforce_path_security = enabled;
310        self
311    }
312
313    /// Set a custom root directory for path security.
314    ///
315    /// By default, the root directory is the parent directory of the main file.
316    /// This method allows overriding that to a custom directory.
317    #[must_use]
318    pub fn with_root_dir(mut self, root: PathBuf) -> Self {
319        self.root_dir = Some(root);
320        self.enforce_path_security = true;
321        self
322    }
323
324    /// Set a custom filesystem for file loading.
325    ///
326    /// This allows using a virtual filesystem (e.g., for WASM) instead of
327    /// the default disk filesystem.
328    ///
329    /// # Example
330    ///
331    /// ```
332    /// use rustledger_loader::{Loader, VirtualFileSystem};
333    ///
334    /// let mut vfs = VirtualFileSystem::new();
335    /// vfs.add_file("main.beancount", "2024-01-01 open Assets:Bank USD");
336    ///
337    /// let loader = Loader::new().with_filesystem(Box::new(vfs));
338    /// ```
339    #[must_use]
340    pub fn with_filesystem(mut self, fs: Box<dyn FileSystem>) -> Self {
341        self.fs = fs;
342        self
343    }
344
345    /// Load a beancount file and all its includes.
346    ///
347    /// Uses parallel file parsing when multiple files are discovered via
348    /// include directives. The root file is parsed first to resolve the
349    /// include tree, then all included files are read and parsed in
350    /// parallel using rayon.
351    ///
352    /// # Errors
353    ///
354    /// Returns [`LoadError`] in the following cases:
355    ///
356    /// - [`LoadError::Io`] - Failed to read the file or an included file
357    /// - [`LoadError::IncludeCycle`] - Circular include detected
358    ///
359    /// Note: Parse errors and path traversal errors are collected in
360    /// [`LoadResult::errors`] rather than returned directly, allowing
361    /// partial results to be returned.
362    pub fn load(&mut self, path: &Path) -> Result<LoadResult, LoadError> {
363        let mut directives = Vec::new();
364        let mut options = Options::default();
365        let mut plugins = Vec::new();
366        let mut source_map = SourceMap::new();
367        let mut errors = Vec::new();
368
369        // Get normalized path (uses filesystem-specific normalization)
370        let canonical = self.fs.normalize(path);
371
372        // Set root directory for path security if enabled but not explicitly set
373        if self.enforce_path_security && self.root_dir.is_none() {
374            self.root_dir = canonical.parent().map(Path::to_path_buf);
375        }
376
377        // Phase 1: Parse the root file to discover includes.
378        // The root file is typically small (just includes + options).
379        self.load_recursive(
380            &canonical,
381            None,
382            &mut directives,
383            &mut options,
384            &mut plugins,
385            &mut source_map,
386            &mut errors,
387        )?;
388
389        // Deduplicate every `InternedStr` reachable from a directive
390        // across files. Each file parses with its own per-file
391        // `StringInterner`, so identical strings — accounts,
392        // currencies, tags, links, payees, narrations — appearing in
393        // two included files land in two different `Arc<str>`
394        // allocations, defeating the `Arc::ptr_eq` fast path in
395        // `InternedStr`'s `PartialEq` and forcing all cross-file
396        // equality through byte comparison.
397        //
398        // The cache-hit path already runs `reintern_directives` to fix
399        // this (see `crates/rustledger/src/cmd/check.rs`). Doing the
400        // same here aligns the fresh-parse path with the cache path:
401        // every consumer of `LoadResult` sees a deduplicated directive
402        // list regardless of how it was produced. Closes #1071.
403        dedup::reintern_directives(&mut directives);
404
405        // Build display context from directives and options
406        let display_context = build_display_context(&directives, &options);
407
408        Ok(LoadResult {
409            directives,
410            options,
411            plugins,
412            source_map,
413            errors,
414            display_context,
415        })
416    }
417
418    #[allow(clippy::too_many_arguments)]
419    fn load_recursive(
420        &mut self,
421        path: &Path,
422        pre_parsed: Option<(std::sync::Arc<str>, rustledger_parser::ParseResult)>,
423        directives: &mut Vec<Spanned<Directive>>,
424        options: &mut Options,
425        plugins: &mut Vec<Plugin>,
426        source_map: &mut SourceMap,
427        errors: &mut Vec<LoadError>,
428    ) -> Result<(), LoadError> {
429        // Allocate path once for reuse
430        let path_buf = path.to_path_buf();
431
432        // Check for cycles using O(1) HashSet lookup
433        if self.include_stack_set.contains(&path_buf) {
434            // `collect::<Vec<_>>()` on a chain of two `ExactSizeIterator`s
435            // preallocates the exact capacity via `size_hint`, so an
436            // explicit `Vec::with_capacity(...)` + `extend` + `push` is
437            // equivalent and noisier. This is the cycle-error cold path
438            // anyway — readability wins over micro-optimization.
439            let cycle: Vec<String> = self
440                .include_stack
441                .iter()
442                .map(|p| p.display().to_string())
443                .chain(std::iter::once(path.display().to_string()))
444                .collect();
445            return Err(LoadError::IncludeCycle { cycle });
446        }
447
448        // Check if already loaded
449        if self.loaded_files.contains(&path_buf) {
450            return Ok(());
451        }
452
453        // Use pre-parsed data if available (from parallel loading path),
454        // otherwise read and parse the file.
455        let (source, result) = if let Some(pre) = pre_parsed {
456            pre
457        } else {
458            let src: std::sync::Arc<str> = if self.fs.is_encrypted(path) {
459                decrypt_gpg_file(path)?.into()
460            } else {
461                self.fs.read(path)?
462            };
463            let parsed = rustledger_parser::parse(&src);
464            (src, parsed)
465        };
466
467        // Add to source map (Arc::clone is cheap - just increments refcount)
468        let file_id = source_map.add_file(path_buf.clone(), std::sync::Arc::clone(&source));
469
470        // Mark as loading (update both stack and set)
471        self.include_stack_set.insert(path_buf.clone());
472        self.include_stack.push(path_buf.clone());
473        self.loaded_files.insert(path_buf);
474
475        // Collect parse errors
476        if !result.errors.is_empty() {
477            errors.push(LoadError::ParseErrors {
478                path: path.to_path_buf(),
479                errors: result.errors,
480            });
481        }
482
483        // Process options
484        for (key, value, _span) in result.options {
485            options.set(&key, &value);
486        }
487
488        // Process plugins
489        for (name, config, span) in result.plugins {
490            // Check for "python:" prefix to force Python execution
491            let (actual_name, force_python) = if let Some(stripped) = name.strip_prefix("python:") {
492                (stripped.to_string(), true)
493            } else {
494                (name, false)
495            };
496            plugins.push(Plugin {
497                name: actual_name,
498                config,
499                span,
500                file_id,
501                force_python,
502            });
503        }
504
505        // Process includes (with glob pattern support)
506        let base_dir = path.parent().unwrap_or(Path::new("."));
507        for (include_path, _span) in &result.includes {
508            // Check if the include path contains glob metacharacters
509            // (check on include_path, not full_path, to avoid false positives from directory names)
510            let has_glob = include_path.contains('*')
511                || include_path.contains('?')
512                || include_path.contains('[');
513
514            let full_path = base_dir.join(include_path);
515
516            // Path traversal protection: check BEFORE glob expansion to avoid
517            // enumerating files outside the allowed root directory
518            if self.enforce_path_security
519                && let Some(ref root) = self.root_dir
520            {
521                // For glob patterns, extract and check the non-glob prefix
522                let path_to_check = if has_glob {
523                    // Find where the first glob metacharacter is
524                    let glob_start = include_path
525                        .find(['*', '?', '['])
526                        .unwrap_or(include_path.len());
527                    // Get the directory prefix before the glob
528                    let prefix = &include_path[..glob_start];
529                    let prefix_path = if let Some(last_sep) = prefix.rfind('/') {
530                        base_dir.join(&include_path[..=last_sep])
531                    } else {
532                        base_dir.to_path_buf()
533                    };
534                    normalize_path(&prefix_path)
535                } else {
536                    normalize_path(&full_path)
537                };
538
539                if !path_to_check.starts_with(root) {
540                    errors.push(LoadError::PathTraversal {
541                        include_path: include_path.clone(),
542                        base_dir: root.clone(),
543                    });
544                    continue;
545                }
546            }
547
548            let full_path_str = full_path.to_string_lossy();
549
550            // Expand glob patterns or use literal path
551            let paths_to_load: Vec<PathBuf> = if has_glob {
552                match self.fs.glob(&full_path_str) {
553                    Ok(matched) => matched,
554                    Err(e) => {
555                        errors.push(LoadError::GlobError {
556                            pattern: include_path.clone(),
557                            message: e,
558                        });
559                        continue;
560                    }
561                }
562            } else {
563                vec![full_path.clone()]
564            };
565
566            // Check if glob matched nothing
567            if has_glob && paths_to_load.is_empty() {
568                errors.push(LoadError::GlobNoMatch {
569                    pattern: include_path.clone(),
570                });
571                continue;
572            }
573
574            // Normalize and security-check all matched paths first.
575            let mut valid_paths = Vec::with_capacity(paths_to_load.len());
576            for matched_path in paths_to_load {
577                let canonical = self.fs.normalize(&matched_path);
578
579                // Security check: glob could match files outside root via symlinks
580                if self.enforce_path_security
581                    && let Some(ref root) = self.root_dir
582                    && !canonical.starts_with(root)
583                {
584                    errors.push(LoadError::PathTraversal {
585                        include_path: matched_path.to_string_lossy().into_owned(),
586                        base_dir: root.clone(),
587                    });
588                    continue;
589                }
590
591                valid_paths.push(canonical);
592            }
593
594            // Parallel optimization: when loading multiple sibling includes
595            // from disk, read and parse them in parallel. The expensive work
596            // (I/O + tokenize + parse) runs on rayon's thread pool while the
597            // main thread coordinates the include tree walk.
598            //
599            // Each file is read and parsed independently. Results are then
600            // merged sequentially to preserve include order and process any
601            // nested includes via recursive calls.
602            if valid_paths.len() > 1 && self.fs.supports_parallel_read() {
603                use rayon::prelude::*;
604
605                // Read + parse non-encrypted files in parallel, preserving
606                // original include order. Each entry becomes either
607                // Some((source, parsed)) for successful reads, or None for
608                // encrypted/failed files (which fall back to sequential).
609                //
610                // We keep the original index to merge results in order,
611                // ensuring option/directive precedence matches the declared
612                // include sequence.
613                let fs = &*self.fs;
614                let pre_parsed: Vec<Option<(std::sync::Arc<str>, rustledger_parser::ParseResult)>> =
615                    valid_paths
616                        .par_iter()
617                        .map(|p| {
618                            // Skip encrypted files — they need sequential GPG decryption
619                            if fs.is_encrypted(p) {
620                                return None;
621                            }
622                            // Read through the FileSystem trait so all I/O goes
623                            // through one code path (UTF-8 handling, error types, etc.)
624                            let source = fs.read(p).ok()?;
625                            let parsed = rustledger_parser::parse(&source);
626                            Some((source, parsed))
627                        })
628                        .collect();
629
630                // Merge in original include order. Files that were
631                // pre-parsed pass their data to load_recursive; files
632                // that weren't (encrypted or I/O error) are loaded
633                // sequentially as a fallback.
634                for (canonical, pre) in valid_paths.iter().zip(pre_parsed) {
635                    if let Err(e) = self.load_recursive(
636                        canonical, pre, directives, options, plugins, source_map, errors,
637                    ) {
638                        errors.push(e);
639                    }
640                }
641            } else {
642                // Sequential fallback: single file or VFS.
643                for canonical in valid_paths {
644                    if let Err(e) = self.load_recursive(
645                        &canonical, None, directives, options, plugins, source_map, errors,
646                    ) {
647                        errors.push(e);
648                    }
649                }
650            }
651        }
652
653        // Add directives from this file, setting the file_id on the outer
654        // Spanned<Directive> and on each inner Spanned<Posting> inside
655        // transactions. Postings inside an included file share that file's
656        // ID; this keeps inner spans consistent with their containing
657        // directive so consumers don't need to traverse parent pointers.
658        //
659        // file_id is `u16` everywhere (see `Spanned::file_id` rustdoc).
660        // `with_file_id` debug-asserts on overflow; we use the same
661        // expect here so release builds also fail loudly instead of
662        // silently mapping the 65,537th file onto `SYNTHESIZED_FILE_ID`.
663        let fid_u16 = u16::try_from(file_id)
664            .expect("file_id exceeds u16::MAX; SourceMap supports at most 65,535 files");
665        directives.extend(result.directives.into_iter().map(|d| {
666            let mut d = d.with_file_id(file_id);
667            if let rustledger_core::Directive::Transaction(ref mut txn) = d.value {
668                for p in &mut txn.postings {
669                    p.file_id = fid_u16;
670                }
671            }
672            d
673        }));
674
675        // Pop from stack and set
676        if let Some(popped) = self.include_stack.pop() {
677            self.include_stack_set.remove(&popped);
678        }
679
680        Ok(())
681    }
682}
683
684/// Build a display context from loaded directives and options.
685///
686/// This scans all directives for amounts and tracks the maximum precision seen
687/// for each currency. Fixed precisions from `option "display_precision"` override
688/// the inferred values.
689fn build_display_context(directives: &[Spanned<Directive>], options: &Options) -> DisplayContext {
690    let mut ctx = DisplayContext::new();
691
692    // Set render_commas from options
693    ctx.set_render_commas(options.render_commas);
694
695    // Scan directives for amounts to infer precision
696    for spanned in directives {
697        match &spanned.value {
698            Directive::Transaction(txn) => {
699                for posting in &txn.postings {
700                    // Units (IncompleteAmount)
701                    if let Some(ref units) = posting.units
702                        && let (Some(number), Some(currency)) = (units.number(), units.currency())
703                    {
704                        ctx.update(number, currency);
705                    }
706                    // Cost (CostSpec) — feed the user-written amount to
707                    // the display-context inference. Prefer `total()`
708                    // over `per_unit()` so that for `PerUnitFromTotal`
709                    // we sample the user's literal `{{ total }}` rather
710                    // than the booker-derived per-unit (which has been
711                    // divided by |units| and typically carries far more
712                    // trailing precision than the source spec).
713                    if let Some(ref cost) = posting.cost
714                        && let (Some(number), Some(currency)) = (
715                            cost.number
716                                .map(|cn| cn.total().or_else(|| cn.per_unit()).unwrap_or_default()),
717                            &cost.currency,
718                        )
719                    {
720                        ctx.update(number, currency.as_str());
721                    }
722                    // Price annotations: included so the per-currency dist
723                    // sees them, matching Python beancount's DisplayContext
724                    // population. With the default `Precision::MostCommon`
725                    // policy (introduced for bean-query parity), high-
726                    // precision computed exchange rates are naturally
727                    // ignored by the mode — they're a small minority next
728                    // to mainstream postings. Pre-fix (under MAX policy)
729                    // they were excluded to avoid inflating display
730                    // precision; that exclusion is no longer needed.
731                    if let Some(ref price) = posting.price
732                        && let Some(amount) = price.amount()
733                    {
734                        ctx.update(amount.number, amount.currency.as_str());
735                    }
736                }
737            }
738            Directive::Balance(bal) => {
739                ctx.update(bal.amount.number, bal.amount.currency.as_str());
740                if let Some(tol) = bal.tolerance {
741                    ctx.update(tol, bal.amount.currency.as_str());
742                }
743            }
744            Directive::Price(p) => {
745                // Same rationale as posting price annotations above —
746                // included now that MostCommon is the default. The single
747                // 28dp computed-rate price won't shift the mode for a
748                // currency with hundreds of mainstream postings.
749                ctx.update(p.amount.number, p.amount.currency.as_str());
750            }
751            Directive::Pad(_)
752            | Directive::Open(_)
753            | Directive::Close(_)
754            | Directive::Commodity(_)
755            | Directive::Event(_)
756            | Directive::Query(_)
757            | Directive::Note(_)
758            | Directive::Document(_)
759            | Directive::Custom(_) => {}
760        }
761    }
762
763    // Apply fixed precisions from options (these override inferred values)
764    for (currency, precision) in &options.display_precision {
765        ctx.set_fixed_precision(currency, *precision);
766    }
767
768    // Apply per-commodity `precision: N` metadata (issue #991), AFTER the
769    // options loop so a commodity-level declaration wins over the global
770    // option. Multi-declaration of the same currency is last-wins (matches
771    // typical option-stacking semantics). Invalid values are silently
772    // skipped here — `rustledger-validate` surfaces them as
773    // `InvalidPrecisionMetadata` warnings (E5003) so users see the problem
774    // without breaking loading.
775    for spanned in directives {
776        if let Directive::Commodity(comm) = &spanned.value
777            && let Some(value) = comm.meta.get("precision")
778            && let Ok(precision) = rustledger_core::parse_precision_meta(value)
779        {
780            ctx.set_fixed_precision(comm.currency.as_str(), precision);
781        }
782    }
783
784    ctx
785}
786
787/// Load a beancount file without processing.
788///
789/// This is a convenience function that creates a loader and loads a single file.
790/// For fully processed results (booking, plugins, validation), use the
791/// [`load`] function with [`LoadOptions`] instead.
792#[cfg(not(any(feature = "booking", feature = "plugins", feature = "validation")))]
793pub fn load(path: &Path) -> Result<LoadResult, LoadError> {
794    Loader::new().load(path)
795}
796
797#[cfg(test)]
798mod tests {
799    use super::*;
800    use std::io::Write;
801    use tempfile::NamedTempFile;
802
803    #[test]
804    fn test_is_encrypted_file_gpg_extension() {
805        let fs = DiskFileSystem;
806        let path = Path::new("test.beancount.gpg");
807        assert!(fs.is_encrypted(path));
808    }
809
810    #[test]
811    fn test_is_encrypted_file_plain_beancount() {
812        let fs = DiskFileSystem;
813        let path = Path::new("test.beancount");
814        assert!(!fs.is_encrypted(path));
815    }
816
817    #[test]
818    fn test_is_encrypted_file_asc_with_pgp_header() {
819        let fs = DiskFileSystem;
820        let mut file = NamedTempFile::with_suffix(".asc").unwrap();
821        writeln!(file, "-----BEGIN PGP MESSAGE-----").unwrap();
822        writeln!(file, "some encrypted content").unwrap();
823        writeln!(file, "-----END PGP MESSAGE-----").unwrap();
824        file.flush().unwrap();
825
826        assert!(fs.is_encrypted(file.path()));
827    }
828
829    #[test]
830    fn test_is_encrypted_file_asc_without_pgp_header() {
831        let fs = DiskFileSystem;
832        let mut file = NamedTempFile::with_suffix(".asc").unwrap();
833        writeln!(file, "This is just a plain text file").unwrap();
834        writeln!(file, "with .asc extension but no PGP content").unwrap();
835        file.flush().unwrap();
836
837        assert!(!fs.is_encrypted(file.path()));
838    }
839
840    #[test]
841    fn test_decrypt_gpg_file_missing_gpg() {
842        // Create a fake .gpg file
843        let mut file = NamedTempFile::with_suffix(".gpg").unwrap();
844        writeln!(file, "fake encrypted content").unwrap();
845        file.flush().unwrap();
846
847        // This will fail because the content isn't actually GPG-encrypted
848        // (or gpg isn't installed, or there's no matching key)
849        let result = decrypt_gpg_file(file.path());
850        assert!(result.is_err());
851
852        if let Err(LoadError::Decryption { path, message }) = result {
853            assert_eq!(path, file.path().to_path_buf());
854            assert!(!message.is_empty());
855        } else {
856            panic!("Expected Decryption error");
857        }
858    }
859
860    #[test]
861    fn test_plugin_force_python_prefix() {
862        let mut file = NamedTempFile::with_suffix(".beancount").unwrap();
863        writeln!(file, r#"plugin "python:my_plugin""#).unwrap();
864        writeln!(file, r#"plugin "regular_plugin""#).unwrap();
865        file.flush().unwrap();
866
867        let result = Loader::new().load(file.path()).unwrap();
868
869        assert_eq!(result.plugins.len(), 2);
870
871        // First plugin should have force_python = true and name without prefix
872        assert_eq!(result.plugins[0].name, "my_plugin");
873        assert!(result.plugins[0].force_python);
874
875        // Second plugin should have force_python = false
876        assert_eq!(result.plugins[1].name, "regular_plugin");
877        assert!(!result.plugins[1].force_python);
878    }
879
880    #[test]
881    fn test_plugin_force_python_with_config() {
882        let mut file = NamedTempFile::with_suffix(".beancount").unwrap();
883        writeln!(file, r#"plugin "python:my_plugin" "config_value""#).unwrap();
884        file.flush().unwrap();
885
886        let result = Loader::new().load(file.path()).unwrap();
887
888        assert_eq!(result.plugins.len(), 1);
889        assert_eq!(result.plugins[0].name, "my_plugin");
890        assert!(result.plugins[0].force_python);
891        assert_eq!(result.plugins[0].config, Some("config_value".to_string()));
892    }
893
894    #[test]
895    fn test_virtual_filesystem_include_resolution() {
896        // Create a virtual filesystem with multiple files
897        let mut vfs = VirtualFileSystem::new();
898        vfs.add_file(
899            "main.beancount",
900            r#"
901include "accounts.beancount"
902
9032024-01-15 * "Coffee"
904  Expenses:Food  5.00 USD
905  Assets:Bank   -5.00 USD
906"#,
907        );
908        vfs.add_file(
909            "accounts.beancount",
910            r"
9112024-01-01 open Assets:Bank USD
9122024-01-01 open Expenses:Food USD
913",
914        );
915
916        // Load with virtual filesystem
917        let result = Loader::new()
918            .with_filesystem(Box::new(vfs))
919            .load(Path::new("main.beancount"))
920            .unwrap();
921
922        // Should have 3 directives: 2 opens + 1 transaction
923        assert_eq!(result.directives.len(), 3);
924        assert!(result.errors.is_empty());
925
926        // Verify directive types
927        let directive_types: Vec<_> = result
928            .directives
929            .iter()
930            .map(|d| match &d.value {
931                rustledger_core::Directive::Open(_) => "open",
932                rustledger_core::Directive::Transaction(_) => "txn",
933                _ => "other",
934            })
935            .collect();
936        assert_eq!(directive_types, vec!["open", "open", "txn"]);
937    }
938
939    #[test]
940    fn test_virtual_filesystem_nested_includes() {
941        // Test deeply nested includes
942        let mut vfs = VirtualFileSystem::new();
943        vfs.add_file("main.beancount", r#"include "level1.beancount""#);
944        vfs.add_file(
945            "level1.beancount",
946            r#"
947include "level2.beancount"
9482024-01-01 open Assets:Level1 USD
949"#,
950        );
951        vfs.add_file("level2.beancount", "2024-01-01 open Assets:Level2 USD");
952
953        let result = Loader::new()
954            .with_filesystem(Box::new(vfs))
955            .load(Path::new("main.beancount"))
956            .unwrap();
957
958        // Should have 2 open directives from nested includes
959        assert_eq!(result.directives.len(), 2);
960        assert!(result.errors.is_empty());
961    }
962
963    #[test]
964    fn test_virtual_filesystem_missing_include() {
965        let mut vfs = VirtualFileSystem::new();
966        vfs.add_file("main.beancount", r#"include "nonexistent.beancount""#);
967
968        let result = Loader::new()
969            .with_filesystem(Box::new(vfs))
970            .load(Path::new("main.beancount"))
971            .unwrap();
972
973        // Should have an error for missing file
974        assert!(!result.errors.is_empty());
975        let error_msg = result.errors[0].to_string();
976        assert!(error_msg.contains("not found") || error_msg.contains("Io"));
977    }
978
979    #[test]
980    fn test_virtual_filesystem_glob_include() {
981        let mut vfs = VirtualFileSystem::new();
982        vfs.add_file(
983            "main.beancount",
984            r#"
985include "transactions/*.beancount"
986
9872024-01-01 open Assets:Bank USD
988"#,
989        );
990        vfs.add_file(
991            "transactions/2024.beancount",
992            r#"
9932024-01-01 open Expenses:Food USD
994
9952024-06-15 * "Groceries"
996  Expenses:Food  50.00 USD
997  Assets:Bank   -50.00 USD
998"#,
999        );
1000        vfs.add_file(
1001            "transactions/2025.beancount",
1002            r#"
10032025-01-01 open Expenses:Rent USD
1004
10052025-02-01 * "Rent"
1006  Expenses:Rent  1000.00 USD
1007  Assets:Bank   -1000.00 USD
1008"#,
1009        );
1010        // This file should NOT be matched by the glob
1011        vfs.add_file(
1012            "other/ignored.beancount",
1013            "2024-01-01 open Expenses:Other USD",
1014        );
1015
1016        let result = Loader::new()
1017            .with_filesystem(Box::new(vfs))
1018            .load(Path::new("main.beancount"))
1019            .unwrap();
1020
1021        // Should have: 1 open from main + 2 opens from transactions + 2 txns
1022        let opens = result
1023            .directives
1024            .iter()
1025            .filter(|d| matches!(d.value, rustledger_core::Directive::Open(_)))
1026            .count();
1027        assert_eq!(
1028            opens, 3,
1029            "expected 3 open directives (1 main + 2 transactions)"
1030        );
1031
1032        let txns = result
1033            .directives
1034            .iter()
1035            .filter(|d| matches!(d.value, rustledger_core::Directive::Transaction(_)))
1036            .count();
1037        assert_eq!(txns, 2, "expected 2 transactions from glob-matched files");
1038
1039        assert!(
1040            result.errors.is_empty(),
1041            "expected no errors, got: {:?}",
1042            result.errors
1043        );
1044    }
1045
1046    #[test]
1047    fn test_virtual_filesystem_glob_dot_slash_prefix() {
1048        let mut vfs = VirtualFileSystem::new();
1049        vfs.add_file(
1050            "main.beancount",
1051            r#"
1052include "./transactions/*.beancount"
1053
10542024-01-01 open Assets:Bank USD
1055"#,
1056        );
1057        vfs.add_file(
1058            "transactions/2024.beancount",
1059            r#"
10602024-01-01 open Expenses:Food USD
1061
10622024-06-15 * "Groceries"
1063  Expenses:Food  50.00 USD
1064  Assets:Bank   -50.00 USD
1065"#,
1066        );
1067        vfs.add_file(
1068            "transactions/2025.beancount",
1069            r#"
10702025-01-01 open Expenses:Rent USD
1071
10722025-02-01 * "Rent"
1073  Expenses:Rent  1000.00 USD
1074  Assets:Bank   -1000.00 USD
1075"#,
1076        );
1077
1078        let result = Loader::new()
1079            .with_filesystem(Box::new(vfs))
1080            .load(Path::new("main.beancount"))
1081            .unwrap();
1082
1083        // Should have: 1 open from main + 2 opens from transactions + 2 txns
1084        let opens = result
1085            .directives
1086            .iter()
1087            .filter(|d| matches!(d.value, rustledger_core::Directive::Open(_)))
1088            .count();
1089        assert_eq!(
1090            opens, 3,
1091            "expected 3 open directives (1 main + 2 transactions), ./ prefix should be normalized"
1092        );
1093
1094        let txns = result
1095            .directives
1096            .iter()
1097            .filter(|d| matches!(d.value, rustledger_core::Directive::Transaction(_)))
1098            .count();
1099        assert_eq!(
1100            txns, 2,
1101            "expected 2 transactions from glob-matched files despite ./ prefix"
1102        );
1103
1104        assert!(
1105            result.errors.is_empty(),
1106            "expected no errors, got: {:?}",
1107            result.errors
1108        );
1109    }
1110
1111    #[test]
1112    fn test_virtual_filesystem_glob_no_match() {
1113        let mut vfs = VirtualFileSystem::new();
1114        vfs.add_file("main.beancount", r#"include "nonexistent/*.beancount""#);
1115
1116        let result = Loader::new()
1117            .with_filesystem(Box::new(vfs))
1118            .load(Path::new("main.beancount"))
1119            .unwrap();
1120
1121        // Should have a GlobNoMatch error
1122        let has_glob_error = result
1123            .errors
1124            .iter()
1125            .any(|e| matches!(e, LoadError::GlobNoMatch { .. }));
1126        assert!(
1127            has_glob_error,
1128            "expected GlobNoMatch error, got: {:?}",
1129            result.errors
1130        );
1131    }
1132
1133    /// Regression test for #1071: a fresh multi-file parse must produce
1134    /// deduplicated `InternedStr` values, so two `Posting`s referencing
1135    /// the same account from different files share one `Arc<str>`.
1136    /// Pre-fix the per-file `StringInterner` kept the two `Arc`s
1137    /// distinct and `Arc::ptr_eq` fell through to byte comparison.
1138    #[test]
1139    fn test_fresh_parse_deduplicates_internedstr_across_files() {
1140        let mut vfs = VirtualFileSystem::new();
1141        vfs.add_file(
1142            "main.beancount",
1143            r#"
11442024-01-01 open Assets:Bank USD
1145include "transactions.beancount"
1146"#,
1147        );
1148        vfs.add_file(
1149            "transactions.beancount",
1150            r#"
11512024-01-15 * "Coffee"
1152  Assets:Bank   -5.00 USD
1153  Expenses:Coffee  5.00 USD
1154
11552024-01-16 open Expenses:Coffee
1156"#,
1157        );
1158
1159        let result = Loader::new()
1160            .with_filesystem(Box::new(vfs))
1161            .load(Path::new("main.beancount"))
1162            .unwrap();
1163
1164        // Collect every `Assets:Bank` `Account` (one from `open`, one
1165        // from the posting). They originate in different files, so
1166        // pre-fix they had distinct `Arc<str>` allocations.
1167        let bank_accounts: Vec<&rustledger_core::Account> = result
1168            .directives
1169            .iter()
1170            .filter_map(|s| match &s.value {
1171                rustledger_core::Directive::Open(o) if o.account.as_str() == "Assets:Bank" => {
1172                    Some(&o.account)
1173                }
1174                rustledger_core::Directive::Transaction(t) => t
1175                    .postings
1176                    .iter()
1177                    .find(|p| p.account.as_str() == "Assets:Bank")
1178                    .map(|p| &p.account),
1179                _ => None,
1180            })
1181            .collect();
1182
1183        assert_eq!(
1184            bank_accounts.len(),
1185            2,
1186            "expected one Open and one posting for Assets:Bank"
1187        );
1188        assert!(
1189            bank_accounts[0]
1190                .as_interned()
1191                .ptr_eq(bank_accounts[1].as_interned()),
1192            "Assets:Bank from cross-file open/posting must share the same Arc<str> \
1193             after Loader::load runs reintern_directives"
1194        );
1195    }
1196
1197    /// Companion to the previous test — covers the Transaction-level
1198    /// `InternedStr` fields (payee, narration, tags, links) that the
1199    /// pre-Copilot version of `reintern_directive` silently skipped
1200    /// (Copilot review on PR #1081). Two transactions in different
1201    /// files share the same payee + tag; after `Loader::load` they
1202    /// must share one `Arc<str>` per string.
1203    #[test]
1204    fn test_fresh_parse_deduplicates_transaction_fields_across_files() {
1205        let mut vfs = VirtualFileSystem::new();
1206        vfs.add_file(
1207            "main.beancount",
1208            r#"
12092024-01-01 open Assets:Bank USD
12102024-01-01 open Expenses:Coffee
1211
12122024-01-15 * "Cafe Bench" "Latte" #morning
1213  Assets:Bank   -5.00 USD
1214  Expenses:Coffee  5.00 USD
1215
1216include "more.beancount"
1217"#,
1218        );
1219        vfs.add_file(
1220            "more.beancount",
1221            r#"
12222024-01-16 * "Cafe Bench" "Espresso" #morning
1223  Assets:Bank   -3.00 USD
1224  Expenses:Coffee  3.00 USD
1225"#,
1226        );
1227
1228        let result = Loader::new()
1229            .with_filesystem(Box::new(vfs))
1230            .load(Path::new("main.beancount"))
1231            .unwrap();
1232
1233        let txns: Vec<&rustledger_core::Transaction> = result
1234            .directives
1235            .iter()
1236            .filter_map(|s| match &s.value {
1237                rustledger_core::Directive::Transaction(t) => Some(t),
1238                _ => None,
1239            })
1240            .collect();
1241
1242        assert_eq!(txns.len(), 2, "expected the two transactions");
1243        let p1 = txns[0].payee.as_ref().expect("first txn has payee");
1244        let p2 = txns[1].payee.as_ref().expect("second txn has payee");
1245        assert!(
1246            p1.ptr_eq(p2),
1247            "Identical payee \"Cafe Bench\" across files must share one Arc<str>"
1248        );
1249
1250        assert!(!txns[0].tags.is_empty() && !txns[1].tags.is_empty());
1251        assert!(
1252            txns[0].tags[0].ptr_eq(&txns[1].tags[0]),
1253            "Identical tag #morning across files must share one Arc<str>"
1254        );
1255    }
1256
1257    /// Regression test responding to Copilot review on PR #1174: the
1258    /// dedup pass must walk every interned payload type inside
1259    /// `Metadata` maps — `MetaValue::{Account, Currency, Tag, Link,
1260    /// Amount.currency}` — at both the transaction level and the
1261    /// posting level. Before the meta walk was added, cross-file
1262    /// metadata values held distinct `Arc<str>` allocations even when
1263    /// they referenced identical strings.
1264    ///
1265    /// One multi-file fixture exercises all five variants in a single
1266    /// load to keep the test focused on the dedup invariant rather
1267    /// than the parse machinery.
1268    #[test]
1269    fn test_fresh_parse_deduplicates_metavalue_across_files() {
1270        use rustledger_core::MetaValue;
1271
1272        let mut vfs = VirtualFileSystem::new();
1273        vfs.add_file(
1274            "main.beancount",
1275            r#"
12762024-01-01 open Assets:Bank USD
12772024-01-01 open Expenses:Coffee
1278
12792024-01-15 * "Latte"
1280  counterparty_account: Assets:Bank
1281  preferred_currency: USD
1282  category_tag: #coffee
1283  receipt_link: ^receipt-2024
1284  fee_amount: 0.50 USD
1285  Assets:Bank   -5.00 USD
1286    settled_with: Assets:Bank
1287  Expenses:Coffee  5.00 USD
1288
1289include "more.beancount"
1290"#,
1291        );
1292        vfs.add_file(
1293            "more.beancount",
1294            r#"
12952024-01-16 * "Espresso"
1296  counterparty_account: Assets:Bank
1297  preferred_currency: USD
1298  category_tag: #coffee
1299  receipt_link: ^receipt-2024
1300  fee_amount: 0.50 USD
1301  Assets:Bank   -3.00 USD
1302    settled_with: Assets:Bank
1303  Expenses:Coffee  3.00 USD
1304"#,
1305        );
1306
1307        let result = Loader::new()
1308            .with_filesystem(Box::new(vfs))
1309            .load(Path::new("main.beancount"))
1310            .unwrap();
1311
1312        let txns: Vec<&rustledger_core::Transaction> = result
1313            .directives
1314            .iter()
1315            .filter_map(|s| match &s.value {
1316                rustledger_core::Directive::Transaction(t) => Some(t),
1317                _ => None,
1318            })
1319            .collect();
1320        assert_eq!(txns.len(), 2);
1321
1322        // --- Transaction-level meta: all four typed variants + Amount.currency ---
1323
1324        let MetaValue::Account(a1) = &txns[0].meta["counterparty_account"] else {
1325            panic!("expected MetaValue::Account");
1326        };
1327        let MetaValue::Account(a2) = &txns[1].meta["counterparty_account"] else {
1328            panic!("expected MetaValue::Account");
1329        };
1330        assert!(
1331            a1.ptr_eq(a2),
1332            "MetaValue::Account in cross-file meta must share Arc<str>"
1333        );
1334
1335        let MetaValue::Currency(c1) = &txns[0].meta["preferred_currency"] else {
1336            panic!("expected MetaValue::Currency");
1337        };
1338        let MetaValue::Currency(c2) = &txns[1].meta["preferred_currency"] else {
1339            panic!("expected MetaValue::Currency");
1340        };
1341        assert!(
1342            c1.ptr_eq(c2),
1343            "MetaValue::Currency in cross-file meta must share Arc<str>"
1344        );
1345
1346        let MetaValue::Tag(t1) = &txns[0].meta["category_tag"] else {
1347            panic!("expected MetaValue::Tag");
1348        };
1349        let MetaValue::Tag(t2) = &txns[1].meta["category_tag"] else {
1350            panic!("expected MetaValue::Tag");
1351        };
1352        assert!(
1353            t1.ptr_eq(t2),
1354            "MetaValue::Tag in cross-file meta must share Arc<str>"
1355        );
1356
1357        let MetaValue::Link(l1) = &txns[0].meta["receipt_link"] else {
1358            panic!("expected MetaValue::Link");
1359        };
1360        let MetaValue::Link(l2) = &txns[1].meta["receipt_link"] else {
1361            panic!("expected MetaValue::Link");
1362        };
1363        assert!(
1364            l1.ptr_eq(l2),
1365            "MetaValue::Link in cross-file meta must share Arc<str>"
1366        );
1367
1368        let MetaValue::Amount(am1) = &txns[0].meta["fee_amount"] else {
1369            panic!("expected MetaValue::Amount");
1370        };
1371        let MetaValue::Amount(am2) = &txns[1].meta["fee_amount"] else {
1372            panic!("expected MetaValue::Amount");
1373        };
1374        assert!(
1375            am1.currency.ptr_eq(&am2.currency),
1376            "MetaValue::Amount.currency in cross-file meta must share Arc<str>"
1377        );
1378
1379        // --- Posting-level meta: the per-posting `intern_meta` call ---
1380
1381        let first_posting_0 = &txns[0].postings[0].value;
1382        let first_posting_1 = &txns[1].postings[0].value;
1383        let MetaValue::Account(p1) = &first_posting_0.meta["settled_with"] else {
1384            panic!("expected MetaValue::Account in posting meta");
1385        };
1386        let MetaValue::Account(p2) = &first_posting_1.meta["settled_with"] else {
1387            panic!("expected MetaValue::Account in posting meta");
1388        };
1389        assert!(
1390            p1.ptr_eq(p2),
1391            "Posting-level MetaValue::Account in cross-file meta must share Arc<str> \
1392             (verifies the per-posting `intern_meta` call, not just the directive-level one)"
1393        );
1394    }
1395}