Skip to main content

rustledger_loader/
lib.rs

1//! Beancount file loader with include resolution.
2//!
3//! This crate handles loading beancount files, resolving includes,
4//! and collecting options. It builds on the parser to provide a
5//! complete loading pipeline.
6//!
7//! # Features
8//!
9//! - Recursive include resolution with cycle detection
10//! - Options collection and parsing
11//! - Plugin directive collection
12//! - Source map for error reporting
13//! - Push/pop tag and metadata handling
14//! - Automatic GPG decryption for encrypted files (`.gpg`, `.asc`)
15//!
16//! # Example
17//!
18//! ```ignore
19//! use rustledger_loader::Loader;
20//! use std::path::Path;
21//!
22//! let result = Loader::new().load(Path::new("ledger.beancount"))?;
23//! for directive in result.directives {
24//!     println!("{:?}", directive);
25//! }
26//! ```
27
28#![forbid(unsafe_code)]
29#![warn(missing_docs)]
30
31#[cfg(feature = "cache")]
32pub mod cache;
33mod dedup;
34mod options;
35#[cfg(any(feature = "booking", feature = "plugins", feature = "validation"))]
36mod process;
37mod source_map;
38mod vfs;
39
40#[cfg(feature = "cache")]
41pub use cache::{
42    CACHE_FILENAME_ENV, CacheEntry, CachedOptions, CachedPlugin, DISABLE_CACHE_ENV,
43    cache_disabled_by_env, cache_path, default_cache_path, invalidate_cache, load_cache_entry,
44    save_cache_entry,
45};
46pub use dedup::{reintern_directives, reintern_plain_directives};
47pub use options::Options;
48pub use source_map::{SourceFile, SourceMap};
49pub use vfs::{DiskFileSystem, FileSystem, VirtualFileSystem};
50
51// Re-export processing API when features are enabled
52#[cfg(any(feature = "booking", feature = "plugins", feature = "validation"))]
53pub use process::{
54    ErrorLocation, ErrorSeverity, Ledger, LedgerError, LoadOptions, ProcessError, load, load_raw,
55    process,
56};
57#[cfg(feature = "plugins")]
58pub use process::{PluginPass, run_plugins};
59
60use rustledger_core::{Directive, DisplayContext};
61use rustledger_parser::{ParseError, Span, Spanned};
62use std::collections::HashSet;
63use std::path::{Path, PathBuf};
64use std::process::Command;
65use thiserror::Error;
66
67/// Try to canonicalize a path, falling back to making it absolute if canonicalize
68/// is not supported (e.g., on WASI).
69///
70/// This function:
71/// 1. First tries `fs::canonicalize()` which resolves symlinks and returns absolute path
72/// 2. If that fails (e.g., WASI doesn't support it), tries to make an absolute path manually
73/// 3. As a last resort, returns the original path
74fn normalize_path(path: &Path) -> PathBuf {
75    // Try canonicalize first (works on most platforms, resolves symlinks)
76    if let Ok(canonical) = path.canonicalize() {
77        return canonical;
78    }
79
80    // Fallback: make absolute without resolving symlinks (WASI-compatible)
81    if path.is_absolute() {
82        path.to_path_buf()
83    } else if let Ok(cwd) = std::env::current_dir() {
84        // Join with current directory and clean up the path
85        let mut result = cwd;
86        for component in path.components() {
87            match component {
88                std::path::Component::ParentDir => {
89                    result.pop();
90                }
91                std::path::Component::Normal(s) => {
92                    result.push(s);
93                }
94                std::path::Component::CurDir => {}
95                std::path::Component::RootDir => {
96                    result = PathBuf::from("/");
97                }
98                std::path::Component::Prefix(p) => {
99                    result = PathBuf::from(p.as_os_str());
100                }
101            }
102        }
103        result
104    } else {
105        // Last resort: just return the path as-is
106        path.to_path_buf()
107    }
108}
109
110/// Errors that can occur during loading.
111#[derive(Debug, Error)]
112pub enum LoadError {
113    /// IO error reading a file.
114    #[error("failed to read file {path}: {source}")]
115    Io {
116        /// The path that failed to read.
117        path: PathBuf,
118        /// The underlying IO error.
119        #[source]
120        source: std::io::Error,
121    },
122
123    /// Include cycle detected.
124    ///
125    /// The Display string intentionally begins with `Duplicate filename
126    /// parsed:` to match Python beancount's wording for the same
127    /// condition. The pta-standards `include-cycle-detection`
128    /// conformance test asserts on the substring `"Duplicate filename"`,
129    /// so this wording is load-bearing (#765). The full cycle path is
130    /// preserved in a trailing parenthetical for debuggability.
131    #[error(
132        "Duplicate filename parsed: \"{}\" (include cycle: {})",
133        .cycle.last().map_or("", String::as_str),
134        .cycle.join(" -> ")
135    )]
136    IncludeCycle {
137        /// The cycle of file paths. The last element is the
138        /// re-encountered filename (equal to one of the earlier
139        /// entries), and it's the one quoted in the `"Duplicate
140        /// filename parsed:"` prefix.
141        cycle: Vec<String>,
142    },
143
144    /// Parse errors occurred.
145    #[error("parse errors in {path}")]
146    ParseErrors {
147        /// The file with parse errors.
148        path: PathBuf,
149        /// The parse errors.
150        errors: Vec<ParseError>,
151    },
152
153    /// Path traversal attempt detected.
154    #[error("path traversal not allowed: {include_path} escapes base directory {base_dir}")]
155    PathTraversal {
156        /// The include path that attempted traversal.
157        include_path: String,
158        /// The base directory.
159        base_dir: PathBuf,
160    },
161
162    /// GPG decryption failed.
163    #[error("failed to decrypt {path}: {message}")]
164    Decryption {
165        /// The encrypted file path.
166        path: PathBuf,
167        /// Error message from GPG.
168        message: String,
169    },
170
171    /// Glob pattern did not match any files.
172    #[error("include pattern \"{pattern}\" does not match any files")]
173    GlobNoMatch {
174        /// The glob pattern that matched nothing.
175        pattern: String,
176    },
177
178    /// Glob pattern expansion failed.
179    #[error("failed to expand include pattern \"{pattern}\": {message}")]
180    GlobError {
181        /// The glob pattern that failed.
182        pattern: String,
183        /// The error message.
184        message: String,
185    },
186}
187
188/// Result of loading a beancount file.
189#[derive(Debug)]
190pub struct LoadResult {
191    /// All directives from all files, in order.
192    pub directives: Vec<Spanned<Directive>>,
193    /// Parsed options.
194    pub options: Options,
195    /// Plugins to load.
196    pub plugins: Vec<Plugin>,
197    /// Source map for error reporting.
198    pub source_map: SourceMap,
199    /// All errors encountered during loading.
200    pub errors: Vec<LoadError>,
201    /// Display context for formatting numbers (tracks precision per currency).
202    pub display_context: DisplayContext,
203}
204
205/// A plugin directive.
206#[derive(Debug, Clone)]
207pub struct Plugin {
208    /// Plugin module name (with any `python:` prefix stripped).
209    pub name: String,
210    /// Optional configuration string.
211    pub config: Option<String>,
212    /// Source location.
213    pub span: Span,
214    /// File this plugin was declared in.
215    pub file_id: usize,
216    /// Whether the `python:` prefix was used to force Python execution.
217    pub force_python: bool,
218}
219
220/// Decrypt a GPG-encrypted file using the system `gpg` command.
221///
222/// This uses `gpg --batch --decrypt` which will use the user's
223/// GPG keyring and gpg-agent for passphrase handling.
224fn decrypt_gpg_file(path: &Path) -> Result<String, LoadError> {
225    let output = Command::new("gpg")
226        .args(["--batch", "--decrypt"])
227        .arg(path)
228        .output()
229        .map_err(|e| LoadError::Decryption {
230            path: path.to_path_buf(),
231            message: format!("failed to run gpg: {e}"),
232        })?;
233
234    if !output.status.success() {
235        return Err(LoadError::Decryption {
236            path: path.to_path_buf(),
237            message: String::from_utf8_lossy(&output.stderr).trim().to_string(),
238        });
239    }
240
241    String::from_utf8(output.stdout).map_err(|e| LoadError::Decryption {
242        path: path.to_path_buf(),
243        message: format!("decrypted content is not valid UTF-8: {e}"),
244    })
245}
246
247/// Beancount file loader.
248#[derive(Debug)]
249pub struct Loader {
250    /// Files that have been loaded (for cycle detection).
251    loaded_files: HashSet<PathBuf>,
252    /// Stack for cycle detection during loading (maintains order for error messages).
253    include_stack: Vec<PathBuf>,
254    /// Set for O(1) cycle detection (mirrors `include_stack`).
255    include_stack_set: HashSet<PathBuf>,
256    /// Root directory for path traversal protection.
257    /// If set, includes must resolve to paths within this directory.
258    root_dir: Option<PathBuf>,
259    /// Whether to enforce path traversal protection.
260    enforce_path_security: bool,
261    /// Filesystem abstraction for reading files.
262    fs: Box<dyn FileSystem>,
263}
264
265impl Default for Loader {
266    fn default() -> Self {
267        Self {
268            loaded_files: HashSet::new(),
269            include_stack: Vec::new(),
270            include_stack_set: HashSet::new(),
271            root_dir: None,
272            enforce_path_security: false,
273            fs: Box::new(DiskFileSystem),
274        }
275    }
276}
277
278impl Loader {
279    /// Create a new loader.
280    #[must_use]
281    pub fn new() -> Self {
282        Self::default()
283    }
284
285    /// Enable path traversal protection.
286    ///
287    /// When enabled, include directives cannot escape the root directory
288    /// of the main beancount file. This prevents malicious ledger files
289    /// from accessing sensitive files outside the ledger directory.
290    ///
291    /// # Example
292    ///
293    /// ```ignore
294    /// let result = Loader::new()
295    ///     .with_path_security(true)
296    ///     .load(Path::new("ledger.beancount"))?;
297    /// ```
298    #[must_use]
299    pub const fn with_path_security(mut self, enabled: bool) -> Self {
300        self.enforce_path_security = enabled;
301        self
302    }
303
304    /// Set a custom root directory for path security.
305    ///
306    /// By default, the root directory is the parent directory of the main file.
307    /// This method allows overriding that to a custom directory.
308    #[must_use]
309    pub fn with_root_dir(mut self, root: PathBuf) -> Self {
310        self.root_dir = Some(root);
311        self.enforce_path_security = true;
312        self
313    }
314
315    /// Set a custom filesystem for file loading.
316    ///
317    /// This allows using a virtual filesystem (e.g., for WASM) instead of
318    /// the default disk filesystem.
319    ///
320    /// # Example
321    ///
322    /// ```
323    /// use rustledger_loader::{Loader, VirtualFileSystem};
324    ///
325    /// let mut vfs = VirtualFileSystem::new();
326    /// vfs.add_file("main.beancount", "2024-01-01 open Assets:Bank USD");
327    ///
328    /// let loader = Loader::new().with_filesystem(Box::new(vfs));
329    /// ```
330    #[must_use]
331    pub fn with_filesystem(mut self, fs: Box<dyn FileSystem>) -> Self {
332        self.fs = fs;
333        self
334    }
335
336    /// Load a beancount file and all its includes.
337    ///
338    /// Uses parallel file parsing when multiple files are discovered via
339    /// include directives. The root file is parsed first to resolve the
340    /// include tree, then all included files are read and parsed in
341    /// parallel using rayon.
342    ///
343    /// # Errors
344    ///
345    /// Returns [`LoadError`] in the following cases:
346    ///
347    /// - [`LoadError::Io`] - Failed to read the file or an included file
348    /// - [`LoadError::IncludeCycle`] - Circular include detected
349    ///
350    /// Note: Parse errors and path traversal errors are collected in
351    /// [`LoadResult::errors`] rather than returned directly, allowing
352    /// partial results to be returned.
353    pub fn load(&mut self, path: &Path) -> Result<LoadResult, LoadError> {
354        let mut directives = Vec::new();
355        let mut options = Options::default();
356        let mut plugins = Vec::new();
357        let mut source_map = SourceMap::new();
358        let mut errors = Vec::new();
359
360        // Get normalized path (uses filesystem-specific normalization)
361        let canonical = self.fs.normalize(path);
362
363        // Set root directory for path security if enabled but not explicitly set
364        if self.enforce_path_security && self.root_dir.is_none() {
365            self.root_dir = canonical.parent().map(Path::to_path_buf);
366        }
367
368        // Phase 1: Parse the root file to discover includes.
369        // The root file is typically small (just includes + options).
370        self.load_recursive(
371            &canonical,
372            None,
373            &mut directives,
374            &mut options,
375            &mut plugins,
376            &mut source_map,
377            &mut errors,
378        )?;
379
380        // Deduplicate every `InternedStr` reachable from a directive
381        // across files. Each file parses with its own per-file
382        // `StringInterner`, so identical strings — accounts,
383        // currencies, tags, links, payees, narrations — appearing in
384        // two included files land in two different `Arc<str>`
385        // allocations, defeating the `Arc::ptr_eq` fast path in
386        // `InternedStr`'s `PartialEq` and forcing all cross-file
387        // equality through byte comparison.
388        //
389        // The cache-hit path already runs `reintern_directives` to fix
390        // this (see `crates/rustledger/src/cmd/check.rs`). Doing the
391        // same here aligns the fresh-parse path with the cache path:
392        // every consumer of `LoadResult` sees a deduplicated directive
393        // list regardless of how it was produced. Closes #1071.
394        dedup::reintern_directives(&mut directives);
395
396        // Build display context from directives and options
397        let display_context = build_display_context(&directives, &options);
398
399        Ok(LoadResult {
400            directives,
401            options,
402            plugins,
403            source_map,
404            errors,
405            display_context,
406        })
407    }
408
409    #[allow(clippy::too_many_arguments)]
410    fn load_recursive(
411        &mut self,
412        path: &Path,
413        pre_parsed: Option<(std::sync::Arc<str>, rustledger_parser::ParseResult)>,
414        directives: &mut Vec<Spanned<Directive>>,
415        options: &mut Options,
416        plugins: &mut Vec<Plugin>,
417        source_map: &mut SourceMap,
418        errors: &mut Vec<LoadError>,
419    ) -> Result<(), LoadError> {
420        // Allocate path once for reuse
421        let path_buf = path.to_path_buf();
422
423        // Check for cycles using O(1) HashSet lookup
424        if self.include_stack_set.contains(&path_buf) {
425            // `collect::<Vec<_>>()` on a chain of two `ExactSizeIterator`s
426            // preallocates the exact capacity via `size_hint`, so an
427            // explicit `Vec::with_capacity(...)` + `extend` + `push` is
428            // equivalent and noisier. This is the cycle-error cold path
429            // anyway — readability wins over micro-optimization.
430            let cycle: Vec<String> = self
431                .include_stack
432                .iter()
433                .map(|p| p.display().to_string())
434                .chain(std::iter::once(path.display().to_string()))
435                .collect();
436            return Err(LoadError::IncludeCycle { cycle });
437        }
438
439        // Check if already loaded
440        if self.loaded_files.contains(&path_buf) {
441            return Ok(());
442        }
443
444        // Use pre-parsed data if available (from parallel loading path),
445        // otherwise read and parse the file.
446        let (source, result) = if let Some(pre) = pre_parsed {
447            pre
448        } else {
449            let src: std::sync::Arc<str> = if self.fs.is_encrypted(path) {
450                decrypt_gpg_file(path)?.into()
451            } else {
452                self.fs.read(path)?
453            };
454            let parsed = rustledger_parser::parse(&src);
455            (src, parsed)
456        };
457
458        // Add to source map (Arc::clone is cheap - just increments refcount)
459        let file_id = source_map.add_file(path_buf.clone(), std::sync::Arc::clone(&source));
460
461        // Mark as loading (update both stack and set)
462        self.include_stack_set.insert(path_buf.clone());
463        self.include_stack.push(path_buf.clone());
464        self.loaded_files.insert(path_buf);
465
466        // Collect parse errors
467        if !result.errors.is_empty() {
468            errors.push(LoadError::ParseErrors {
469                path: path.to_path_buf(),
470                errors: result.errors,
471            });
472        }
473
474        // Process options
475        for (key, value, _span) in result.options {
476            options.set(&key, &value);
477        }
478
479        // Process plugins
480        for (name, config, span) in result.plugins {
481            // Check for "python:" prefix to force Python execution
482            let (actual_name, force_python) = if let Some(stripped) = name.strip_prefix("python:") {
483                (stripped.to_string(), true)
484            } else {
485                (name, false)
486            };
487            plugins.push(Plugin {
488                name: actual_name,
489                config,
490                span,
491                file_id,
492                force_python,
493            });
494        }
495
496        // Process includes (with glob pattern support)
497        let base_dir = path.parent().unwrap_or(Path::new("."));
498        for (include_path, _span) in &result.includes {
499            // Check if the include path contains glob metacharacters
500            // (check on include_path, not full_path, to avoid false positives from directory names)
501            let has_glob = include_path.contains('*')
502                || include_path.contains('?')
503                || include_path.contains('[');
504
505            let full_path = base_dir.join(include_path);
506
507            // Path traversal protection: check BEFORE glob expansion to avoid
508            // enumerating files outside the allowed root directory
509            if self.enforce_path_security
510                && let Some(ref root) = self.root_dir
511            {
512                // For glob patterns, extract and check the non-glob prefix
513                let path_to_check = if has_glob {
514                    // Find where the first glob metacharacter is
515                    let glob_start = include_path
516                        .find(['*', '?', '['])
517                        .unwrap_or(include_path.len());
518                    // Get the directory prefix before the glob
519                    let prefix = &include_path[..glob_start];
520                    let prefix_path = if let Some(last_sep) = prefix.rfind('/') {
521                        base_dir.join(&include_path[..=last_sep])
522                    } else {
523                        base_dir.to_path_buf()
524                    };
525                    normalize_path(&prefix_path)
526                } else {
527                    normalize_path(&full_path)
528                };
529
530                if !path_to_check.starts_with(root) {
531                    errors.push(LoadError::PathTraversal {
532                        include_path: include_path.clone(),
533                        base_dir: root.clone(),
534                    });
535                    continue;
536                }
537            }
538
539            let full_path_str = full_path.to_string_lossy();
540
541            // Expand glob patterns or use literal path
542            let paths_to_load: Vec<PathBuf> = if has_glob {
543                match self.fs.glob(&full_path_str) {
544                    Ok(matched) => matched,
545                    Err(e) => {
546                        errors.push(LoadError::GlobError {
547                            pattern: include_path.clone(),
548                            message: e,
549                        });
550                        continue;
551                    }
552                }
553            } else {
554                vec![full_path.clone()]
555            };
556
557            // Check if glob matched nothing
558            if has_glob && paths_to_load.is_empty() {
559                errors.push(LoadError::GlobNoMatch {
560                    pattern: include_path.clone(),
561                });
562                continue;
563            }
564
565            // Normalize and security-check all matched paths first.
566            let mut valid_paths = Vec::with_capacity(paths_to_load.len());
567            for matched_path in paths_to_load {
568                let canonical = self.fs.normalize(&matched_path);
569
570                // Security check: glob could match files outside root via symlinks
571                if self.enforce_path_security
572                    && let Some(ref root) = self.root_dir
573                    && !canonical.starts_with(root)
574                {
575                    errors.push(LoadError::PathTraversal {
576                        include_path: matched_path.to_string_lossy().into_owned(),
577                        base_dir: root.clone(),
578                    });
579                    continue;
580                }
581
582                valid_paths.push(canonical);
583            }
584
585            // Parallel optimization: when loading multiple sibling includes
586            // from disk, read and parse them in parallel. The expensive work
587            // (I/O + tokenize + parse) runs on rayon's thread pool while the
588            // main thread coordinates the include tree walk.
589            //
590            // Each file is read and parsed independently. Results are then
591            // merged sequentially to preserve include order and process any
592            // nested includes via recursive calls.
593            if valid_paths.len() > 1 && self.fs.supports_parallel_read() {
594                use rayon::prelude::*;
595
596                // Read + parse non-encrypted files in parallel, preserving
597                // original include order. Each entry becomes either
598                // Some((source, parsed)) for successful reads, or None for
599                // encrypted/failed files (which fall back to sequential).
600                //
601                // We keep the original index to merge results in order,
602                // ensuring option/directive precedence matches the declared
603                // include sequence.
604                let fs = &*self.fs;
605                let pre_parsed: Vec<Option<(std::sync::Arc<str>, rustledger_parser::ParseResult)>> =
606                    valid_paths
607                        .par_iter()
608                        .map(|p| {
609                            // Skip encrypted files — they need sequential GPG decryption
610                            if fs.is_encrypted(p) {
611                                return None;
612                            }
613                            // Read through the FileSystem trait so all I/O goes
614                            // through one code path (UTF-8 handling, error types, etc.)
615                            let source = fs.read(p).ok()?;
616                            let parsed = rustledger_parser::parse(&source);
617                            Some((source, parsed))
618                        })
619                        .collect();
620
621                // Merge in original include order. Files that were
622                // pre-parsed pass their data to load_recursive; files
623                // that weren't (encrypted or I/O error) are loaded
624                // sequentially as a fallback.
625                for (canonical, pre) in valid_paths.iter().zip(pre_parsed) {
626                    if let Err(e) = self.load_recursive(
627                        canonical, pre, directives, options, plugins, source_map, errors,
628                    ) {
629                        errors.push(e);
630                    }
631                }
632            } else {
633                // Sequential fallback: single file or VFS.
634                for canonical in valid_paths {
635                    if let Err(e) = self.load_recursive(
636                        &canonical, None, directives, options, plugins, source_map, errors,
637                    ) {
638                        errors.push(e);
639                    }
640                }
641            }
642        }
643
644        // Add directives from this file, setting the file_id
645        directives.extend(
646            result
647                .directives
648                .into_iter()
649                .map(|d| d.with_file_id(file_id)),
650        );
651
652        // Pop from stack and set
653        if let Some(popped) = self.include_stack.pop() {
654            self.include_stack_set.remove(&popped);
655        }
656
657        Ok(())
658    }
659}
660
661/// Build a display context from loaded directives and options.
662///
663/// This scans all directives for amounts and tracks the maximum precision seen
664/// for each currency. Fixed precisions from `option "display_precision"` override
665/// the inferred values.
666fn build_display_context(directives: &[Spanned<Directive>], options: &Options) -> DisplayContext {
667    let mut ctx = DisplayContext::new();
668
669    // Set render_commas from options
670    ctx.set_render_commas(options.render_commas);
671
672    // Scan directives for amounts to infer precision
673    for spanned in directives {
674        match &spanned.value {
675            Directive::Transaction(txn) => {
676                for posting in &txn.postings {
677                    // Units (IncompleteAmount)
678                    if let Some(ref units) = posting.units
679                        && let (Some(number), Some(currency)) = (units.number(), units.currency())
680                    {
681                        ctx.update(number, currency);
682                    }
683                    // Cost (CostSpec)
684                    if let Some(ref cost) = posting.cost
685                        && let (Some(number), Some(currency)) =
686                            (cost.number_per.or(cost.number_total), &cost.currency)
687                    {
688                        ctx.update(number, currency.as_str());
689                    }
690                    // Price annotations: included so the per-currency dist
691                    // sees them, matching Python beancount's DisplayContext
692                    // population. With the default `Precision::MostCommon`
693                    // policy (introduced for bean-query parity), high-
694                    // precision computed exchange rates are naturally
695                    // ignored by the mode — they're a small minority next
696                    // to mainstream postings. Pre-fix (under MAX policy)
697                    // they were excluded to avoid inflating display
698                    // precision; that exclusion is no longer needed.
699                    if let Some(ref price) = posting.price
700                        && let Some(amount) = price.amount()
701                    {
702                        ctx.update(amount.number, amount.currency.as_str());
703                    }
704                }
705            }
706            Directive::Balance(bal) => {
707                ctx.update(bal.amount.number, bal.amount.currency.as_str());
708                if let Some(tol) = bal.tolerance {
709                    ctx.update(tol, bal.amount.currency.as_str());
710                }
711            }
712            Directive::Price(p) => {
713                // Same rationale as posting price annotations above —
714                // included now that MostCommon is the default. The single
715                // 28dp computed-rate price won't shift the mode for a
716                // currency with hundreds of mainstream postings.
717                ctx.update(p.amount.number, p.amount.currency.as_str());
718            }
719            Directive::Pad(_)
720            | Directive::Open(_)
721            | Directive::Close(_)
722            | Directive::Commodity(_)
723            | Directive::Event(_)
724            | Directive::Query(_)
725            | Directive::Note(_)
726            | Directive::Document(_)
727            | Directive::Custom(_) => {}
728        }
729    }
730
731    // Apply fixed precisions from options (these override inferred values)
732    for (currency, precision) in &options.display_precision {
733        ctx.set_fixed_precision(currency, *precision);
734    }
735
736    // Apply per-commodity `precision: N` metadata (issue #991), AFTER the
737    // options loop so a commodity-level declaration wins over the global
738    // option. Multi-declaration of the same currency is last-wins (matches
739    // typical option-stacking semantics). Invalid values are silently
740    // skipped here — `rustledger-validate` surfaces them as
741    // `InvalidPrecisionMetadata` warnings (E5003) so users see the problem
742    // without breaking loading.
743    for spanned in directives {
744        if let Directive::Commodity(comm) = &spanned.value
745            && let Some(value) = comm.meta.get("precision")
746            && let Ok(precision) = rustledger_core::parse_precision_meta(value)
747        {
748            ctx.set_fixed_precision(comm.currency.as_str(), precision);
749        }
750    }
751
752    ctx
753}
754
755/// Load a beancount file without processing.
756///
757/// This is a convenience function that creates a loader and loads a single file.
758/// For fully processed results (booking, plugins, validation), use the
759/// [`load`] function with [`LoadOptions`] instead.
760#[cfg(not(any(feature = "booking", feature = "plugins", feature = "validation")))]
761pub fn load(path: &Path) -> Result<LoadResult, LoadError> {
762    Loader::new().load(path)
763}
764
765#[cfg(test)]
766mod tests {
767    use super::*;
768    use std::io::Write;
769    use tempfile::NamedTempFile;
770
771    #[test]
772    fn test_is_encrypted_file_gpg_extension() {
773        let fs = DiskFileSystem;
774        let path = Path::new("test.beancount.gpg");
775        assert!(fs.is_encrypted(path));
776    }
777
778    #[test]
779    fn test_is_encrypted_file_plain_beancount() {
780        let fs = DiskFileSystem;
781        let path = Path::new("test.beancount");
782        assert!(!fs.is_encrypted(path));
783    }
784
785    #[test]
786    fn test_is_encrypted_file_asc_with_pgp_header() {
787        let fs = DiskFileSystem;
788        let mut file = NamedTempFile::with_suffix(".asc").unwrap();
789        writeln!(file, "-----BEGIN PGP MESSAGE-----").unwrap();
790        writeln!(file, "some encrypted content").unwrap();
791        writeln!(file, "-----END PGP MESSAGE-----").unwrap();
792        file.flush().unwrap();
793
794        assert!(fs.is_encrypted(file.path()));
795    }
796
797    #[test]
798    fn test_is_encrypted_file_asc_without_pgp_header() {
799        let fs = DiskFileSystem;
800        let mut file = NamedTempFile::with_suffix(".asc").unwrap();
801        writeln!(file, "This is just a plain text file").unwrap();
802        writeln!(file, "with .asc extension but no PGP content").unwrap();
803        file.flush().unwrap();
804
805        assert!(!fs.is_encrypted(file.path()));
806    }
807
808    #[test]
809    fn test_decrypt_gpg_file_missing_gpg() {
810        // Create a fake .gpg file
811        let mut file = NamedTempFile::with_suffix(".gpg").unwrap();
812        writeln!(file, "fake encrypted content").unwrap();
813        file.flush().unwrap();
814
815        // This will fail because the content isn't actually GPG-encrypted
816        // (or gpg isn't installed, or there's no matching key)
817        let result = decrypt_gpg_file(file.path());
818        assert!(result.is_err());
819
820        if let Err(LoadError::Decryption { path, message }) = result {
821            assert_eq!(path, file.path().to_path_buf());
822            assert!(!message.is_empty());
823        } else {
824            panic!("Expected Decryption error");
825        }
826    }
827
828    #[test]
829    fn test_plugin_force_python_prefix() {
830        let mut file = NamedTempFile::with_suffix(".beancount").unwrap();
831        writeln!(file, r#"plugin "python:my_plugin""#).unwrap();
832        writeln!(file, r#"plugin "regular_plugin""#).unwrap();
833        file.flush().unwrap();
834
835        let result = Loader::new().load(file.path()).unwrap();
836
837        assert_eq!(result.plugins.len(), 2);
838
839        // First plugin should have force_python = true and name without prefix
840        assert_eq!(result.plugins[0].name, "my_plugin");
841        assert!(result.plugins[0].force_python);
842
843        // Second plugin should have force_python = false
844        assert_eq!(result.plugins[1].name, "regular_plugin");
845        assert!(!result.plugins[1].force_python);
846    }
847
848    #[test]
849    fn test_plugin_force_python_with_config() {
850        let mut file = NamedTempFile::with_suffix(".beancount").unwrap();
851        writeln!(file, r#"plugin "python:my_plugin" "config_value""#).unwrap();
852        file.flush().unwrap();
853
854        let result = Loader::new().load(file.path()).unwrap();
855
856        assert_eq!(result.plugins.len(), 1);
857        assert_eq!(result.plugins[0].name, "my_plugin");
858        assert!(result.plugins[0].force_python);
859        assert_eq!(result.plugins[0].config, Some("config_value".to_string()));
860    }
861
862    #[test]
863    fn test_virtual_filesystem_include_resolution() {
864        // Create a virtual filesystem with multiple files
865        let mut vfs = VirtualFileSystem::new();
866        vfs.add_file(
867            "main.beancount",
868            r#"
869include "accounts.beancount"
870
8712024-01-15 * "Coffee"
872  Expenses:Food  5.00 USD
873  Assets:Bank   -5.00 USD
874"#,
875        );
876        vfs.add_file(
877            "accounts.beancount",
878            r"
8792024-01-01 open Assets:Bank USD
8802024-01-01 open Expenses:Food USD
881",
882        );
883
884        // Load with virtual filesystem
885        let result = Loader::new()
886            .with_filesystem(Box::new(vfs))
887            .load(Path::new("main.beancount"))
888            .unwrap();
889
890        // Should have 3 directives: 2 opens + 1 transaction
891        assert_eq!(result.directives.len(), 3);
892        assert!(result.errors.is_empty());
893
894        // Verify directive types
895        let directive_types: Vec<_> = result
896            .directives
897            .iter()
898            .map(|d| match &d.value {
899                rustledger_core::Directive::Open(_) => "open",
900                rustledger_core::Directive::Transaction(_) => "txn",
901                _ => "other",
902            })
903            .collect();
904        assert_eq!(directive_types, vec!["open", "open", "txn"]);
905    }
906
907    #[test]
908    fn test_virtual_filesystem_nested_includes() {
909        // Test deeply nested includes
910        let mut vfs = VirtualFileSystem::new();
911        vfs.add_file("main.beancount", r#"include "level1.beancount""#);
912        vfs.add_file(
913            "level1.beancount",
914            r#"
915include "level2.beancount"
9162024-01-01 open Assets:Level1 USD
917"#,
918        );
919        vfs.add_file("level2.beancount", "2024-01-01 open Assets:Level2 USD");
920
921        let result = Loader::new()
922            .with_filesystem(Box::new(vfs))
923            .load(Path::new("main.beancount"))
924            .unwrap();
925
926        // Should have 2 open directives from nested includes
927        assert_eq!(result.directives.len(), 2);
928        assert!(result.errors.is_empty());
929    }
930
931    #[test]
932    fn test_virtual_filesystem_missing_include() {
933        let mut vfs = VirtualFileSystem::new();
934        vfs.add_file("main.beancount", r#"include "nonexistent.beancount""#);
935
936        let result = Loader::new()
937            .with_filesystem(Box::new(vfs))
938            .load(Path::new("main.beancount"))
939            .unwrap();
940
941        // Should have an error for missing file
942        assert!(!result.errors.is_empty());
943        let error_msg = result.errors[0].to_string();
944        assert!(error_msg.contains("not found") || error_msg.contains("Io"));
945    }
946
947    #[test]
948    fn test_virtual_filesystem_glob_include() {
949        let mut vfs = VirtualFileSystem::new();
950        vfs.add_file(
951            "main.beancount",
952            r#"
953include "transactions/*.beancount"
954
9552024-01-01 open Assets:Bank USD
956"#,
957        );
958        vfs.add_file(
959            "transactions/2024.beancount",
960            r#"
9612024-01-01 open Expenses:Food USD
962
9632024-06-15 * "Groceries"
964  Expenses:Food  50.00 USD
965  Assets:Bank   -50.00 USD
966"#,
967        );
968        vfs.add_file(
969            "transactions/2025.beancount",
970            r#"
9712025-01-01 open Expenses:Rent USD
972
9732025-02-01 * "Rent"
974  Expenses:Rent  1000.00 USD
975  Assets:Bank   -1000.00 USD
976"#,
977        );
978        // This file should NOT be matched by the glob
979        vfs.add_file(
980            "other/ignored.beancount",
981            "2024-01-01 open Expenses:Other USD",
982        );
983
984        let result = Loader::new()
985            .with_filesystem(Box::new(vfs))
986            .load(Path::new("main.beancount"))
987            .unwrap();
988
989        // Should have: 1 open from main + 2 opens from transactions + 2 txns
990        let opens = result
991            .directives
992            .iter()
993            .filter(|d| matches!(d.value, rustledger_core::Directive::Open(_)))
994            .count();
995        assert_eq!(
996            opens, 3,
997            "expected 3 open directives (1 main + 2 transactions)"
998        );
999
1000        let txns = result
1001            .directives
1002            .iter()
1003            .filter(|d| matches!(d.value, rustledger_core::Directive::Transaction(_)))
1004            .count();
1005        assert_eq!(txns, 2, "expected 2 transactions from glob-matched files");
1006
1007        assert!(
1008            result.errors.is_empty(),
1009            "expected no errors, got: {:?}",
1010            result.errors
1011        );
1012    }
1013
1014    #[test]
1015    fn test_virtual_filesystem_glob_dot_slash_prefix() {
1016        let mut vfs = VirtualFileSystem::new();
1017        vfs.add_file(
1018            "main.beancount",
1019            r#"
1020include "./transactions/*.beancount"
1021
10222024-01-01 open Assets:Bank USD
1023"#,
1024        );
1025        vfs.add_file(
1026            "transactions/2024.beancount",
1027            r#"
10282024-01-01 open Expenses:Food USD
1029
10302024-06-15 * "Groceries"
1031  Expenses:Food  50.00 USD
1032  Assets:Bank   -50.00 USD
1033"#,
1034        );
1035        vfs.add_file(
1036            "transactions/2025.beancount",
1037            r#"
10382025-01-01 open Expenses:Rent USD
1039
10402025-02-01 * "Rent"
1041  Expenses:Rent  1000.00 USD
1042  Assets:Bank   -1000.00 USD
1043"#,
1044        );
1045
1046        let result = Loader::new()
1047            .with_filesystem(Box::new(vfs))
1048            .load(Path::new("main.beancount"))
1049            .unwrap();
1050
1051        // Should have: 1 open from main + 2 opens from transactions + 2 txns
1052        let opens = result
1053            .directives
1054            .iter()
1055            .filter(|d| matches!(d.value, rustledger_core::Directive::Open(_)))
1056            .count();
1057        assert_eq!(
1058            opens, 3,
1059            "expected 3 open directives (1 main + 2 transactions), ./ prefix should be normalized"
1060        );
1061
1062        let txns = result
1063            .directives
1064            .iter()
1065            .filter(|d| matches!(d.value, rustledger_core::Directive::Transaction(_)))
1066            .count();
1067        assert_eq!(
1068            txns, 2,
1069            "expected 2 transactions from glob-matched files despite ./ prefix"
1070        );
1071
1072        assert!(
1073            result.errors.is_empty(),
1074            "expected no errors, got: {:?}",
1075            result.errors
1076        );
1077    }
1078
1079    #[test]
1080    fn test_virtual_filesystem_glob_no_match() {
1081        let mut vfs = VirtualFileSystem::new();
1082        vfs.add_file("main.beancount", r#"include "nonexistent/*.beancount""#);
1083
1084        let result = Loader::new()
1085            .with_filesystem(Box::new(vfs))
1086            .load(Path::new("main.beancount"))
1087            .unwrap();
1088
1089        // Should have a GlobNoMatch error
1090        let has_glob_error = result
1091            .errors
1092            .iter()
1093            .any(|e| matches!(e, LoadError::GlobNoMatch { .. }));
1094        assert!(
1095            has_glob_error,
1096            "expected GlobNoMatch error, got: {:?}",
1097            result.errors
1098        );
1099    }
1100
1101    /// Regression test for #1071: a fresh multi-file parse must produce
1102    /// deduplicated `InternedStr` values, so two `Posting`s referencing
1103    /// the same account from different files share one `Arc<str>`.
1104    /// Pre-fix the per-file `StringInterner` kept the two `Arc`s
1105    /// distinct and `Arc::ptr_eq` fell through to byte comparison.
1106    #[test]
1107    fn test_fresh_parse_deduplicates_internedstr_across_files() {
1108        let mut vfs = VirtualFileSystem::new();
1109        vfs.add_file(
1110            "main.beancount",
1111            r#"
11122024-01-01 open Assets:Bank USD
1113include "transactions.beancount"
1114"#,
1115        );
1116        vfs.add_file(
1117            "transactions.beancount",
1118            r#"
11192024-01-15 * "Coffee"
1120  Assets:Bank   -5.00 USD
1121  Expenses:Coffee  5.00 USD
1122
11232024-01-16 open Expenses:Coffee
1124"#,
1125        );
1126
1127        let result = Loader::new()
1128            .with_filesystem(Box::new(vfs))
1129            .load(Path::new("main.beancount"))
1130            .unwrap();
1131
1132        // Collect every `Assets:Bank` `InternedStr` (one from `open`,
1133        // one from the posting). They originate in different files, so
1134        // pre-fix they had distinct `Arc<str>` allocations.
1135        let bank_accounts: Vec<&rustledger_core::InternedStr> = result
1136            .directives
1137            .iter()
1138            .filter_map(|s| match &s.value {
1139                rustledger_core::Directive::Open(o) if o.account.as_str() == "Assets:Bank" => {
1140                    Some(&o.account)
1141                }
1142                rustledger_core::Directive::Transaction(t) => t
1143                    .postings
1144                    .iter()
1145                    .find(|p| p.account.as_str() == "Assets:Bank")
1146                    .map(|p| &p.account),
1147                _ => None,
1148            })
1149            .collect();
1150
1151        assert_eq!(
1152            bank_accounts.len(),
1153            2,
1154            "expected one Open and one posting for Assets:Bank"
1155        );
1156        assert!(
1157            bank_accounts[0].ptr_eq(bank_accounts[1]),
1158            "Assets:Bank from cross-file open/posting must share the same Arc<str> \
1159             after Loader::load runs reintern_directives"
1160        );
1161    }
1162
1163    /// Companion to the previous test — covers the Transaction-level
1164    /// `InternedStr` fields (payee, narration, tags, links) that the
1165    /// pre-Copilot version of `reintern_directive` silently skipped
1166    /// (Copilot review on PR #1081). Two transactions in different
1167    /// files share the same payee + tag; after `Loader::load` they
1168    /// must share one `Arc<str>` per string.
1169    #[test]
1170    fn test_fresh_parse_deduplicates_transaction_fields_across_files() {
1171        let mut vfs = VirtualFileSystem::new();
1172        vfs.add_file(
1173            "main.beancount",
1174            r#"
11752024-01-01 open Assets:Bank USD
11762024-01-01 open Expenses:Coffee
1177
11782024-01-15 * "Cafe Bench" "Latte" #morning
1179  Assets:Bank   -5.00 USD
1180  Expenses:Coffee  5.00 USD
1181
1182include "more.beancount"
1183"#,
1184        );
1185        vfs.add_file(
1186            "more.beancount",
1187            r#"
11882024-01-16 * "Cafe Bench" "Espresso" #morning
1189  Assets:Bank   -3.00 USD
1190  Expenses:Coffee  3.00 USD
1191"#,
1192        );
1193
1194        let result = Loader::new()
1195            .with_filesystem(Box::new(vfs))
1196            .load(Path::new("main.beancount"))
1197            .unwrap();
1198
1199        let txns: Vec<&rustledger_core::Transaction> = result
1200            .directives
1201            .iter()
1202            .filter_map(|s| match &s.value {
1203                rustledger_core::Directive::Transaction(t) => Some(t),
1204                _ => None,
1205            })
1206            .collect();
1207
1208        assert_eq!(txns.len(), 2, "expected the two transactions");
1209        let p1 = txns[0].payee.as_ref().expect("first txn has payee");
1210        let p2 = txns[1].payee.as_ref().expect("second txn has payee");
1211        assert!(
1212            p1.ptr_eq(p2),
1213            "Identical payee \"Cafe Bench\" across files must share one Arc<str>"
1214        );
1215
1216        assert!(!txns[0].tags.is_empty() && !txns[1].tags.is_empty());
1217        assert!(
1218            txns[0].tags[0].ptr_eq(&txns[1].tags[0]),
1219            "Identical tag #morning across files must share one Arc<str>"
1220        );
1221    }
1222}