Skip to main content

rustledger_loader/
process.rs

1//! Processing pipeline: sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
2//!
3//! This module orchestrates the full processing pipeline for a beancount ledger,
4//! equivalent to Python's `loader.load_file()` function.
5
6use crate::{LoadError, LoadResult, Options, Plugin, SourceMap};
7use rustledger_core::{BookingMethod, Directive, DisplayContext};
8use rustledger_parser::Spanned;
9use std::path::Path;
10use thiserror::Error;
11
12/// Options for loading and processing a ledger.
13#[derive(Debug, Clone)]
14pub struct LoadOptions {
15    /// Booking method for lot matching (default: Strict).
16    pub booking_method: BookingMethod,
17    /// Run plugins declared in the file (default: true).
18    pub run_plugins: bool,
19    /// Run `auto_accounts` plugin (default: false).
20    pub auto_accounts: bool,
21    /// Additional native plugins to run (by name).
22    pub extra_plugins: Vec<String>,
23    /// Plugin configurations for extra plugins.
24    pub extra_plugin_configs: Vec<Option<String>>,
25    /// Run validation after processing (default: true).
26    pub validate: bool,
27    /// Enable path security (prevent include traversal).
28    pub path_security: bool,
29}
30
31impl Default for LoadOptions {
32    fn default() -> Self {
33        Self {
34            booking_method: BookingMethod::Strict,
35            run_plugins: true,
36            auto_accounts: false,
37            extra_plugins: Vec::new(),
38            extra_plugin_configs: Vec::new(),
39            validate: true,
40            path_security: false,
41        }
42    }
43}
44
45impl LoadOptions {
46    /// Create options for raw loading (no booking, no plugins, no validation).
47    #[must_use]
48    pub const fn raw() -> Self {
49        Self {
50            booking_method: BookingMethod::Strict,
51            run_plugins: false,
52            auto_accounts: false,
53            extra_plugins: Vec::new(),
54            extra_plugin_configs: Vec::new(),
55            validate: false,
56            path_security: false,
57        }
58    }
59}
60
61/// Errors that can occur during ledger processing.
62#[derive(Debug, Error)]
63pub enum ProcessError {
64    /// Loading failed.
65    #[error("loading failed: {0}")]
66    Load(#[from] LoadError),
67
68    /// Booking/interpolation error.
69    #[cfg(feature = "booking")]
70    #[error("booking error: {message}")]
71    Booking {
72        /// Error message.
73        message: String,
74        /// Date of the transaction.
75        date: rustledger_core::NaiveDate,
76        /// Narration of the transaction.
77        narration: String,
78    },
79
80    /// Plugin execution error.
81    #[cfg(feature = "plugins")]
82    #[error("plugin error: {0}")]
83    Plugin(String),
84
85    /// Validation error.
86    #[cfg(feature = "validation")]
87    #[error("validation error: {0}")]
88    Validation(String),
89
90    /// Plugin output conversion error.
91    #[cfg(feature = "plugins")]
92    #[error("failed to convert plugin output: {0}")]
93    PluginConversion(String),
94}
95
96/// A fully processed ledger.
97///
98/// This is the result of loading and processing a beancount file,
99/// equivalent to the tuple returned by Python's `loader.load_file()`.
100#[derive(Debug)]
101pub struct Ledger {
102    /// Processed directives (sorted, booked, plugins applied).
103    pub directives: Vec<Spanned<Directive>>,
104    /// Options parsed from the file.
105    pub options: Options,
106    /// Plugins declared in the file.
107    pub plugins: Vec<Plugin>,
108    /// Source map for error reporting.
109    pub source_map: SourceMap,
110    /// Errors encountered during processing.
111    pub errors: Vec<LedgerError>,
112    /// Display context for formatting numbers.
113    pub display_context: DisplayContext,
114}
115
116/// Unified error type for ledger processing.
117///
118/// This encompasses all error types that can occur during loading,
119/// booking, plugin execution, and validation.
120#[derive(Debug)]
121#[non_exhaustive]
122pub struct LedgerError {
123    /// Error severity.
124    pub severity: ErrorSeverity,
125    /// Error code (e.g., "E0001", "W8002").
126    pub code: String,
127    /// Human-readable error message.
128    pub message: String,
129    /// Source location, if available.
130    pub location: Option<ErrorLocation>,
131    /// Byte span (inclusive start, exclusive end) in the source file,
132    /// used by rich renderers (e.g. miette) to draw a snippet around
133    /// the offending directive. Consumers that only need `file:line:col`
134    /// should use `location`; those that want to show the surrounding
135    /// source text want this.
136    pub source_span: Option<(usize, usize)>,
137    /// Source file ID — index into the ledger's [`SourceMap`]. Used
138    /// alongside `source_span` for snippet rendering.
139    pub file_id: Option<u16>,
140    /// Processing phase that produced this error: "parse", "validate", or "plugin".
141    pub phase: String,
142}
143
144/// Error severity level.
145#[derive(Debug, Clone, Copy, PartialEq, Eq)]
146pub enum ErrorSeverity {
147    /// Error - indicates a problem that should be fixed.
148    Error,
149    /// Warning - indicates a potential issue.
150    Warning,
151}
152
153/// Source location for an error.
154#[derive(Debug, Clone)]
155pub struct ErrorLocation {
156    /// File path.
157    pub file: std::path::PathBuf,
158    /// Line number (1-indexed).
159    pub line: usize,
160    /// Column number (1-indexed).
161    pub column: usize,
162}
163
164impl LedgerError {
165    /// Create a new error with the given phase.
166    pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
167        Self {
168            severity: ErrorSeverity::Error,
169            code: code.into(),
170            message: message.into(),
171            location: None,
172            source_span: None,
173            file_id: None,
174            phase: "validate".to_string(),
175        }
176    }
177
178    /// Create a new warning.
179    pub fn warning(code: impl Into<String>, message: impl Into<String>) -> Self {
180        Self {
181            severity: ErrorSeverity::Warning,
182            code: code.into(),
183            message: message.into(),
184            location: None,
185            source_span: None,
186            file_id: None,
187            phase: "validate".to_string(),
188        }
189    }
190
191    /// Attach a source span and file ID so rich renderers can draw a snippet.
192    #[must_use]
193    pub const fn with_source_span(mut self, span: (usize, usize), file_id: u16) -> Self {
194        self.source_span = Some(span);
195        self.file_id = Some(file_id);
196        self
197    }
198
199    /// Set the processing phase for this error.
200    #[must_use]
201    pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
202        self.phase = phase.into();
203        self
204    }
205
206    /// Add a location to this error.
207    #[must_use]
208    pub fn with_location(mut self, location: ErrorLocation) -> Self {
209        self.location = Some(location);
210        self
211    }
212}
213
214/// Process a raw load result into a fully processed ledger.
215///
216/// Pipeline (see numbered comments below for the rationale of each step):
217///
218/// ```text
219///   1. sort                         (canonical display order)
220///   2. synth plugins                (auto_accounts, document_discovery)
221///   3. Early validation             (account presence, structural, lifecycle)
222///   4. booking                      (cost spec resolution, interpolation)
223///   5. partition                    (set aside failed-booking txns)
224///   6. regular plugins              (file plugins + extras, on booked only)
225///   7. Late validation              (balance, currency, inventory, on booked only)
226///   8. finalize                     (unused-pad warnings)
227///   9. re-merge                     (booked + failed → final Ledger.directives)
228/// ```
229pub fn process(raw: LoadResult, options: &LoadOptions) -> Result<Ledger, ProcessError> {
230    let mut directives = raw.directives;
231    let mut errors: Vec<LedgerError> = Vec::new();
232
233    // Convert load errors to ledger errors (parse phase)
234    for load_err in raw.errors {
235        errors.push(LedgerError::error("LOAD", load_err.to_string()).with_phase("parse"));
236    }
237
238    // 1. Sort once into canonical display order: `(date, priority, file_id,
239    //    span.start)`. This is what BQL / JSON / format output expect and
240    //    what Python beancount produces via `(date, type_priority, lineno)`.
241    //    `span.start` is a byte offset that orders within a file the same
242    //    way line numbers would; `file_id` preserves include order across
243    //    files (issue #1049 — same rows, different tie-break would diverge
244    //    BQL output on same-date augmentation+reduction fixtures).
245    //
246    //    Booking needs a different iteration order — augmentations BEFORE
247    //    reductions on the same `(date, priority)` so lots exist when
248    //    matched — but it doesn't need the underlying vec reordered.
249    //    `run_booking` walks the vec via a transient `Vec<usize>` index
250    //    that adds `has_cost_reduction` as an extra tiebreaker; this
251    //    avoids a second full sort of `Vec<Spanned<Directive>>` (large
252    //    structs) after booking just to put display order back.
253    directives.sort_by_key(|d| (d.value.date(), d.value.priority(), d.file_id, d.span.start));
254
255    // 2. Synth-only plugins — run BEFORE early validation so the
256    // synthesizers (`auto_accounts` and `document_discovery`) inject
257    // Opens / Documents that Early checks depend on (E1001 account
258    // presence, E5001 missing-document file). Only this narrow synth
259    // subset runs here; everything else waits until after booking
260    // (step 5) so cost-spec-reading plugins see filled-in
261    // `cost.number_per` values. See `PluginPass` rustdoc for the
262    // detailed split rationale.
263    #[cfg(feature = "plugins")]
264    if options.run_plugins || options.auto_accounts {
265        run_plugins(
266            &mut directives,
267            &raw.plugins,
268            &raw.options,
269            options,
270            &raw.source_map,
271            &mut errors,
272            PluginPass::PreBookingSynth,
273        )?;
274    }
275
276    // 3. Validation (early phase) — runs on pre-booking directives,
277    // AFTER plugins so account-presence checks (E1001) see any Opens
278    // that plugins like `auto_accounts` injected.
279    //
280    // This is what lets booking match Python's "prune zero-interp
281    // postings" behavior in step 4 without losing E1001 on the
282    // elided-zero-to-unopened-account case (rustledger#877).
283    //
284    // The `ValidationSession` carries state (open accounts,
285    // commodities, pending pads, accumulated tolerances) into the late
286    // phase at step 5 so balance assertions and inventory updates see
287    // everything the early phase recorded.
288    #[cfg(feature = "validation")]
289    let mut validation_session = if options.validate {
290        Some(rustledger_validate::ValidationSession::new(
291            build_validation_options(&raw.options, &raw.source_map),
292        ))
293    } else {
294        None
295    };
296
297    // Compute `today` once for both phases — avoids a midnight-crossing
298    // race where Early and Late could disagree on what day it is, and
299    // gives `FutureDate` warnings a single coherent reference point.
300    #[cfg(feature = "validation")]
301    let today = jiff::Zoned::now().date();
302
303    #[cfg(feature = "validation")]
304    if let Some(session) = validation_session.as_mut() {
305        let phase_errors =
306            session.run_phase_spanned(&directives, rustledger_validate::Phase::Early, today);
307        ledger_errors_extend(&mut errors, phase_errors, &raw.source_map);
308    }
309
310    // 4. Booking/interpolation
311    //
312    // The booking method comes from two sources: the API-level
313    // `LoadOptions.booking_method` and the file-level `option
314    // "booking_method"`. The file-level option takes precedence only
315    // when the file explicitly set it AND the caller hasn't overridden
316    // the API-level default. This matches Python beancount, where
317    // `option "booking_method" "FIFO"` sets the default for all accounts
318    // without an explicit method on their `open` directive.
319    //
320    // We check `set_options` (not `booking_method.is_empty()`) because
321    // `Options::new()` defaults `booking_method` to "STRICT", so the
322    // string is never empty.
323    //
324    // Booking drops zero-value interpolated postings as part of
325    // `interpolate()` — see the comment in
326    // `rustledger-booking/src/interpolate.rs`. The early validation
327    // pass above already caught E1001 on any unopened-account
328    // references, so it's safe to prune now (the now-removed
329    // `INTERPOLATED_MARKER` workaround in #1114 is obsolete).
330    // Run booking and receive the directives partitioned into
331    // `(booked, failed)`. Failed transactions are in pre-booking shape
332    // (unresolved cost specs, unfilled elided slots, possibly
333    // unbalanced); they don't flow into regular plugins or Late
334    // validation — booking already reported the root cause and the
335    // downstream checks would cascade misleading errors. They get
336    // re-merged for the final `Ledger.directives` so the user still
337    // sees their original input.
338    #[cfg(feature = "booking")]
339    let (mut booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) = {
340        let file_set_booking = raw.options.set_options.contains("booking_method");
341        let effective_method = if file_set_booking {
342            raw.options
343                .booking_method
344                .parse()
345                .unwrap_or(options.booking_method)
346        } else {
347            options.booking_method
348        };
349        run_booking(directives, effective_method, &mut errors)
350    };
351    #[cfg(not(feature = "booking"))]
352    let (mut booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) =
353        (directives, Vec::new());
354
355    // 5. Post-booking plugins — file-declared plugins + CLI extras.
356    // Runs AFTER booking so cost-spec-reading plugins
357    // (`implicit_prices`, `capital_gains_classifier`,
358    // `check_average_cost`, `sell_gains`, `unrealized`, `valuation`)
359    // see filled-in `cost.number_per` values. This matches Python
360    // beancount's plugins-after-booking ordering and closes
361    // rustledger#1117. Failed transactions were partitioned out
362    // above; plugins only see successfully-booked input.
363    #[cfg(feature = "plugins")]
364    if options.run_plugins || !options.extra_plugins.is_empty() {
365        run_plugins(
366            &mut booked,
367            &raw.plugins,
368            &raw.options,
369            options,
370            &raw.source_map,
371            &mut errors,
372            PluginPass::PostBooking,
373        )?;
374    }
375
376    // 6. Validation (late phase) — runs on booked + plugin-processed
377    // directives. Reuses the `ValidationSession` from step 2 so
378    // account/commodity/pad bookkeeping carries forward.
379    #[cfg(feature = "validation")]
380    if let Some(mut session) = validation_session {
381        let phase_errors =
382            session.run_phase_spanned(&booked, rustledger_validate::Phase::Late, today);
383        ledger_errors_extend(&mut errors, phase_errors, &raw.source_map);
384        let finalize_errors = session.finalize();
385        ledger_errors_extend(&mut errors, finalize_errors, &raw.source_map);
386    }
387
388    // 7. Re-merge failed transactions back into the directive list
389    // for output. The user wrote them and expects to see them in the
390    // resulting `Ledger.directives`; we just kept them isolated from
391    // post-booking processing. Re-sort to restore canonical display
392    // order (booked retained order during plugin transformation; the
393    // sort restores the failed entries' positions).
394    let mut directives = booked;
395    directives.extend(failed);
396    directives.sort_by_key(|d| (d.value.date(), d.value.priority(), d.file_id, d.span.start));
397
398    Ok(Ledger {
399        directives,
400        options: raw.options,
401        plugins: raw.plugins,
402        source_map: raw.source_map,
403        errors,
404        display_context: raw.display_context,
405    })
406}
407
408/// Run booking and interpolation on transactions, returning the
409/// directives partitioned into `(booked, failed)`.
410///
411/// The caller has already sorted `directives` into canonical display
412/// order `(date, priority, file_id, span.start)`. Booking needs the
413/// extra constraint that cost-reduction transactions process AFTER
414/// augmentations on the same `(date, priority)` so lots exist when
415/// matched. Rather than re-sorting the whole vec, we walk it via a
416/// transient `Vec<usize>` of indices sorted by booking order. Stable
417/// sort preserves display-order tiebreaks between transactions with
418/// the same `has_cost_reduction` flag.
419///
420/// Failed transactions are partitioned out into the second return
421/// value so they don't flow into regular plugins or Late validation
422/// (they're in pre-booking shape — postings have unresolved cost
423/// specs and unfilled elided slots, so downstream processing would
424/// cascade misleading errors). The caller is responsible for
425/// re-merging `failed` into the final `Ledger.directives` for output
426/// so the user still sees their original input.
427#[cfg(feature = "booking")]
428fn run_booking(
429    mut directives: Vec<Spanned<Directive>>,
430    booking_method: BookingMethod,
431    errors: &mut Vec<LedgerError>,
432) -> (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) {
433    use rustledger_booking::BookingEngine;
434
435    let mut engine = BookingEngine::with_method(booking_method);
436    engine.register_account_methods(directives.iter().map(|s| &s.value));
437
438    // Build an index ordered for booking: stable sort by
439    // `has_cost_reduction` only (display order — `(date, priority,
440    // file_id, span.start)` — is already encoded in the existing
441    // positional order, and stable_sort preserves that as the tiebreak).
442    let mut order: Vec<usize> = (0..directives.len()).collect();
443    order.sort_by_key(|&i| {
444        let d = &directives[i].value;
445        (d.date(), d.priority(), d.has_cost_reduction())
446    });
447
448    let mut failed_indices: Vec<usize> = Vec::new();
449    for &i in &order {
450        let spanned = &mut directives[i];
451        if let Directive::Transaction(txn) = &mut spanned.value {
452            match engine.book_and_interpolate(txn) {
453                Ok(result) => {
454                    engine.apply(&result.transaction);
455                    *txn = result.transaction;
456                }
457                Err(e) => {
458                    errors.push(LedgerError::error(
459                        "BOOK",
460                        format!("{} ({}, \"{}\")", e, txn.date, txn.narration),
461                    ));
462                    failed_indices.push(i);
463                }
464            }
465        }
466    }
467
468    // Partition into (booked, failed). Indices are valid in the current
469    // `directives` vec (no mutation has happened since they were
470    // collected); after this consuming iteration the vec is gone and
471    // partition is fait accompli — no window where a caller could
472    // accidentally mutate between collection and partition.
473    let failed_set: rustc_hash::FxHashSet<usize> = failed_indices.iter().copied().collect();
474    let mut booked = Vec::with_capacity(directives.len() - failed_indices.len());
475    let mut failed = Vec::with_capacity(failed_indices.len());
476    for (i, d) in directives.into_iter().enumerate() {
477        if failed_set.contains(&i) {
478            failed.push(d);
479        } else {
480            booked.push(d);
481        }
482    }
483    (booked, failed)
484}
485
486/// Which subset of plugins to run.
487///
488/// The loader pipeline calls `run_plugins` twice: once with
489/// [`PluginPass::PreBookingSynth`] before the Early validation phase
490/// (so synthesizers can inject Opens / Documents that early checks
491/// depend on), and once with [`PluginPass::PostBooking`] after booking
492/// (so cost-spec-reading plugins like `implicit_prices`,
493/// `capital_gains_classifier`, `check_average_cost`, `sell_gains`,
494/// `unrealized`, and `valuation` see filled-in `cost.number_per`
495/// values).
496///
497/// Standalone callers (LSP, FFI, tests) that operate on already-booked
498/// input should pass [`PluginPass::All`] for the historical single-pass
499/// behavior.
500#[cfg(feature = "plugins")]
501#[derive(Debug, Clone, Copy, PartialEq, Eq)]
502pub enum PluginPass {
503    /// Only plugins that synthesize directives the Early validator
504    /// depends on: `auto_accounts` (synthesizes Open directives) and
505    /// the built-in document discovery walker (synthesizes Document
506    /// directives the early phase checks for missing files).
507    PreBookingSynth,
508    /// All file-declared plugins and CLI `extra_plugins`, EXCLUDING
509    /// `auto_accounts` and `document_discovery` (those ran pre-booking).
510    /// Includes the 28 plugins that don't depend on synth state but
511    /// may depend on booked cost specs.
512    PostBooking,
513    /// Every plugin — historical single-pass behavior. Used by callers
514    /// (LSP, FFI, standalone tests) that don't run booking themselves
515    /// or that work on already-booked input.
516    All,
517}
518
519/// Run plugins on directives.
520///
521/// Executes native plugins (and document discovery) on the given directives,
522/// modifying them in-place. Plugin errors are appended to `errors`.
523///
524/// `pass` selects which subset of plugins to run — see [`PluginPass`].
525/// The loader pipeline calls this twice (synth pass before Early,
526/// regular pass after booking). LSP / FFI / standalone callers pass
527/// `PluginPass::All` for the historical behavior.
528#[cfg(feature = "plugins")]
529pub fn run_plugins(
530    directives: &mut Vec<Spanned<Directive>>,
531    file_plugins: &[Plugin],
532    file_options: &Options,
533    options: &LoadOptions,
534    source_map: &SourceMap,
535    errors: &mut Vec<LedgerError>,
536    pass: PluginPass,
537) -> Result<(), ProcessError> {
538    use rustledger_plugin::{
539        DocumentDiscoveryPlugin, NativePlugin, NativePluginRegistry, PluginInput, PluginOptions,
540    };
541
542    // Resolve document directories relative to the main file's directory
543    // Document discovery only runs when run_plugins is true (respects raw mode)
544    let base_dir = source_map
545        .files()
546        .first()
547        .and_then(|f| f.path.parent())
548        .unwrap_or_else(|| std::path::Path::new("."));
549
550    // `document_discovery` is a synthesizer — runs in PreBookingSynth
551    // and All, skipped in PostBooking (it already injected directives
552    // during the synth pass).
553    let run_doc_discovery = matches!(pass, PluginPass::PreBookingSynth | PluginPass::All)
554        && options.run_plugins
555        && !file_options.documents.is_empty();
556    let has_document_dirs = run_doc_discovery;
557    let resolved_documents: Vec<String> = if has_document_dirs {
558        file_options
559            .documents
560            .iter()
561            .map(|d| {
562                let path = std::path::Path::new(d);
563                if path.is_absolute() {
564                    d.clone()
565                } else {
566                    base_dir.join(path).to_string_lossy().to_string()
567                }
568            })
569            .collect()
570    } else {
571        Vec::new()
572    };
573
574    // Build the native plugin registry up front so we can ask each
575    // plugin whether it's a synthesizer (via `NativePlugin::is_synth`)
576    // during the classification step below. Constructing the registry
577    // is O(n_plugins) and just instantiates the plugin structs; it's
578    // cheap to do before we know whether any plugins will actually
579    // run.
580    let registry = NativePluginRegistry::new();
581
582    // Collect raw plugin names first (we'll resolve them with the registry later)
583    // Tuple: (name, config, force_python)
584    let mut raw_plugins: Vec<(String, Option<String>, bool)> = Vec::new();
585
586    // Classify a plugin by name. Self-classification lives on the
587    // `NativePlugin::is_synth` trait method (see
588    // `rustledger-plugin/src/native/mod.rs`). Plugins not in the
589    // native registry (WASM, Python) default to non-synth — they
590    // run post-booking like file-authored beancount plugins.
591    let is_synth = |name: &str| -> bool { registry.find(name).is_some_and(NativePlugin::is_synth) };
592
593    // The API-level `options.auto_accounts` flag is a synth source.
594    if options.auto_accounts && matches!(pass, PluginPass::PreBookingSynth | PluginPass::All) {
595        raw_plugins.push(("auto_accounts".to_string(), None, false));
596    }
597
598    // File-declared plugins: synth plugins go in PreBookingSynth,
599    // everything else (including the 6 cost-spec-reading ones) goes in
600    // PostBooking. `PluginPass::All` runs everything for standalone
601    // callers (LSP / FFI / tests on already-booked input).
602    if options.run_plugins {
603        for plugin in file_plugins {
604            let synth = is_synth(&plugin.name);
605            let in_pass = match pass {
606                PluginPass::PreBookingSynth => synth,
607                PluginPass::PostBooking => !synth,
608                PluginPass::All => true,
609            };
610            if in_pass {
611                raw_plugins.push((
612                    plugin.name.clone(),
613                    plugin.config.clone(),
614                    plugin.force_python,
615                ));
616            }
617        }
618    }
619
620    // CLI extras: same synth/regular split as file plugins.
621    for (i, plugin_name) in options.extra_plugins.iter().enumerate() {
622        let synth = is_synth(plugin_name);
623        let in_pass = match pass {
624            PluginPass::PreBookingSynth => synth,
625            PluginPass::PostBooking => !synth,
626            PluginPass::All => true,
627        };
628        if in_pass {
629            let config = options.extra_plugin_configs.get(i).cloned().flatten();
630            raw_plugins.push((plugin_name.clone(), config, false));
631        }
632    }
633
634    // Check if we have any work to do - early return before creating registry
635    if raw_plugins.is_empty() && !has_document_dirs {
636        return Ok(());
637    }
638
639    let plugin_options = PluginOptions {
640        operating_currencies: file_options.operating_currency.clone(),
641        title: file_options.title.clone(),
642    };
643
644    // Run document discovery plugin if documents directories are configured.
645    // Each plugin call builds wrappers freshly from the current `directives`,
646    // sends them to the plugin, receives `PluginOp`s, and applies the ops
647    // to update `directives` — spans on `Keep` / `Modify` ops are inherited
648    // from the original `directives` entry by index, so plugin-transformed
649    // directives retain byte-precise source locations.
650    if has_document_dirs {
651        let doc_plugin = DocumentDiscoveryPlugin::new(resolved_documents, base_dir.to_path_buf());
652        let wrappers = build_wrappers(directives, source_map);
653        let input = PluginInput {
654            directives: wrappers,
655            options: plugin_options.clone(),
656            config: None,
657        };
658        let output = doc_plugin.process(input);
659        record_plugin_errors(errors, output.errors, source_map);
660        apply_plugin_ops(directives, output.ops, errors, source_map)?;
661    }
662
663    // Run each plugin (registry was constructed earlier for the
664    // synth classification step).
665    if !raw_plugins.is_empty() {
666        for (raw_name, plugin_config, force_python) in &raw_plugins {
667            // Resolve the plugin name - try direct match first, then prefixed variants.
668            // Skip native resolution when force_python is set (plugin "python:..." prefix).
669            let resolved_name = if *force_python {
670                None
671            } else if registry.find(raw_name).is_some() {
672                Some(raw_name.as_str())
673            } else if let Some(short_name) = raw_name.strip_prefix("beancount.plugins.") {
674                registry.find(short_name).is_some().then_some(short_name)
675            } else if let Some(short_name) = raw_name.strip_prefix("beancount_reds_plugins.") {
676                registry.find(short_name).is_some().then_some(short_name)
677            } else if let Some(short_name) = raw_name.strip_prefix("beancount_lazy_plugins.") {
678                registry.find(short_name).is_some().then_some(short_name)
679            } else {
680                None
681            };
682
683            if let Some(name) = resolved_name
684                && let Some(plugin) = registry.find(name)
685            {
686                let wrappers = build_wrappers(directives, source_map);
687                let input = PluginInput {
688                    directives: wrappers,
689                    options: plugin_options.clone(),
690                    config: plugin_config.clone(),
691                };
692                let output = plugin.process(input);
693                record_plugin_errors(errors, output.errors, source_map);
694                apply_plugin_ops(directives, output.ops, errors, source_map)?;
695            } else {
696                // Not a native plugin — categorize and handle
697                let plugin_path = std::path::Path::new(raw_name);
698                let ext = plugin_path
699                    .extension()
700                    .and_then(|e| e.to_str())
701                    .unwrap_or("")
702                    .to_lowercase();
703
704                // The closure is only invoked from inside the wasm-plugins /
705                // python-plugins cfg blocks below. The whole function is
706                // already `#[cfg(feature = "plugins")]`, so this only matters
707                // when `plugins` is enabled but neither child feature is
708                // (e.g. `--features native-plugins`). Allow `unused_variables`
709                // for exactly that configuration. Underscore-prefixing the
710                // binding would have been the wrong fix because we DO call
711                // the closure in builds with one of the features enabled,
712                // which would trip `no_effect_underscore_binding` instead.
713                #[cfg_attr(
714                    not(any(feature = "wasm-plugins", feature = "python-plugins")),
715                    allow(unused_variables)
716                )]
717                let resolve_path = |name: &str| -> Result<std::path::PathBuf, String> {
718                    let p = std::path::Path::new(name);
719                    let resolved = if p.is_absolute() {
720                        p.to_path_buf()
721                    } else {
722                        base_dir.join(name)
723                    };
724
725                    // Path security: prevent plugins from outside the ledger directory
726                    if options.path_security
727                        && let (Ok(canon_base), Ok(canon_plugin)) =
728                            (base_dir.canonicalize(), resolved.canonicalize())
729                        && !canon_plugin.starts_with(&canon_base)
730                    {
731                        return Err(format!(
732                            "plugin path '{name}' is outside the ledger directory"
733                        ));
734                    }
735
736                    Ok(resolved)
737                };
738
739                if ext == "wasm" {
740                    // WASM plugin
741                    #[cfg(feature = "wasm-plugins")]
742                    {
743                        let wasm_path = match resolve_path(raw_name) {
744                            Ok(p) => p,
745                            Err(e) => {
746                                errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
747                                continue;
748                            }
749                        };
750                        let wrappers = build_wrappers(directives, source_map);
751                        match run_wasm_plugin(&wasm_path, &wrappers, &plugin_options, plugin_config)
752                        {
753                            Ok((ops, plugin_errors)) => {
754                                for err in plugin_errors {
755                                    errors.push(err);
756                                }
757                                apply_plugin_ops(directives, ops, errors, source_map)?;
758                            }
759                            Err(e) => {
760                                errors.push(
761                                    LedgerError::error(
762                                        "PLUGIN",
763                                        format!("WASM plugin {} failed: {e}", wasm_path.display()),
764                                    )
765                                    .with_phase("plugin"),
766                                );
767                            }
768                        }
769                    }
770                    #[cfg(not(feature = "wasm-plugins"))]
771                    {
772                        errors.push(
773                            LedgerError::error(
774                                "PLUGIN",
775                                format!(
776                                    "WASM plugin '{raw_name}' requires the wasm-plugins feature",
777                                ),
778                            )
779                            .with_phase("plugin"),
780                        );
781                    }
782                } else if *force_python
783                    || ext == "py"
784                    || raw_name.contains(std::path::MAIN_SEPARATOR)
785                    || raw_name.contains('.')
786                {
787                    // Python module or file-based plugin (or force_python via "python:" prefix)
788                    #[cfg(feature = "python-plugins")]
789                    {
790                        let resolved = match resolve_path(raw_name) {
791                            Ok(p) => p,
792                            Err(e) => {
793                                errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
794                                continue;
795                            }
796                        };
797                        let wrappers = build_wrappers(directives, source_map);
798                        match run_python_plugin(
799                            raw_name,
800                            &resolved,
801                            base_dir,
802                            &wrappers,
803                            &plugin_options,
804                            plugin_config,
805                        ) {
806                            Ok((ops, plugin_errors)) => {
807                                for err in plugin_errors {
808                                    errors.push(err);
809                                }
810                                apply_plugin_ops(directives, ops, errors, source_map)?;
811                            }
812                            Err(e) => {
813                                errors.push(LedgerError::error("E8002", e).with_phase("plugin"));
814                            }
815                        }
816                    }
817                    #[cfg(not(feature = "python-plugins"))]
818                    {
819                        errors.push(
820                            LedgerError::error(
821                                "E8005",
822                                format!(
823                                    "Python plugin \"{raw_name}\" requires the python-plugins feature",
824                                ),
825                            )
826                            .with_phase("plugin"),
827                        );
828                    }
829                } else {
830                    // Completely unknown plugin name — try to suggest a module path
831                    #[cfg(feature = "python-plugins")]
832                    {
833                        use rustledger_plugin::python::{is_python_available, suggest_module_path};
834                        let suggestion = if is_python_available() {
835                            suggest_module_path(raw_name)
836                        } else {
837                            None
838                        };
839                        if let Some(module_path) = suggestion {
840                            errors.push(
841                                LedgerError::error(
842                                    "E8004",
843                                    format!(
844                                        "Cannot resolve Python module '{raw_name}'. Replace with: plugin \"{module_path}\""
845                                    ),
846                                )
847                                .with_phase("plugin"),
848                            );
849                        } else {
850                            errors.push(
851                                LedgerError::error(
852                                    "E8001",
853                                    format!("Plugin not found: \"{raw_name}\""),
854                                )
855                                .with_phase("plugin"),
856                            );
857                        }
858                    }
859                    #[cfg(not(feature = "python-plugins"))]
860                    {
861                        errors.push(
862                            LedgerError::error(
863                                "E8001",
864                                format!("Plugin not found: \"{raw_name}\""),
865                            )
866                            .with_phase("plugin"),
867                        );
868                    }
869                }
870            }
871        }
872    }
873
874    // No final wrapper→directive conversion needed: `apply_plugin_ops`
875    // updates `directives` in place after each plugin call, preserving
876    // original spans on Keep/Modify ops. Plugin-synthesized directives
877    // (Insert ops) get `SYNTHESIZED_FILE_ID` and a zero span.
878    Ok(())
879}
880
881/// Build a fresh `Vec<DirectiveWrapper>` from the current directives,
882/// carrying filename + line number for plugin-side error reporting.
883/// Spans don't need to round-trip through the wrappers — the loader
884/// preserves them via `apply_plugin_ops` matching on op index.
885#[cfg(feature = "plugins")]
886fn build_wrappers(
887    directives: &[Spanned<Directive>],
888    source_map: &SourceMap,
889) -> Vec<rustledger_plugin::DirectiveWrapper> {
890    use rustledger_plugin::directive_to_wrapper_with_location;
891
892    directives
893        .iter()
894        .map(|spanned| {
895            let (filename, lineno) = if let Some(file) = source_map.get(spanned.file_id as usize) {
896                let (line, _col) = file.line_col(spanned.span.start);
897                (Some(file.path.display().to_string()), Some(line as u32))
898            } else {
899                (None, None)
900            };
901            directive_to_wrapper_with_location(&spanned.value, filename, lineno)
902        })
903        .collect()
904}
905
906/// Push plugin errors into the ledger's error stream, tagged with
907/// `phase: "plugin"` and — when the plugin set `source_file` /
908/// `line_number` on the error — an attached `ErrorLocation` so
909/// downstream renderers (CLI, LSP, JSON output) can pinpoint where
910/// the plugin objected.
911///
912/// Source-location resolution: if the wrapper's `source_file` resolves
913/// to a real file in the source map, use that for `ErrorLocation.file`
914/// and treat `line_number` as the line index. Plugin-synthesized
915/// filenames (e.g. `"<auto_accounts>"`) that don't match any real
916/// file are passed through as `PathBuf::from(name)` so the rendered
917/// location still attributes the error to the originating plugin —
918/// better than silently dropping the field.
919#[cfg(feature = "plugins")]
920fn record_plugin_errors(
921    errors: &mut Vec<LedgerError>,
922    plugin_errors: Vec<rustledger_plugin::PluginError>,
923    source_map: &SourceMap,
924) {
925    for err in plugin_errors {
926        let mut ledger_err = match err.severity {
927            rustledger_plugin::PluginErrorSeverity::Error => {
928                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
929            }
930            rustledger_plugin::PluginErrorSeverity::Warning => {
931                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
932            }
933        };
934        // Propagate plugin-set source location into `ErrorLocation`.
935        // Column defaults to 1 — plugin errors don't carry column info
936        // through the wrapper protocol.
937        if let (Some(file), Some(line)) = (&err.source_file, err.line_number) {
938            let resolved_path = source_map
939                .get_by_path(std::path::Path::new(file))
940                .map_or_else(|| std::path::PathBuf::from(file), |f| f.path.clone());
941            ledger_err = ledger_err.with_location(ErrorLocation {
942                file: resolved_path,
943                line: line as usize,
944                column: 1,
945            });
946        }
947        errors.push(ledger_err);
948    }
949}
950
951/// Apply a plugin's `Vec<PluginOp>` to `directives` in place.
952///
953/// Validates that the op set forms a complete partition of the input
954/// indices (each input index appears in exactly one `Keep` / `Modify` /
955/// `Delete` op). Protocol violations produce a `PLUGIN` error in
956/// `errors` and leave `directives` untouched.
957///
958/// For `Keep(i)` / `Modify(i, w)`, the resulting `Spanned<Directive>`
959/// inherits `directives[i]`'s span and `file_id` — this is the core of
960/// the ops protocol's correctness guarantee (plugin-transformed
961/// directives keep their original source identity for error reporting).
962/// `Insert(w)` directives get `(Span::new(0, 0), SYNTHESIZED_FILE_ID)`.
963#[cfg(feature = "plugins")]
964fn apply_plugin_ops(
965    directives: &mut Vec<Spanned<Directive>>,
966    ops: Vec<rustledger_plugin::PluginOp>,
967    errors: &mut Vec<LedgerError>,
968    source_map: &SourceMap,
969) -> Result<(), ProcessError> {
970    use rustledger_plugin::PluginOp;
971    use rustledger_plugin::wrapper_to_directive;
972
973    let n = directives.len();
974
975    // Validate: every input index in {Keep, Modify, Delete} exactly once.
976    let mut seen = vec![false; n];
977    for op in &ops {
978        let idx = match op {
979            PluginOp::Keep(i) | PluginOp::Modify(i, _) | PluginOp::Delete(i) => Some(*i),
980            PluginOp::Insert(_) => None,
981        };
982        if let Some(i) = idx {
983            if i >= n {
984                errors.push(
985                    LedgerError::error(
986                        "PLUGIN",
987                        format!(
988                            "plugin op references out-of-bounds input index {i} (input has {n} directives)"
989                        ),
990                    )
991                    .with_phase("plugin"),
992                );
993                return Ok(());
994            }
995            if seen[i] {
996                errors.push(
997                    LedgerError::error(
998                        "PLUGIN",
999                        format!("plugin op references input index {i} more than once"),
1000                    )
1001                    .with_phase("plugin"),
1002                );
1003                return Ok(());
1004            }
1005            seen[i] = true;
1006        }
1007    }
1008    for (i, was_seen) in seen.iter().enumerate() {
1009        if !was_seen {
1010            errors.push(
1011                LedgerError::error(
1012                    "PLUGIN",
1013                    format!(
1014                        "plugin omitted input directive {i} (must appear in exactly one of Keep/Modify/Delete)"
1015                    ),
1016                )
1017                .with_phase("plugin"),
1018            );
1019            return Ok(());
1020        }
1021    }
1022
1023    // Materialize new directives, preserving spans for Keep/Modify.
1024    let mut new_directives = Vec::with_capacity(ops.len());
1025    for op in ops {
1026        match op {
1027            PluginOp::Keep(i) => {
1028                new_directives.push(directives[i].clone());
1029            }
1030            PluginOp::Modify(i, wrapper) => {
1031                let directive = wrapper_to_directive(&wrapper)
1032                    .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1033                new_directives.push(Spanned {
1034                    value: directive,
1035                    span: directives[i].span,
1036                    file_id: directives[i].file_id,
1037                });
1038            }
1039            PluginOp::Insert(wrapper) => {
1040                // Resolve the wrapper's filename + line number, if set,
1041                // into a real (file_id, span) when the filename
1042                // corresponds to a loaded source file. Falls back to
1043                // SYNTHESIZED_FILE_ID + zero span otherwise — including
1044                // for plugin-only attribution like `"<auto_accounts>"`
1045                // (which never matches a loaded file).
1046                let (span, file_id) = match (&wrapper.filename, wrapper.lineno) {
1047                    (Some(filename), Some(lineno)) => {
1048                        if let Some(file) = source_map.get_by_path(std::path::Path::new(filename)) {
1049                            let span_start = file.line_start(lineno as usize).unwrap_or(0);
1050                            (
1051                                rustledger_parser::Span::new(span_start, span_start),
1052                                file.id as u16,
1053                            )
1054                        } else {
1055                            (
1056                                rustledger_parser::Span::new(0, 0),
1057                                rustledger_parser::SYNTHESIZED_FILE_ID,
1058                            )
1059                        }
1060                    }
1061                    _ => (
1062                        rustledger_parser::Span::new(0, 0),
1063                        rustledger_parser::SYNTHESIZED_FILE_ID,
1064                    ),
1065                };
1066                let directive = wrapper_to_directive(&wrapper)
1067                    .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1068                new_directives.push(Spanned::new(directive, span).with_file_id(file_id as usize));
1069            }
1070            PluginOp::Delete(_) => {}
1071        }
1072    }
1073
1074    *directives = new_directives;
1075    Ok(())
1076}
1077
1078/// Build a [`ValidationOptions`] from loader-level file options.
1079///
1080/// Factored out of the old `run_validation` so both the early and
1081/// late phases in `process()` can share the same `ValidationSession`
1082/// configuration. Document-dir resolution is relative to the main
1083/// file's parent directory.
1084#[cfg(feature = "validation")]
1085fn build_validation_options(
1086    file_options: &Options,
1087    source_map: &SourceMap,
1088) -> rustledger_validate::ValidationOptions {
1089    use rustledger_validate::ValidationOptions;
1090
1091    // Resolve document directories relative to the main file's
1092    // directory. Absolute paths pass through; relative paths are
1093    // joined onto the source map's first file's parent. Matches the
1094    // pre-refactor `run_validation` behavior exactly.
1095    let base_dir = source_map
1096        .files()
1097        .first()
1098        .and_then(|f| f.path.parent())
1099        .unwrap_or_else(|| std::path::Path::new("."));
1100
1101    let resolved_document_dirs: Vec<std::path::PathBuf> = file_options
1102        .documents
1103        .iter()
1104        .map(|d| {
1105            let path = std::path::Path::new(d);
1106            if path.is_absolute() {
1107                path.to_path_buf()
1108            } else {
1109                base_dir.join(path)
1110            }
1111        })
1112        .collect();
1113
1114    let account_types: Vec<String> = file_options
1115        .account_types()
1116        .iter()
1117        .map(|s| (*s).to_string())
1118        .collect();
1119
1120    ValidationOptions::default()
1121        .with_account_types(account_types)
1122        .with_document_dirs(resolved_document_dirs)
1123        .with_infer_tolerance_from_cost(file_options.infer_tolerance_from_cost)
1124        .with_tolerance_multiplier(file_options.inferred_tolerance_multiplier)
1125        .with_inferred_tolerance_default(file_options.inferred_tolerance_default.clone())
1126}
1127
1128/// Convert a batch of [`rustledger_validate::ValidationError`]s into
1129/// loader-level [`LedgerError`]s (with resolved `file:line:column`
1130/// locations) and append to the existing list.
1131///
1132/// Factored out so both validation phases in `process()` share the
1133/// same conversion path.
1134#[cfg(feature = "validation")]
1135fn ledger_errors_extend(
1136    errors: &mut Vec<LedgerError>,
1137    validation_errors: Vec<rustledger_validate::ValidationError>,
1138    source_map: &SourceMap,
1139) {
1140    for err in validation_errors {
1141        let phase = if err.code.is_parse_phase() {
1142            "parse"
1143        } else {
1144            "validate"
1145        };
1146        let severity_level = if err.code.is_warning() {
1147            ErrorSeverity::Warning
1148        } else {
1149            ErrorSeverity::Error
1150        };
1151        // Fold the advisory note (if any) into the message so it propagates
1152        // through every downstream format (LedgerError, JSON diagnostic, CLI
1153        // report, LSP diagnostic) without each one needing a dedicated field.
1154        let message = match &err.note {
1155            Some(note) => format!("{err}\n  note: {note}"),
1156            None => err.to_string(),
1157        };
1158        // Resolve span + file_id into a file/line/column triple so CLI and
1159        // LSP consumers can render `file:line:col` headers without having
1160        // to do the lookup themselves (issue #901).
1161        let location = err.span.and_then(|span| {
1162            let fid = err.file_id? as usize;
1163            let file = source_map.get(fid)?;
1164            let (line, column) = file.line_col(span.start);
1165            Some(ErrorLocation {
1166                file: file.path.clone(),
1167                line,
1168                column,
1169            })
1170        });
1171        errors.push(LedgerError {
1172            severity: severity_level,
1173            code: err.code.code().to_string(),
1174            message,
1175            location,
1176            source_span: err.span.map(|s| (s.start, s.end)),
1177            file_id: err.file_id,
1178            phase: phase.to_string(),
1179        });
1180    }
1181}
1182
1183/// Load and fully process a beancount file.
1184///
1185/// This is the main entry point, equivalent to Python's `loader.load_file()`.
1186/// It performs: parse → sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
1187///
1188/// # Example
1189///
1190/// ```ignore
1191/// use rustledger_loader::{load, LoadOptions};
1192/// use std::path::Path;
1193///
1194/// let ledger = load(Path::new("ledger.beancount"), LoadOptions::default())?;
1195/// for error in &ledger.errors {
1196///     eprintln!("{}: {}", error.code, error.message);
1197/// }
1198/// ```
1199pub fn load(path: &Path, options: &LoadOptions) -> Result<Ledger, ProcessError> {
1200    let mut loader = crate::Loader::new();
1201
1202    if options.path_security {
1203        loader = loader.with_path_security(true);
1204    }
1205
1206    let raw = loader.load(path)?;
1207    process(raw, options)
1208}
1209
1210/// Load a beancount file without processing.
1211///
1212/// This returns raw directives without sorting, booking, or plugins.
1213/// Use this when you need the original parse output.
1214pub fn load_raw(path: &Path) -> Result<LoadResult, LoadError> {
1215    crate::Loader::new().load(path)
1216}
1217
1218/// Run a WASM plugin and return its output ops and errors.
1219#[cfg(feature = "wasm-plugins")]
1220fn run_wasm_plugin(
1221    wasm_path: &std::path::Path,
1222    directives: &[rustledger_plugin::DirectiveWrapper],
1223    options: &rustledger_plugin::PluginOptions,
1224    config: &Option<String>,
1225) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1226    use rustledger_plugin::{PluginInput, PluginManager};
1227
1228    let mut mgr = PluginManager::new();
1229    let plugin_idx = mgr
1230        .load(wasm_path)
1231        .map_err(|e| format!("failed to load: {e}"))?;
1232
1233    let input = PluginInput {
1234        directives: directives.to_vec(),
1235        options: options.clone(),
1236        config: config.clone(),
1237    };
1238
1239    let output = mgr
1240        .execute(plugin_idx, &input)
1241        .map_err(|e| format!("execution failed: {e}"))?;
1242
1243    let mut errors = Vec::new();
1244    for err in output.errors {
1245        let ledger_err = match err.severity {
1246            rustledger_plugin::PluginErrorSeverity::Error => {
1247                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1248            }
1249            rustledger_plugin::PluginErrorSeverity::Warning => {
1250                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1251            }
1252        };
1253        errors.push(ledger_err);
1254    }
1255
1256    Ok((output.ops, errors))
1257}
1258
1259/// Run a Python module plugin via the WASI-based Python runtime.
1260#[cfg(feature = "python-plugins")]
1261fn run_python_plugin(
1262    module_name: &str,
1263    resolved_path: &std::path::Path,
1264    base_dir: &std::path::Path,
1265    directives: &[rustledger_plugin::DirectiveWrapper],
1266    options: &rustledger_plugin::PluginOptions,
1267    config: &Option<String>,
1268) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1269    use rustledger_plugin::{PluginInput, python::PythonRuntime};
1270
1271    let runtime = PythonRuntime::new().map_err(|e| format!("Python runtime unavailable: {e}"))?;
1272
1273    let input = PluginInput {
1274        directives: directives.to_vec(),
1275        options: options.clone(),
1276        config: config.clone(),
1277    };
1278
1279    // Try file-based execution first, then module-based
1280    let is_file = resolved_path.exists()
1281        || std::path::Path::new(module_name)
1282            .extension()
1283            .is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
1284        || module_name.contains(std::path::MAIN_SEPARATOR);
1285
1286    let output = if is_file {
1287        runtime
1288            .execute_module(module_name, &input, Some(base_dir))
1289            .map_err(|e| format!("Python plugin execution failed: {e}"))?
1290    } else {
1291        runtime
1292            .execute_module(module_name, &input, Some(base_dir))
1293            .map_err(|e| format!("Python plugin '{module_name}' execution failed: {e}"))?
1294    };
1295
1296    let mut errors = Vec::new();
1297    for err in output.errors {
1298        let ledger_err = match err.severity {
1299            rustledger_plugin::PluginErrorSeverity::Error => {
1300                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1301            }
1302            rustledger_plugin::PluginErrorSeverity::Warning => {
1303                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1304            }
1305        };
1306        errors.push(ledger_err);
1307    }
1308
1309    Ok((output.ops, errors))
1310}