Skip to main content

rustledger_loader/
process.rs

1//! Processing pipeline: sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
2//!
3//! This module orchestrates the full processing pipeline for a beancount ledger,
4//! equivalent to Python's `loader.load_file()` function.
5
6// ratchet: fxhash-only — hot path; use FxHashMap/FxHashSet, not std SipHash collections (#1237).
7use crate::{LoadError, LoadResult, Options, Plugin, SourceMap};
8use rustledger_core::{BookingMethod, Directive, DisplayContext};
9use rustledger_parser::Spanned;
10use std::path::Path;
11use thiserror::Error;
12
13/// A CLI-supplied (or programmatic) extra plugin invocation.
14///
15/// Bundles the plugin name with its optional config string so the two
16/// can't drift apart — the previous parallel-Vec representation could
17/// silently misalign a config with the wrong plugin.
18#[derive(Debug, Clone)]
19pub struct ExtraPlugin {
20    /// Plugin name (short or fully-qualified module path).
21    pub name: String,
22    /// Plugin-specific config string, if any.
23    pub config: Option<String>,
24}
25
26/// Options for loading and processing a ledger.
27#[derive(Debug, Clone)]
28pub struct LoadOptions {
29    /// Booking method for lot matching (default: Strict).
30    pub booking_method: BookingMethod,
31    /// Run plugins declared in the file (default: true).
32    pub run_plugins: bool,
33    /// Run `auto_accounts` plugin (default: false).
34    pub auto_accounts: bool,
35    /// Additional plugins to run (CLI `--plugin` or programmatic API),
36    /// each with an optional config string.
37    pub extra_plugins: Vec<ExtraPlugin>,
38    /// Run validation after processing (default: true).
39    pub validate: bool,
40    /// Enable path security (prevent include traversal).
41    pub path_security: bool,
42}
43
44impl Default for LoadOptions {
45    fn default() -> Self {
46        Self {
47            booking_method: BookingMethod::Strict,
48            run_plugins: true,
49            auto_accounts: false,
50            extra_plugins: Vec::new(),
51            validate: true,
52            path_security: false,
53        }
54    }
55}
56
57impl LoadOptions {
58    /// Create options for raw loading (no booking, no plugins, no validation).
59    #[must_use]
60    pub const fn raw() -> Self {
61        Self {
62            booking_method: BookingMethod::Strict,
63            run_plugins: false,
64            auto_accounts: false,
65            extra_plugins: Vec::new(),
66            validate: false,
67            path_security: false,
68        }
69    }
70}
71
72/// Errors that can occur during ledger processing.
73#[derive(Debug, Error)]
74pub enum ProcessError {
75    /// Loading failed.
76    #[error("loading failed: {0}")]
77    Load(#[from] LoadError),
78
79    /// Booking/interpolation error.
80    #[cfg(feature = "booking")]
81    #[error("booking error: {message}")]
82    Booking {
83        /// Error message.
84        message: String,
85        /// Date of the transaction.
86        date: rustledger_core::NaiveDate,
87        /// Narration of the transaction.
88        narration: String,
89    },
90
91    /// Plugin execution error.
92    #[cfg(feature = "plugins")]
93    #[error("plugin error: {0}")]
94    Plugin(String),
95
96    /// Validation error.
97    #[cfg(feature = "validation")]
98    #[error("validation error: {0}")]
99    Validation(String),
100
101    /// Plugin output conversion error.
102    #[cfg(feature = "plugins")]
103    #[error("failed to convert plugin output: {0}")]
104    PluginConversion(String),
105}
106
107/// A fully processed ledger.
108///
109/// This is the result of loading and processing a beancount file,
110/// equivalent to the tuple returned by Python's `loader.load_file()`.
111#[derive(Debug)]
112pub struct Ledger {
113    /// Processed directives in source-faithful form: sorted by date,
114    /// booked (cost specs resolved, interpolations applied), and
115    /// plugin-rewritten. **`Pad` directives remain as `Pad`**; they
116    /// are not pre-expanded into synthesized transactions.
117    ///
118    /// Consumers split into two groups:
119    ///
120    /// - **Source-faithful consumers** (stats, journal, formatter,
121    ///   LSP, BQL `FROM #entries WHERE type = 'pad'` audits,
122    ///   source-mapped diagnostics) iterate this field directly.
123    ///   Pads count as Pads.
124    /// - **Balance-computing consumers** (holdings, balances,
125    ///   balsheet, networth, income, FFI `query.execute`/`batch`,
126    ///   WASM `expandPads`/`query`) call [`Ledger::balance_view`]
127    ///   to get the directive stream MERGED with synthesized P-flag
128    ///   transactions for each pad-balance pair. This is the only
129    ///   way to get pad effects into per-account inventory math.
130    ///
131    /// The two views are derived from the same source; there is no
132    /// drift possible because [`Ledger::balance_view`] is a pure
133    /// function of `self.directives`.
134    pub directives: Vec<Spanned<Directive>>,
135    /// Options parsed from the file.
136    pub options: Options,
137    /// Plugins declared in the file.
138    pub plugins: Vec<Plugin>,
139    /// Source map for error reporting.
140    pub source_map: SourceMap,
141    /// Errors encountered during processing.
142    pub errors: Vec<LedgerError>,
143    /// Display context for formatting numbers.
144    pub display_context: DisplayContext,
145}
146
147impl Ledger {
148    /// Return the directive stream merged with synthesized
149    /// pad-equivalent transactions, suitable for inventory /
150    /// balance math.
151    ///
152    /// For each `Pad` directive followed (in date order) by a
153    /// `Balance` assertion on the same account, a `Transaction`
154    /// with `flag = 'P'` is added to the view carrying the
155    /// postings needed to make the balance match. A multi-currency
156    /// pad produces one synth transaction per currency.
157    ///
158    /// **Original `Pad` directives are preserved in the view.**
159    /// Synth transactions are added alongside, not in place of.
160    /// This matters for two reasons:
161    ///
162    /// 1. BQL queries against the `#entries` table
163    ///    (`SELECT * FROM #entries WHERE type = 'pad'`) can still
164    ///    enumerate the pad directives the user authored. A
165    ///    REPLACE-style expansion would silently zero those out.
166    ///    (BQL's default SELECT path operates on postings; pads
167    ///    have no postings, so a default SELECT never matches them
168    ///    regardless of this view shape.)
169    /// 2. Multi-pad cases (issue #1300) produce exactly one synth
170    ///    per pad-balance pair:
171    ///    `rustledger_booking::process_pads` (which
172    ///    `merge_with_padding` delegates to) only retains the most
173    ///    recent same-account pad in its pending-pads map, so
174    ///    earlier same-account pads are silently shadowed and
175    ///    their `source_account` does NOT contribute to the synth.
176    ///    The validator emits `E2003` for shadowed pads
177    ///    independently; this view reflects only the effective pad.
178    ///
179    /// Inventory-walking consumers iterate `Directive::Transaction`
180    /// and ignore `Pad` directives, so the preserved Pads are
181    /// invisible to them.
182    ///
183    /// **When to use this vs. [`Ledger.directives`](Self::directives):**
184    /// any consumer that maintains running per-account inventory
185    /// state and asks "what is the balance" needs this view. Any
186    /// consumer that asks "what did the user write" wants the raw
187    /// `directives` field.
188    ///
189    /// # Performance
190    ///
191    /// Each call clones every source directive once (`O(n)`).
192    /// Inlines the merge logic from
193    /// [`rustledger_booking::merge_with_padding`] so the already-
194    /// owned `booked` vector can be moved into the merged output
195    /// instead of cloned a second time. For short-lived CLI
196    /// invocations the single clone is negligible. Long-lived
197    /// processes (FFI servers, LSPs) that query the same ledger
198    /// repeatedly should hoist the result above their loop.
199    /// `TODO(perf):` memoize internally once a benchmark shows it
200    /// matters.
201    #[cfg(feature = "booking")]
202    #[must_use]
203    pub fn balance_view(&self) -> Vec<Directive> {
204        let mut booked: Vec<Directive> = self.directives.iter().map(|s| s.value.clone()).collect();
205
206        // Inlined from `rustledger_booking::merge_with_padding` so
207        // `booked` is moved (not re-cloned via `to_vec()`).
208        // Algorithmically identical: prepend synth transactions, then
209        // stable-sort by date. Same-date pad+balance pairs land as
210        // `[synth, pad, balance]` because synths sit at the front of
211        // their date-group pre-sort.
212        debug_assert!(
213            !booked.iter().any(|d| matches!(d, Directive::Transaction(t) if rustledger_booking::is_synthesized_pad(t))),
214            "balance_view called on a Ledger whose directives already contain synth pad transactions",
215        );
216        let pad_result = rustledger_booking::process_pads(&booked);
217        let mut merged: Vec<Directive> =
218            Vec::with_capacity(booked.len() + pad_result.padding_transactions.len());
219        for txn in pad_result.padding_transactions {
220            merged.push(Directive::Transaction(txn));
221        }
222        merged.append(&mut booked);
223        merged.sort_by_key(rustledger_core::Directive::date);
224        merged
225    }
226}
227
228/// Unified error type for ledger processing.
229///
230/// This encompasses all error types that can occur during loading,
231/// booking, plugin execution, and validation.
232#[derive(Debug)]
233#[non_exhaustive]
234pub struct LedgerError {
235    /// Error severity.
236    pub severity: ErrorSeverity,
237    /// Error code (e.g., "E0001", "W8002").
238    pub code: String,
239    /// Human-readable error message.
240    pub message: String,
241    /// Source location, if available.
242    pub location: Option<ErrorLocation>,
243    /// Byte span (inclusive start, exclusive end) in the source file,
244    /// used by rich renderers (e.g. miette) to draw a snippet around
245    /// the offending directive. Consumers that only need `file:line:col`
246    /// should use `location`; those that want to show the surrounding
247    /// source text want this.
248    pub source_span: Option<(usize, usize)>,
249    /// Source file ID — index into the ledger's [`SourceMap`]. Used
250    /// alongside `source_span` for snippet rendering.
251    pub file_id: Option<u16>,
252    /// Processing phase that produced this error: "parse", "validate", or "plugin".
253    pub phase: String,
254}
255
256/// Error severity level.
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
258pub enum ErrorSeverity {
259    /// Error - indicates a problem that should be fixed.
260    Error,
261    /// Warning - indicates a potential issue.
262    Warning,
263}
264
265/// Source location for an error.
266#[derive(Debug, Clone)]
267pub struct ErrorLocation {
268    /// File path.
269    pub file: std::path::PathBuf,
270    /// Line number (1-indexed).
271    pub line: usize,
272    /// Column number (1-indexed).
273    pub column: usize,
274}
275
276impl LedgerError {
277    /// Create a new error with the given phase.
278    pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
279        Self {
280            severity: ErrorSeverity::Error,
281            code: code.into(),
282            message: message.into(),
283            location: None,
284            source_span: None,
285            file_id: None,
286            phase: "validate".to_string(),
287        }
288    }
289
290    /// Create a new warning.
291    pub fn warning(code: impl Into<String>, message: impl Into<String>) -> Self {
292        Self {
293            severity: ErrorSeverity::Warning,
294            code: code.into(),
295            message: message.into(),
296            location: None,
297            source_span: None,
298            file_id: None,
299            phase: "validate".to_string(),
300        }
301    }
302
303    /// Attach a source span and file ID so rich renderers can draw a snippet.
304    #[must_use]
305    pub const fn with_source_span(mut self, span: (usize, usize), file_id: u16) -> Self {
306        self.source_span = Some(span);
307        self.file_id = Some(file_id);
308        self
309    }
310
311    /// Set the processing phase for this error.
312    #[must_use]
313    pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
314        self.phase = phase.into();
315        self
316    }
317
318    /// Add a location to this error.
319    #[must_use]
320    pub fn with_location(mut self, location: ErrorLocation) -> Self {
321        self.location = Some(location);
322        self
323    }
324}
325
326/// Process a raw load result into a fully processed ledger.
327///
328/// Pipeline (see numbered comments below for the rationale of each step):
329///
330/// ```text
331///   1. sort                         (canonical display order)
332///   2. synth plugins                (auto_accounts, document_discovery)
333///   3. Early validation             (account presence, structural, lifecycle)
334///   4. booking                      (cost spec resolution, interpolation)
335///   5. partition                    (set aside failed-booking txns)
336///   6. regular plugins              (file plugins + extras, on booked only)
337///   7. Late validation              (balance, currency, inventory, on booked only)
338///   8. finalize                     (unused-pad warnings)
339///   9. re-merge                     (booked + failed → final Ledger.directives)
340/// ```
341pub fn process(raw: LoadResult, options: &LoadOptions) -> Result<Ledger, ProcessError> {
342    let mut errors: Vec<LedgerError> = Vec::new();
343
344    // Convert load errors to ledger errors (parse phase). Iterate by
345    // reference so `raw` stays borrowable for the rest of the pipeline
346    // (the phase transitions and validator setup below borrow it).
347    for load_err in &raw.errors {
348        errors.push(LedgerError::error("LOAD", load_err.to_string()).with_phase("parse"));
349    }
350
351    // Phase-typed pipeline (issue #1166). The phantom-typed
352    // `Directives<P>` wrapper makes the sequence
353    //
354    //     Raw → Sorted → Synthed → EarlyValidated → Booked
355    //         → RegularPluginsApplied → LateValidated → Finalized
356    //
357    // a compile-time property of the type system. Each transition
358    // method consumes one phase and produces the next; the compiler
359    // rejects any call-site that drops a phase, swaps two, or invokes
360    // a later phase on raw input. See `crates/rustledger-loader/src/phase.rs`.
361    //
362    // The transitions themselves wrap the existing subsystem entry
363    // points (`run_booking`, `run_plugins`, validators) without
364    // changing their semantics — this PR is the structural refactor
365    // only; behavior is bit-identical to the pre-#1166 pipeline.
366
367    // Resolve the effective booking method once, before the pipeline
368    // starts, so both the validator (early/late phases — needs it to
369    // seed each opened account's per-account booking method, see
370    // issue #1182) and the booking engine see the same value. File-
371    // level `option "booking_method"` wins when explicitly set;
372    // otherwise the API-level `LoadOptions.booking_method` is used.
373    #[cfg(any(feature = "validation", feature = "booking"))]
374    let effective_booking_method = resolve_effective_booking_method(&raw, options);
375
376    #[cfg(feature = "validation")]
377    let validation_session = if options.validate {
378        Some(rustledger_validate::ValidationSession::new(
379            build_validation_options(&raw.options, &raw.source_map, effective_booking_method),
380        ))
381    } else {
382        None
383    };
384
385    // Compute `today` once for both phases — avoids a midnight-crossing
386    // race where Early and Late could disagree on what day it is, and
387    // gives `FutureDate` warnings a single coherent reference point.
388    #[cfg(feature = "validation")]
389    let today = jiff::Zoned::now().date();
390
391    let synthed = crate::Directives::<crate::Raw>::from_parser(raw.directives)
392        .sort()
393        .apply_synth_plugins(
394            &raw.plugins,
395            &raw.options,
396            options,
397            &raw.source_map,
398            &mut errors,
399        )?;
400
401    // The validation feature changes `early_validate`'s shape: with
402    // it on we thread the `Option<ValidationSession<Pending>>` in and
403    // catch the returned `Option<ValidationSession<EarlyDone>>` for
404    // `late_validate` (typestate-moved per #1236); without it we just
405    // get the next-phase `Directives` back. Branching here keeps each
406    // cfg's signature small and prevents the call site from having to
407    // know the typestate phase parameters in the disabled case.
408    #[cfg(feature = "validation")]
409    let (directives, validation_session) =
410        synthed.early_validate(validation_session, today, &raw.source_map, &mut errors);
411    #[cfg(not(feature = "validation"))]
412    let directives = synthed.early_validate(&raw.source_map, &mut errors);
413
414    let (booked, failed) = directives.book(
415        #[cfg(feature = "booking")]
416        effective_booking_method,
417        #[cfg(feature = "booking")]
418        &mut errors,
419    );
420
421    let regular_applied = booked.apply_regular_plugins(
422        &raw.plugins,
423        &raw.options,
424        options,
425        &raw.source_map,
426        &mut errors,
427    )?;
428
429    #[cfg(feature = "validation")]
430    let late_validated =
431        regular_applied.late_validate(validation_session, today, &raw.source_map, &mut errors);
432    #[cfg(not(feature = "validation"))]
433    let late_validated = regular_applied.late_validate(&raw.source_map, &mut errors);
434
435    let finalized = late_validated.finalize(failed);
436
437    Ok(Ledger {
438        directives: finalized.into_inner(),
439        options: raw.options,
440        plugins: raw.plugins,
441        source_map: raw.source_map,
442        errors,
443        display_context: raw.display_context,
444    })
445}
446
447/// Resolve the booking method from `LoadOptions` + file-level option.
448///
449/// Factored out of `process()` so both the validator session (which
450/// needs it to seed per-account booking) and the booking engine see
451/// the same value. File-level `option "booking_method"` wins when
452/// explicitly set; otherwise the API-level default is used.
453#[cfg(any(feature = "validation", feature = "booking"))]
454fn resolve_effective_booking_method(
455    raw: &LoadResult,
456    options: &LoadOptions,
457) -> rustledger_core::BookingMethod {
458    let file_set = raw.options.set_options.contains("booking_method");
459    if file_set {
460        raw.options
461            .booking_method
462            .parse()
463            .unwrap_or(options.booking_method)
464    } else {
465        options.booking_method
466    }
467}
468
469// ============================================================================
470// Phase transitions
471// ============================================================================
472//
473// Each transition consumes a `Directives<P>` of one phase and
474// produces a `Directives<NextP>` of the next phase. Bodies wrap the
475// existing subsystem calls (`run_booking`, `run_plugins`, validators)
476// without changing their semantics — only the type-level sequencing
477// is new. See `phase.rs` for the phase markers and overall rationale.
478
479/// Canonical display-order sort key: `(date, priority, file_id, span.start)`.
480/// What BQL / JSON / format output expects and what Python beancount
481/// produces. Used by `sort` (initial ordering) and `finalize` (re-sort
482/// after merging failed bookings back in).
483type CanonicalSortKey = (
484    rustledger_core::NaiveDate,
485    rustledger_core::DirectivePriority,
486    u16,
487    usize,
488);
489
490#[inline]
491const fn canonical_sort_key(d: &Spanned<Directive>) -> CanonicalSortKey {
492    (d.value.date(), d.value.priority(), d.file_id, d.span.start)
493}
494
495impl crate::Directives<crate::Raw> {
496    /// Sort directives into canonical display order — see
497    /// [`canonical_sort_key`].
498    ///
499    /// Booking needs a different iteration order (augmentations
500    /// BEFORE reductions on the same `(date, priority)`) but doesn't
501    /// need the underlying vec reordered — `run_booking` walks via
502    /// a transient `Vec<usize>` index. This sort goes once, here,
503    /// and the display order survives the rest of the pipeline.
504    #[must_use]
505    pub(crate) fn sort(mut self) -> crate::Directives<crate::Sorted> {
506        self.as_vec_mut().sort_by_key(canonical_sort_key);
507        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
508    }
509}
510
511impl crate::Directives<crate::Sorted> {
512    /// Run synth-only plugins (`auto_accounts`, `document_discovery`)
513    /// BEFORE early validation so the synthesizers inject Opens /
514    /// Documents that Early checks depend on (E1001 account
515    /// presence, E5001 missing-document file).
516    ///
517    /// Only this narrow synth subset runs here; everything else
518    /// waits until after booking (post-booking plugin pass) so
519    /// cost-spec-reading plugins see filled-in per-unit values on
520    /// `CostNumber::PerUnitFromTotal`. See `PluginPass` rustdoc for
521    /// the detailed split rationale.
522    pub(crate) fn apply_synth_plugins(
523        mut self,
524        plugins: &[crate::Plugin],
525        file_options: &crate::Options,
526        options: &LoadOptions,
527        source_map: &SourceMap,
528        errors: &mut Vec<LedgerError>,
529    ) -> Result<crate::Directives<crate::Synthed>, ProcessError> {
530        // `run_plugins` early-returns when no plugin entry matches the
531        // pass; no outer gate needed (and any outer gate risked
532        // missing one of the implicit-synth triggers — auto_accounts,
533        // document_discovery via `option "documents"`, file-declared
534        // synth plugins).
535        #[cfg(feature = "plugins")]
536        run_plugins(
537            self.as_vec_mut(),
538            plugins,
539            file_options,
540            options,
541            source_map,
542            errors,
543            PluginPass::PreBookingSynth,
544        )?;
545        // Suppress unused-arg warnings when `plugins` feature is off.
546        #[cfg(not(feature = "plugins"))]
547        {
548            let _ = (plugins, file_options, options, source_map, errors);
549        }
550        Ok(crate::Directives::new_unchecked(std::mem::take(
551            self.as_vec_mut(),
552        )))
553    }
554}
555
556impl crate::Directives<crate::Synthed> {
557    /// Run the early-phase validators. Account-presence /
558    /// lifecycle / structural errors are collected into `errors`
559    /// (via the `LedgerError` stream); the directive list itself is
560    /// unchanged by validation.
561    ///
562    /// Runs on pre-booking directives, AFTER synth plugins so
563    /// account-presence checks (E1001) see any Opens that plugins
564    /// like `auto_accounts` injected. This is what lets booking
565    /// match Python's "prune zero-interp postings" behavior without
566    /// losing E1001 on the elided-zero-to-unopened-account case
567    /// (rustledger#877).
568    #[cfg(feature = "validation")]
569    pub(crate) fn early_validate(
570        mut self,
571        validation_session: Option<
572            rustledger_validate::ValidationSession<rustledger_validate::Pending>,
573        >,
574        today: rustledger_core::NaiveDate,
575        source_map: &SourceMap,
576        errors: &mut Vec<LedgerError>,
577    ) -> (
578        crate::Directives<crate::EarlyValidated>,
579        Option<rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>>,
580    ) {
581        // Typestate move: consume `Pending`, return `EarlyDone`. The
582        // session must be threaded by value rather than `&mut`-borrowed
583        // because the phase parameter on `ValidationSession<P>` changes
584        // as a result of the call (#1236). The caller in `process()`
585        // captures the returned session and passes it to
586        // `late_validate`.
587        let session_out = validation_session.map(|session| {
588            let (session, phase_errors) = session.run_early_spanned(self.as_slice(), today);
589            ledger_errors_extend(errors, phase_errors, source_map);
590            session
591        });
592        (
593            crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut())),
594            session_out,
595        )
596    }
597
598    #[cfg(not(feature = "validation"))]
599    pub(crate) fn early_validate(
600        mut self,
601        source_map: &SourceMap,
602        errors: &mut Vec<LedgerError>,
603    ) -> crate::Directives<crate::EarlyValidated> {
604        let _ = (source_map, errors);
605        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
606    }
607}
608
609impl crate::Directives<crate::EarlyValidated> {
610    /// Run booking/interpolation. Returns the successfully-booked
611    /// directives plus a typed wrapper holding failed transactions.
612    ///
613    /// Failed transactions are in pre-booking shape (unresolved cost
614    /// specs, unfilled elided slots, possibly unbalanced); they
615    /// don't flow into regular plugins or Late validation — booking
616    /// already reported the root cause and the downstream checks
617    /// would cascade misleading errors. They get re-merged at
618    /// [`crate::Directives::<crate::LateValidated>::finalize`].
619    ///
620    /// When the `booking` feature is disabled this is an identity
621    /// transition: directives pass through unchanged and the failed
622    /// set is always empty. The same method exists in both feature
623    /// configurations so the caller in `process()` doesn't need a
624    /// `#[cfg]` match — the booking-specific arguments appear or
625    /// disappear via per-parameter `#[cfg]` attributes, mirroring
626    /// `early_validate` / `late_validate`.
627    pub(crate) fn book(
628        mut self,
629        #[cfg(feature = "booking")] effective_method: rustledger_core::BookingMethod,
630        #[cfg(feature = "booking")] errors: &mut Vec<LedgerError>,
631    ) -> (
632        crate::Directives<crate::Booked>,
633        crate::phase::FailedBookings,
634    ) {
635        #[cfg(feature = "booking")]
636        let (booked, failed) =
637            run_booking(std::mem::take(self.as_vec_mut()), effective_method, errors);
638        #[cfg(not(feature = "booking"))]
639        let (booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) =
640            (std::mem::take(self.as_vec_mut()), Vec::new());
641        (
642            crate::Directives::new_unchecked(booked),
643            crate::phase::FailedBookings::new(failed),
644        )
645    }
646}
647
648impl crate::Directives<crate::Booked> {
649    /// Run post-booking plugins — file-declared + CLI extras.
650    /// Cost-spec-reading plugins (`implicit_prices`,
651    /// `capital_gains_classifier`, `check_average_cost`,
652    /// `sell_gains`, `unrealized`, `valuation`) see filled-in
653    /// per-unit values on `CostNumber::PerUnitFromTotal` because
654    /// booking has run.
655    ///
656    /// Matches Python beancount's plugins-after-booking ordering
657    /// and closes rustledger#1117. Failed transactions were
658    /// partitioned out by `book`; plugins only see
659    /// successfully-booked input.
660    pub(crate) fn apply_regular_plugins(
661        mut self,
662        plugins: &[crate::Plugin],
663        file_options: &crate::Options,
664        options: &LoadOptions,
665        source_map: &SourceMap,
666        errors: &mut Vec<LedgerError>,
667    ) -> Result<crate::Directives<crate::RegularPluginsApplied>, ProcessError> {
668        // `run_plugins` early-returns when no plugin entry matches
669        // the pass; no outer gate needed.
670        #[cfg(feature = "plugins")]
671        run_plugins(
672            self.as_vec_mut(),
673            plugins,
674            file_options,
675            options,
676            source_map,
677            errors,
678            PluginPass::PostBooking,
679        )?;
680        #[cfg(not(feature = "plugins"))]
681        {
682            let _ = (plugins, file_options, options, source_map, errors);
683        }
684        Ok(crate::Directives::new_unchecked(std::mem::take(
685            self.as_vec_mut(),
686        )))
687    }
688}
689
690impl crate::Directives<crate::RegularPluginsApplied> {
691    /// Run the late-phase validators on booked + plugin-processed
692    /// directives. Reuses the `ValidationSession` from
693    /// `early_validate` so account / commodity / pad bookkeeping
694    /// carries forward.
695    #[cfg(feature = "validation")]
696    pub(crate) fn late_validate(
697        mut self,
698        validation_session: Option<
699            rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>,
700        >,
701        today: rustledger_core::NaiveDate,
702        source_map: &SourceMap,
703        errors: &mut Vec<LedgerError>,
704    ) -> crate::Directives<crate::LateValidated> {
705        // Typestate move: consume `EarlyDone`, drive through `LateDone`
706        // to `finalize()`. The compile-time enforcement here is that
707        // we cannot call `late_validate` with a fresh `Pending` session
708        // (no `From<Pending>` to `EarlyDone`), so the loader caller
709        // must have routed the session through `early_validate` first
710        // (#1236).
711        if let Some(session) = validation_session {
712            let (session, phase_errors) = session.run_late_spanned(self.as_slice(), today);
713            ledger_errors_extend(errors, phase_errors, source_map);
714            let finalize_errors = session.finalize();
715            ledger_errors_extend(errors, finalize_errors, source_map);
716        }
717        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
718    }
719
720    #[cfg(not(feature = "validation"))]
721    pub(crate) fn late_validate(
722        mut self,
723        source_map: &SourceMap,
724        errors: &mut Vec<LedgerError>,
725    ) -> crate::Directives<crate::LateValidated> {
726        let _ = (source_map, errors);
727        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
728    }
729}
730
731impl crate::Directives<crate::LateValidated> {
732    /// Re-merge failed (un-booked) transactions back into the
733    /// directive list for output. The user wrote them and expects
734    /// to see them in `Ledger.directives`; we kept them isolated
735    /// from post-booking processing.
736    ///
737    /// Re-sorts to restore canonical display order — `booked`
738    /// retained order during plugin transformation; the sort
739    /// restores the failed entries' positions.
740    pub(crate) fn finalize(
741        mut self,
742        failed: crate::phase::FailedBookings,
743    ) -> crate::Directives<crate::Finalized> {
744        let mut v = std::mem::take(self.as_vec_mut());
745        v.extend(failed.into_inner());
746        v.sort_by_key(canonical_sort_key);
747        crate::Directives::new_unchecked(v)
748    }
749}
750
751/// Run booking and interpolation on transactions, returning the
752/// directives partitioned into `(booked, failed)`.
753///
754/// The caller has already sorted `directives` into canonical display
755/// order `(date, priority, file_id, span.start)`. Booking needs the
756/// extra constraint that cost-reduction transactions process AFTER
757/// augmentations on the same `(date, priority)` so lots exist when
758/// matched. Rather than re-sorting the whole vec, we walk it via a
759/// transient `Vec<usize>` of indices sorted by booking order. Stable
760/// sort preserves display-order tiebreaks between transactions with
761/// the same `has_cost_reduction` flag.
762///
763/// Failed transactions are partitioned out into the second return
764/// value so they don't flow into regular plugins or Late validation
765/// (they're in pre-booking shape — postings have unresolved cost
766/// specs and unfilled elided slots, so downstream processing would
767/// cascade misleading errors). The caller is responsible for
768/// re-merging `failed` into the final `Ledger.directives` for output
769/// so the user still sees their original input.
770#[cfg(feature = "booking")]
771fn run_booking(
772    mut directives: Vec<Spanned<Directive>>,
773    booking_method: BookingMethod,
774    errors: &mut Vec<LedgerError>,
775) -> (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) {
776    use rustledger_booking::BookingEngine;
777
778    let mut engine = BookingEngine::with_method(booking_method);
779    engine.register_account_methods(directives.iter().map(|s| &s.value));
780
781    // Build an index ordered for booking: stable sort by
782    // `has_cost_reduction` only (display order — `(date, priority,
783    // file_id, span.start)` — is already encoded in the existing
784    // positional order, and stable_sort preserves that as the tiebreak).
785    let mut order: Vec<usize> = (0..directives.len()).collect();
786    order.sort_by_key(|&i| {
787        let d = &directives[i].value;
788        (d.date(), d.priority(), d.has_cost_reduction())
789    });
790
791    let mut failed_indices: Vec<usize> = Vec::new();
792    for &i in &order {
793        let spanned = &mut directives[i];
794        if let Directive::Transaction(txn) = &mut spanned.value {
795            match engine.book_and_interpolate(txn) {
796                Ok(result) => {
797                    engine.apply(&result.transaction);
798                    *txn = result.transaction;
799                }
800                Err(e) => {
801                    errors.push(LedgerError::error(
802                        "BOOK",
803                        format!("{} ({}, \"{}\")", e, txn.date, txn.narration),
804                    ));
805                    failed_indices.push(i);
806                }
807            }
808        }
809    }
810
811    // Partition into (booked, failed). Indices are valid in the current
812    // `directives` vec (no mutation has happened since they were
813    // collected); after this consuming iteration the vec is gone and
814    // partition is fait accompli — no window where a caller could
815    // accidentally mutate between collection and partition.
816    let failed_set: rustc_hash::FxHashSet<usize> = failed_indices.iter().copied().collect();
817    let mut booked = Vec::with_capacity(directives.len() - failed_indices.len());
818    let mut failed = Vec::with_capacity(failed_indices.len());
819    for (i, d) in directives.into_iter().enumerate() {
820        if failed_set.contains(&i) {
821            failed.push(d);
822        } else {
823            booked.push(d);
824        }
825    }
826    (booked, failed)
827}
828
829/// Which subset of plugins to run.
830///
831/// The loader pipeline calls `run_plugins` twice: once with
832/// [`PluginPass::PreBookingSynth`] before the Early validation phase
833/// (so synthesizers can inject Opens / Documents that early checks
834/// depend on), and once with [`PluginPass::PostBooking`] after booking
835/// (so cost-spec-reading plugins like `implicit_prices`,
836/// `capital_gains_classifier`, `check_average_cost`, `sell_gains`,
837/// `unrealized`, and `valuation` see filled-in per-unit values on the
838/// `CostNumber::PerUnitFromTotal` variant).
839///
840/// Standalone callers (LSP / FFI / tests on already-booked input) pass
841/// [`PluginPass::PostBooking`] — synth plugins are a loader-internal
842/// concern and would re-Open already-opened accounts if run a second
843/// time.
844#[cfg(feature = "plugins")]
845#[derive(Debug, Clone, Copy, PartialEq, Eq)]
846pub enum PluginPass {
847    /// Only plugins that synthesize directives the Early validator
848    /// depends on: `auto_accounts` (synthesizes Open directives) and
849    /// the built-in document discovery walker (synthesizes Document
850    /// directives the early phase checks for missing files).
851    PreBookingSynth,
852    /// All file-declared plugins and CLI `extra_plugins`, EXCLUDING
853    /// `auto_accounts` and `document_discovery` (those ran pre-booking).
854    /// Includes the 28 plugins that don't depend on synth state but
855    /// may depend on booked cost specs.
856    PostBooking,
857}
858
859/// Run plugins on directives.
860///
861/// Executes native plugins (and document discovery) on the given directives,
862/// modifying them in-place. Plugin errors are appended to `errors`.
863///
864/// A single plugin invocation in `run_plugins`'s unified dispatch
865/// list. `force_python` ("python:..." prefix) overrides native
866/// resolution; `config` is the plugin-specific string passed to
867/// `PluginInput.config`.
868#[cfg(feature = "plugins")]
869struct PluginInvocation {
870    name: String,
871    config: Option<String>,
872    force_python: bool,
873}
874
875/// `pass` selects which subset of plugins to run — see [`PluginPass`].
876/// The loader pipeline calls this twice (synth pass before Early,
877/// regular pass after booking).
878#[cfg(feature = "plugins")]
879pub fn run_plugins(
880    directives: &mut Vec<Spanned<Directive>>,
881    file_plugins: &[Plugin],
882    file_options: &Options,
883    options: &LoadOptions,
884    source_map: &SourceMap,
885    errors: &mut Vec<LedgerError>,
886    pass: PluginPass,
887) -> Result<(), ProcessError> {
888    use rustledger_plugin::{NativePlugin, NativePluginRegistry, PluginInput, PluginOptions};
889
890    // Resolve document directories relative to the main file's directory.
891    // Used to build doc_discovery's per-call config in the synth pass.
892    let base_dir = source_map
893        .files()
894        .first()
895        .and_then(|f| f.path.parent())
896        .unwrap_or_else(|| std::path::Path::new("."));
897
898    // Access the process-wide registry singleton. The registry is
899    // immutable and stateless, so the same instance services every
900    // call.
901    let registry = NativePluginRegistry::global();
902
903    // Build the unified list of plugins to invoke for this pass:
904    //   1. Implicit synth plugins triggered by `LoadOptions` /
905    //      `file_options` (auto_accounts via `options.auto_accounts`;
906    //      document_discovery via non-empty `file_options.documents`).
907    //   2. File-declared plugins from `plugin "..."` directives.
908    //   3. CLI `--plugin` extras.
909    // Pass classification happens here — once — via `registry.find_synth`.
910    // A plugin enters the list iff its pass matches the requested `pass`.
911    let mut entries: Vec<PluginInvocation> = Vec::new();
912
913    if matches!(pass, PluginPass::PreBookingSynth) {
914        // Implicit synth: API-level auto_accounts flag.
915        if options.auto_accounts {
916            entries.push(PluginInvocation {
917                name: rustledger_plugin::AUTO_ACCOUNTS_NAME.to_string(),
918                config: None,
919                force_python: false,
920            });
921        }
922        // Implicit synth: document_discovery, driven by `option "documents"`.
923        // The plugin sits in the registry as a ZST; we hand it the
924        // resolved directories + base_dir via its config JSON.
925        if options.run_plugins && !file_options.documents.is_empty() {
926            let resolved: Vec<String> = file_options
927                .documents
928                .iter()
929                .map(|d| {
930                    let path = std::path::Path::new(d);
931                    if path.is_absolute() {
932                        d.clone()
933                    } else {
934                        base_dir.join(path).to_string_lossy().to_string()
935                    }
936                })
937                .collect();
938            entries.push(PluginInvocation {
939                name: rustledger_plugin::DOCUMENT_DISCOVERY_NAME.to_string(),
940                config: Some(rustledger_plugin::document_discovery_config(
941                    base_dir, &resolved,
942                )),
943                force_python: false,
944            });
945        }
946    }
947
948    // A plugin name belongs in the current pass iff its synth-marker
949    // membership matches `pass`. Non-native plugins (WASM/Python) are
950    // never in the synth registry and therefore always fall into the
951    // PostBooking pass.
952    let want_synth = matches!(pass, PluginPass::PreBookingSynth);
953
954    // File-declared plugins.
955    if options.run_plugins {
956        for plugin in file_plugins {
957            if registry.find_synth(&plugin.name).is_some() == want_synth {
958                entries.push(PluginInvocation {
959                    name: plugin.name.clone(),
960                    config: plugin.config.clone(),
961                    force_python: plugin.force_python,
962                });
963            }
964        }
965    }
966
967    // CLI extra plugins.
968    for extra in &options.extra_plugins {
969        if registry.find_synth(&extra.name).is_some() == want_synth {
970            entries.push(PluginInvocation {
971                name: extra.name.clone(),
972                config: extra.config.clone(),
973                force_python: false,
974            });
975        }
976    }
977
978    if entries.is_empty() {
979        return Ok(());
980    }
981
982    let plugin_options = PluginOptions {
983        operating_currencies: file_options.operating_currency.clone(),
984        title: file_options.title.clone(),
985    };
986
987    // Dispatch each entry. Native plugins resolve through the typed
988    // registry (`find_synth` / `find_regular`) keyed on the pass — the
989    // returned reference type reflects the pass. Anything that doesn't
990    // resolve falls through to the WASM/Python branches.
991    for invocation in &entries {
992        let PluginInvocation {
993            name: raw_name,
994            config: plugin_config,
995            force_python,
996        } = invocation;
997
998        // Dispatch via the typed registry. `find_synth`/`find_regular`
999        // internally take the short name (last `.`-separated segment),
1000        // so prefixed names like `"beancount.plugins.implicit_prices"`
1001        // resolve through the same call — no explicit prefix-stripping
1002        // needed. Returns `Some` only if the plugin exists AND its
1003        // marker trait matches the requested pass: a `RegularPlugin`
1004        // won't be returned from `find_synth` (and vice versa), even
1005        // on a name collision. Anything that returns `None` (WASM,
1006        // Python, unknown names, wrong-pass natives) falls through
1007        // to the WASM/Python branches below.
1008        let native_plugin: Option<&dyn NativePlugin> = if *force_python {
1009            None
1010        } else {
1011            match pass {
1012                PluginPass::PreBookingSynth => registry
1013                    .find_synth(raw_name)
1014                    .map(|p| p as &dyn NativePlugin),
1015                PluginPass::PostBooking => registry
1016                    .find_regular(raw_name)
1017                    .map(|p| p as &dyn NativePlugin),
1018            }
1019        };
1020
1021        if let Some(plugin) = native_plugin {
1022            let wrappers = build_wrappers(directives, source_map);
1023            let input = PluginInput {
1024                directives: wrappers,
1025                options: plugin_options.clone(),
1026                config: plugin_config.clone(),
1027            };
1028            let output = plugin.process(input);
1029            record_plugin_errors(errors, output.errors, source_map);
1030            apply_plugin_ops(directives, output.ops, errors, source_map)?;
1031        } else {
1032            // Not a native plugin — categorize and handle
1033            let plugin_path = std::path::Path::new(raw_name);
1034            let ext = plugin_path
1035                .extension()
1036                .and_then(|e| e.to_str())
1037                .unwrap_or("")
1038                .to_lowercase();
1039
1040            // The closure is only invoked from inside the wasm-plugins /
1041            // python-plugins cfg blocks below. The whole function is
1042            // already `#[cfg(feature = "plugins")]`, so this only matters
1043            // when `plugins` is enabled but neither child feature is
1044            // (e.g. `--features native-plugins`). Allow `unused_variables`
1045            // for exactly that configuration. Underscore-prefixing the
1046            // binding would have been the wrong fix because we DO call
1047            // the closure in builds with one of the features enabled,
1048            // which would trip `no_effect_underscore_binding` instead.
1049            #[cfg_attr(
1050                not(any(feature = "wasm-plugins", feature = "python-plugins")),
1051                allow(unused_variables)
1052            )]
1053            let resolve_path = |name: &str| -> Result<std::path::PathBuf, String> {
1054                let p = std::path::Path::new(name);
1055                let resolved = if p.is_absolute() {
1056                    p.to_path_buf()
1057                } else {
1058                    base_dir.join(name)
1059                };
1060
1061                // Path security: prevent plugins from outside the ledger directory
1062                if options.path_security
1063                    && let (Ok(canon_base), Ok(canon_plugin)) =
1064                        (base_dir.canonicalize(), resolved.canonicalize())
1065                    && !canon_plugin.starts_with(&canon_base)
1066                {
1067                    return Err(format!(
1068                        "plugin path '{name}' is outside the ledger directory"
1069                    ));
1070                }
1071
1072                Ok(resolved)
1073            };
1074
1075            if ext == "wasm" {
1076                // WASM plugin
1077                #[cfg(feature = "wasm-plugins")]
1078                {
1079                    let wasm_path = match resolve_path(raw_name) {
1080                        Ok(p) => p,
1081                        Err(e) => {
1082                            errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1083                            continue;
1084                        }
1085                    };
1086                    let wrappers = build_wrappers(directives, source_map);
1087                    match run_wasm_plugin(&wasm_path, &wrappers, &plugin_options, plugin_config) {
1088                        Ok((ops, plugin_errors)) => {
1089                            for err in plugin_errors {
1090                                errors.push(err);
1091                            }
1092                            apply_plugin_ops(directives, ops, errors, source_map)?;
1093                        }
1094                        Err(e) => {
1095                            errors.push(
1096                                LedgerError::error(
1097                                    "PLUGIN",
1098                                    format!("WASM plugin {} failed: {e}", wasm_path.display()),
1099                                )
1100                                .with_phase("plugin"),
1101                            );
1102                        }
1103                    }
1104                }
1105                #[cfg(not(feature = "wasm-plugins"))]
1106                {
1107                    errors.push(
1108                        LedgerError::error(
1109                            "PLUGIN",
1110                            format!("WASM plugin '{raw_name}' requires the wasm-plugins feature"),
1111                        )
1112                        .with_phase("plugin"),
1113                    );
1114                }
1115            } else if *force_python
1116                || ext == "py"
1117                || raw_name.contains(std::path::MAIN_SEPARATOR)
1118                || raw_name.contains('.')
1119            {
1120                // Python module or file-based plugin (or force_python via "python:" prefix)
1121                #[cfg(feature = "python-plugins")]
1122                {
1123                    let resolved = match resolve_path(raw_name) {
1124                        Ok(p) => p,
1125                        Err(e) => {
1126                            errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1127                            continue;
1128                        }
1129                    };
1130                    let wrappers = build_wrappers(directives, source_map);
1131                    match run_python_plugin(
1132                        raw_name,
1133                        &resolved,
1134                        base_dir,
1135                        &wrappers,
1136                        &plugin_options,
1137                        plugin_config,
1138                    ) {
1139                        Ok((ops, plugin_errors)) => {
1140                            for err in plugin_errors {
1141                                errors.push(err);
1142                            }
1143                            apply_plugin_ops(directives, ops, errors, source_map)?;
1144                        }
1145                        Err(e) => {
1146                            errors.push(LedgerError::error("E8002", e).with_phase("plugin"));
1147                        }
1148                    }
1149                }
1150                #[cfg(not(feature = "python-plugins"))]
1151                {
1152                    errors.push(
1153                        LedgerError::error(
1154                            "E8005",
1155                            format!(
1156                                "Python plugin \"{raw_name}\" requires the python-plugins feature",
1157                            ),
1158                        )
1159                        .with_phase("plugin"),
1160                    );
1161                }
1162            } else {
1163                // Completely unknown plugin name — try to suggest a module path
1164                #[cfg(feature = "python-plugins")]
1165                {
1166                    use rustledger_plugin::python::{is_python_available, suggest_module_path};
1167                    let suggestion = if is_python_available() {
1168                        suggest_module_path(raw_name)
1169                    } else {
1170                        None
1171                    };
1172                    if let Some(module_path) = suggestion {
1173                        errors.push(
1174                                LedgerError::error(
1175                                    "E8004",
1176                                    format!(
1177                                        "Cannot resolve Python module '{raw_name}'. Replace with: plugin \"{module_path}\""
1178                                    ),
1179                                )
1180                                .with_phase("plugin"),
1181                            );
1182                    } else {
1183                        errors.push(
1184                            LedgerError::error(
1185                                "E8001",
1186                                format!("Plugin not found: \"{raw_name}\""),
1187                            )
1188                            .with_phase("plugin"),
1189                        );
1190                    }
1191                }
1192                #[cfg(not(feature = "python-plugins"))]
1193                {
1194                    errors.push(
1195                        LedgerError::error("E8001", format!("Plugin not found: \"{raw_name}\""))
1196                            .with_phase("plugin"),
1197                    );
1198                }
1199            }
1200        }
1201    }
1202    // No final wrapper→directive conversion needed: `apply_plugin_ops`
1203    // updates `directives` in place after each plugin call, preserving
1204    // original spans on Keep/Modify ops. Plugin-synthesized directives
1205    // (Insert ops) get `SYNTHESIZED_FILE_ID` and a zero span.
1206    Ok(())
1207}
1208
1209/// Build a fresh `Vec<DirectiveWrapper>` from the current directives,
1210/// carrying filename + line number for plugin-side error reporting.
1211/// Spans don't need to round-trip through the wrappers — the loader
1212/// preserves them via `apply_plugin_ops` matching on op index.
1213#[cfg(feature = "plugins")]
1214fn build_wrappers(
1215    directives: &[Spanned<Directive>],
1216    source_map: &SourceMap,
1217) -> Vec<rustledger_plugin::DirectiveWrapper> {
1218    use rustledger_plugin::directive_to_wrapper_with_location;
1219
1220    directives
1221        .iter()
1222        .map(|spanned| {
1223            let (filename, lineno) = if let Some(file) = source_map.get(spanned.file_id as usize) {
1224                let (line, _col) = file.line_col(spanned.span.start);
1225                (Some(file.path.display().to_string()), Some(line as u32))
1226            } else {
1227                (None, None)
1228            };
1229            directive_to_wrapper_with_location(&spanned.value, filename, lineno)
1230        })
1231        .collect()
1232}
1233
1234/// Push plugin errors into the ledger's error stream, tagged with
1235/// `phase: "plugin"` and — when the plugin set `source_file` /
1236/// `line_number` on the error — an attached `ErrorLocation` so
1237/// downstream renderers (CLI, LSP, JSON output) can pinpoint where
1238/// the plugin objected.
1239///
1240/// Source-location resolution: if the wrapper's `source_file` resolves
1241/// to a real file in the source map, use that for `ErrorLocation.file`
1242/// and treat `line_number` as the line index. Plugin-synthesized
1243/// filenames (e.g. `"<auto_accounts>"`) that don't match any real
1244/// file are passed through as `PathBuf::from(name)` so the rendered
1245/// location still attributes the error to the originating plugin —
1246/// better than silently dropping the field.
1247#[cfg(feature = "plugins")]
1248fn record_plugin_errors(
1249    errors: &mut Vec<LedgerError>,
1250    plugin_errors: Vec<rustledger_plugin::PluginError>,
1251    source_map: &SourceMap,
1252) {
1253    for err in plugin_errors {
1254        let mut ledger_err = match err.severity {
1255            rustledger_plugin::PluginErrorSeverity::Error => {
1256                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1257            }
1258            rustledger_plugin::PluginErrorSeverity::Warning => {
1259                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1260            }
1261        };
1262        // Propagate plugin-set source location into `ErrorLocation`.
1263        // Column defaults to 1 — plugin errors don't carry column info
1264        // through the wrapper protocol.
1265        if let (Some(file), Some(line)) = (&err.source_file, err.line_number) {
1266            let resolved_path = source_map
1267                .get_by_path(std::path::Path::new(file))
1268                .map_or_else(|| std::path::PathBuf::from(file), |f| f.path.clone());
1269            ledger_err = ledger_err.with_location(ErrorLocation {
1270                file: resolved_path,
1271                line: line as usize,
1272                column: 1,
1273            });
1274        }
1275        errors.push(ledger_err);
1276    }
1277}
1278
1279/// Apply a plugin's `Vec<PluginOp>` to `directives` in place.
1280///
1281/// Validates that the op set forms a complete partition of the input
1282/// indices (each input index appears in exactly one `Keep` / `Modify` /
1283/// `Delete` op). Protocol violations produce a `PLUGIN` error in
1284/// `errors` and leave `directives` untouched.
1285///
1286/// For `Keep(i)` / `Modify(i, w)`, the resulting `Spanned<Directive>`
1287/// inherits `directives[i]`'s span and `file_id` — this is the core of
1288/// the ops protocol's correctness guarantee (plugin-transformed
1289/// directives keep their original source identity for error reporting).
1290/// `Insert(w)` directives get `(Span::ZERO, SYNTHESIZED_FILE_ID)`.
1291///
1292/// Inner posting spans returned by plugins are sanitized against the
1293/// host's `SourceMap` (see [`sanitize_inner_posting_spans`]) so a
1294/// misbehaving plugin cannot smuggle out-of-bounds spans into the LSP.
1295#[cfg(feature = "plugins")]
1296fn apply_plugin_ops(
1297    directives: &mut Vec<Spanned<Directive>>,
1298    ops: Vec<rustledger_plugin::PluginOp>,
1299    errors: &mut Vec<LedgerError>,
1300    source_map: &SourceMap,
1301) -> Result<(), ProcessError> {
1302    use rustledger_plugin::PluginOp;
1303    use rustledger_plugin::wrapper_to_directive;
1304
1305    // Validate the op set forms a complete cover of the input — the contract is
1306    // single-sourced in `rustledger-plugin` so the loader and FFI surfaces stay
1307    // in lock-step. On violation, surface the error and leave directives as-is.
1308    if let Err(msg) = rustledger_plugin::validate_op_coverage(directives.len(), &ops) {
1309        errors.push(LedgerError::error("PLUGIN", msg).with_phase("plugin"));
1310        return Ok(());
1311    }
1312
1313    // Materialize new directives, preserving spans for Keep/Modify.
1314    let mut new_directives = Vec::with_capacity(ops.len());
1315    for op in ops {
1316        match op {
1317            PluginOp::Keep(i) => {
1318                new_directives.push(directives[i].clone());
1319            }
1320            PluginOp::Modify(i, wrapper) => {
1321                let mut directive = wrapper_to_directive(&wrapper)
1322                    .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1323                // Plugins are not trusted to return well-formed inner
1324                // posting spans — a misbehaving plugin can synthesize a
1325                // file_id pointing at a nonexistent source or a span
1326                // that runs past EOF. The LSP later builds TextEdits
1327                // from these spans, so an out-of-bounds posting span
1328                // would produce a corrupt edit. Reset any inner posting
1329                // span that doesn't refer to a real loaded file or that
1330                // exceeds the file's length to `Spanned::synthesized`.
1331                sanitize_inner_posting_spans(&mut directive, source_map);
1332                new_directives.push(Spanned {
1333                    value: directive,
1334                    span: directives[i].span,
1335                    file_id: directives[i].file_id,
1336                });
1337            }
1338            PluginOp::Insert(wrapper) => {
1339                // Same trust caveat as Modify: don't let an Insert smuggle
1340                // bogus inner-posting spans through.
1341                // (Wrapper-derived outer span is validated below.)
1342                // Resolve the wrapper's filename + line number, if set,
1343                // into a real (file_id, span) when the filename
1344                // corresponds to a loaded source file. Falls back to
1345                // SYNTHESIZED_FILE_ID + zero span otherwise — including
1346                // for plugin-only attribution like `"<auto_accounts>"`
1347                // (which never matches a loaded file).
1348                let (span, file_id) = match (&wrapper.filename, wrapper.lineno) {
1349                    (Some(filename), Some(lineno)) => {
1350                        if let Some(file) = source_map.get_by_path(std::path::Path::new(filename)) {
1351                            let span_start = file.line_start(lineno as usize).unwrap_or(0);
1352                            (
1353                                rustledger_parser::Span::new(span_start, span_start),
1354                                file.id as u16,
1355                            )
1356                        } else {
1357                            (
1358                                rustledger_parser::Span::ZERO,
1359                                rustledger_parser::SYNTHESIZED_FILE_ID,
1360                            )
1361                        }
1362                    }
1363                    _ => (
1364                        rustledger_parser::Span::ZERO,
1365                        rustledger_parser::SYNTHESIZED_FILE_ID,
1366                    ),
1367                };
1368                let mut directive = wrapper_to_directive(&wrapper)
1369                    .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1370                sanitize_inner_posting_spans(&mut directive, source_map);
1371                new_directives.push(Spanned::new(directive, span).with_file_id(file_id as usize));
1372            }
1373            PluginOp::Delete(_) => {}
1374        }
1375    }
1376
1377    *directives = new_directives;
1378    Ok(())
1379}
1380
1381/// Reset any inner `Spanned<Posting>` whose location does not refer to a
1382/// real loaded source range to [`Spanned::synthesized`]. Plugins are not
1383/// trusted to return well-formed `file_id` + byte ranges; without this,
1384/// a misbehaving plugin could induce out-of-bounds LSP text edits.
1385///
1386/// A span is considered valid when:
1387/// - `file_id == SYNTHESIZED_FILE_ID` (genuine synthesis), OR
1388/// - the `file_id` resolves in `SourceMap` AND `0 <= start <= end <= len`
1389///   for that file's source.
1390///
1391/// Everything else collapses to `Spanned::synthesized(posting)`. As a
1392/// final pass, synthesized postings that arrived with a non-zero span
1393/// are normalized to `Span::ZERO` so the in-memory state matches the
1394/// `Spanned::synthesized` constructor's contract (`file_id` +
1395/// `Span::ZERO`).
1396#[cfg(feature = "plugins")]
1397fn sanitize_inner_posting_spans(directive: &mut Directive, source_map: &SourceMap) {
1398    use rustledger_core::Span;
1399    use rustledger_parser::SYNTHESIZED_FILE_ID;
1400    if let Directive::Transaction(txn) = directive {
1401        for p in &mut txn.postings {
1402            let ok = if p.file_id == SYNTHESIZED_FILE_ID {
1403                true
1404            } else {
1405                source_map
1406                    .get(p.file_id as usize)
1407                    .is_some_and(|f| p.span.start <= p.span.end && p.span.end <= f.source.len())
1408            };
1409            if !ok {
1410                let inner = std::mem::replace(
1411                    &mut p.value,
1412                    rustledger_core::Posting::auto(rustledger_core::InternedStr::from("")),
1413                );
1414                *p = rustledger_core::Spanned::synthesized(inner);
1415            } else if p.file_id == SYNTHESIZED_FILE_ID && p.span != Span::ZERO {
1416                // Synthesized → span is meaningless; normalize so the
1417                // state is consistent with `Spanned::synthesized`.
1418                p.span = Span::ZERO;
1419            }
1420        }
1421    }
1422}
1423
1424/// Build a [`ValidationOptions`] from loader-level file options.
1425///
1426/// Factored out of the old `run_validation` so both the early and
1427/// late phases in `process()` can share the same `ValidationSession`
1428/// configuration. Document-dir resolution is relative to the main
1429/// file's parent directory.
1430#[cfg(feature = "validation")]
1431fn build_validation_options(
1432    file_options: &Options,
1433    source_map: &SourceMap,
1434    default_booking_method: BookingMethod,
1435) -> rustledger_validate::ValidationOptions {
1436    use rustledger_validate::ValidationOptions;
1437
1438    // Resolve document directories relative to the main file's
1439    // directory. Absolute paths pass through; relative paths are
1440    // joined onto the source map's first file's parent. Matches the
1441    // pre-refactor `run_validation` behavior exactly.
1442    let base_dir = source_map
1443        .files()
1444        .first()
1445        .and_then(|f| f.path.parent())
1446        .unwrap_or_else(|| std::path::Path::new("."));
1447
1448    let resolved_document_dirs: Vec<std::path::PathBuf> = file_options
1449        .documents
1450        .iter()
1451        .map(|d| {
1452            let path = std::path::Path::new(d);
1453            if path.is_absolute() {
1454                path.to_path_buf()
1455            } else {
1456                base_dir.join(path)
1457            }
1458        })
1459        .collect();
1460
1461    let account_types: Vec<String> = file_options
1462        .account_types()
1463        .iter()
1464        .map(|s| (*s).to_string())
1465        .collect();
1466
1467    ValidationOptions::default()
1468        .with_account_types(account_types)
1469        .with_document_dirs(resolved_document_dirs)
1470        .with_infer_tolerance_from_cost(file_options.infer_tolerance_from_cost)
1471        .with_tolerance_multiplier(file_options.inferred_tolerance_multiplier)
1472        .with_inferred_tolerance_default(file_options.inferred_tolerance_default.clone())
1473        .with_default_booking_method(default_booking_method)
1474}
1475
1476/// Convert a batch of [`rustledger_validate::ValidationError`]s into
1477/// loader-level [`LedgerError`]s (with resolved `file:line:column`
1478/// locations) and append to the existing list.
1479///
1480/// Factored out so both validation phases in `process()` share the
1481/// same conversion path.
1482#[cfg(feature = "validation")]
1483fn ledger_errors_extend(
1484    errors: &mut Vec<LedgerError>,
1485    validation_errors: Vec<rustledger_validate::ValidationError>,
1486    source_map: &SourceMap,
1487) {
1488    for err in validation_errors {
1489        let phase = if err.code.is_parse_phase() {
1490            "parse"
1491        } else {
1492            "validate"
1493        };
1494        let severity_level = if err.code.is_warning() {
1495            ErrorSeverity::Warning
1496        } else {
1497            ErrorSeverity::Error
1498        };
1499        // Fold the advisory note (if any) into the message so it propagates
1500        // through every downstream format (LedgerError, JSON diagnostic, CLI
1501        // report, LSP diagnostic) without each one needing a dedicated field.
1502        let message = match &err.note {
1503            Some(note) => format!("{err}\n  note: {note}"),
1504            None => err.to_string(),
1505        };
1506        // Resolve span + file_id into a file/line/column triple so CLI and
1507        // LSP consumers can render `file:line:col` headers without having
1508        // to do the lookup themselves (issue #901).
1509        let location = err.span.and_then(|span| {
1510            let fid = err.file_id? as usize;
1511            let file = source_map.get(fid)?;
1512            let (line, column) = file.line_col(span.start);
1513            Some(ErrorLocation {
1514                file: file.path.clone(),
1515                line,
1516                column,
1517            })
1518        });
1519        errors.push(LedgerError {
1520            severity: severity_level,
1521            code: err.code.code().to_string(),
1522            message,
1523            location,
1524            source_span: err.span.map(|s| (s.start, s.end)),
1525            file_id: err.file_id,
1526            phase: phase.to_string(),
1527        });
1528    }
1529}
1530
1531/// Load and fully process a beancount file.
1532///
1533/// This is the main entry point, equivalent to Python's `loader.load_file()`.
1534/// It performs: parse → sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
1535///
1536/// # Example
1537///
1538/// ```ignore
1539/// use rustledger_loader::{load, LoadOptions};
1540/// use std::path::Path;
1541///
1542/// let ledger = load(Path::new("ledger.beancount"), LoadOptions::default())?;
1543/// for error in &ledger.errors {
1544///     eprintln!("{}: {}", error.code, error.message);
1545/// }
1546/// ```
1547pub fn load(path: &Path, options: &LoadOptions) -> Result<Ledger, ProcessError> {
1548    let mut loader = crate::Loader::new();
1549
1550    if options.path_security {
1551        loader = loader.with_path_security(true);
1552    }
1553
1554    let raw = loader.load(path)?;
1555    process(raw, options)
1556}
1557
1558/// Load a beancount file without processing.
1559///
1560/// This returns raw directives without sorting, booking, or plugins.
1561/// Use this when you need the original parse output.
1562pub fn load_raw(path: &Path) -> Result<LoadResult, LoadError> {
1563    crate::Loader::new().load(path)
1564}
1565
1566/// Run a WASM plugin and return its output ops and errors.
1567#[cfg(feature = "wasm-plugins")]
1568fn run_wasm_plugin(
1569    wasm_path: &std::path::Path,
1570    directives: &[rustledger_plugin::DirectiveWrapper],
1571    options: &rustledger_plugin::PluginOptions,
1572    config: &Option<String>,
1573) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1574    use rustledger_plugin::{PluginInput, PluginManager};
1575
1576    let mut mgr = PluginManager::new();
1577    let plugin_idx = mgr
1578        .load(wasm_path)
1579        .map_err(|e| format!("failed to load: {e}"))?;
1580
1581    let input = PluginInput {
1582        directives: directives.to_vec(),
1583        options: options.clone(),
1584        config: config.clone(),
1585    };
1586
1587    let output = mgr
1588        .execute(plugin_idx, &input)
1589        .map_err(|e| format!("execution failed: {e}"))?;
1590
1591    let mut errors = Vec::new();
1592    for err in output.errors {
1593        let ledger_err = match err.severity {
1594            rustledger_plugin::PluginErrorSeverity::Error => {
1595                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1596            }
1597            rustledger_plugin::PluginErrorSeverity::Warning => {
1598                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1599            }
1600        };
1601        errors.push(ledger_err);
1602    }
1603
1604    Ok((output.ops, errors))
1605}
1606
1607/// Run a Python module plugin via the WASI-based Python runtime.
1608#[cfg(feature = "python-plugins")]
1609fn run_python_plugin(
1610    module_name: &str,
1611    resolved_path: &std::path::Path,
1612    base_dir: &std::path::Path,
1613    directives: &[rustledger_plugin::DirectiveWrapper],
1614    options: &rustledger_plugin::PluginOptions,
1615    config: &Option<String>,
1616) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1617    use rustledger_plugin::{PluginInput, python::PythonRuntime};
1618
1619    let runtime = PythonRuntime::new().map_err(|e| format!("Python runtime unavailable: {e}"))?;
1620
1621    let input = PluginInput {
1622        directives: directives.to_vec(),
1623        options: options.clone(),
1624        config: config.clone(),
1625    };
1626
1627    // Try file-based execution first, then module-based
1628    let is_file = resolved_path.exists()
1629        || std::path::Path::new(module_name)
1630            .extension()
1631            .is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
1632        || module_name.contains(std::path::MAIN_SEPARATOR);
1633
1634    let output = if is_file {
1635        runtime
1636            .execute_module(module_name, &input, Some(base_dir))
1637            .map_err(|e| format!("Python plugin execution failed: {e}"))?
1638    } else {
1639        runtime
1640            .execute_module(module_name, &input, Some(base_dir))
1641            .map_err(|e| format!("Python plugin '{module_name}' execution failed: {e}"))?
1642    };
1643
1644    let mut errors = Vec::new();
1645    for err in output.errors {
1646        let ledger_err = match err.severity {
1647            rustledger_plugin::PluginErrorSeverity::Error => {
1648                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1649            }
1650            rustledger_plugin::PluginErrorSeverity::Warning => {
1651                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1652            }
1653        };
1654        errors.push(ledger_err);
1655    }
1656
1657    Ok((output.ops, errors))
1658}
1659
1660#[cfg(all(test, feature = "plugins"))]
1661mod sanitize_tests {
1662    use super::sanitize_inner_posting_spans;
1663    use crate::source_map::SourceMap;
1664    use rust_decimal_macros::dec;
1665    use rustledger_core::{
1666        Amount, Directive, IncompleteAmount, Posting, SYNTHESIZED_FILE_ID, Span, Spanned,
1667        Transaction,
1668    };
1669    use std::path::PathBuf;
1670    use std::sync::Arc;
1671
1672    fn txn_with_postings(postings: Vec<Spanned<Posting>>) -> Directive {
1673        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
1674        let mut txn = Transaction::new(date, "x");
1675        txn.postings = postings;
1676        Directive::Transaction(txn)
1677    }
1678
1679    fn posting_at(file_id: u16, span: Span) -> Spanned<Posting> {
1680        let p = Posting::with_incomplete(
1681            "Assets:Cash",
1682            IncompleteAmount::Complete(Amount::new(dec!(1), "USD")),
1683        );
1684        Spanned::new(p, span).with_file_id(file_id as usize)
1685    }
1686
1687    fn source_map_with_one_file(source: &str) -> (SourceMap, u16) {
1688        let mut sm = SourceMap::new();
1689        let id = sm.add_file(PathBuf::from("test.bean"), Arc::from(source));
1690        (sm, id as u16)
1691    }
1692
1693    #[test]
1694    fn span_within_real_file_is_preserved() {
1695        let (sm, fid) = source_map_with_one_file("0123456789");
1696        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(2, 6))]);
1697        sanitize_inner_posting_spans(&mut d, &sm);
1698        let Directive::Transaction(t) = &d else {
1699            unreachable!()
1700        };
1701        assert_eq!(t.postings[0].file_id, fid);
1702        assert_eq!(t.postings[0].span, Span::new(2, 6));
1703    }
1704
1705    #[test]
1706    fn span_past_eof_is_reset_to_synthesized() {
1707        // Bug case: a misbehaving plugin claims the posting extends past
1708        // the file's actual length. The sanitizer must reject it so the
1709        // LSP can't be tricked into producing an out-of-bounds TextEdit.
1710        let (sm, fid) = source_map_with_one_file("0123456789"); // 10 bytes
1711        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 9999))]);
1712        sanitize_inner_posting_spans(&mut d, &sm);
1713        let Directive::Transaction(t) = &d else {
1714            unreachable!()
1715        };
1716        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1717        assert_eq!(t.postings[0].span, Span::ZERO);
1718    }
1719
1720    #[test]
1721    fn unknown_file_id_is_reset_to_synthesized() {
1722        // Plugin claims a file_id that the host's SourceMap doesn't know.
1723        let (sm, _real) = source_map_with_one_file("hello");
1724        let mut d = txn_with_postings(vec![posting_at(123, Span::new(0, 5))]);
1725        sanitize_inner_posting_spans(&mut d, &sm);
1726        let Directive::Transaction(t) = &d else {
1727            unreachable!()
1728        };
1729        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1730        assert_eq!(t.postings[0].span, Span::ZERO);
1731    }
1732
1733    #[test]
1734    fn start_after_end_is_reset_to_synthesized() {
1735        let (sm, fid) = source_map_with_one_file("abcdef");
1736        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(5, 2))]);
1737        sanitize_inner_posting_spans(&mut d, &sm);
1738        let Directive::Transaction(t) = &d else {
1739            unreachable!()
1740        };
1741        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1742        assert_eq!(t.postings[0].span, Span::ZERO);
1743    }
1744
1745    #[test]
1746    fn synthesized_file_id_is_left_alone_but_span_normalized() {
1747        // file_id == SYNTHESIZED_FILE_ID with a non-zero span: the
1748        // sanitizer leaves it synthesized (span is meaningless for
1749        // synth postings) but normalizes to Span::ZERO for tidy state.
1750        let (sm, _fid) = source_map_with_one_file("x");
1751        let mut d = txn_with_postings(vec![posting_at(SYNTHESIZED_FILE_ID, Span::new(100, 200))]);
1752        sanitize_inner_posting_spans(&mut d, &sm);
1753        let Directive::Transaction(t) = &d else {
1754            unreachable!()
1755        };
1756        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1757        assert_eq!(t.postings[0].span, Span::ZERO, "synth span normalized");
1758    }
1759
1760    #[test]
1761    fn boundary_span_eq_source_len_is_valid() {
1762        // end == source.len() is the canonical "to-end-of-file" span;
1763        // must not be rejected.
1764        let (sm, fid) = source_map_with_one_file("abcd");
1765        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 4))]);
1766        sanitize_inner_posting_spans(&mut d, &sm);
1767        let Directive::Transaction(t) = &d else {
1768            unreachable!()
1769        };
1770        assert_eq!(t.postings[0].file_id, fid);
1771        assert_eq!(t.postings[0].span, Span::new(0, 4));
1772    }
1773
1774    #[test]
1775    fn non_transaction_directive_is_left_alone() {
1776        // Sanitizer only walks transactions; other directive types have
1777        // no inner posting spans.
1778        let (sm, _fid) = source_map_with_one_file("x");
1779        let mut d = Directive::Open(rustledger_core::Open {
1780            date: rustledger_core::naive_date(2024, 1, 1).unwrap(),
1781            account: "Assets:Bank".into(),
1782            currencies: vec![],
1783            booking: None,
1784            meta: Default::default(),
1785        });
1786        sanitize_inner_posting_spans(&mut d, &sm); // no panic, no change
1787        assert!(matches!(d, Directive::Open(_)));
1788    }
1789}