Skip to main content

rustledger_loader/
process.rs

1//! Processing pipeline: sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
2//!
3//! This module orchestrates the full processing pipeline for a beancount ledger,
4//! equivalent to Python's `loader.load_file()` function.
5
6// ratchet: fxhash-only — hot path; use FxHashMap/FxHashSet, not std SipHash collections (#1237).
7use crate::{LoadError, LoadResult, Options, Plugin, SourceMap};
8use rustledger_core::{BookingMethod, Directive, DisplayContext};
9use rustledger_parser::Spanned;
10use std::path::Path;
11use thiserror::Error;
12
13/// A CLI-supplied (or programmatic) extra plugin invocation.
14///
15/// Bundles the plugin name with its optional config string so the two
16/// can't drift apart — the previous parallel-Vec representation could
17/// silently misalign a config with the wrong plugin.
18#[derive(Debug, Clone)]
19pub struct ExtraPlugin {
20    /// Plugin name (short or fully-qualified module path).
21    pub name: String,
22    /// Plugin-specific config string, if any.
23    pub config: Option<String>,
24}
25
26/// Options for loading and processing a ledger.
27#[derive(Debug, Clone)]
28pub struct LoadOptions {
29    /// Booking method for lot matching (default: Strict).
30    pub booking_method: BookingMethod,
31    /// Run plugins declared in the file (default: true).
32    pub run_plugins: bool,
33    /// Run `auto_accounts` plugin (default: false).
34    pub auto_accounts: bool,
35    /// Additional plugins to run (CLI `--plugin` or programmatic API),
36    /// each with an optional config string.
37    pub extra_plugins: Vec<ExtraPlugin>,
38    /// Run validation after processing (default: true).
39    pub validate: bool,
40    /// Enable path security (prevent include traversal).
41    pub path_security: bool,
42}
43
44impl Default for LoadOptions {
45    fn default() -> Self {
46        Self {
47            booking_method: BookingMethod::Strict,
48            run_plugins: true,
49            auto_accounts: false,
50            extra_plugins: Vec::new(),
51            validate: true,
52            path_security: false,
53        }
54    }
55}
56
57impl LoadOptions {
58    /// Create options for raw loading (no booking, no plugins, no validation).
59    #[must_use]
60    pub const fn raw() -> Self {
61        Self {
62            booking_method: BookingMethod::Strict,
63            run_plugins: false,
64            auto_accounts: false,
65            extra_plugins: Vec::new(),
66            validate: false,
67            path_security: false,
68        }
69    }
70}
71
72/// Errors that can occur during ledger processing.
73#[derive(Debug, Error)]
74pub enum ProcessError {
75    /// Loading failed.
76    #[error("loading failed: {0}")]
77    Load(#[from] LoadError),
78
79    /// Booking/interpolation error.
80    #[cfg(feature = "booking")]
81    #[error("booking error: {message}")]
82    Booking {
83        /// Error message.
84        message: String,
85        /// Date of the transaction.
86        date: rustledger_core::NaiveDate,
87        /// Narration of the transaction.
88        narration: String,
89    },
90
91    /// Plugin execution error.
92    #[cfg(feature = "plugins")]
93    #[error("plugin error: {0}")]
94    Plugin(String),
95
96    /// Validation error.
97    #[cfg(feature = "validation")]
98    #[error("validation error: {0}")]
99    Validation(String),
100
101    /// Plugin output conversion error.
102    #[cfg(feature = "plugins")]
103    #[error("failed to convert plugin output: {0}")]
104    PluginConversion(String),
105}
106
107/// A fully processed ledger.
108///
109/// This is the result of loading and processing a beancount file,
110/// equivalent to the tuple returned by Python's `loader.load_file()`.
111#[derive(Debug)]
112pub struct Ledger {
113    /// Processed directives in source-faithful form: sorted by date,
114    /// booked (cost specs resolved, interpolations applied), and
115    /// plugin-rewritten. **`Pad` directives remain as `Pad`**; they
116    /// are not pre-expanded into synthesized transactions.
117    ///
118    /// Consumers split into two groups:
119    ///
120    /// - **Source-faithful consumers** (stats, journal, formatter,
121    ///   LSP, BQL `FROM #entries WHERE type = 'pad'` audits,
122    ///   source-mapped diagnostics) iterate this field directly.
123    ///   Pads count as Pads.
124    /// - **Balance-computing consumers** (holdings, balances,
125    ///   balsheet, networth, income, FFI `query.execute`/`batch`,
126    ///   WASM `expandPads`/`query`) call [`Ledger::balance_view`]
127    ///   to get the directive stream MERGED with synthesized P-flag
128    ///   transactions for each pad-balance pair. This is the only
129    ///   way to get pad effects into per-account inventory math.
130    ///
131    /// The two views are derived from the same source; there is no
132    /// drift possible because [`Ledger::balance_view`] is a pure
133    /// function of `self.directives`.
134    pub directives: Vec<Spanned<Directive>>,
135    /// Options parsed from the file.
136    pub options: Options,
137    /// Plugins declared in the file.
138    pub plugins: Vec<Plugin>,
139    /// Source map for error reporting.
140    pub source_map: SourceMap,
141    /// Errors encountered during processing.
142    pub errors: Vec<LedgerError>,
143    /// Display context for formatting numbers.
144    pub display_context: DisplayContext,
145}
146
147impl Ledger {
148    /// Return the directive stream merged with synthesized
149    /// pad-equivalent transactions, suitable for inventory /
150    /// balance math.
151    ///
152    /// For each `Pad` directive followed (in date order) by a
153    /// `Balance` assertion on the same account, a `Transaction`
154    /// with `flag = 'P'` is added to the view carrying the
155    /// postings needed to make the balance match. A multi-currency
156    /// pad produces one synth transaction per currency.
157    ///
158    /// **Original `Pad` directives are preserved in the view.**
159    /// Synth transactions are added alongside, not in place of.
160    /// This matters for two reasons:
161    ///
162    /// 1. BQL queries against the `#entries` table
163    ///    (`SELECT * FROM #entries WHERE type = 'pad'`) can still
164    ///    enumerate the pad directives the user authored. A
165    ///    REPLACE-style expansion would silently zero those out.
166    ///    (BQL's default SELECT path operates on postings; pads
167    ///    have no postings, so a default SELECT never matches them
168    ///    regardless of this view shape.)
169    /// 2. Multi-pad cases (issue #1300) produce exactly one synth
170    ///    per pad-balance pair:
171    ///    `rustledger_booking::process_pads` (which
172    ///    `merge_with_padding` delegates to) only retains the most
173    ///    recent same-account pad in its pending-pads map, so
174    ///    earlier same-account pads are silently shadowed and
175    ///    their `source_account` does NOT contribute to the synth.
176    ///    The validator emits `E2003` for shadowed pads
177    ///    independently; this view reflects only the effective pad.
178    ///
179    /// Inventory-walking consumers iterate `Directive::Transaction`
180    /// and ignore `Pad` directives, so the preserved Pads are
181    /// invisible to them.
182    ///
183    /// **When to use this vs. [`Ledger.directives`](Self::directives):**
184    /// any consumer that maintains running per-account inventory
185    /// state and asks "what is the balance" needs this view. Any
186    /// consumer that asks "what did the user write" wants the raw
187    /// `directives` field.
188    ///
189    /// # Performance
190    ///
191    /// Each call clones every source directive once (`O(n)`).
192    /// Inlines the merge logic from
193    /// [`rustledger_booking::merge_with_padding`] so the already-
194    /// owned `booked` vector can be moved into the merged output
195    /// instead of cloned a second time. For short-lived CLI
196    /// invocations the single clone is negligible. Long-lived
197    /// processes (FFI servers, LSPs) that query the same ledger
198    /// repeatedly should hoist the result above their loop.
199    /// `TODO(perf):` memoize internally once a benchmark shows it
200    /// matters.
201    #[cfg(feature = "booking")]
202    #[must_use]
203    pub fn balance_view(&self) -> Vec<Directive> {
204        let mut booked: Vec<Directive> = self.directives.iter().map(|s| s.value.clone()).collect();
205
206        // Inlined from `rustledger_booking::merge_with_padding` so
207        // `booked` is moved (not re-cloned via `to_vec()`).
208        // Algorithmically identical: prepend synth transactions, then
209        // stable-sort by date. Same-date pad+balance pairs land as
210        // `[synth, pad, balance]` because synths sit at the front of
211        // their date-group pre-sort.
212        debug_assert!(
213            !booked.iter().any(|d| matches!(d, Directive::Transaction(t) if rustledger_booking::is_synthesized_pad(t))),
214            "balance_view called on a Ledger whose directives already contain synth pad transactions",
215        );
216        let pad_result = rustledger_booking::process_pads(&booked);
217        let mut merged: Vec<Directive> =
218            Vec::with_capacity(booked.len() + pad_result.padding_transactions.len());
219        for txn in pad_result.padding_transactions {
220            merged.push(Directive::Transaction(txn));
221        }
222        merged.append(&mut booked);
223        merged.sort_by_key(rustledger_core::Directive::date);
224        merged
225    }
226}
227
228/// Unified error type for ledger processing.
229///
230/// This encompasses all error types that can occur during loading,
231/// booking, plugin execution, and validation.
232#[derive(Debug)]
233#[non_exhaustive]
234pub struct LedgerError {
235    /// Error severity.
236    pub severity: ErrorSeverity,
237    /// Error code (e.g., "E0001", "W8002").
238    pub code: String,
239    /// Human-readable error message.
240    pub message: String,
241    /// Source location, if available.
242    pub location: Option<ErrorLocation>,
243    /// Byte span (inclusive start, exclusive end) in the source file,
244    /// used by rich renderers (e.g. miette) to draw a snippet around
245    /// the offending directive. Consumers that only need `file:line:col`
246    /// should use `location`; those that want to show the surrounding
247    /// source text want this.
248    pub source_span: Option<(usize, usize)>,
249    /// Source file ID — index into the ledger's [`SourceMap`]. Used
250    /// alongside `source_span` for snippet rendering.
251    pub file_id: Option<u16>,
252    /// Processing phase that produced this error: "parse", "validate", or "plugin".
253    pub phase: String,
254}
255
256/// Error severity level.
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
258pub enum ErrorSeverity {
259    /// Error - indicates a problem that should be fixed.
260    Error,
261    /// Warning - indicates a potential issue.
262    Warning,
263}
264
265/// Source location for an error.
266#[derive(Debug, Clone)]
267pub struct ErrorLocation {
268    /// File path.
269    pub file: std::path::PathBuf,
270    /// Line number (1-indexed).
271    pub line: usize,
272    /// Column number (1-indexed).
273    pub column: usize,
274}
275
276impl LedgerError {
277    /// Create a new error with the given phase.
278    pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
279        Self {
280            severity: ErrorSeverity::Error,
281            code: code.into(),
282            message: message.into(),
283            location: None,
284            source_span: None,
285            file_id: None,
286            phase: "validate".to_string(),
287        }
288    }
289
290    /// Create a new warning.
291    pub fn warning(code: impl Into<String>, message: impl Into<String>) -> Self {
292        Self {
293            severity: ErrorSeverity::Warning,
294            code: code.into(),
295            message: message.into(),
296            location: None,
297            source_span: None,
298            file_id: None,
299            phase: "validate".to_string(),
300        }
301    }
302
303    /// Attach a source span and file ID so rich renderers can draw a snippet.
304    #[must_use]
305    pub const fn with_source_span(mut self, span: (usize, usize), file_id: u16) -> Self {
306        self.source_span = Some(span);
307        self.file_id = Some(file_id);
308        self
309    }
310
311    /// Set the processing phase for this error.
312    #[must_use]
313    pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
314        self.phase = phase.into();
315        self
316    }
317
318    /// Add a location to this error.
319    #[must_use]
320    pub fn with_location(mut self, location: ErrorLocation) -> Self {
321        self.location = Some(location);
322        self
323    }
324}
325
326/// Process a raw load result into a fully processed ledger.
327///
328/// Pipeline (see numbered comments below for the rationale of each step):
329///
330/// ```text
331///   1. sort                         (canonical display order)
332///   2. synth plugins                (auto_accounts, document_discovery)
333///   3. Early validation             (account presence, structural, lifecycle)
334///   4. booking                      (cost spec resolution, interpolation)
335///   5. partition                    (set aside failed-booking txns)
336///   6. regular plugins              (file plugins + extras, on booked only)
337///   7. Late validation              (balance, currency, inventory, on booked only)
338///   8. finalize                     (unused-pad warnings)
339///   9. re-merge                     (booked + failed → final Ledger.directives)
340/// ```
341pub fn process(raw: LoadResult, options: &LoadOptions) -> Result<Ledger, ProcessError> {
342    let mut errors: Vec<LedgerError> = Vec::new();
343
344    // Convert load errors to ledger errors (parse phase). Iterate by
345    // reference so `raw` stays borrowable for the rest of the pipeline
346    // (the phase transitions and validator setup below borrow it).
347    for load_err in &raw.errors {
348        errors.push(LedgerError::error("LOAD", load_err.to_string()).with_phase("parse"));
349    }
350
351    // Phase-typed pipeline (issue #1166). The phantom-typed
352    // `Directives<P>` wrapper makes the sequence
353    //
354    //     Raw → Sorted → Synthed → EarlyValidated → Booked
355    //         → RegularPluginsApplied → LateValidated → Finalized
356    //
357    // a compile-time property of the type system. Each transition
358    // method consumes one phase and produces the next; the compiler
359    // rejects any call-site that drops a phase, swaps two, or invokes
360    // a later phase on raw input. See `crates/rustledger-loader/src/phase.rs`.
361    //
362    // The transitions themselves wrap the existing subsystem entry
363    // points (`run_booking`, `run_plugins`, validators) without
364    // changing their semantics — this PR is the structural refactor
365    // only; behavior is bit-identical to the pre-#1166 pipeline.
366
367    // Resolve the effective booking method once, before the pipeline
368    // starts, so both the validator (early/late phases — needs it to
369    // seed each opened account's per-account booking method, see
370    // issue #1182) and the booking engine see the same value. File-
371    // level `option "booking_method"` wins when explicitly set;
372    // otherwise the API-level `LoadOptions.booking_method` is used.
373    #[cfg(any(feature = "validation", feature = "booking"))]
374    let effective_booking_method = resolve_effective_booking_method(&raw, options);
375
376    #[cfg(feature = "validation")]
377    let validation_session = if options.validate {
378        Some(rustledger_validate::ValidationSession::new(
379            build_validation_options(&raw.options, &raw.source_map, effective_booking_method),
380        ))
381    } else {
382        None
383    };
384
385    // Compute `today` once for both phases — avoids a midnight-crossing
386    // race where Early and Late could disagree on what day it is, and
387    // gives `FutureDate` warnings a single coherent reference point.
388    #[cfg(feature = "validation")]
389    let today = jiff::Zoned::now().date();
390
391    let synthed = crate::Directives::<crate::Raw>::from_parser(raw.directives)
392        .sort()
393        .apply_synth_plugins(
394            &raw.plugins,
395            &raw.options,
396            options,
397            &raw.source_map,
398            &mut errors,
399        )?;
400
401    // The validation feature changes `early_validate`'s shape: with
402    // it on we thread the `Option<ValidationSession<Pending>>` in and
403    // catch the returned `Option<ValidationSession<EarlyDone>>` for
404    // `late_validate` (typestate-moved per #1236); without it we just
405    // get the next-phase `Directives` back. Branching here keeps each
406    // cfg's signature small and prevents the call site from having to
407    // know the typestate phase parameters in the disabled case.
408    #[cfg(feature = "validation")]
409    let (directives, validation_session) =
410        synthed.early_validate(validation_session, today, &raw.source_map, &mut errors);
411    #[cfg(not(feature = "validation"))]
412    let directives = synthed.early_validate(&raw.source_map, &mut errors);
413
414    let (booked, failed) = directives.book(
415        #[cfg(feature = "booking")]
416        effective_booking_method,
417        #[cfg(feature = "booking")]
418        &mut errors,
419    );
420
421    let regular_applied = booked.apply_regular_plugins(
422        &raw.plugins,
423        &raw.options,
424        options,
425        &raw.source_map,
426        &mut errors,
427    )?;
428
429    #[cfg(feature = "validation")]
430    let late_validated =
431        regular_applied.late_validate(validation_session, today, &raw.source_map, &mut errors);
432    #[cfg(not(feature = "validation"))]
433    let late_validated = regular_applied.late_validate(&raw.source_map, &mut errors);
434
435    let finalized = late_validated.finalize(failed);
436
437    Ok(Ledger {
438        directives: finalized.into_inner(),
439        options: raw.options,
440        plugins: raw.plugins,
441        source_map: raw.source_map,
442        errors,
443        display_context: raw.display_context,
444    })
445}
446
447/// Resolve the booking method from `LoadOptions` + file-level option.
448///
449/// Factored out of `process()` so both the validator session (which
450/// needs it to seed per-account booking) and the booking engine see
451/// the same value. File-level `option "booking_method"` wins when
452/// explicitly set; otherwise the API-level default is used.
453#[cfg(any(feature = "validation", feature = "booking"))]
454fn resolve_effective_booking_method(
455    raw: &LoadResult,
456    options: &LoadOptions,
457) -> rustledger_core::BookingMethod {
458    let file_set = raw.options.set_options.contains("booking_method");
459    if file_set {
460        raw.options
461            .booking_method
462            .parse()
463            .unwrap_or(options.booking_method)
464    } else {
465        options.booking_method
466    }
467}
468
469// ============================================================================
470// Phase transitions
471// ============================================================================
472//
473// Each transition consumes a `Directives<P>` of one phase and
474// produces a `Directives<NextP>` of the next phase. Bodies wrap the
475// existing subsystem calls (`run_booking`, `run_plugins`, validators)
476// without changing their semantics — only the type-level sequencing
477// is new. See `phase.rs` for the phase markers and overall rationale.
478
479/// Canonical display-order sort key: `(date, priority, file_id, span.start)`.
480/// What BQL / JSON / format output expects and what Python beancount
481/// produces. Used by `sort` (initial ordering) and `finalize` (re-sort
482/// after merging failed bookings back in).
483type CanonicalSortKey = (
484    rustledger_core::NaiveDate,
485    rustledger_core::DirectivePriority,
486    u16,
487    usize,
488);
489
490#[inline]
491const fn canonical_sort_key(d: &Spanned<Directive>) -> CanonicalSortKey {
492    (d.value.date(), d.value.priority(), d.file_id, d.span.start)
493}
494
495impl crate::Directives<crate::Raw> {
496    /// Sort directives into canonical display order — see
497    /// [`canonical_sort_key`].
498    ///
499    /// Booking needs a different iteration order (augmentations
500    /// BEFORE reductions on the same `(date, priority)`) but doesn't
501    /// need the underlying vec reordered — `run_booking` walks via
502    /// a transient `Vec<usize>` index. This sort goes once, here,
503    /// and the display order survives the rest of the pipeline.
504    #[must_use]
505    pub(crate) fn sort(mut self) -> crate::Directives<crate::Sorted> {
506        self.as_vec_mut().sort_by_key(canonical_sort_key);
507        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
508    }
509}
510
511impl crate::Directives<crate::Sorted> {
512    /// Run synth-only plugins (`auto_accounts`, `document_discovery`)
513    /// BEFORE early validation so the synthesizers inject Opens /
514    /// Documents that Early checks depend on (E1001 account
515    /// presence, E5001 missing-document file).
516    ///
517    /// Only this narrow synth subset runs here; everything else
518    /// waits until after booking (post-booking plugin pass) so
519    /// cost-spec-reading plugins see filled-in per-unit values on
520    /// `CostNumber::PerUnitFromTotal`. See `PluginPass` rustdoc for
521    /// the detailed split rationale.
522    pub(crate) fn apply_synth_plugins(
523        mut self,
524        plugins: &[crate::Plugin],
525        file_options: &crate::Options,
526        options: &LoadOptions,
527        source_map: &SourceMap,
528        errors: &mut Vec<LedgerError>,
529    ) -> Result<crate::Directives<crate::Synthed>, ProcessError> {
530        // `run_plugins` early-returns when no plugin entry matches the
531        // pass; no outer gate needed (and any outer gate risked
532        // missing one of the implicit-synth triggers — auto_accounts,
533        // document_discovery via `option "documents"`, file-declared
534        // synth plugins).
535        #[cfg(feature = "plugins")]
536        run_plugins(
537            self.as_vec_mut(),
538            plugins,
539            file_options,
540            options,
541            source_map,
542            errors,
543            PluginPass::PreBookingSynth,
544        )?;
545        // Suppress unused-arg warnings when `plugins` feature is off.
546        #[cfg(not(feature = "plugins"))]
547        {
548            let _ = (plugins, file_options, options, source_map, errors);
549        }
550        Ok(crate::Directives::new_unchecked(std::mem::take(
551            self.as_vec_mut(),
552        )))
553    }
554}
555
556impl crate::Directives<crate::Synthed> {
557    /// Run the early-phase validators. Account-presence /
558    /// lifecycle / structural errors are collected into `errors`
559    /// (via the `LedgerError` stream); the directive list itself is
560    /// unchanged by validation.
561    ///
562    /// Runs on pre-booking directives, AFTER synth plugins so
563    /// account-presence checks (E1001) see any Opens that plugins
564    /// like `auto_accounts` injected. This is what lets booking
565    /// match Python's "prune zero-interp postings" behavior without
566    /// losing E1001 on the elided-zero-to-unopened-account case
567    /// (rustledger#877).
568    #[cfg(feature = "validation")]
569    pub(crate) fn early_validate(
570        mut self,
571        validation_session: Option<
572            rustledger_validate::ValidationSession<rustledger_validate::Pending>,
573        >,
574        today: rustledger_core::NaiveDate,
575        source_map: &SourceMap,
576        errors: &mut Vec<LedgerError>,
577    ) -> (
578        crate::Directives<crate::EarlyValidated>,
579        Option<rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>>,
580    ) {
581        // Typestate move: consume `Pending`, return `EarlyDone`. The
582        // session must be threaded by value rather than `&mut`-borrowed
583        // because the phase parameter on `ValidationSession<P>` changes
584        // as a result of the call (#1236). The caller in `process()`
585        // captures the returned session and passes it to
586        // `late_validate`.
587        let session_out = validation_session.map(|session| {
588            let (session, phase_errors) = session.run_early_spanned(self.as_slice(), today);
589            ledger_errors_extend(errors, phase_errors, source_map);
590            session
591        });
592        (
593            crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut())),
594            session_out,
595        )
596    }
597
598    #[cfg(not(feature = "validation"))]
599    pub(crate) fn early_validate(
600        mut self,
601        source_map: &SourceMap,
602        errors: &mut Vec<LedgerError>,
603    ) -> crate::Directives<crate::EarlyValidated> {
604        let _ = (source_map, errors);
605        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
606    }
607}
608
609impl crate::Directives<crate::EarlyValidated> {
610    /// Run booking/interpolation. Returns the successfully-booked
611    /// directives plus a typed wrapper holding failed transactions.
612    ///
613    /// Failed transactions are in pre-booking shape (unresolved cost
614    /// specs, unfilled elided slots, possibly unbalanced); they
615    /// don't flow into regular plugins or Late validation — booking
616    /// already reported the root cause and the downstream checks
617    /// would cascade misleading errors. They get re-merged at
618    /// [`crate::Directives::<crate::LateValidated>::finalize`].
619    ///
620    /// When the `booking` feature is disabled this is an identity
621    /// transition: directives pass through unchanged and the failed
622    /// set is always empty. The same method exists in both feature
623    /// configurations so the caller in `process()` doesn't need a
624    /// `#[cfg]` match — the booking-specific arguments appear or
625    /// disappear via per-parameter `#[cfg]` attributes, mirroring
626    /// `early_validate` / `late_validate`.
627    pub(crate) fn book(
628        mut self,
629        #[cfg(feature = "booking")] effective_method: rustledger_core::BookingMethod,
630        #[cfg(feature = "booking")] errors: &mut Vec<LedgerError>,
631    ) -> (
632        crate::Directives<crate::Booked>,
633        crate::phase::FailedBookings,
634    ) {
635        #[cfg(feature = "booking")]
636        let (booked, failed) =
637            run_booking(std::mem::take(self.as_vec_mut()), effective_method, errors);
638        #[cfg(not(feature = "booking"))]
639        let (booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) =
640            (std::mem::take(self.as_vec_mut()), Vec::new());
641        (
642            crate::Directives::new_unchecked(booked),
643            crate::phase::FailedBookings::new(failed),
644        )
645    }
646}
647
648impl crate::Directives<crate::Booked> {
649    /// Run post-booking plugins — file-declared + CLI extras.
650    /// Cost-spec-reading plugins (`implicit_prices`,
651    /// `capital_gains_classifier`, `check_average_cost`,
652    /// `sell_gains`, `unrealized`, `valuation`) see filled-in
653    /// per-unit values on `CostNumber::PerUnitFromTotal` because
654    /// booking has run.
655    ///
656    /// Matches Python beancount's plugins-after-booking ordering
657    /// and closes rustledger#1117. Failed transactions were
658    /// partitioned out by `book`; plugins only see
659    /// successfully-booked input.
660    pub(crate) fn apply_regular_plugins(
661        mut self,
662        plugins: &[crate::Plugin],
663        file_options: &crate::Options,
664        options: &LoadOptions,
665        source_map: &SourceMap,
666        errors: &mut Vec<LedgerError>,
667    ) -> Result<crate::Directives<crate::RegularPluginsApplied>, ProcessError> {
668        // `run_plugins` early-returns when no plugin entry matches
669        // the pass; no outer gate needed.
670        #[cfg(feature = "plugins")]
671        run_plugins(
672            self.as_vec_mut(),
673            plugins,
674            file_options,
675            options,
676            source_map,
677            errors,
678            PluginPass::PostBooking,
679        )?;
680        #[cfg(not(feature = "plugins"))]
681        {
682            let _ = (plugins, file_options, options, source_map, errors);
683        }
684        Ok(crate::Directives::new_unchecked(std::mem::take(
685            self.as_vec_mut(),
686        )))
687    }
688}
689
690impl crate::Directives<crate::RegularPluginsApplied> {
691    /// Run the late-phase validators on booked + plugin-processed
692    /// directives. Reuses the `ValidationSession` from
693    /// `early_validate` so account / commodity / pad bookkeeping
694    /// carries forward.
695    #[cfg(feature = "validation")]
696    pub(crate) fn late_validate(
697        mut self,
698        validation_session: Option<
699            rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>,
700        >,
701        today: rustledger_core::NaiveDate,
702        source_map: &SourceMap,
703        errors: &mut Vec<LedgerError>,
704    ) -> crate::Directives<crate::LateValidated> {
705        // Typestate move: consume `EarlyDone`, drive through `LateDone`
706        // to `finalize()`. The compile-time enforcement here is that
707        // we cannot call `late_validate` with a fresh `Pending` session
708        // (no `From<Pending>` to `EarlyDone`), so the loader caller
709        // must have routed the session through `early_validate` first
710        // (#1236).
711        if let Some(session) = validation_session {
712            let (session, phase_errors) = session.run_late_spanned(self.as_slice(), today);
713            ledger_errors_extend(errors, phase_errors, source_map);
714            let finalize_errors = session.finalize();
715            ledger_errors_extend(errors, finalize_errors, source_map);
716        }
717        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
718    }
719
720    #[cfg(not(feature = "validation"))]
721    pub(crate) fn late_validate(
722        mut self,
723        source_map: &SourceMap,
724        errors: &mut Vec<LedgerError>,
725    ) -> crate::Directives<crate::LateValidated> {
726        let _ = (source_map, errors);
727        crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
728    }
729}
730
731impl crate::Directives<crate::LateValidated> {
732    /// Re-merge failed (un-booked) transactions back into the
733    /// directive list for output. The user wrote them and expects
734    /// to see them in `Ledger.directives`; we kept them isolated
735    /// from post-booking processing.
736    ///
737    /// Re-sorts to restore canonical display order — `booked`
738    /// retained order during plugin transformation; the sort
739    /// restores the failed entries' positions.
740    pub(crate) fn finalize(
741        mut self,
742        failed: crate::phase::FailedBookings,
743    ) -> crate::Directives<crate::Finalized> {
744        let mut v = std::mem::take(self.as_vec_mut());
745        v.extend(failed.into_inner());
746        v.sort_by_key(canonical_sort_key);
747        crate::Directives::new_unchecked(v)
748    }
749}
750
751/// Run booking and interpolation on transactions, returning the
752/// directives partitioned into `(booked, failed)`.
753///
754/// The caller has already sorted `directives` into canonical display
755/// order `(date, priority, file_id, span.start)`. Booking needs the
756/// extra constraint that cost-reduction transactions process AFTER
757/// augmentations on the same `(date, priority)` so lots exist when
758/// matched. Rather than re-sorting the whole vec, we walk it via a
759/// transient `Vec<usize>` of indices sorted by booking order. Stable
760/// sort preserves display-order tiebreaks between transactions with
761/// the same `has_cost_reduction` flag.
762///
763/// Failed transactions are partitioned out into the second return
764/// value so they don't flow into regular plugins or Late validation
765/// (they're in pre-booking shape — postings have unresolved cost
766/// specs and unfilled elided slots, so downstream processing would
767/// cascade misleading errors). The caller is responsible for
768/// re-merging `failed` into the final `Ledger.directives` for output
769/// so the user still sees their original input.
770#[cfg(feature = "booking")]
771fn run_booking(
772    mut directives: Vec<Spanned<Directive>>,
773    booking_method: BookingMethod,
774    errors: &mut Vec<LedgerError>,
775) -> (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) {
776    use rustledger_booking::BookingEngine;
777
778    let mut engine = BookingEngine::with_method(booking_method);
779    engine.register_account_methods(directives.iter().map(|s| &s.value));
780
781    // Build an index ordered for booking: stable sort by
782    // `has_cost_reduction` only (display order — `(date, priority,
783    // file_id, span.start)` — is already encoded in the existing
784    // positional order, and stable_sort preserves that as the tiebreak).
785    let mut order: Vec<usize> = (0..directives.len()).collect();
786    order.sort_by_key(|&i| {
787        let d = &directives[i].value;
788        (d.date(), d.priority(), d.has_cost_reduction())
789    });
790
791    let mut failed_indices: Vec<usize> = Vec::new();
792    for &i in &order {
793        let spanned = &mut directives[i];
794        if let Directive::Transaction(txn) = &mut spanned.value {
795            match engine.book_and_interpolate(txn) {
796                Ok(result) => {
797                    engine.apply(&result.transaction);
798                    *txn = result.transaction;
799                }
800                Err(e) => {
801                    errors.push(LedgerError::error(
802                        "BOOK",
803                        format!("{} ({}, \"{}\")", e, txn.date, txn.narration),
804                    ));
805                    failed_indices.push(i);
806                }
807            }
808        }
809    }
810
811    // Partition into (booked, failed). Indices are valid in the current
812    // `directives` vec (no mutation has happened since they were
813    // collected); after this consuming iteration the vec is gone and
814    // partition is fait accompli — no window where a caller could
815    // accidentally mutate between collection and partition.
816    let failed_set: rustc_hash::FxHashSet<usize> = failed_indices.iter().copied().collect();
817    let mut booked = Vec::with_capacity(directives.len() - failed_indices.len());
818    let mut failed = Vec::with_capacity(failed_indices.len());
819    for (i, d) in directives.into_iter().enumerate() {
820        if failed_set.contains(&i) {
821            failed.push(d);
822        } else {
823            booked.push(d);
824        }
825    }
826    (booked, failed)
827}
828
829/// Which subset of plugins to run.
830///
831/// The loader pipeline calls `run_plugins` twice: once with
832/// [`PluginPass::PreBookingSynth`] before the Early validation phase
833/// (so synthesizers can inject Opens / Documents that early checks
834/// depend on), and once with [`PluginPass::PostBooking`] after booking
835/// (so cost-spec-reading plugins like `implicit_prices`,
836/// `capital_gains_classifier`, `check_average_cost`, `sell_gains`,
837/// `unrealized`, and `valuation` see filled-in per-unit values on the
838/// `CostNumber::PerUnitFromTotal` variant).
839///
840/// Standalone callers (LSP / FFI / tests on already-booked input) pass
841/// [`PluginPass::PostBooking`] — synth plugins are a loader-internal
842/// concern and would re-Open already-opened accounts if run a second
843/// time.
844#[cfg(feature = "plugins")]
845#[derive(Debug, Clone, Copy, PartialEq, Eq)]
846pub enum PluginPass {
847    /// Only plugins that synthesize directives the Early validator
848    /// depends on: `auto_accounts` (synthesizes Open directives) and
849    /// the built-in document discovery walker (synthesizes Document
850    /// directives the early phase checks for missing files).
851    PreBookingSynth,
852    /// All file-declared plugins and CLI `extra_plugins`, EXCLUDING
853    /// `auto_accounts` and `document_discovery` (those ran pre-booking).
854    /// Includes the 28 plugins that don't depend on synth state but
855    /// may depend on booked cost specs.
856    PostBooking,
857}
858
859/// Run plugins on directives.
860///
861/// Executes native plugins (and document discovery) on the given directives,
862/// modifying them in-place. Plugin errors are appended to `errors`.
863///
864/// A single plugin invocation in `run_plugins`'s unified dispatch
865/// list. `force_python` ("python:..." prefix) overrides native
866/// resolution; `config` is the plugin-specific string passed to
867/// `PluginInput.config`.
868#[cfg(feature = "plugins")]
869struct PluginInvocation {
870    name: String,
871    config: Option<String>,
872    force_python: bool,
873}
874
875/// `pass` selects which subset of plugins to run — see [`PluginPass`].
876/// The loader pipeline calls this twice (synth pass before Early,
877/// regular pass after booking).
878#[cfg(feature = "plugins")]
879pub fn run_plugins(
880    directives: &mut Vec<Spanned<Directive>>,
881    file_plugins: &[Plugin],
882    file_options: &Options,
883    options: &LoadOptions,
884    source_map: &SourceMap,
885    errors: &mut Vec<LedgerError>,
886    pass: PluginPass,
887) -> Result<(), ProcessError> {
888    use rustledger_plugin::{NativePlugin, NativePluginRegistry, PluginInput, PluginOptions};
889
890    // Resolve document directories relative to the main file's directory.
891    // Used to build doc_discovery's per-call config in the synth pass.
892    let base_dir = source_map
893        .files()
894        .first()
895        .and_then(|f| f.path.parent())
896        .unwrap_or_else(|| std::path::Path::new("."));
897
898    // Access the process-wide registry singleton. The registry is
899    // immutable and stateless, so the same instance services every
900    // call.
901    let registry = NativePluginRegistry::global();
902
903    // Build the unified list of plugins to invoke for this pass:
904    //   1. Implicit synth plugins triggered by `LoadOptions` /
905    //      `file_options` (auto_accounts via `options.auto_accounts`;
906    //      document_discovery via non-empty `file_options.documents`).
907    //   2. File-declared plugins from `plugin "..."` directives.
908    //   3. CLI `--plugin` extras.
909    // Pass classification happens here — once — via `registry.find_synth`.
910    // A plugin enters the list iff its pass matches the requested `pass`.
911    let mut entries: Vec<PluginInvocation> = Vec::new();
912
913    if matches!(pass, PluginPass::PreBookingSynth) {
914        // Implicit synth: API-level auto_accounts flag.
915        if options.auto_accounts {
916            entries.push(PluginInvocation {
917                name: rustledger_plugin::AUTO_ACCOUNTS_NAME.to_string(),
918                config: None,
919                force_python: false,
920            });
921        }
922        // Implicit synth: document_discovery, driven by `option "documents"`.
923        // The plugin sits in the registry as a ZST; we hand it the
924        // resolved directories + base_dir via its config JSON.
925        if options.run_plugins && !file_options.documents.is_empty() {
926            let resolved: Vec<String> = file_options
927                .documents
928                .iter()
929                .map(|d| {
930                    let path = std::path::Path::new(d);
931                    if path.is_absolute() {
932                        d.clone()
933                    } else {
934                        base_dir.join(path).to_string_lossy().to_string()
935                    }
936                })
937                .collect();
938            entries.push(PluginInvocation {
939                name: rustledger_plugin::DOCUMENT_DISCOVERY_NAME.to_string(),
940                config: Some(rustledger_plugin::document_discovery_config(
941                    base_dir, &resolved,
942                )),
943                force_python: false,
944            });
945        }
946    }
947
948    // A plugin name belongs in the current pass iff its synth-marker
949    // membership matches `pass`. Non-native plugins (WASM/Python) are
950    // never in the synth registry and therefore always fall into the
951    // PostBooking pass.
952    let want_synth = matches!(pass, PluginPass::PreBookingSynth);
953
954    // File-declared plugins.
955    if options.run_plugins {
956        for plugin in file_plugins {
957            if registry.find_synth(&plugin.name).is_some() == want_synth {
958                entries.push(PluginInvocation {
959                    name: plugin.name.clone(),
960                    config: plugin.config.clone(),
961                    force_python: plugin.force_python,
962                });
963            }
964        }
965    }
966
967    // CLI extra plugins.
968    for extra in &options.extra_plugins {
969        if registry.find_synth(&extra.name).is_some() == want_synth {
970            entries.push(PluginInvocation {
971                name: extra.name.clone(),
972                config: extra.config.clone(),
973                force_python: false,
974            });
975        }
976    }
977
978    if entries.is_empty() {
979        return Ok(());
980    }
981
982    let plugin_options = PluginOptions {
983        operating_currencies: file_options.operating_currency.clone(),
984        title: file_options.title.clone(),
985    };
986
987    // Dispatch each entry. Native plugins resolve through the typed
988    // registry (`find_synth` / `find_regular`) keyed on the pass — the
989    // returned reference type reflects the pass. Anything that doesn't
990    // resolve falls through to the WASM/Python branches.
991    for invocation in &entries {
992        let PluginInvocation {
993            name: raw_name,
994            config: plugin_config,
995            force_python,
996        } = invocation;
997
998        // Dispatch via the typed registry. `find_synth`/`find_regular`
999        // internally take the short name (last `.`-separated segment),
1000        // so prefixed names like `"beancount.plugins.implicit_prices"`
1001        // resolve through the same call — no explicit prefix-stripping
1002        // needed. Returns `Some` only if the plugin exists AND its
1003        // marker trait matches the requested pass: a `RegularPlugin`
1004        // won't be returned from `find_synth` (and vice versa), even
1005        // on a name collision. Anything that returns `None` (WASM,
1006        // Python, unknown names, wrong-pass natives) falls through
1007        // to the WASM/Python branches below.
1008        let native_plugin: Option<&dyn NativePlugin> = if *force_python {
1009            None
1010        } else {
1011            match pass {
1012                PluginPass::PreBookingSynth => registry
1013                    .find_synth(raw_name)
1014                    .map(|p| p as &dyn NativePlugin),
1015                PluginPass::PostBooking => registry
1016                    .find_regular(raw_name)
1017                    .map(|p| p as &dyn NativePlugin),
1018            }
1019        };
1020
1021        if let Some(plugin) = native_plugin {
1022            let wrappers = build_wrappers(directives, source_map);
1023            let input = PluginInput {
1024                directives: wrappers,
1025                options: plugin_options.clone(),
1026                config: plugin_config.clone(),
1027            };
1028            let output = plugin.process(input);
1029            record_plugin_errors(errors, output.errors, source_map);
1030            apply_plugin_ops(directives, output.ops, errors, source_map)?;
1031        } else {
1032            // Not a native plugin — categorize and handle
1033            let plugin_path = std::path::Path::new(raw_name);
1034            let ext = plugin_path
1035                .extension()
1036                .and_then(|e| e.to_str())
1037                .unwrap_or("")
1038                .to_lowercase();
1039
1040            // The closure is only invoked from inside the wasm-plugins /
1041            // python-plugins cfg blocks below. The whole function is
1042            // already `#[cfg(feature = "plugins")]`, so this only matters
1043            // when `plugins` is enabled but neither child feature is
1044            // (e.g. `--features native-plugins`). Allow `unused_variables`
1045            // for exactly that configuration. Underscore-prefixing the
1046            // binding would have been the wrong fix because we DO call
1047            // the closure in builds with one of the features enabled,
1048            // which would trip `no_effect_underscore_binding` instead.
1049            #[cfg_attr(
1050                not(any(feature = "wasm-plugins", feature = "python-plugins")),
1051                allow(unused_variables)
1052            )]
1053            let resolve_path = |name: &str| -> Result<std::path::PathBuf, String> {
1054                let p = std::path::Path::new(name);
1055                let resolved = if p.is_absolute() {
1056                    p.to_path_buf()
1057                } else {
1058                    base_dir.join(name)
1059                };
1060
1061                // Path security: prevent plugins from outside the ledger directory
1062                if options.path_security
1063                    && let (Ok(canon_base), Ok(canon_plugin)) =
1064                        (base_dir.canonicalize(), resolved.canonicalize())
1065                    && !canon_plugin.starts_with(&canon_base)
1066                {
1067                    return Err(format!(
1068                        "plugin path '{name}' is outside the ledger directory"
1069                    ));
1070                }
1071
1072                Ok(resolved)
1073            };
1074
1075            if ext == "wasm" {
1076                // WASM plugin
1077                #[cfg(feature = "wasm-plugins")]
1078                {
1079                    let wasm_path = match resolve_path(raw_name) {
1080                        Ok(p) => p,
1081                        Err(e) => {
1082                            errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1083                            continue;
1084                        }
1085                    };
1086                    let wrappers = build_wrappers(directives, source_map);
1087                    match run_wasm_plugin(&wasm_path, &wrappers, &plugin_options, plugin_config) {
1088                        Ok((ops, plugin_errors)) => {
1089                            for err in plugin_errors {
1090                                errors.push(err);
1091                            }
1092                            apply_plugin_ops(directives, ops, errors, source_map)?;
1093                        }
1094                        Err(e) => {
1095                            errors.push(
1096                                LedgerError::error(
1097                                    "PLUGIN",
1098                                    format!("WASM plugin {} failed: {e}", wasm_path.display()),
1099                                )
1100                                .with_phase("plugin"),
1101                            );
1102                        }
1103                    }
1104                }
1105                #[cfg(not(feature = "wasm-plugins"))]
1106                {
1107                    errors.push(
1108                        LedgerError::error(
1109                            "PLUGIN",
1110                            format!("WASM plugin '{raw_name}' requires the wasm-plugins feature"),
1111                        )
1112                        .with_phase("plugin"),
1113                    );
1114                }
1115            } else if *force_python
1116                || ext == "py"
1117                || raw_name.contains(std::path::MAIN_SEPARATOR)
1118                || raw_name.contains('.')
1119            {
1120                // Python module or file-based plugin (or force_python via "python:" prefix)
1121                #[cfg(feature = "python-plugins")]
1122                {
1123                    let resolved = match resolve_path(raw_name) {
1124                        Ok(p) => p,
1125                        Err(e) => {
1126                            errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1127                            continue;
1128                        }
1129                    };
1130                    let wrappers = build_wrappers(directives, source_map);
1131                    match run_python_plugin(
1132                        raw_name,
1133                        &resolved,
1134                        base_dir,
1135                        &wrappers,
1136                        &plugin_options,
1137                        plugin_config,
1138                    ) {
1139                        Ok((ops, plugin_errors)) => {
1140                            for err in plugin_errors {
1141                                errors.push(err);
1142                            }
1143                            apply_plugin_ops(directives, ops, errors, source_map)?;
1144                        }
1145                        Err(e) => {
1146                            errors.push(LedgerError::error("E8002", e).with_phase("plugin"));
1147                        }
1148                    }
1149                }
1150                #[cfg(not(feature = "python-plugins"))]
1151                {
1152                    errors.push(
1153                        LedgerError::error(
1154                            "E8005",
1155                            format!(
1156                                "Python plugin \"{raw_name}\" requires the python-plugins feature",
1157                            ),
1158                        )
1159                        .with_phase("plugin"),
1160                    );
1161                }
1162            } else {
1163                // Completely unknown plugin name — try to suggest a module path
1164                #[cfg(feature = "python-plugins")]
1165                {
1166                    use rustledger_plugin::python::{is_python_available, suggest_module_path};
1167                    let suggestion = if is_python_available() {
1168                        suggest_module_path(raw_name)
1169                    } else {
1170                        None
1171                    };
1172                    if let Some(module_path) = suggestion {
1173                        errors.push(
1174                                LedgerError::error(
1175                                    "E8004",
1176                                    format!(
1177                                        "Cannot resolve Python module '{raw_name}'. Replace with: plugin \"{module_path}\""
1178                                    ),
1179                                )
1180                                .with_phase("plugin"),
1181                            );
1182                    } else {
1183                        errors.push(
1184                            LedgerError::error(
1185                                "E8001",
1186                                format!("Plugin not found: \"{raw_name}\""),
1187                            )
1188                            .with_phase("plugin"),
1189                        );
1190                    }
1191                }
1192                #[cfg(not(feature = "python-plugins"))]
1193                {
1194                    errors.push(
1195                        LedgerError::error("E8001", format!("Plugin not found: \"{raw_name}\""))
1196                            .with_phase("plugin"),
1197                    );
1198                }
1199            }
1200        }
1201    }
1202    // No final wrapper→directive conversion needed: `apply_plugin_ops`
1203    // updates `directives` in place after each plugin call, preserving
1204    // original spans on Keep/Modify ops. Plugin-synthesized directives
1205    // (Insert ops) get `SYNTHESIZED_FILE_ID` and a zero span.
1206    Ok(())
1207}
1208
1209/// Build a fresh `Vec<DirectiveWrapper>` from the current directives,
1210/// carrying filename + line number for plugin-side error reporting.
1211/// Spans don't need to round-trip through the wrappers — the loader
1212/// preserves them via `apply_plugin_ops` matching on op index.
1213#[cfg(feature = "plugins")]
1214fn build_wrappers(
1215    directives: &[Spanned<Directive>],
1216    source_map: &SourceMap,
1217) -> Vec<rustledger_plugin::DirectiveWrapper> {
1218    use rustledger_plugin::directive_to_wrapper_with_location;
1219
1220    directives
1221        .iter()
1222        .map(|spanned| {
1223            let (filename, lineno) = if let Some(file) = source_map.get(spanned.file_id as usize) {
1224                let (line, _col) = file.line_col(spanned.span.start);
1225                (Some(file.path.display().to_string()), Some(line as u32))
1226            } else {
1227                (None, None)
1228            };
1229            directive_to_wrapper_with_location(&spanned.value, filename, lineno)
1230        })
1231        .collect()
1232}
1233
1234/// Push plugin errors into the ledger's error stream, tagged with
1235/// `phase: "plugin"` and — when the plugin set `source_file` /
1236/// `line_number` on the error — an attached `ErrorLocation` so
1237/// downstream renderers (CLI, LSP, JSON output) can pinpoint where
1238/// the plugin objected.
1239///
1240/// Source-location resolution: if the wrapper's `source_file` resolves
1241/// to a real file in the source map, use that for `ErrorLocation.file`
1242/// and treat `line_number` as the line index. Plugin-synthesized
1243/// filenames (e.g. `"<auto_accounts>"`) that don't match any real
1244/// file are passed through as `PathBuf::from(name)` so the rendered
1245/// location still attributes the error to the originating plugin —
1246/// better than silently dropping the field.
1247#[cfg(feature = "plugins")]
1248fn record_plugin_errors(
1249    errors: &mut Vec<LedgerError>,
1250    plugin_errors: Vec<rustledger_plugin::PluginError>,
1251    source_map: &SourceMap,
1252) {
1253    for err in plugin_errors {
1254        let mut ledger_err = match err.severity {
1255            rustledger_plugin::PluginErrorSeverity::Error => {
1256                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1257            }
1258            rustledger_plugin::PluginErrorSeverity::Warning => {
1259                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1260            }
1261        };
1262        // Propagate plugin-set source location into `ErrorLocation`.
1263        // Column defaults to 1 — plugin errors don't carry column info
1264        // through the wrapper protocol.
1265        if let (Some(file), Some(line)) = (&err.source_file, err.line_number) {
1266            let resolved_path = source_map
1267                .get_by_path(std::path::Path::new(file))
1268                .map_or_else(|| std::path::PathBuf::from(file), |f| f.path.clone());
1269            ledger_err = ledger_err.with_location(ErrorLocation {
1270                file: resolved_path,
1271                line: line as usize,
1272                column: 1,
1273            });
1274        }
1275        errors.push(ledger_err);
1276    }
1277}
1278
1279/// Apply a plugin's `Vec<PluginOp>` to `directives` in place.
1280///
1281/// Validates that the op set forms a complete partition of the input
1282/// indices (each input index appears in exactly one `Keep` / `Modify` /
1283/// `Delete` op). Protocol violations produce a `PLUGIN` error in
1284/// `errors` and leave `directives` untouched.
1285///
1286/// For `Keep(i)` / `Modify(i, w)`, the resulting `Spanned<Directive>`
1287/// inherits `directives[i]`'s span and `file_id` — this is the core of
1288/// the ops protocol's correctness guarantee (plugin-transformed
1289/// directives keep their original source identity for error reporting).
1290/// `Insert(w)` directives get `(Span::ZERO, SYNTHESIZED_FILE_ID)`.
1291///
1292/// Inner posting spans returned by plugins are sanitized against the
1293/// host's `SourceMap` (see [`sanitize_inner_posting_spans`]) so a
1294/// misbehaving plugin cannot smuggle out-of-bounds spans into the LSP.
1295#[cfg(feature = "plugins")]
1296fn apply_plugin_ops(
1297    directives: &mut Vec<Spanned<Directive>>,
1298    ops: Vec<rustledger_plugin::PluginOp>,
1299    errors: &mut Vec<LedgerError>,
1300    source_map: &SourceMap,
1301) -> Result<(), ProcessError> {
1302    use rustledger_plugin::PluginOp;
1303    use rustledger_plugin::wrapper_to_directive;
1304
1305    let n = directives.len();
1306
1307    // Validate: every input index in {Keep, Modify, Delete} exactly once.
1308    let mut seen = vec![false; n];
1309    for op in &ops {
1310        let idx = match op {
1311            PluginOp::Keep(i) | PluginOp::Modify(i, _) | PluginOp::Delete(i) => Some(*i),
1312            PluginOp::Insert(_) => None,
1313        };
1314        if let Some(i) = idx {
1315            if i >= n {
1316                errors.push(
1317                    LedgerError::error(
1318                        "PLUGIN",
1319                        format!(
1320                            "plugin op references out-of-bounds input index {i} (input has {n} directives)"
1321                        ),
1322                    )
1323                    .with_phase("plugin"),
1324                );
1325                return Ok(());
1326            }
1327            if seen[i] {
1328                errors.push(
1329                    LedgerError::error(
1330                        "PLUGIN",
1331                        format!("plugin op references input index {i} more than once"),
1332                    )
1333                    .with_phase("plugin"),
1334                );
1335                return Ok(());
1336            }
1337            seen[i] = true;
1338        }
1339    }
1340    for (i, was_seen) in seen.iter().enumerate() {
1341        if !was_seen {
1342            errors.push(
1343                LedgerError::error(
1344                    "PLUGIN",
1345                    format!(
1346                        "plugin omitted input directive {i} (must appear in exactly one of Keep/Modify/Delete)"
1347                    ),
1348                )
1349                .with_phase("plugin"),
1350            );
1351            return Ok(());
1352        }
1353    }
1354
1355    // Materialize new directives, preserving spans for Keep/Modify.
1356    let mut new_directives = Vec::with_capacity(ops.len());
1357    for op in ops {
1358        match op {
1359            PluginOp::Keep(i) => {
1360                new_directives.push(directives[i].clone());
1361            }
1362            PluginOp::Modify(i, wrapper) => {
1363                let mut directive = wrapper_to_directive(&wrapper)
1364                    .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1365                // Plugins are not trusted to return well-formed inner
1366                // posting spans — a misbehaving plugin can synthesize a
1367                // file_id pointing at a nonexistent source or a span
1368                // that runs past EOF. The LSP later builds TextEdits
1369                // from these spans, so an out-of-bounds posting span
1370                // would produce a corrupt edit. Reset any inner posting
1371                // span that doesn't refer to a real loaded file or that
1372                // exceeds the file's length to `Spanned::synthesized`.
1373                sanitize_inner_posting_spans(&mut directive, source_map);
1374                new_directives.push(Spanned {
1375                    value: directive,
1376                    span: directives[i].span,
1377                    file_id: directives[i].file_id,
1378                });
1379            }
1380            PluginOp::Insert(wrapper) => {
1381                // Same trust caveat as Modify: don't let an Insert smuggle
1382                // bogus inner-posting spans through.
1383                // (Wrapper-derived outer span is validated below.)
1384                // Resolve the wrapper's filename + line number, if set,
1385                // into a real (file_id, span) when the filename
1386                // corresponds to a loaded source file. Falls back to
1387                // SYNTHESIZED_FILE_ID + zero span otherwise — including
1388                // for plugin-only attribution like `"<auto_accounts>"`
1389                // (which never matches a loaded file).
1390                let (span, file_id) = match (&wrapper.filename, wrapper.lineno) {
1391                    (Some(filename), Some(lineno)) => {
1392                        if let Some(file) = source_map.get_by_path(std::path::Path::new(filename)) {
1393                            let span_start = file.line_start(lineno as usize).unwrap_or(0);
1394                            (
1395                                rustledger_parser::Span::new(span_start, span_start),
1396                                file.id as u16,
1397                            )
1398                        } else {
1399                            (
1400                                rustledger_parser::Span::ZERO,
1401                                rustledger_parser::SYNTHESIZED_FILE_ID,
1402                            )
1403                        }
1404                    }
1405                    _ => (
1406                        rustledger_parser::Span::ZERO,
1407                        rustledger_parser::SYNTHESIZED_FILE_ID,
1408                    ),
1409                };
1410                let mut directive = wrapper_to_directive(&wrapper)
1411                    .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1412                sanitize_inner_posting_spans(&mut directive, source_map);
1413                new_directives.push(Spanned::new(directive, span).with_file_id(file_id as usize));
1414            }
1415            PluginOp::Delete(_) => {}
1416        }
1417    }
1418
1419    *directives = new_directives;
1420    Ok(())
1421}
1422
1423/// Reset any inner `Spanned<Posting>` whose location does not refer to a
1424/// real loaded source range to [`Spanned::synthesized`]. Plugins are not
1425/// trusted to return well-formed `file_id` + byte ranges; without this,
1426/// a misbehaving plugin could induce out-of-bounds LSP text edits.
1427///
1428/// A span is considered valid when:
1429/// - `file_id == SYNTHESIZED_FILE_ID` (genuine synthesis), OR
1430/// - the `file_id` resolves in `SourceMap` AND `0 <= start <= end <= len`
1431///   for that file's source.
1432///
1433/// Everything else collapses to `Spanned::synthesized(posting)`. As a
1434/// final pass, synthesized postings that arrived with a non-zero span
1435/// are normalized to `Span::ZERO` so the in-memory state matches the
1436/// `Spanned::synthesized` constructor's contract (`file_id` +
1437/// `Span::ZERO`).
1438#[cfg(feature = "plugins")]
1439fn sanitize_inner_posting_spans(directive: &mut Directive, source_map: &SourceMap) {
1440    use rustledger_core::Span;
1441    use rustledger_parser::SYNTHESIZED_FILE_ID;
1442    if let Directive::Transaction(txn) = directive {
1443        for p in &mut txn.postings {
1444            let ok = if p.file_id == SYNTHESIZED_FILE_ID {
1445                true
1446            } else {
1447                source_map
1448                    .get(p.file_id as usize)
1449                    .is_some_and(|f| p.span.start <= p.span.end && p.span.end <= f.source.len())
1450            };
1451            if !ok {
1452                let inner = std::mem::replace(
1453                    &mut p.value,
1454                    rustledger_core::Posting::auto(rustledger_core::InternedStr::from("")),
1455                );
1456                *p = rustledger_core::Spanned::synthesized(inner);
1457            } else if p.file_id == SYNTHESIZED_FILE_ID && p.span != Span::ZERO {
1458                // Synthesized → span is meaningless; normalize so the
1459                // state is consistent with `Spanned::synthesized`.
1460                p.span = Span::ZERO;
1461            }
1462        }
1463    }
1464}
1465
1466/// Build a [`ValidationOptions`] from loader-level file options.
1467///
1468/// Factored out of the old `run_validation` so both the early and
1469/// late phases in `process()` can share the same `ValidationSession`
1470/// configuration. Document-dir resolution is relative to the main
1471/// file's parent directory.
1472#[cfg(feature = "validation")]
1473fn build_validation_options(
1474    file_options: &Options,
1475    source_map: &SourceMap,
1476    default_booking_method: BookingMethod,
1477) -> rustledger_validate::ValidationOptions {
1478    use rustledger_validate::ValidationOptions;
1479
1480    // Resolve document directories relative to the main file's
1481    // directory. Absolute paths pass through; relative paths are
1482    // joined onto the source map's first file's parent. Matches the
1483    // pre-refactor `run_validation` behavior exactly.
1484    let base_dir = source_map
1485        .files()
1486        .first()
1487        .and_then(|f| f.path.parent())
1488        .unwrap_or_else(|| std::path::Path::new("."));
1489
1490    let resolved_document_dirs: Vec<std::path::PathBuf> = file_options
1491        .documents
1492        .iter()
1493        .map(|d| {
1494            let path = std::path::Path::new(d);
1495            if path.is_absolute() {
1496                path.to_path_buf()
1497            } else {
1498                base_dir.join(path)
1499            }
1500        })
1501        .collect();
1502
1503    let account_types: Vec<String> = file_options
1504        .account_types()
1505        .iter()
1506        .map(|s| (*s).to_string())
1507        .collect();
1508
1509    ValidationOptions::default()
1510        .with_account_types(account_types)
1511        .with_document_dirs(resolved_document_dirs)
1512        .with_infer_tolerance_from_cost(file_options.infer_tolerance_from_cost)
1513        .with_tolerance_multiplier(file_options.inferred_tolerance_multiplier)
1514        .with_inferred_tolerance_default(file_options.inferred_tolerance_default.clone())
1515        .with_default_booking_method(default_booking_method)
1516}
1517
1518/// Convert a batch of [`rustledger_validate::ValidationError`]s into
1519/// loader-level [`LedgerError`]s (with resolved `file:line:column`
1520/// locations) and append to the existing list.
1521///
1522/// Factored out so both validation phases in `process()` share the
1523/// same conversion path.
1524#[cfg(feature = "validation")]
1525fn ledger_errors_extend(
1526    errors: &mut Vec<LedgerError>,
1527    validation_errors: Vec<rustledger_validate::ValidationError>,
1528    source_map: &SourceMap,
1529) {
1530    for err in validation_errors {
1531        let phase = if err.code.is_parse_phase() {
1532            "parse"
1533        } else {
1534            "validate"
1535        };
1536        let severity_level = if err.code.is_warning() {
1537            ErrorSeverity::Warning
1538        } else {
1539            ErrorSeverity::Error
1540        };
1541        // Fold the advisory note (if any) into the message so it propagates
1542        // through every downstream format (LedgerError, JSON diagnostic, CLI
1543        // report, LSP diagnostic) without each one needing a dedicated field.
1544        let message = match &err.note {
1545            Some(note) => format!("{err}\n  note: {note}"),
1546            None => err.to_string(),
1547        };
1548        // Resolve span + file_id into a file/line/column triple so CLI and
1549        // LSP consumers can render `file:line:col` headers without having
1550        // to do the lookup themselves (issue #901).
1551        let location = err.span.and_then(|span| {
1552            let fid = err.file_id? as usize;
1553            let file = source_map.get(fid)?;
1554            let (line, column) = file.line_col(span.start);
1555            Some(ErrorLocation {
1556                file: file.path.clone(),
1557                line,
1558                column,
1559            })
1560        });
1561        errors.push(LedgerError {
1562            severity: severity_level,
1563            code: err.code.code().to_string(),
1564            message,
1565            location,
1566            source_span: err.span.map(|s| (s.start, s.end)),
1567            file_id: err.file_id,
1568            phase: phase.to_string(),
1569        });
1570    }
1571}
1572
1573/// Load and fully process a beancount file.
1574///
1575/// This is the main entry point, equivalent to Python's `loader.load_file()`.
1576/// It performs: parse → sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
1577///
1578/// # Example
1579///
1580/// ```ignore
1581/// use rustledger_loader::{load, LoadOptions};
1582/// use std::path::Path;
1583///
1584/// let ledger = load(Path::new("ledger.beancount"), LoadOptions::default())?;
1585/// for error in &ledger.errors {
1586///     eprintln!("{}: {}", error.code, error.message);
1587/// }
1588/// ```
1589pub fn load(path: &Path, options: &LoadOptions) -> Result<Ledger, ProcessError> {
1590    let mut loader = crate::Loader::new();
1591
1592    if options.path_security {
1593        loader = loader.with_path_security(true);
1594    }
1595
1596    let raw = loader.load(path)?;
1597    process(raw, options)
1598}
1599
1600/// Load a beancount file without processing.
1601///
1602/// This returns raw directives without sorting, booking, or plugins.
1603/// Use this when you need the original parse output.
1604pub fn load_raw(path: &Path) -> Result<LoadResult, LoadError> {
1605    crate::Loader::new().load(path)
1606}
1607
1608/// Run a WASM plugin and return its output ops and errors.
1609#[cfg(feature = "wasm-plugins")]
1610fn run_wasm_plugin(
1611    wasm_path: &std::path::Path,
1612    directives: &[rustledger_plugin::DirectiveWrapper],
1613    options: &rustledger_plugin::PluginOptions,
1614    config: &Option<String>,
1615) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1616    use rustledger_plugin::{PluginInput, PluginManager};
1617
1618    let mut mgr = PluginManager::new();
1619    let plugin_idx = mgr
1620        .load(wasm_path)
1621        .map_err(|e| format!("failed to load: {e}"))?;
1622
1623    let input = PluginInput {
1624        directives: directives.to_vec(),
1625        options: options.clone(),
1626        config: config.clone(),
1627    };
1628
1629    let output = mgr
1630        .execute(plugin_idx, &input)
1631        .map_err(|e| format!("execution failed: {e}"))?;
1632
1633    let mut errors = Vec::new();
1634    for err in output.errors {
1635        let ledger_err = match err.severity {
1636            rustledger_plugin::PluginErrorSeverity::Error => {
1637                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1638            }
1639            rustledger_plugin::PluginErrorSeverity::Warning => {
1640                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1641            }
1642        };
1643        errors.push(ledger_err);
1644    }
1645
1646    Ok((output.ops, errors))
1647}
1648
1649/// Run a Python module plugin via the WASI-based Python runtime.
1650#[cfg(feature = "python-plugins")]
1651fn run_python_plugin(
1652    module_name: &str,
1653    resolved_path: &std::path::Path,
1654    base_dir: &std::path::Path,
1655    directives: &[rustledger_plugin::DirectiveWrapper],
1656    options: &rustledger_plugin::PluginOptions,
1657    config: &Option<String>,
1658) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1659    use rustledger_plugin::{PluginInput, python::PythonRuntime};
1660
1661    let runtime = PythonRuntime::new().map_err(|e| format!("Python runtime unavailable: {e}"))?;
1662
1663    let input = PluginInput {
1664        directives: directives.to_vec(),
1665        options: options.clone(),
1666        config: config.clone(),
1667    };
1668
1669    // Try file-based execution first, then module-based
1670    let is_file = resolved_path.exists()
1671        || std::path::Path::new(module_name)
1672            .extension()
1673            .is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
1674        || module_name.contains(std::path::MAIN_SEPARATOR);
1675
1676    let output = if is_file {
1677        runtime
1678            .execute_module(module_name, &input, Some(base_dir))
1679            .map_err(|e| format!("Python plugin execution failed: {e}"))?
1680    } else {
1681        runtime
1682            .execute_module(module_name, &input, Some(base_dir))
1683            .map_err(|e| format!("Python plugin '{module_name}' execution failed: {e}"))?
1684    };
1685
1686    let mut errors = Vec::new();
1687    for err in output.errors {
1688        let ledger_err = match err.severity {
1689            rustledger_plugin::PluginErrorSeverity::Error => {
1690                LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1691            }
1692            rustledger_plugin::PluginErrorSeverity::Warning => {
1693                LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1694            }
1695        };
1696        errors.push(ledger_err);
1697    }
1698
1699    Ok((output.ops, errors))
1700}
1701
1702#[cfg(all(test, feature = "plugins"))]
1703mod sanitize_tests {
1704    use super::sanitize_inner_posting_spans;
1705    use crate::source_map::SourceMap;
1706    use rust_decimal_macros::dec;
1707    use rustledger_core::{
1708        Amount, Directive, IncompleteAmount, Posting, SYNTHESIZED_FILE_ID, Span, Spanned,
1709        Transaction,
1710    };
1711    use std::path::PathBuf;
1712    use std::sync::Arc;
1713
1714    fn txn_with_postings(postings: Vec<Spanned<Posting>>) -> Directive {
1715        let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
1716        let mut txn = Transaction::new(date, "x");
1717        txn.postings = postings;
1718        Directive::Transaction(txn)
1719    }
1720
1721    fn posting_at(file_id: u16, span: Span) -> Spanned<Posting> {
1722        let p = Posting::with_incomplete(
1723            "Assets:Cash",
1724            IncompleteAmount::Complete(Amount::new(dec!(1), "USD")),
1725        );
1726        Spanned::new(p, span).with_file_id(file_id as usize)
1727    }
1728
1729    fn source_map_with_one_file(source: &str) -> (SourceMap, u16) {
1730        let mut sm = SourceMap::new();
1731        let id = sm.add_file(PathBuf::from("test.bean"), Arc::from(source));
1732        (sm, id as u16)
1733    }
1734
1735    #[test]
1736    fn span_within_real_file_is_preserved() {
1737        let (sm, fid) = source_map_with_one_file("0123456789");
1738        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(2, 6))]);
1739        sanitize_inner_posting_spans(&mut d, &sm);
1740        let Directive::Transaction(t) = &d else {
1741            unreachable!()
1742        };
1743        assert_eq!(t.postings[0].file_id, fid);
1744        assert_eq!(t.postings[0].span, Span::new(2, 6));
1745    }
1746
1747    #[test]
1748    fn span_past_eof_is_reset_to_synthesized() {
1749        // Bug case: a misbehaving plugin claims the posting extends past
1750        // the file's actual length. The sanitizer must reject it so the
1751        // LSP can't be tricked into producing an out-of-bounds TextEdit.
1752        let (sm, fid) = source_map_with_one_file("0123456789"); // 10 bytes
1753        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 9999))]);
1754        sanitize_inner_posting_spans(&mut d, &sm);
1755        let Directive::Transaction(t) = &d else {
1756            unreachable!()
1757        };
1758        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1759        assert_eq!(t.postings[0].span, Span::ZERO);
1760    }
1761
1762    #[test]
1763    fn unknown_file_id_is_reset_to_synthesized() {
1764        // Plugin claims a file_id that the host's SourceMap doesn't know.
1765        let (sm, _real) = source_map_with_one_file("hello");
1766        let mut d = txn_with_postings(vec![posting_at(123, Span::new(0, 5))]);
1767        sanitize_inner_posting_spans(&mut d, &sm);
1768        let Directive::Transaction(t) = &d else {
1769            unreachable!()
1770        };
1771        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1772        assert_eq!(t.postings[0].span, Span::ZERO);
1773    }
1774
1775    #[test]
1776    fn start_after_end_is_reset_to_synthesized() {
1777        let (sm, fid) = source_map_with_one_file("abcdef");
1778        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(5, 2))]);
1779        sanitize_inner_posting_spans(&mut d, &sm);
1780        let Directive::Transaction(t) = &d else {
1781            unreachable!()
1782        };
1783        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1784        assert_eq!(t.postings[0].span, Span::ZERO);
1785    }
1786
1787    #[test]
1788    fn synthesized_file_id_is_left_alone_but_span_normalized() {
1789        // file_id == SYNTHESIZED_FILE_ID with a non-zero span: the
1790        // sanitizer leaves it synthesized (span is meaningless for
1791        // synth postings) but normalizes to Span::ZERO for tidy state.
1792        let (sm, _fid) = source_map_with_one_file("x");
1793        let mut d = txn_with_postings(vec![posting_at(SYNTHESIZED_FILE_ID, Span::new(100, 200))]);
1794        sanitize_inner_posting_spans(&mut d, &sm);
1795        let Directive::Transaction(t) = &d else {
1796            unreachable!()
1797        };
1798        assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1799        assert_eq!(t.postings[0].span, Span::ZERO, "synth span normalized");
1800    }
1801
1802    #[test]
1803    fn boundary_span_eq_source_len_is_valid() {
1804        // end == source.len() is the canonical "to-end-of-file" span;
1805        // must not be rejected.
1806        let (sm, fid) = source_map_with_one_file("abcd");
1807        let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 4))]);
1808        sanitize_inner_posting_spans(&mut d, &sm);
1809        let Directive::Transaction(t) = &d else {
1810            unreachable!()
1811        };
1812        assert_eq!(t.postings[0].file_id, fid);
1813        assert_eq!(t.postings[0].span, Span::new(0, 4));
1814    }
1815
1816    #[test]
1817    fn non_transaction_directive_is_left_alone() {
1818        // Sanitizer only walks transactions; other directive types have
1819        // no inner posting spans.
1820        let (sm, _fid) = source_map_with_one_file("x");
1821        let mut d = Directive::Open(rustledger_core::Open {
1822            date: rustledger_core::naive_date(2024, 1, 1).unwrap(),
1823            account: "Assets:Bank".into(),
1824            currencies: vec![],
1825            booking: None,
1826            meta: Default::default(),
1827        });
1828        sanitize_inner_posting_spans(&mut d, &sm); // no panic, no change
1829        assert!(matches!(d, Directive::Open(_)));
1830    }
1831}