rustledger_loader/process.rs
1//! Processing pipeline: sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
2//!
3//! This module orchestrates the full processing pipeline for a beancount ledger,
4//! equivalent to Python's `loader.load_file()` function.
5
6// ratchet: fxhash-only — hot path; use FxHashMap/FxHashSet, not std SipHash collections (#1237).
7use crate::{LoadError, LoadResult, Options, Plugin, SourceMap};
8use rustledger_core::{BookingMethod, Directive, DisplayContext};
9use rustledger_parser::Spanned;
10use std::path::Path;
11use thiserror::Error;
12
13/// A CLI-supplied (or programmatic) extra plugin invocation.
14///
15/// Bundles the plugin name with its optional config string so the two
16/// can't drift apart — the previous parallel-Vec representation could
17/// silently misalign a config with the wrong plugin.
18#[derive(Debug, Clone)]
19pub struct ExtraPlugin {
20 /// Plugin name (short or fully-qualified module path).
21 pub name: String,
22 /// Plugin-specific config string, if any.
23 pub config: Option<String>,
24}
25
26/// Options for loading and processing a ledger.
27#[derive(Debug, Clone)]
28pub struct LoadOptions {
29 /// Booking method for lot matching (default: Strict).
30 pub booking_method: BookingMethod,
31 /// Run plugins declared in the file (default: true).
32 pub run_plugins: bool,
33 /// Run `auto_accounts` plugin (default: false).
34 pub auto_accounts: bool,
35 /// Additional plugins to run (CLI `--plugin` or programmatic API),
36 /// each with an optional config string.
37 pub extra_plugins: Vec<ExtraPlugin>,
38 /// Run validation after processing (default: true).
39 pub validate: bool,
40 /// Enable path security (prevent include traversal).
41 pub path_security: bool,
42}
43
44impl Default for LoadOptions {
45 fn default() -> Self {
46 Self {
47 booking_method: BookingMethod::Strict,
48 run_plugins: true,
49 auto_accounts: false,
50 extra_plugins: Vec::new(),
51 validate: true,
52 path_security: false,
53 }
54 }
55}
56
57impl LoadOptions {
58 /// Create options for raw loading (no booking, no plugins, no validation).
59 #[must_use]
60 pub const fn raw() -> Self {
61 Self {
62 booking_method: BookingMethod::Strict,
63 run_plugins: false,
64 auto_accounts: false,
65 extra_plugins: Vec::new(),
66 validate: false,
67 path_security: false,
68 }
69 }
70}
71
72/// Errors that can occur during ledger processing.
73#[derive(Debug, Error)]
74pub enum ProcessError {
75 /// Loading failed.
76 #[error("loading failed: {0}")]
77 Load(#[from] LoadError),
78
79 /// Booking/interpolation error.
80 #[cfg(feature = "booking")]
81 #[error("booking error: {message}")]
82 Booking {
83 /// Error message.
84 message: String,
85 /// Date of the transaction.
86 date: rustledger_core::NaiveDate,
87 /// Narration of the transaction.
88 narration: String,
89 },
90
91 /// Plugin execution error.
92 #[cfg(feature = "plugins")]
93 #[error("plugin error: {0}")]
94 Plugin(String),
95
96 /// Validation error.
97 #[cfg(feature = "validation")]
98 #[error("validation error: {0}")]
99 Validation(String),
100
101 /// Plugin output conversion error.
102 #[cfg(feature = "plugins")]
103 #[error("failed to convert plugin output: {0}")]
104 PluginConversion(String),
105}
106
107/// A fully processed ledger.
108///
109/// This is the result of loading and processing a beancount file,
110/// equivalent to the tuple returned by Python's `loader.load_file()`.
111#[derive(Debug)]
112pub struct Ledger {
113 /// Processed directives in source-faithful form: sorted by date,
114 /// booked (cost specs resolved, interpolations applied), and
115 /// plugin-rewritten. **`Pad` directives remain as `Pad`**; they
116 /// are not pre-expanded into synthesized transactions.
117 ///
118 /// Consumers split into two groups:
119 ///
120 /// - **Source-faithful consumers** (stats, journal, formatter,
121 /// LSP, BQL `FROM #entries WHERE type = 'pad'` audits,
122 /// source-mapped diagnostics) iterate this field directly.
123 /// Pads count as Pads.
124 /// - **Balance-computing consumers** (holdings, balances,
125 /// balsheet, networth, income, FFI `query.execute`/`batch`,
126 /// WASM `expandPads`/`query`) call [`Ledger::balance_view`]
127 /// to get the directive stream MERGED with synthesized P-flag
128 /// transactions for each pad-balance pair. This is the only
129 /// way to get pad effects into per-account inventory math.
130 ///
131 /// The two views are derived from the same source; there is no
132 /// drift possible because [`Ledger::balance_view`] is a pure
133 /// function of `self.directives`.
134 pub directives: Vec<Spanned<Directive>>,
135 /// Options parsed from the file.
136 pub options: Options,
137 /// Plugins declared in the file.
138 pub plugins: Vec<Plugin>,
139 /// Source map for error reporting.
140 pub source_map: SourceMap,
141 /// Errors encountered during processing.
142 pub errors: Vec<LedgerError>,
143 /// Display context for formatting numbers.
144 pub display_context: DisplayContext,
145}
146
147impl Ledger {
148 /// Return the directive stream merged with synthesized
149 /// pad-equivalent transactions, suitable for inventory /
150 /// balance math.
151 ///
152 /// For each `Pad` directive followed (in date order) by a
153 /// `Balance` assertion on the same account, a `Transaction`
154 /// with `flag = 'P'` is added to the view carrying the
155 /// postings needed to make the balance match. A multi-currency
156 /// pad produces one synth transaction per currency.
157 ///
158 /// **Original `Pad` directives are preserved in the view.**
159 /// Synth transactions are added alongside, not in place of.
160 /// This matters for two reasons:
161 ///
162 /// 1. BQL queries against the `#entries` table
163 /// (`SELECT * FROM #entries WHERE type = 'pad'`) can still
164 /// enumerate the pad directives the user authored. A
165 /// REPLACE-style expansion would silently zero those out.
166 /// (BQL's default SELECT path operates on postings; pads
167 /// have no postings, so a default SELECT never matches them
168 /// regardless of this view shape.)
169 /// 2. Multi-pad cases (issue #1300) produce exactly one synth
170 /// per pad-balance pair:
171 /// `rustledger_booking::process_pads` (which
172 /// `merge_with_padding` delegates to) only retains the most
173 /// recent same-account pad in its pending-pads map, so
174 /// earlier same-account pads are silently shadowed and
175 /// their `source_account` does NOT contribute to the synth.
176 /// The validator emits `E2003` for shadowed pads
177 /// independently; this view reflects only the effective pad.
178 ///
179 /// Inventory-walking consumers iterate `Directive::Transaction`
180 /// and ignore `Pad` directives, so the preserved Pads are
181 /// invisible to them.
182 ///
183 /// **When to use this vs. [`Ledger.directives`](Self::directives):**
184 /// any consumer that maintains running per-account inventory
185 /// state and asks "what is the balance" needs this view. Any
186 /// consumer that asks "what did the user write" wants the raw
187 /// `directives` field.
188 ///
189 /// # Performance
190 ///
191 /// Each call clones every source directive once (`O(n)`).
192 /// Inlines the merge logic from
193 /// [`rustledger_booking::merge_with_padding`] so the already-
194 /// owned `booked` vector can be moved into the merged output
195 /// instead of cloned a second time. For short-lived CLI
196 /// invocations the single clone is negligible. Long-lived
197 /// processes (FFI servers, LSPs) that query the same ledger
198 /// repeatedly should hoist the result above their loop.
199 /// `TODO(perf):` memoize internally once a benchmark shows it
200 /// matters.
201 #[cfg(feature = "booking")]
202 #[must_use]
203 pub fn balance_view(&self) -> Vec<Directive> {
204 let mut booked: Vec<Directive> = self.directives.iter().map(|s| s.value.clone()).collect();
205
206 // Inlined from `rustledger_booking::merge_with_padding` so
207 // `booked` is moved (not re-cloned via `to_vec()`).
208 // Algorithmically identical: prepend synth transactions, then
209 // stable-sort by date. Same-date pad+balance pairs land as
210 // `[synth, pad, balance]` because synths sit at the front of
211 // their date-group pre-sort.
212 debug_assert!(
213 !booked.iter().any(|d| matches!(d, Directive::Transaction(t) if rustledger_booking::is_synthesized_pad(t))),
214 "balance_view called on a Ledger whose directives already contain synth pad transactions",
215 );
216 let pad_result = rustledger_booking::process_pads(&booked);
217 let mut merged: Vec<Directive> =
218 Vec::with_capacity(booked.len() + pad_result.padding_transactions.len());
219 for txn in pad_result.padding_transactions {
220 merged.push(Directive::Transaction(txn));
221 }
222 merged.append(&mut booked);
223 merged.sort_by_key(rustledger_core::Directive::date);
224 merged
225 }
226}
227
228/// Unified error type for ledger processing.
229///
230/// This encompasses all error types that can occur during loading,
231/// booking, plugin execution, and validation.
232#[derive(Debug)]
233#[non_exhaustive]
234pub struct LedgerError {
235 /// Error severity.
236 pub severity: ErrorSeverity,
237 /// Error code (e.g., "E0001", "W8002").
238 pub code: String,
239 /// Human-readable error message.
240 pub message: String,
241 /// Source location, if available.
242 pub location: Option<ErrorLocation>,
243 /// Byte span (inclusive start, exclusive end) in the source file,
244 /// used by rich renderers (e.g. miette) to draw a snippet around
245 /// the offending directive. Consumers that only need `file:line:col`
246 /// should use `location`; those that want to show the surrounding
247 /// source text want this.
248 pub source_span: Option<(usize, usize)>,
249 /// Source file ID — index into the ledger's [`SourceMap`]. Used
250 /// alongside `source_span` for snippet rendering.
251 pub file_id: Option<u16>,
252 /// Processing phase that produced this error: "parse", "validate", or "plugin".
253 pub phase: String,
254}
255
256/// Error severity level.
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
258pub enum ErrorSeverity {
259 /// Error - indicates a problem that should be fixed.
260 Error,
261 /// Warning - indicates a potential issue.
262 Warning,
263}
264
265/// Source location for an error.
266#[derive(Debug, Clone)]
267pub struct ErrorLocation {
268 /// File path.
269 pub file: std::path::PathBuf,
270 /// Line number (1-indexed).
271 pub line: usize,
272 /// Column number (1-indexed).
273 pub column: usize,
274}
275
276impl LedgerError {
277 /// Create a new error with the given phase.
278 pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
279 Self {
280 severity: ErrorSeverity::Error,
281 code: code.into(),
282 message: message.into(),
283 location: None,
284 source_span: None,
285 file_id: None,
286 phase: "validate".to_string(),
287 }
288 }
289
290 /// Create a new warning.
291 pub fn warning(code: impl Into<String>, message: impl Into<String>) -> Self {
292 Self {
293 severity: ErrorSeverity::Warning,
294 code: code.into(),
295 message: message.into(),
296 location: None,
297 source_span: None,
298 file_id: None,
299 phase: "validate".to_string(),
300 }
301 }
302
303 /// Attach a source span and file ID so rich renderers can draw a snippet.
304 #[must_use]
305 pub const fn with_source_span(mut self, span: (usize, usize), file_id: u16) -> Self {
306 self.source_span = Some(span);
307 self.file_id = Some(file_id);
308 self
309 }
310
311 /// Set the processing phase for this error.
312 #[must_use]
313 pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
314 self.phase = phase.into();
315 self
316 }
317
318 /// Add a location to this error.
319 #[must_use]
320 pub fn with_location(mut self, location: ErrorLocation) -> Self {
321 self.location = Some(location);
322 self
323 }
324}
325
326/// Process a raw load result into a fully processed ledger.
327///
328/// Pipeline (see numbered comments below for the rationale of each step):
329///
330/// ```text
331/// 1. sort (canonical display order)
332/// 2. synth plugins (auto_accounts, document_discovery)
333/// 3. Early validation (account presence, structural, lifecycle)
334/// 4. booking (cost spec resolution, interpolation)
335/// 5. partition (set aside failed-booking txns)
336/// 6. regular plugins (file plugins + extras, on booked only)
337/// 7. Late validation (balance, currency, inventory, on booked only)
338/// 8. finalize (unused-pad warnings)
339/// 9. re-merge (booked + failed → final Ledger.directives)
340/// ```
341pub fn process(raw: LoadResult, options: &LoadOptions) -> Result<Ledger, ProcessError> {
342 let mut errors: Vec<LedgerError> = Vec::new();
343
344 // Convert load errors to ledger errors (parse phase). Iterate by
345 // reference so `raw` stays borrowable for the rest of the pipeline
346 // (the phase transitions and validator setup below borrow it).
347 for load_err in &raw.errors {
348 errors.push(LedgerError::error("LOAD", load_err.to_string()).with_phase("parse"));
349 }
350
351 // Phase-typed pipeline (issue #1166). The phantom-typed
352 // `Directives<P>` wrapper makes the sequence
353 //
354 // Raw → Sorted → Synthed → EarlyValidated → Booked
355 // → RegularPluginsApplied → LateValidated → Finalized
356 //
357 // a compile-time property of the type system. Each transition
358 // method consumes one phase and produces the next; the compiler
359 // rejects any call-site that drops a phase, swaps two, or invokes
360 // a later phase on raw input. See `crates/rustledger-loader/src/phase.rs`.
361 //
362 // The transitions themselves wrap the existing subsystem entry
363 // points (`run_booking`, `run_plugins`, validators) without
364 // changing their semantics — this PR is the structural refactor
365 // only; behavior is bit-identical to the pre-#1166 pipeline.
366
367 // Resolve the effective booking method once, before the pipeline
368 // starts, so both the validator (early/late phases — needs it to
369 // seed each opened account's per-account booking method, see
370 // issue #1182) and the booking engine see the same value. File-
371 // level `option "booking_method"` wins when explicitly set;
372 // otherwise the API-level `LoadOptions.booking_method` is used.
373 #[cfg(any(feature = "validation", feature = "booking"))]
374 let effective_booking_method = resolve_effective_booking_method(&raw, options);
375
376 #[cfg(feature = "validation")]
377 let validation_session = if options.validate {
378 Some(rustledger_validate::ValidationSession::new(
379 build_validation_options(&raw.options, &raw.source_map, effective_booking_method),
380 ))
381 } else {
382 None
383 };
384
385 // Compute `today` once for both phases — avoids a midnight-crossing
386 // race where Early and Late could disagree on what day it is, and
387 // gives `FutureDate` warnings a single coherent reference point.
388 #[cfg(feature = "validation")]
389 let today = jiff::Zoned::now().date();
390
391 let synthed = crate::Directives::<crate::Raw>::from_parser(raw.directives)
392 .sort()
393 .apply_synth_plugins(
394 &raw.plugins,
395 &raw.options,
396 options,
397 &raw.source_map,
398 &mut errors,
399 )?;
400
401 // The validation feature changes `early_validate`'s shape: with
402 // it on we thread the `Option<ValidationSession<Pending>>` in and
403 // catch the returned `Option<ValidationSession<EarlyDone>>` for
404 // `late_validate` (typestate-moved per #1236); without it we just
405 // get the next-phase `Directives` back. Branching here keeps each
406 // cfg's signature small and prevents the call site from having to
407 // know the typestate phase parameters in the disabled case.
408 #[cfg(feature = "validation")]
409 let (directives, validation_session) =
410 synthed.early_validate(validation_session, today, &raw.source_map, &mut errors);
411 #[cfg(not(feature = "validation"))]
412 let directives = synthed.early_validate(&raw.source_map, &mut errors);
413
414 let (booked, failed) = directives.book(
415 #[cfg(feature = "booking")]
416 effective_booking_method,
417 #[cfg(feature = "booking")]
418 &mut errors,
419 );
420
421 let regular_applied = booked.apply_regular_plugins(
422 &raw.plugins,
423 &raw.options,
424 options,
425 &raw.source_map,
426 &mut errors,
427 )?;
428
429 #[cfg(feature = "validation")]
430 let late_validated =
431 regular_applied.late_validate(validation_session, today, &raw.source_map, &mut errors);
432 #[cfg(not(feature = "validation"))]
433 let late_validated = regular_applied.late_validate(&raw.source_map, &mut errors);
434
435 let finalized = late_validated.finalize(failed);
436
437 Ok(Ledger {
438 directives: finalized.into_inner(),
439 options: raw.options,
440 plugins: raw.plugins,
441 source_map: raw.source_map,
442 errors,
443 display_context: raw.display_context,
444 })
445}
446
447/// Resolve the booking method from `LoadOptions` + file-level option.
448///
449/// Factored out of `process()` so both the validator session (which
450/// needs it to seed per-account booking) and the booking engine see
451/// the same value. File-level `option "booking_method"` wins when
452/// explicitly set; otherwise the API-level default is used.
453#[cfg(any(feature = "validation", feature = "booking"))]
454fn resolve_effective_booking_method(
455 raw: &LoadResult,
456 options: &LoadOptions,
457) -> rustledger_core::BookingMethod {
458 let file_set = raw.options.set_options.contains("booking_method");
459 if file_set {
460 raw.options
461 .booking_method
462 .parse()
463 .unwrap_or(options.booking_method)
464 } else {
465 options.booking_method
466 }
467}
468
469// ============================================================================
470// Phase transitions
471// ============================================================================
472//
473// Each transition consumes a `Directives<P>` of one phase and
474// produces a `Directives<NextP>` of the next phase. Bodies wrap the
475// existing subsystem calls (`run_booking`, `run_plugins`, validators)
476// without changing their semantics — only the type-level sequencing
477// is new. See `phase.rs` for the phase markers and overall rationale.
478
479/// Canonical display-order sort key: `(date, priority, file_id, span.start)`.
480/// What BQL / JSON / format output expects and what Python beancount
481/// produces. Used by `sort` (initial ordering) and `finalize` (re-sort
482/// after merging failed bookings back in).
483type CanonicalSortKey = (
484 rustledger_core::NaiveDate,
485 rustledger_core::DirectivePriority,
486 u16,
487 usize,
488);
489
490#[inline]
491const fn canonical_sort_key(d: &Spanned<Directive>) -> CanonicalSortKey {
492 (d.value.date(), d.value.priority(), d.file_id, d.span.start)
493}
494
495impl crate::Directives<crate::Raw> {
496 /// Sort directives into canonical display order — see
497 /// [`canonical_sort_key`].
498 ///
499 /// Booking needs a different iteration order (augmentations
500 /// BEFORE reductions on the same `(date, priority)`) but doesn't
501 /// need the underlying vec reordered — `run_booking` walks via
502 /// a transient `Vec<usize>` index. This sort goes once, here,
503 /// and the display order survives the rest of the pipeline.
504 #[must_use]
505 pub(crate) fn sort(mut self) -> crate::Directives<crate::Sorted> {
506 self.as_vec_mut().sort_by_key(canonical_sort_key);
507 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
508 }
509}
510
511impl crate::Directives<crate::Sorted> {
512 /// Run synth-only plugins (`auto_accounts`, `document_discovery`)
513 /// BEFORE early validation so the synthesizers inject Opens /
514 /// Documents that Early checks depend on (E1001 account
515 /// presence, E5001 missing-document file).
516 ///
517 /// Only this narrow synth subset runs here; everything else
518 /// waits until after booking (post-booking plugin pass) so
519 /// cost-spec-reading plugins see filled-in per-unit values on
520 /// `CostNumber::PerUnitFromTotal`. See `PluginPass` rustdoc for
521 /// the detailed split rationale.
522 pub(crate) fn apply_synth_plugins(
523 mut self,
524 plugins: &[crate::Plugin],
525 file_options: &crate::Options,
526 options: &LoadOptions,
527 source_map: &SourceMap,
528 errors: &mut Vec<LedgerError>,
529 ) -> Result<crate::Directives<crate::Synthed>, ProcessError> {
530 // `run_plugins` early-returns when no plugin entry matches the
531 // pass; no outer gate needed (and any outer gate risked
532 // missing one of the implicit-synth triggers — auto_accounts,
533 // document_discovery via `option "documents"`, file-declared
534 // synth plugins).
535 #[cfg(feature = "plugins")]
536 run_plugins(
537 self.as_vec_mut(),
538 plugins,
539 file_options,
540 options,
541 source_map,
542 errors,
543 PluginPass::PreBookingSynth,
544 )?;
545 // Suppress unused-arg warnings when `plugins` feature is off.
546 #[cfg(not(feature = "plugins"))]
547 {
548 let _ = (plugins, file_options, options, source_map, errors);
549 }
550 Ok(crate::Directives::new_unchecked(std::mem::take(
551 self.as_vec_mut(),
552 )))
553 }
554}
555
556impl crate::Directives<crate::Synthed> {
557 /// Run the early-phase validators. Account-presence /
558 /// lifecycle / structural errors are collected into `errors`
559 /// (via the `LedgerError` stream); the directive list itself is
560 /// unchanged by validation.
561 ///
562 /// Runs on pre-booking directives, AFTER synth plugins so
563 /// account-presence checks (E1001) see any Opens that plugins
564 /// like `auto_accounts` injected. This is what lets booking
565 /// match Python's "prune zero-interp postings" behavior without
566 /// losing E1001 on the elided-zero-to-unopened-account case
567 /// (rustledger#877).
568 #[cfg(feature = "validation")]
569 pub(crate) fn early_validate(
570 mut self,
571 validation_session: Option<
572 rustledger_validate::ValidationSession<rustledger_validate::Pending>,
573 >,
574 today: rustledger_core::NaiveDate,
575 source_map: &SourceMap,
576 errors: &mut Vec<LedgerError>,
577 ) -> (
578 crate::Directives<crate::EarlyValidated>,
579 Option<rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>>,
580 ) {
581 // Typestate move: consume `Pending`, return `EarlyDone`. The
582 // session must be threaded by value rather than `&mut`-borrowed
583 // because the phase parameter on `ValidationSession<P>` changes
584 // as a result of the call (#1236). The caller in `process()`
585 // captures the returned session and passes it to
586 // `late_validate`.
587 let session_out = validation_session.map(|session| {
588 let (session, phase_errors) = session.run_early_spanned(self.as_slice(), today);
589 ledger_errors_extend(errors, phase_errors, source_map);
590 session
591 });
592 (
593 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut())),
594 session_out,
595 )
596 }
597
598 #[cfg(not(feature = "validation"))]
599 pub(crate) fn early_validate(
600 mut self,
601 source_map: &SourceMap,
602 errors: &mut Vec<LedgerError>,
603 ) -> crate::Directives<crate::EarlyValidated> {
604 let _ = (source_map, errors);
605 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
606 }
607}
608
609impl crate::Directives<crate::EarlyValidated> {
610 /// Run booking/interpolation. Returns the successfully-booked
611 /// directives plus a typed wrapper holding failed transactions.
612 ///
613 /// Failed transactions are in pre-booking shape (unresolved cost
614 /// specs, unfilled elided slots, possibly unbalanced); they
615 /// don't flow into regular plugins or Late validation — booking
616 /// already reported the root cause and the downstream checks
617 /// would cascade misleading errors. They get re-merged at
618 /// [`crate::Directives::<crate::LateValidated>::finalize`].
619 ///
620 /// When the `booking` feature is disabled this is an identity
621 /// transition: directives pass through unchanged and the failed
622 /// set is always empty. The same method exists in both feature
623 /// configurations so the caller in `process()` doesn't need a
624 /// `#[cfg]` match — the booking-specific arguments appear or
625 /// disappear via per-parameter `#[cfg]` attributes, mirroring
626 /// `early_validate` / `late_validate`.
627 pub(crate) fn book(
628 mut self,
629 #[cfg(feature = "booking")] effective_method: rustledger_core::BookingMethod,
630 #[cfg(feature = "booking")] errors: &mut Vec<LedgerError>,
631 ) -> (
632 crate::Directives<crate::Booked>,
633 crate::phase::FailedBookings,
634 ) {
635 #[cfg(feature = "booking")]
636 let (booked, failed) =
637 run_booking(std::mem::take(self.as_vec_mut()), effective_method, errors);
638 #[cfg(not(feature = "booking"))]
639 let (booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) =
640 (std::mem::take(self.as_vec_mut()), Vec::new());
641 (
642 crate::Directives::new_unchecked(booked),
643 crate::phase::FailedBookings::new(failed),
644 )
645 }
646}
647
648impl crate::Directives<crate::Booked> {
649 /// Run post-booking plugins — file-declared + CLI extras.
650 /// Cost-spec-reading plugins (`implicit_prices`,
651 /// `capital_gains_classifier`, `check_average_cost`,
652 /// `sell_gains`, `unrealized`, `valuation`) see filled-in
653 /// per-unit values on `CostNumber::PerUnitFromTotal` because
654 /// booking has run.
655 ///
656 /// Matches Python beancount's plugins-after-booking ordering
657 /// and closes rustledger#1117. Failed transactions were
658 /// partitioned out by `book`; plugins only see
659 /// successfully-booked input.
660 pub(crate) fn apply_regular_plugins(
661 mut self,
662 plugins: &[crate::Plugin],
663 file_options: &crate::Options,
664 options: &LoadOptions,
665 source_map: &SourceMap,
666 errors: &mut Vec<LedgerError>,
667 ) -> Result<crate::Directives<crate::RegularPluginsApplied>, ProcessError> {
668 // `run_plugins` early-returns when no plugin entry matches
669 // the pass; no outer gate needed.
670 #[cfg(feature = "plugins")]
671 run_plugins(
672 self.as_vec_mut(),
673 plugins,
674 file_options,
675 options,
676 source_map,
677 errors,
678 PluginPass::PostBooking,
679 )?;
680 #[cfg(not(feature = "plugins"))]
681 {
682 let _ = (plugins, file_options, options, source_map, errors);
683 }
684 Ok(crate::Directives::new_unchecked(std::mem::take(
685 self.as_vec_mut(),
686 )))
687 }
688}
689
690impl crate::Directives<crate::RegularPluginsApplied> {
691 /// Run the late-phase validators on booked + plugin-processed
692 /// directives. Reuses the `ValidationSession` from
693 /// `early_validate` so account / commodity / pad bookkeeping
694 /// carries forward.
695 #[cfg(feature = "validation")]
696 pub(crate) fn late_validate(
697 mut self,
698 validation_session: Option<
699 rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>,
700 >,
701 today: rustledger_core::NaiveDate,
702 source_map: &SourceMap,
703 errors: &mut Vec<LedgerError>,
704 ) -> crate::Directives<crate::LateValidated> {
705 // Typestate move: consume `EarlyDone`, drive through `LateDone`
706 // to `finalize()`. The compile-time enforcement here is that
707 // we cannot call `late_validate` with a fresh `Pending` session
708 // (no `From<Pending>` to `EarlyDone`), so the loader caller
709 // must have routed the session through `early_validate` first
710 // (#1236).
711 if let Some(session) = validation_session {
712 let (session, phase_errors) = session.run_late_spanned(self.as_slice(), today);
713 ledger_errors_extend(errors, phase_errors, source_map);
714 let finalize_errors = session.finalize();
715 ledger_errors_extend(errors, finalize_errors, source_map);
716 }
717 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
718 }
719
720 #[cfg(not(feature = "validation"))]
721 pub(crate) fn late_validate(
722 mut self,
723 source_map: &SourceMap,
724 errors: &mut Vec<LedgerError>,
725 ) -> crate::Directives<crate::LateValidated> {
726 let _ = (source_map, errors);
727 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
728 }
729}
730
731impl crate::Directives<crate::LateValidated> {
732 /// Re-merge failed (un-booked) transactions back into the
733 /// directive list for output. The user wrote them and expects
734 /// to see them in `Ledger.directives`; we kept them isolated
735 /// from post-booking processing.
736 ///
737 /// Re-sorts to restore canonical display order — `booked`
738 /// retained order during plugin transformation; the sort
739 /// restores the failed entries' positions.
740 pub(crate) fn finalize(
741 mut self,
742 failed: crate::phase::FailedBookings,
743 ) -> crate::Directives<crate::Finalized> {
744 let mut v = std::mem::take(self.as_vec_mut());
745 v.extend(failed.into_inner());
746 v.sort_by_key(canonical_sort_key);
747 crate::Directives::new_unchecked(v)
748 }
749}
750
751/// Run booking and interpolation on transactions, returning the
752/// directives partitioned into `(booked, failed)`.
753///
754/// The caller has already sorted `directives` into canonical display
755/// order `(date, priority, file_id, span.start)`. Booking needs the
756/// extra constraint that cost-reduction transactions process AFTER
757/// augmentations on the same `(date, priority)` so lots exist when
758/// matched. Rather than re-sorting the whole vec, we walk it via a
759/// transient `Vec<usize>` of indices sorted by booking order. Stable
760/// sort preserves display-order tiebreaks between transactions with
761/// the same `has_cost_reduction` flag.
762///
763/// Failed transactions are partitioned out into the second return
764/// value so they don't flow into regular plugins or Late validation
765/// (they're in pre-booking shape — postings have unresolved cost
766/// specs and unfilled elided slots, so downstream processing would
767/// cascade misleading errors). The caller is responsible for
768/// re-merging `failed` into the final `Ledger.directives` for output
769/// so the user still sees their original input.
770#[cfg(feature = "booking")]
771fn run_booking(
772 mut directives: Vec<Spanned<Directive>>,
773 booking_method: BookingMethod,
774 errors: &mut Vec<LedgerError>,
775) -> (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) {
776 use rustledger_booking::BookingEngine;
777
778 let mut engine = BookingEngine::with_method(booking_method);
779 engine.register_account_methods(directives.iter().map(|s| &s.value));
780
781 // Build an index ordered for booking: stable sort by
782 // `has_cost_reduction` only (display order — `(date, priority,
783 // file_id, span.start)` — is already encoded in the existing
784 // positional order, and stable_sort preserves that as the tiebreak).
785 let mut order: Vec<usize> = (0..directives.len()).collect();
786 order.sort_by_key(|&i| {
787 let d = &directives[i].value;
788 (d.date(), d.priority(), d.has_cost_reduction())
789 });
790
791 let mut failed_indices: Vec<usize> = Vec::new();
792 for &i in &order {
793 let spanned = &mut directives[i];
794 if let Directive::Transaction(txn) = &mut spanned.value {
795 match engine.book_and_interpolate(txn) {
796 Ok(result) => {
797 engine.apply(&result.transaction);
798 *txn = result.transaction;
799 }
800 Err(e) => {
801 errors.push(LedgerError::error(
802 "BOOK",
803 format!("{} ({}, \"{}\")", e, txn.date, txn.narration),
804 ));
805 failed_indices.push(i);
806 }
807 }
808 }
809 }
810
811 // Partition into (booked, failed). Indices are valid in the current
812 // `directives` vec (no mutation has happened since they were
813 // collected); after this consuming iteration the vec is gone and
814 // partition is fait accompli — no window where a caller could
815 // accidentally mutate between collection and partition.
816 let failed_set: rustc_hash::FxHashSet<usize> = failed_indices.iter().copied().collect();
817 let mut booked = Vec::with_capacity(directives.len() - failed_indices.len());
818 let mut failed = Vec::with_capacity(failed_indices.len());
819 for (i, d) in directives.into_iter().enumerate() {
820 if failed_set.contains(&i) {
821 failed.push(d);
822 } else {
823 booked.push(d);
824 }
825 }
826 (booked, failed)
827}
828
829/// Which subset of plugins to run.
830///
831/// The loader pipeline calls `run_plugins` twice: once with
832/// [`PluginPass::PreBookingSynth`] before the Early validation phase
833/// (so synthesizers can inject Opens / Documents that early checks
834/// depend on), and once with [`PluginPass::PostBooking`] after booking
835/// (so cost-spec-reading plugins like `implicit_prices`,
836/// `capital_gains_classifier`, `check_average_cost`, `sell_gains`,
837/// `unrealized`, and `valuation` see filled-in per-unit values on the
838/// `CostNumber::PerUnitFromTotal` variant).
839///
840/// Standalone callers (LSP / FFI / tests on already-booked input) pass
841/// [`PluginPass::PostBooking`] — synth plugins are a loader-internal
842/// concern and would re-Open already-opened accounts if run a second
843/// time.
844#[cfg(feature = "plugins")]
845#[derive(Debug, Clone, Copy, PartialEq, Eq)]
846pub enum PluginPass {
847 /// Only plugins that synthesize directives the Early validator
848 /// depends on: `auto_accounts` (synthesizes Open directives) and
849 /// the built-in document discovery walker (synthesizes Document
850 /// directives the early phase checks for missing files).
851 PreBookingSynth,
852 /// All file-declared plugins and CLI `extra_plugins`, EXCLUDING
853 /// `auto_accounts` and `document_discovery` (those ran pre-booking).
854 /// Includes the 28 plugins that don't depend on synth state but
855 /// may depend on booked cost specs.
856 PostBooking,
857}
858
859/// Run plugins on directives.
860///
861/// Executes native plugins (and document discovery) on the given directives,
862/// modifying them in-place. Plugin errors are appended to `errors`.
863///
864/// A single plugin invocation in `run_plugins`'s unified dispatch
865/// list. `force_python` ("python:..." prefix) overrides native
866/// resolution; `config` is the plugin-specific string passed to
867/// `PluginInput.config`.
868#[cfg(feature = "plugins")]
869struct PluginInvocation {
870 name: String,
871 config: Option<String>,
872 force_python: bool,
873}
874
875/// `pass` selects which subset of plugins to run — see [`PluginPass`].
876/// The loader pipeline calls this twice (synth pass before Early,
877/// regular pass after booking).
878#[cfg(feature = "plugins")]
879pub fn run_plugins(
880 directives: &mut Vec<Spanned<Directive>>,
881 file_plugins: &[Plugin],
882 file_options: &Options,
883 options: &LoadOptions,
884 source_map: &SourceMap,
885 errors: &mut Vec<LedgerError>,
886 pass: PluginPass,
887) -> Result<(), ProcessError> {
888 use rustledger_plugin::{NativePlugin, NativePluginRegistry, PluginInput, PluginOptions};
889
890 // Resolve document directories relative to the main file's directory.
891 // Used to build doc_discovery's per-call config in the synth pass.
892 let base_dir = source_map
893 .files()
894 .first()
895 .and_then(|f| f.path.parent())
896 .unwrap_or_else(|| std::path::Path::new("."));
897
898 // Access the process-wide registry singleton. The registry is
899 // immutable and stateless, so the same instance services every
900 // call.
901 let registry = NativePluginRegistry::global();
902
903 // Build the unified list of plugins to invoke for this pass:
904 // 1. Implicit synth plugins triggered by `LoadOptions` /
905 // `file_options` (auto_accounts via `options.auto_accounts`;
906 // document_discovery via non-empty `file_options.documents`).
907 // 2. File-declared plugins from `plugin "..."` directives.
908 // 3. CLI `--plugin` extras.
909 // Pass classification happens here — once — via `registry.find_synth`.
910 // A plugin enters the list iff its pass matches the requested `pass`.
911 let mut entries: Vec<PluginInvocation> = Vec::new();
912
913 if matches!(pass, PluginPass::PreBookingSynth) {
914 // Implicit synth: API-level auto_accounts flag.
915 if options.auto_accounts {
916 entries.push(PluginInvocation {
917 name: rustledger_plugin::AUTO_ACCOUNTS_NAME.to_string(),
918 config: None,
919 force_python: false,
920 });
921 }
922 // Implicit synth: document_discovery, driven by `option "documents"`.
923 // The plugin sits in the registry as a ZST; we hand it the
924 // resolved directories + base_dir via its config JSON.
925 if options.run_plugins && !file_options.documents.is_empty() {
926 let resolved: Vec<String> = file_options
927 .documents
928 .iter()
929 .map(|d| {
930 let path = std::path::Path::new(d);
931 if path.is_absolute() {
932 d.clone()
933 } else {
934 base_dir.join(path).to_string_lossy().to_string()
935 }
936 })
937 .collect();
938 entries.push(PluginInvocation {
939 name: rustledger_plugin::DOCUMENT_DISCOVERY_NAME.to_string(),
940 config: Some(rustledger_plugin::document_discovery_config(
941 base_dir, &resolved,
942 )),
943 force_python: false,
944 });
945 }
946 }
947
948 // A plugin name belongs in the current pass iff its synth-marker
949 // membership matches `pass`. Non-native plugins (WASM/Python) are
950 // never in the synth registry and therefore always fall into the
951 // PostBooking pass.
952 let want_synth = matches!(pass, PluginPass::PreBookingSynth);
953
954 // File-declared plugins.
955 if options.run_plugins {
956 for plugin in file_plugins {
957 if registry.find_synth(&plugin.name).is_some() == want_synth {
958 entries.push(PluginInvocation {
959 name: plugin.name.clone(),
960 config: plugin.config.clone(),
961 force_python: plugin.force_python,
962 });
963 }
964 }
965 }
966
967 // CLI extra plugins.
968 for extra in &options.extra_plugins {
969 if registry.find_synth(&extra.name).is_some() == want_synth {
970 entries.push(PluginInvocation {
971 name: extra.name.clone(),
972 config: extra.config.clone(),
973 force_python: false,
974 });
975 }
976 }
977
978 if entries.is_empty() {
979 return Ok(());
980 }
981
982 let plugin_options = PluginOptions {
983 operating_currencies: file_options.operating_currency.clone(),
984 title: file_options.title.clone(),
985 };
986
987 // Dispatch each entry. Native plugins resolve through the typed
988 // registry (`find_synth` / `find_regular`) keyed on the pass — the
989 // returned reference type reflects the pass. Anything that doesn't
990 // resolve falls through to the WASM/Python branches.
991 for invocation in &entries {
992 let PluginInvocation {
993 name: raw_name,
994 config: plugin_config,
995 force_python,
996 } = invocation;
997
998 // Dispatch via the typed registry. `find_synth`/`find_regular`
999 // internally take the short name (last `.`-separated segment),
1000 // so prefixed names like `"beancount.plugins.implicit_prices"`
1001 // resolve through the same call — no explicit prefix-stripping
1002 // needed. Returns `Some` only if the plugin exists AND its
1003 // marker trait matches the requested pass: a `RegularPlugin`
1004 // won't be returned from `find_synth` (and vice versa), even
1005 // on a name collision. Anything that returns `None` (WASM,
1006 // Python, unknown names, wrong-pass natives) falls through
1007 // to the WASM/Python branches below.
1008 let native_plugin: Option<&dyn NativePlugin> = if *force_python {
1009 None
1010 } else {
1011 match pass {
1012 PluginPass::PreBookingSynth => registry
1013 .find_synth(raw_name)
1014 .map(|p| p as &dyn NativePlugin),
1015 PluginPass::PostBooking => registry
1016 .find_regular(raw_name)
1017 .map(|p| p as &dyn NativePlugin),
1018 }
1019 };
1020
1021 if let Some(plugin) = native_plugin {
1022 let wrappers = build_wrappers(directives, source_map);
1023 let input = PluginInput {
1024 directives: wrappers,
1025 options: plugin_options.clone(),
1026 config: plugin_config.clone(),
1027 };
1028 let output = plugin.process(input);
1029 record_plugin_errors(errors, output.errors, source_map);
1030 apply_plugin_ops(directives, output.ops, errors, source_map)?;
1031 } else {
1032 // Not a native plugin — categorize and handle
1033 let plugin_path = std::path::Path::new(raw_name);
1034 let ext = plugin_path
1035 .extension()
1036 .and_then(|e| e.to_str())
1037 .unwrap_or("")
1038 .to_lowercase();
1039
1040 // The closure is only invoked from inside the wasm-plugins /
1041 // python-plugins cfg blocks below. The whole function is
1042 // already `#[cfg(feature = "plugins")]`, so this only matters
1043 // when `plugins` is enabled but neither child feature is
1044 // (e.g. `--features native-plugins`). Allow `unused_variables`
1045 // for exactly that configuration. Underscore-prefixing the
1046 // binding would have been the wrong fix because we DO call
1047 // the closure in builds with one of the features enabled,
1048 // which would trip `no_effect_underscore_binding` instead.
1049 #[cfg_attr(
1050 not(any(feature = "wasm-plugins", feature = "python-plugins")),
1051 allow(unused_variables)
1052 )]
1053 let resolve_path = |name: &str| -> Result<std::path::PathBuf, String> {
1054 let p = std::path::Path::new(name);
1055 let resolved = if p.is_absolute() {
1056 p.to_path_buf()
1057 } else {
1058 base_dir.join(name)
1059 };
1060
1061 // Path security: prevent plugins from outside the ledger directory
1062 if options.path_security
1063 && let (Ok(canon_base), Ok(canon_plugin)) =
1064 (base_dir.canonicalize(), resolved.canonicalize())
1065 && !canon_plugin.starts_with(&canon_base)
1066 {
1067 return Err(format!(
1068 "plugin path '{name}' is outside the ledger directory"
1069 ));
1070 }
1071
1072 Ok(resolved)
1073 };
1074
1075 if ext == "wasm" {
1076 // WASM plugin
1077 #[cfg(feature = "wasm-plugins")]
1078 {
1079 let wasm_path = match resolve_path(raw_name) {
1080 Ok(p) => p,
1081 Err(e) => {
1082 errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1083 continue;
1084 }
1085 };
1086 let wrappers = build_wrappers(directives, source_map);
1087 match run_wasm_plugin(&wasm_path, &wrappers, &plugin_options, plugin_config) {
1088 Ok((ops, plugin_errors)) => {
1089 for err in plugin_errors {
1090 errors.push(err);
1091 }
1092 apply_plugin_ops(directives, ops, errors, source_map)?;
1093 }
1094 Err(e) => {
1095 errors.push(
1096 LedgerError::error(
1097 "PLUGIN",
1098 format!("WASM plugin {} failed: {e}", wasm_path.display()),
1099 )
1100 .with_phase("plugin"),
1101 );
1102 }
1103 }
1104 }
1105 #[cfg(not(feature = "wasm-plugins"))]
1106 {
1107 errors.push(
1108 LedgerError::error(
1109 "PLUGIN",
1110 format!("WASM plugin '{raw_name}' requires the wasm-plugins feature"),
1111 )
1112 .with_phase("plugin"),
1113 );
1114 }
1115 } else if *force_python
1116 || ext == "py"
1117 || raw_name.contains(std::path::MAIN_SEPARATOR)
1118 || raw_name.contains('.')
1119 {
1120 // Python module or file-based plugin (or force_python via "python:" prefix)
1121 #[cfg(feature = "python-plugins")]
1122 {
1123 let resolved = match resolve_path(raw_name) {
1124 Ok(p) => p,
1125 Err(e) => {
1126 errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1127 continue;
1128 }
1129 };
1130 let wrappers = build_wrappers(directives, source_map);
1131 match run_python_plugin(
1132 raw_name,
1133 &resolved,
1134 base_dir,
1135 &wrappers,
1136 &plugin_options,
1137 plugin_config,
1138 ) {
1139 Ok((ops, plugin_errors)) => {
1140 for err in plugin_errors {
1141 errors.push(err);
1142 }
1143 apply_plugin_ops(directives, ops, errors, source_map)?;
1144 }
1145 Err(e) => {
1146 errors.push(LedgerError::error("E8002", e).with_phase("plugin"));
1147 }
1148 }
1149 }
1150 #[cfg(not(feature = "python-plugins"))]
1151 {
1152 errors.push(
1153 LedgerError::error(
1154 "E8005",
1155 format!(
1156 "Python plugin \"{raw_name}\" requires the python-plugins feature",
1157 ),
1158 )
1159 .with_phase("plugin"),
1160 );
1161 }
1162 } else {
1163 // Completely unknown plugin name — try to suggest a module path
1164 #[cfg(feature = "python-plugins")]
1165 {
1166 use rustledger_plugin::python::{is_python_available, suggest_module_path};
1167 let suggestion = if is_python_available() {
1168 suggest_module_path(raw_name)
1169 } else {
1170 None
1171 };
1172 if let Some(module_path) = suggestion {
1173 errors.push(
1174 LedgerError::error(
1175 "E8004",
1176 format!(
1177 "Cannot resolve Python module '{raw_name}'. Replace with: plugin \"{module_path}\""
1178 ),
1179 )
1180 .with_phase("plugin"),
1181 );
1182 } else {
1183 errors.push(
1184 LedgerError::error(
1185 "E8001",
1186 format!("Plugin not found: \"{raw_name}\""),
1187 )
1188 .with_phase("plugin"),
1189 );
1190 }
1191 }
1192 #[cfg(not(feature = "python-plugins"))]
1193 {
1194 errors.push(
1195 LedgerError::error("E8001", format!("Plugin not found: \"{raw_name}\""))
1196 .with_phase("plugin"),
1197 );
1198 }
1199 }
1200 }
1201 }
1202 // No final wrapper→directive conversion needed: `apply_plugin_ops`
1203 // updates `directives` in place after each plugin call, preserving
1204 // original spans on Keep/Modify ops. Plugin-synthesized directives
1205 // (Insert ops) get `SYNTHESIZED_FILE_ID` and a zero span.
1206 Ok(())
1207}
1208
1209/// Build a fresh `Vec<DirectiveWrapper>` from the current directives,
1210/// carrying filename + line number for plugin-side error reporting.
1211/// Spans don't need to round-trip through the wrappers — the loader
1212/// preserves them via `apply_plugin_ops` matching on op index.
1213#[cfg(feature = "plugins")]
1214fn build_wrappers(
1215 directives: &[Spanned<Directive>],
1216 source_map: &SourceMap,
1217) -> Vec<rustledger_plugin::DirectiveWrapper> {
1218 use rustledger_plugin::directive_to_wrapper_with_location;
1219
1220 directives
1221 .iter()
1222 .map(|spanned| {
1223 let (filename, lineno) = if let Some(file) = source_map.get(spanned.file_id as usize) {
1224 let (line, _col) = file.line_col(spanned.span.start);
1225 (Some(file.path.display().to_string()), Some(line as u32))
1226 } else {
1227 (None, None)
1228 };
1229 directive_to_wrapper_with_location(&spanned.value, filename, lineno)
1230 })
1231 .collect()
1232}
1233
1234/// Push plugin errors into the ledger's error stream, tagged with
1235/// `phase: "plugin"` and — when the plugin set `source_file` /
1236/// `line_number` on the error — an attached `ErrorLocation` so
1237/// downstream renderers (CLI, LSP, JSON output) can pinpoint where
1238/// the plugin objected.
1239///
1240/// Source-location resolution: if the wrapper's `source_file` resolves
1241/// to a real file in the source map, use that for `ErrorLocation.file`
1242/// and treat `line_number` as the line index. Plugin-synthesized
1243/// filenames (e.g. `"<auto_accounts>"`) that don't match any real
1244/// file are passed through as `PathBuf::from(name)` so the rendered
1245/// location still attributes the error to the originating plugin —
1246/// better than silently dropping the field.
1247#[cfg(feature = "plugins")]
1248fn record_plugin_errors(
1249 errors: &mut Vec<LedgerError>,
1250 plugin_errors: Vec<rustledger_plugin::PluginError>,
1251 source_map: &SourceMap,
1252) {
1253 for err in plugin_errors {
1254 let mut ledger_err = match err.severity {
1255 rustledger_plugin::PluginErrorSeverity::Error => {
1256 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1257 }
1258 rustledger_plugin::PluginErrorSeverity::Warning => {
1259 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1260 }
1261 };
1262 // Propagate plugin-set source location into `ErrorLocation`.
1263 // Column defaults to 1 — plugin errors don't carry column info
1264 // through the wrapper protocol.
1265 if let (Some(file), Some(line)) = (&err.source_file, err.line_number) {
1266 let resolved_path = source_map
1267 .get_by_path(std::path::Path::new(file))
1268 .map_or_else(|| std::path::PathBuf::from(file), |f| f.path.clone());
1269 ledger_err = ledger_err.with_location(ErrorLocation {
1270 file: resolved_path,
1271 line: line as usize,
1272 column: 1,
1273 });
1274 }
1275 errors.push(ledger_err);
1276 }
1277}
1278
1279/// Apply a plugin's `Vec<PluginOp>` to `directives` in place.
1280///
1281/// Validates that the op set forms a complete partition of the input
1282/// indices (each input index appears in exactly one `Keep` / `Modify` /
1283/// `Delete` op). Protocol violations produce a `PLUGIN` error in
1284/// `errors` and leave `directives` untouched.
1285///
1286/// For `Keep(i)` / `Modify(i, w)`, the resulting `Spanned<Directive>`
1287/// inherits `directives[i]`'s span and `file_id` — this is the core of
1288/// the ops protocol's correctness guarantee (plugin-transformed
1289/// directives keep their original source identity for error reporting).
1290/// `Insert(w)` directives get `(Span::ZERO, SYNTHESIZED_FILE_ID)`.
1291///
1292/// Inner posting spans returned by plugins are sanitized against the
1293/// host's `SourceMap` (see [`sanitize_inner_posting_spans`]) so a
1294/// misbehaving plugin cannot smuggle out-of-bounds spans into the LSP.
1295#[cfg(feature = "plugins")]
1296fn apply_plugin_ops(
1297 directives: &mut Vec<Spanned<Directive>>,
1298 ops: Vec<rustledger_plugin::PluginOp>,
1299 errors: &mut Vec<LedgerError>,
1300 source_map: &SourceMap,
1301) -> Result<(), ProcessError> {
1302 use rustledger_plugin::PluginOp;
1303 use rustledger_plugin::wrapper_to_directive;
1304
1305 // Validate the op set forms a complete cover of the input — the contract is
1306 // single-sourced in `rustledger-plugin` so the loader and FFI surfaces stay
1307 // in lock-step. On violation, surface the error and leave directives as-is.
1308 if let Err(msg) = rustledger_plugin::validate_op_coverage(directives.len(), &ops) {
1309 errors.push(LedgerError::error("PLUGIN", msg).with_phase("plugin"));
1310 return Ok(());
1311 }
1312
1313 // Materialize new directives, preserving spans for Keep/Modify.
1314 let mut new_directives = Vec::with_capacity(ops.len());
1315 for op in ops {
1316 match op {
1317 PluginOp::Keep(i) => {
1318 new_directives.push(directives[i].clone());
1319 }
1320 PluginOp::Modify(i, wrapper) => {
1321 let mut directive = wrapper_to_directive(&wrapper)
1322 .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1323 // Plugins are not trusted to return well-formed inner
1324 // posting spans — a misbehaving plugin can synthesize a
1325 // file_id pointing at a nonexistent source or a span
1326 // that runs past EOF. The LSP later builds TextEdits
1327 // from these spans, so an out-of-bounds posting span
1328 // would produce a corrupt edit. Reset any inner posting
1329 // span that doesn't refer to a real loaded file or that
1330 // exceeds the file's length to `Spanned::synthesized`.
1331 sanitize_inner_posting_spans(&mut directive, source_map);
1332 new_directives.push(Spanned {
1333 value: directive,
1334 span: directives[i].span,
1335 file_id: directives[i].file_id,
1336 });
1337 }
1338 PluginOp::Insert(wrapper) => {
1339 // Same trust caveat as Modify: don't let an Insert smuggle
1340 // bogus inner-posting spans through.
1341 // (Wrapper-derived outer span is validated below.)
1342 // Resolve the wrapper's filename + line number, if set,
1343 // into a real (file_id, span) when the filename
1344 // corresponds to a loaded source file. Falls back to
1345 // SYNTHESIZED_FILE_ID + zero span otherwise — including
1346 // for plugin-only attribution like `"<auto_accounts>"`
1347 // (which never matches a loaded file).
1348 let (span, file_id) = match (&wrapper.filename, wrapper.lineno) {
1349 (Some(filename), Some(lineno)) => {
1350 if let Some(file) = source_map.get_by_path(std::path::Path::new(filename)) {
1351 let span_start = file.line_start(lineno as usize).unwrap_or(0);
1352 (
1353 rustledger_parser::Span::new(span_start, span_start),
1354 file.id as u16,
1355 )
1356 } else {
1357 (
1358 rustledger_parser::Span::ZERO,
1359 rustledger_parser::SYNTHESIZED_FILE_ID,
1360 )
1361 }
1362 }
1363 _ => (
1364 rustledger_parser::Span::ZERO,
1365 rustledger_parser::SYNTHESIZED_FILE_ID,
1366 ),
1367 };
1368 let mut directive = wrapper_to_directive(&wrapper)
1369 .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1370 sanitize_inner_posting_spans(&mut directive, source_map);
1371 new_directives.push(Spanned::new(directive, span).with_file_id(file_id as usize));
1372 }
1373 PluginOp::Delete(_) => {}
1374 }
1375 }
1376
1377 *directives = new_directives;
1378 Ok(())
1379}
1380
1381/// Reset any inner `Spanned<Posting>` whose location does not refer to a
1382/// real loaded source range to [`Spanned::synthesized`]. Plugins are not
1383/// trusted to return well-formed `file_id` + byte ranges; without this,
1384/// a misbehaving plugin could induce out-of-bounds LSP text edits.
1385///
1386/// A span is considered valid when:
1387/// - `file_id == SYNTHESIZED_FILE_ID` (genuine synthesis), OR
1388/// - the `file_id` resolves in `SourceMap` AND `0 <= start <= end <= len`
1389/// for that file's source.
1390///
1391/// Everything else collapses to `Spanned::synthesized(posting)`. As a
1392/// final pass, synthesized postings that arrived with a non-zero span
1393/// are normalized to `Span::ZERO` so the in-memory state matches the
1394/// `Spanned::synthesized` constructor's contract (`file_id` +
1395/// `Span::ZERO`).
1396#[cfg(feature = "plugins")]
1397fn sanitize_inner_posting_spans(directive: &mut Directive, source_map: &SourceMap) {
1398 use rustledger_core::Span;
1399 use rustledger_parser::SYNTHESIZED_FILE_ID;
1400 if let Directive::Transaction(txn) = directive {
1401 for p in &mut txn.postings {
1402 let ok = if p.file_id == SYNTHESIZED_FILE_ID {
1403 true
1404 } else {
1405 source_map
1406 .get(p.file_id as usize)
1407 .is_some_and(|f| p.span.start <= p.span.end && p.span.end <= f.source.len())
1408 };
1409 if !ok {
1410 let inner = std::mem::replace(
1411 &mut p.value,
1412 rustledger_core::Posting::auto(rustledger_core::InternedStr::from("")),
1413 );
1414 *p = rustledger_core::Spanned::synthesized(inner);
1415 } else if p.file_id == SYNTHESIZED_FILE_ID && p.span != Span::ZERO {
1416 // Synthesized → span is meaningless; normalize so the
1417 // state is consistent with `Spanned::synthesized`.
1418 p.span = Span::ZERO;
1419 }
1420 }
1421 }
1422}
1423
1424/// Build a [`ValidationOptions`] from loader-level file options.
1425///
1426/// Factored out of the old `run_validation` so both the early and
1427/// late phases in `process()` can share the same `ValidationSession`
1428/// configuration. Document-dir resolution is relative to the main
1429/// file's parent directory.
1430#[cfg(feature = "validation")]
1431fn build_validation_options(
1432 file_options: &Options,
1433 source_map: &SourceMap,
1434 default_booking_method: BookingMethod,
1435) -> rustledger_validate::ValidationOptions {
1436 use rustledger_validate::ValidationOptions;
1437
1438 // Resolve document directories relative to the main file's
1439 // directory. Absolute paths pass through; relative paths are
1440 // joined onto the source map's first file's parent. Matches the
1441 // pre-refactor `run_validation` behavior exactly.
1442 let base_dir = source_map
1443 .files()
1444 .first()
1445 .and_then(|f| f.path.parent())
1446 .unwrap_or_else(|| std::path::Path::new("."));
1447
1448 let resolved_document_dirs: Vec<std::path::PathBuf> = file_options
1449 .documents
1450 .iter()
1451 .map(|d| {
1452 let path = std::path::Path::new(d);
1453 if path.is_absolute() {
1454 path.to_path_buf()
1455 } else {
1456 base_dir.join(path)
1457 }
1458 })
1459 .collect();
1460
1461 let account_types: Vec<String> = file_options
1462 .account_types()
1463 .iter()
1464 .map(|s| (*s).to_string())
1465 .collect();
1466
1467 ValidationOptions::default()
1468 .with_account_types(account_types)
1469 .with_document_dirs(resolved_document_dirs)
1470 .with_infer_tolerance_from_cost(file_options.infer_tolerance_from_cost)
1471 .with_tolerance_multiplier(file_options.inferred_tolerance_multiplier)
1472 .with_inferred_tolerance_default(file_options.inferred_tolerance_default.clone())
1473 .with_default_booking_method(default_booking_method)
1474}
1475
1476/// Convert a batch of [`rustledger_validate::ValidationError`]s into
1477/// loader-level [`LedgerError`]s (with resolved `file:line:column`
1478/// locations) and append to the existing list.
1479///
1480/// Factored out so both validation phases in `process()` share the
1481/// same conversion path.
1482#[cfg(feature = "validation")]
1483fn ledger_errors_extend(
1484 errors: &mut Vec<LedgerError>,
1485 validation_errors: Vec<rustledger_validate::ValidationError>,
1486 source_map: &SourceMap,
1487) {
1488 for err in validation_errors {
1489 let phase = if err.code.is_parse_phase() {
1490 "parse"
1491 } else {
1492 "validate"
1493 };
1494 let severity_level = if err.code.is_warning() {
1495 ErrorSeverity::Warning
1496 } else {
1497 ErrorSeverity::Error
1498 };
1499 // Fold the advisory note (if any) into the message so it propagates
1500 // through every downstream format (LedgerError, JSON diagnostic, CLI
1501 // report, LSP diagnostic) without each one needing a dedicated field.
1502 let message = match &err.note {
1503 Some(note) => format!("{err}\n note: {note}"),
1504 None => err.to_string(),
1505 };
1506 // Resolve span + file_id into a file/line/column triple so CLI and
1507 // LSP consumers can render `file:line:col` headers without having
1508 // to do the lookup themselves (issue #901).
1509 let location = err.span.and_then(|span| {
1510 let fid = err.file_id? as usize;
1511 let file = source_map.get(fid)?;
1512 let (line, column) = file.line_col(span.start);
1513 Some(ErrorLocation {
1514 file: file.path.clone(),
1515 line,
1516 column,
1517 })
1518 });
1519 errors.push(LedgerError {
1520 severity: severity_level,
1521 code: err.code.code().to_string(),
1522 message,
1523 location,
1524 source_span: err.span.map(|s| (s.start, s.end)),
1525 file_id: err.file_id,
1526 phase: phase.to_string(),
1527 });
1528 }
1529}
1530
1531/// Load and fully process a beancount file.
1532///
1533/// This is the main entry point, equivalent to Python's `loader.load_file()`.
1534/// It performs: parse → sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
1535///
1536/// # Example
1537///
1538/// ```ignore
1539/// use rustledger_loader::{load, LoadOptions};
1540/// use std::path::Path;
1541///
1542/// let ledger = load(Path::new("ledger.beancount"), LoadOptions::default())?;
1543/// for error in &ledger.errors {
1544/// eprintln!("{}: {}", error.code, error.message);
1545/// }
1546/// ```
1547pub fn load(path: &Path, options: &LoadOptions) -> Result<Ledger, ProcessError> {
1548 let mut loader = crate::Loader::new();
1549
1550 if options.path_security {
1551 loader = loader.with_path_security(true);
1552 }
1553
1554 let raw = loader.load(path)?;
1555 process(raw, options)
1556}
1557
1558/// Load a beancount file without processing.
1559///
1560/// This returns raw directives without sorting, booking, or plugins.
1561/// Use this when you need the original parse output.
1562pub fn load_raw(path: &Path) -> Result<LoadResult, LoadError> {
1563 crate::Loader::new().load(path)
1564}
1565
1566/// Run a WASM plugin and return its output ops and errors.
1567#[cfg(feature = "wasm-plugins")]
1568fn run_wasm_plugin(
1569 wasm_path: &std::path::Path,
1570 directives: &[rustledger_plugin::DirectiveWrapper],
1571 options: &rustledger_plugin::PluginOptions,
1572 config: &Option<String>,
1573) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1574 use rustledger_plugin::{PluginInput, PluginManager};
1575
1576 let mut mgr = PluginManager::new();
1577 let plugin_idx = mgr
1578 .load(wasm_path)
1579 .map_err(|e| format!("failed to load: {e}"))?;
1580
1581 let input = PluginInput {
1582 directives: directives.to_vec(),
1583 options: options.clone(),
1584 config: config.clone(),
1585 };
1586
1587 let output = mgr
1588 .execute(plugin_idx, &input)
1589 .map_err(|e| format!("execution failed: {e}"))?;
1590
1591 let mut errors = Vec::new();
1592 for err in output.errors {
1593 let ledger_err = match err.severity {
1594 rustledger_plugin::PluginErrorSeverity::Error => {
1595 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1596 }
1597 rustledger_plugin::PluginErrorSeverity::Warning => {
1598 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1599 }
1600 };
1601 errors.push(ledger_err);
1602 }
1603
1604 Ok((output.ops, errors))
1605}
1606
1607/// Run a Python module plugin via the WASI-based Python runtime.
1608#[cfg(feature = "python-plugins")]
1609fn run_python_plugin(
1610 module_name: &str,
1611 resolved_path: &std::path::Path,
1612 base_dir: &std::path::Path,
1613 directives: &[rustledger_plugin::DirectiveWrapper],
1614 options: &rustledger_plugin::PluginOptions,
1615 config: &Option<String>,
1616) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1617 use rustledger_plugin::{PluginInput, python::PythonRuntime};
1618
1619 let runtime = PythonRuntime::new().map_err(|e| format!("Python runtime unavailable: {e}"))?;
1620
1621 let input = PluginInput {
1622 directives: directives.to_vec(),
1623 options: options.clone(),
1624 config: config.clone(),
1625 };
1626
1627 // Try file-based execution first, then module-based
1628 let is_file = resolved_path.exists()
1629 || std::path::Path::new(module_name)
1630 .extension()
1631 .is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
1632 || module_name.contains(std::path::MAIN_SEPARATOR);
1633
1634 let output = if is_file {
1635 runtime
1636 .execute_module(module_name, &input, Some(base_dir))
1637 .map_err(|e| format!("Python plugin execution failed: {e}"))?
1638 } else {
1639 runtime
1640 .execute_module(module_name, &input, Some(base_dir))
1641 .map_err(|e| format!("Python plugin '{module_name}' execution failed: {e}"))?
1642 };
1643
1644 let mut errors = Vec::new();
1645 for err in output.errors {
1646 let ledger_err = match err.severity {
1647 rustledger_plugin::PluginErrorSeverity::Error => {
1648 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1649 }
1650 rustledger_plugin::PluginErrorSeverity::Warning => {
1651 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1652 }
1653 };
1654 errors.push(ledger_err);
1655 }
1656
1657 Ok((output.ops, errors))
1658}
1659
1660#[cfg(all(test, feature = "plugins"))]
1661mod sanitize_tests {
1662 use super::sanitize_inner_posting_spans;
1663 use crate::source_map::SourceMap;
1664 use rust_decimal_macros::dec;
1665 use rustledger_core::{
1666 Amount, Directive, IncompleteAmount, Posting, SYNTHESIZED_FILE_ID, Span, Spanned,
1667 Transaction,
1668 };
1669 use std::path::PathBuf;
1670 use std::sync::Arc;
1671
1672 fn txn_with_postings(postings: Vec<Spanned<Posting>>) -> Directive {
1673 let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
1674 let mut txn = Transaction::new(date, "x");
1675 txn.postings = postings;
1676 Directive::Transaction(txn)
1677 }
1678
1679 fn posting_at(file_id: u16, span: Span) -> Spanned<Posting> {
1680 let p = Posting::with_incomplete(
1681 "Assets:Cash",
1682 IncompleteAmount::Complete(Amount::new(dec!(1), "USD")),
1683 );
1684 Spanned::new(p, span).with_file_id(file_id as usize)
1685 }
1686
1687 fn source_map_with_one_file(source: &str) -> (SourceMap, u16) {
1688 let mut sm = SourceMap::new();
1689 let id = sm.add_file(PathBuf::from("test.bean"), Arc::from(source));
1690 (sm, id as u16)
1691 }
1692
1693 #[test]
1694 fn span_within_real_file_is_preserved() {
1695 let (sm, fid) = source_map_with_one_file("0123456789");
1696 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(2, 6))]);
1697 sanitize_inner_posting_spans(&mut d, &sm);
1698 let Directive::Transaction(t) = &d else {
1699 unreachable!()
1700 };
1701 assert_eq!(t.postings[0].file_id, fid);
1702 assert_eq!(t.postings[0].span, Span::new(2, 6));
1703 }
1704
1705 #[test]
1706 fn span_past_eof_is_reset_to_synthesized() {
1707 // Bug case: a misbehaving plugin claims the posting extends past
1708 // the file's actual length. The sanitizer must reject it so the
1709 // LSP can't be tricked into producing an out-of-bounds TextEdit.
1710 let (sm, fid) = source_map_with_one_file("0123456789"); // 10 bytes
1711 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 9999))]);
1712 sanitize_inner_posting_spans(&mut d, &sm);
1713 let Directive::Transaction(t) = &d else {
1714 unreachable!()
1715 };
1716 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1717 assert_eq!(t.postings[0].span, Span::ZERO);
1718 }
1719
1720 #[test]
1721 fn unknown_file_id_is_reset_to_synthesized() {
1722 // Plugin claims a file_id that the host's SourceMap doesn't know.
1723 let (sm, _real) = source_map_with_one_file("hello");
1724 let mut d = txn_with_postings(vec![posting_at(123, Span::new(0, 5))]);
1725 sanitize_inner_posting_spans(&mut d, &sm);
1726 let Directive::Transaction(t) = &d else {
1727 unreachable!()
1728 };
1729 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1730 assert_eq!(t.postings[0].span, Span::ZERO);
1731 }
1732
1733 #[test]
1734 fn start_after_end_is_reset_to_synthesized() {
1735 let (sm, fid) = source_map_with_one_file("abcdef");
1736 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(5, 2))]);
1737 sanitize_inner_posting_spans(&mut d, &sm);
1738 let Directive::Transaction(t) = &d else {
1739 unreachable!()
1740 };
1741 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1742 assert_eq!(t.postings[0].span, Span::ZERO);
1743 }
1744
1745 #[test]
1746 fn synthesized_file_id_is_left_alone_but_span_normalized() {
1747 // file_id == SYNTHESIZED_FILE_ID with a non-zero span: the
1748 // sanitizer leaves it synthesized (span is meaningless for
1749 // synth postings) but normalizes to Span::ZERO for tidy state.
1750 let (sm, _fid) = source_map_with_one_file("x");
1751 let mut d = txn_with_postings(vec![posting_at(SYNTHESIZED_FILE_ID, Span::new(100, 200))]);
1752 sanitize_inner_posting_spans(&mut d, &sm);
1753 let Directive::Transaction(t) = &d else {
1754 unreachable!()
1755 };
1756 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1757 assert_eq!(t.postings[0].span, Span::ZERO, "synth span normalized");
1758 }
1759
1760 #[test]
1761 fn boundary_span_eq_source_len_is_valid() {
1762 // end == source.len() is the canonical "to-end-of-file" span;
1763 // must not be rejected.
1764 let (sm, fid) = source_map_with_one_file("abcd");
1765 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 4))]);
1766 sanitize_inner_posting_spans(&mut d, &sm);
1767 let Directive::Transaction(t) = &d else {
1768 unreachable!()
1769 };
1770 assert_eq!(t.postings[0].file_id, fid);
1771 assert_eq!(t.postings[0].span, Span::new(0, 4));
1772 }
1773
1774 #[test]
1775 fn non_transaction_directive_is_left_alone() {
1776 // Sanitizer only walks transactions; other directive types have
1777 // no inner posting spans.
1778 let (sm, _fid) = source_map_with_one_file("x");
1779 let mut d = Directive::Open(rustledger_core::Open {
1780 date: rustledger_core::naive_date(2024, 1, 1).unwrap(),
1781 account: "Assets:Bank".into(),
1782 currencies: vec![],
1783 booking: None,
1784 meta: Default::default(),
1785 });
1786 sanitize_inner_posting_spans(&mut d, &sm); // no panic, no change
1787 assert!(matches!(d, Directive::Open(_)));
1788 }
1789}