rustledger_loader/process.rs
1//! Processing pipeline: sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
2//!
3//! This module orchestrates the full processing pipeline for a beancount ledger,
4//! equivalent to Python's `loader.load_file()` function.
5
6// ratchet: fxhash-only — hot path; use FxHashMap/FxHashSet, not std SipHash collections (#1237).
7use crate::{LoadError, LoadResult, Options, Plugin, SourceMap};
8use rustledger_core::{BookingMethod, Directive, DisplayContext};
9use rustledger_parser::Spanned;
10use std::path::Path;
11use thiserror::Error;
12
13/// A CLI-supplied (or programmatic) extra plugin invocation.
14///
15/// Bundles the plugin name with its optional config string so the two
16/// can't drift apart — the previous parallel-Vec representation could
17/// silently misalign a config with the wrong plugin.
18#[derive(Debug, Clone)]
19pub struct ExtraPlugin {
20 /// Plugin name (short or fully-qualified module path).
21 pub name: String,
22 /// Plugin-specific config string, if any.
23 pub config: Option<String>,
24}
25
26/// Options for loading and processing a ledger.
27#[derive(Debug, Clone)]
28pub struct LoadOptions {
29 /// Booking method for lot matching (default: Strict).
30 pub booking_method: BookingMethod,
31 /// Run plugins declared in the file (default: true).
32 pub run_plugins: bool,
33 /// Run `auto_accounts` plugin (default: false).
34 pub auto_accounts: bool,
35 /// Additional plugins to run (CLI `--plugin` or programmatic API),
36 /// each with an optional config string.
37 pub extra_plugins: Vec<ExtraPlugin>,
38 /// Run validation after processing (default: true).
39 pub validate: bool,
40 /// Enable path security (prevent include traversal).
41 pub path_security: bool,
42}
43
44impl Default for LoadOptions {
45 fn default() -> Self {
46 Self {
47 booking_method: BookingMethod::Strict,
48 run_plugins: true,
49 auto_accounts: false,
50 extra_plugins: Vec::new(),
51 validate: true,
52 path_security: false,
53 }
54 }
55}
56
57impl LoadOptions {
58 /// Create options for raw loading (no booking, no plugins, no validation).
59 #[must_use]
60 pub const fn raw() -> Self {
61 Self {
62 booking_method: BookingMethod::Strict,
63 run_plugins: false,
64 auto_accounts: false,
65 extra_plugins: Vec::new(),
66 validate: false,
67 path_security: false,
68 }
69 }
70}
71
72/// Errors that can occur during ledger processing.
73#[derive(Debug, Error)]
74pub enum ProcessError {
75 /// Loading failed.
76 #[error("loading failed: {0}")]
77 Load(#[from] LoadError),
78
79 /// Booking/interpolation error.
80 #[cfg(feature = "booking")]
81 #[error("booking error: {message}")]
82 Booking {
83 /// Error message.
84 message: String,
85 /// Date of the transaction.
86 date: rustledger_core::NaiveDate,
87 /// Narration of the transaction.
88 narration: String,
89 },
90
91 /// Plugin execution error.
92 #[cfg(feature = "plugins")]
93 #[error("plugin error: {0}")]
94 Plugin(String),
95
96 /// Validation error.
97 #[cfg(feature = "validation")]
98 #[error("validation error: {0}")]
99 Validation(String),
100
101 /// Plugin output conversion error.
102 #[cfg(feature = "plugins")]
103 #[error("failed to convert plugin output: {0}")]
104 PluginConversion(String),
105}
106
107/// A fully processed ledger.
108///
109/// This is the result of loading and processing a beancount file,
110/// equivalent to the tuple returned by Python's `loader.load_file()`.
111#[derive(Debug)]
112pub struct Ledger {
113 /// Processed directives in source-faithful form: sorted by date,
114 /// booked (cost specs resolved, interpolations applied), and
115 /// plugin-rewritten. **`Pad` directives remain as `Pad`**; they
116 /// are not pre-expanded into synthesized transactions.
117 ///
118 /// Consumers split into two groups:
119 ///
120 /// - **Source-faithful consumers** (stats, journal, formatter,
121 /// LSP, BQL `FROM #entries WHERE type = 'pad'` audits,
122 /// source-mapped diagnostics) iterate this field directly.
123 /// Pads count as Pads.
124 /// - **Balance-computing consumers** (holdings, balances,
125 /// balsheet, networth, income, FFI `query.execute`/`batch`,
126 /// WASM `expandPads`/`query`) call [`Ledger::balance_view`]
127 /// to get the directive stream MERGED with synthesized P-flag
128 /// transactions for each pad-balance pair. This is the only
129 /// way to get pad effects into per-account inventory math.
130 ///
131 /// The two views are derived from the same source; there is no
132 /// drift possible because [`Ledger::balance_view`] is a pure
133 /// function of `self.directives`.
134 pub directives: Vec<Spanned<Directive>>,
135 /// Options parsed from the file.
136 pub options: Options,
137 /// Plugins declared in the file.
138 pub plugins: Vec<Plugin>,
139 /// Source map for error reporting.
140 pub source_map: SourceMap,
141 /// Errors encountered during processing.
142 pub errors: Vec<LedgerError>,
143 /// Display context for formatting numbers.
144 pub display_context: DisplayContext,
145}
146
147impl Ledger {
148 /// Return the directive stream merged with synthesized
149 /// pad-equivalent transactions, suitable for inventory /
150 /// balance math.
151 ///
152 /// For each `Pad` directive followed (in date order) by a
153 /// `Balance` assertion on the same account, a `Transaction`
154 /// with `flag = 'P'` is added to the view carrying the
155 /// postings needed to make the balance match. A multi-currency
156 /// pad produces one synth transaction per currency.
157 ///
158 /// **Original `Pad` directives are preserved in the view.**
159 /// Synth transactions are added alongside, not in place of.
160 /// This matters for two reasons:
161 ///
162 /// 1. BQL queries against the `#entries` table
163 /// (`SELECT * FROM #entries WHERE type = 'pad'`) can still
164 /// enumerate the pad directives the user authored. A
165 /// REPLACE-style expansion would silently zero those out.
166 /// (BQL's default SELECT path operates on postings; pads
167 /// have no postings, so a default SELECT never matches them
168 /// regardless of this view shape.)
169 /// 2. Multi-pad cases (issue #1300) produce exactly one synth
170 /// per pad-balance pair:
171 /// `rustledger_booking::process_pads` (which
172 /// `merge_with_padding` delegates to) only retains the most
173 /// recent same-account pad in its pending-pads map, so
174 /// earlier same-account pads are silently shadowed and
175 /// their `source_account` does NOT contribute to the synth.
176 /// The validator emits `E2003` for shadowed pads
177 /// independently; this view reflects only the effective pad.
178 ///
179 /// Inventory-walking consumers iterate `Directive::Transaction`
180 /// and ignore `Pad` directives, so the preserved Pads are
181 /// invisible to them.
182 ///
183 /// **When to use this vs. [`Ledger.directives`](Self::directives):**
184 /// any consumer that maintains running per-account inventory
185 /// state and asks "what is the balance" needs this view. Any
186 /// consumer that asks "what did the user write" wants the raw
187 /// `directives` field.
188 ///
189 /// # Performance
190 ///
191 /// Each call clones every source directive once (`O(n)`).
192 /// Inlines the merge logic from
193 /// [`rustledger_booking::merge_with_padding`] so the already-
194 /// owned `booked` vector can be moved into the merged output
195 /// instead of cloned a second time. For short-lived CLI
196 /// invocations the single clone is negligible. Long-lived
197 /// processes (FFI servers, LSPs) that query the same ledger
198 /// repeatedly should hoist the result above their loop.
199 /// `TODO(perf):` memoize internally once a benchmark shows it
200 /// matters.
201 #[cfg(feature = "booking")]
202 #[must_use]
203 pub fn balance_view(&self) -> Vec<Directive> {
204 let mut booked: Vec<Directive> = self.directives.iter().map(|s| s.value.clone()).collect();
205
206 // Inlined from `rustledger_booking::merge_with_padding` so
207 // `booked` is moved (not re-cloned via `to_vec()`).
208 // Algorithmically identical: prepend synth transactions, then
209 // stable-sort by date. Same-date pad+balance pairs land as
210 // `[synth, pad, balance]` because synths sit at the front of
211 // their date-group pre-sort.
212 debug_assert!(
213 !booked.iter().any(|d| matches!(d, Directive::Transaction(t) if rustledger_booking::is_synthesized_pad(t))),
214 "balance_view called on a Ledger whose directives already contain synth pad transactions",
215 );
216 let pad_result = rustledger_booking::process_pads(&booked);
217 let mut merged: Vec<Directive> =
218 Vec::with_capacity(booked.len() + pad_result.padding_transactions.len());
219 for txn in pad_result.padding_transactions {
220 merged.push(Directive::Transaction(txn));
221 }
222 merged.append(&mut booked);
223 merged.sort_by_key(rustledger_core::Directive::date);
224 merged
225 }
226}
227
228/// Unified error type for ledger processing.
229///
230/// This encompasses all error types that can occur during loading,
231/// booking, plugin execution, and validation.
232#[derive(Debug)]
233#[non_exhaustive]
234pub struct LedgerError {
235 /// Error severity.
236 pub severity: ErrorSeverity,
237 /// Error code (e.g., "E0001", "W8002").
238 pub code: String,
239 /// Human-readable error message.
240 pub message: String,
241 /// Source location, if available.
242 pub location: Option<ErrorLocation>,
243 /// Byte span (inclusive start, exclusive end) in the source file,
244 /// used by rich renderers (e.g. miette) to draw a snippet around
245 /// the offending directive. Consumers that only need `file:line:col`
246 /// should use `location`; those that want to show the surrounding
247 /// source text want this.
248 pub source_span: Option<(usize, usize)>,
249 /// Source file ID — index into the ledger's [`SourceMap`]. Used
250 /// alongside `source_span` for snippet rendering.
251 pub file_id: Option<u16>,
252 /// Processing phase that produced this error: "parse", "validate", or "plugin".
253 pub phase: String,
254}
255
256/// Error severity level.
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
258pub enum ErrorSeverity {
259 /// Error - indicates a problem that should be fixed.
260 Error,
261 /// Warning - indicates a potential issue.
262 Warning,
263}
264
265/// Source location for an error.
266#[derive(Debug, Clone)]
267pub struct ErrorLocation {
268 /// File path.
269 pub file: std::path::PathBuf,
270 /// Line number (1-indexed).
271 pub line: usize,
272 /// Column number (1-indexed).
273 pub column: usize,
274}
275
276impl LedgerError {
277 /// Create a new error with the given phase.
278 pub fn error(code: impl Into<String>, message: impl Into<String>) -> Self {
279 Self {
280 severity: ErrorSeverity::Error,
281 code: code.into(),
282 message: message.into(),
283 location: None,
284 source_span: None,
285 file_id: None,
286 phase: "validate".to_string(),
287 }
288 }
289
290 /// Create a new warning.
291 pub fn warning(code: impl Into<String>, message: impl Into<String>) -> Self {
292 Self {
293 severity: ErrorSeverity::Warning,
294 code: code.into(),
295 message: message.into(),
296 location: None,
297 source_span: None,
298 file_id: None,
299 phase: "validate".to_string(),
300 }
301 }
302
303 /// Attach a source span and file ID so rich renderers can draw a snippet.
304 #[must_use]
305 pub const fn with_source_span(mut self, span: (usize, usize), file_id: u16) -> Self {
306 self.source_span = Some(span);
307 self.file_id = Some(file_id);
308 self
309 }
310
311 /// Set the processing phase for this error.
312 #[must_use]
313 pub fn with_phase(mut self, phase: impl Into<String>) -> Self {
314 self.phase = phase.into();
315 self
316 }
317
318 /// Add a location to this error.
319 #[must_use]
320 pub fn with_location(mut self, location: ErrorLocation) -> Self {
321 self.location = Some(location);
322 self
323 }
324}
325
326/// Process a raw load result into a fully processed ledger.
327///
328/// Pipeline (see numbered comments below for the rationale of each step):
329///
330/// ```text
331/// 1. sort (canonical display order)
332/// 2. synth plugins (auto_accounts, document_discovery)
333/// 3. Early validation (account presence, structural, lifecycle)
334/// 4. booking (cost spec resolution, interpolation)
335/// 5. partition (set aside failed-booking txns)
336/// 6. regular plugins (file plugins + extras, on booked only)
337/// 7. Late validation (balance, currency, inventory, on booked only)
338/// 8. finalize (unused-pad warnings)
339/// 9. re-merge (booked + failed → final Ledger.directives)
340/// ```
341pub fn process(raw: LoadResult, options: &LoadOptions) -> Result<Ledger, ProcessError> {
342 let mut errors: Vec<LedgerError> = Vec::new();
343
344 // Convert load errors to ledger errors (parse phase). Iterate by
345 // reference so `raw` stays borrowable for the rest of the pipeline
346 // (the phase transitions and validator setup below borrow it).
347 for load_err in &raw.errors {
348 errors.push(LedgerError::error("LOAD", load_err.to_string()).with_phase("parse"));
349 }
350
351 // Phase-typed pipeline (issue #1166). The phantom-typed
352 // `Directives<P>` wrapper makes the sequence
353 //
354 // Raw → Sorted → Synthed → EarlyValidated → Booked
355 // → RegularPluginsApplied → LateValidated → Finalized
356 //
357 // a compile-time property of the type system. Each transition
358 // method consumes one phase and produces the next; the compiler
359 // rejects any call-site that drops a phase, swaps two, or invokes
360 // a later phase on raw input. See `crates/rustledger-loader/src/phase.rs`.
361 //
362 // The transitions themselves wrap the existing subsystem entry
363 // points (`run_booking`, `run_plugins`, validators) without
364 // changing their semantics — this PR is the structural refactor
365 // only; behavior is bit-identical to the pre-#1166 pipeline.
366
367 // Resolve the effective booking method once, before the pipeline
368 // starts, so both the validator (early/late phases — needs it to
369 // seed each opened account's per-account booking method, see
370 // issue #1182) and the booking engine see the same value. File-
371 // level `option "booking_method"` wins when explicitly set;
372 // otherwise the API-level `LoadOptions.booking_method` is used.
373 #[cfg(any(feature = "validation", feature = "booking"))]
374 let effective_booking_method = resolve_effective_booking_method(&raw, options);
375
376 #[cfg(feature = "validation")]
377 let validation_session = if options.validate {
378 Some(rustledger_validate::ValidationSession::new(
379 build_validation_options(&raw.options, &raw.source_map, effective_booking_method),
380 ))
381 } else {
382 None
383 };
384
385 // Compute `today` once for both phases — avoids a midnight-crossing
386 // race where Early and Late could disagree on what day it is, and
387 // gives `FutureDate` warnings a single coherent reference point.
388 #[cfg(feature = "validation")]
389 let today = jiff::Zoned::now().date();
390
391 let synthed = crate::Directives::<crate::Raw>::from_parser(raw.directives)
392 .sort()
393 .apply_synth_plugins(
394 &raw.plugins,
395 &raw.options,
396 options,
397 &raw.source_map,
398 &mut errors,
399 )?;
400
401 // The validation feature changes `early_validate`'s shape: with
402 // it on we thread the `Option<ValidationSession<Pending>>` in and
403 // catch the returned `Option<ValidationSession<EarlyDone>>` for
404 // `late_validate` (typestate-moved per #1236); without it we just
405 // get the next-phase `Directives` back. Branching here keeps each
406 // cfg's signature small and prevents the call site from having to
407 // know the typestate phase parameters in the disabled case.
408 #[cfg(feature = "validation")]
409 let (directives, validation_session) =
410 synthed.early_validate(validation_session, today, &raw.source_map, &mut errors);
411 #[cfg(not(feature = "validation"))]
412 let directives = synthed.early_validate(&raw.source_map, &mut errors);
413
414 let (booked, failed) = directives.book(
415 #[cfg(feature = "booking")]
416 effective_booking_method,
417 #[cfg(feature = "booking")]
418 &mut errors,
419 );
420
421 let regular_applied = booked.apply_regular_plugins(
422 &raw.plugins,
423 &raw.options,
424 options,
425 &raw.source_map,
426 &mut errors,
427 )?;
428
429 #[cfg(feature = "validation")]
430 let late_validated =
431 regular_applied.late_validate(validation_session, today, &raw.source_map, &mut errors);
432 #[cfg(not(feature = "validation"))]
433 let late_validated = regular_applied.late_validate(&raw.source_map, &mut errors);
434
435 let finalized = late_validated.finalize(failed);
436
437 Ok(Ledger {
438 directives: finalized.into_inner(),
439 options: raw.options,
440 plugins: raw.plugins,
441 source_map: raw.source_map,
442 errors,
443 display_context: raw.display_context,
444 })
445}
446
447/// Resolve the booking method from `LoadOptions` + file-level option.
448///
449/// Factored out of `process()` so both the validator session (which
450/// needs it to seed per-account booking) and the booking engine see
451/// the same value. File-level `option "booking_method"` wins when
452/// explicitly set; otherwise the API-level default is used.
453#[cfg(any(feature = "validation", feature = "booking"))]
454fn resolve_effective_booking_method(
455 raw: &LoadResult,
456 options: &LoadOptions,
457) -> rustledger_core::BookingMethod {
458 let file_set = raw.options.set_options.contains("booking_method");
459 if file_set {
460 raw.options
461 .booking_method
462 .parse()
463 .unwrap_or(options.booking_method)
464 } else {
465 options.booking_method
466 }
467}
468
469// ============================================================================
470// Phase transitions
471// ============================================================================
472//
473// Each transition consumes a `Directives<P>` of one phase and
474// produces a `Directives<NextP>` of the next phase. Bodies wrap the
475// existing subsystem calls (`run_booking`, `run_plugins`, validators)
476// without changing their semantics — only the type-level sequencing
477// is new. See `phase.rs` for the phase markers and overall rationale.
478
479/// Canonical display-order sort key: `(date, priority, file_id, span.start)`.
480/// What BQL / JSON / format output expects and what Python beancount
481/// produces. Used by `sort` (initial ordering) and `finalize` (re-sort
482/// after merging failed bookings back in).
483type CanonicalSortKey = (
484 rustledger_core::NaiveDate,
485 rustledger_core::DirectivePriority,
486 u16,
487 usize,
488);
489
490#[inline]
491const fn canonical_sort_key(d: &Spanned<Directive>) -> CanonicalSortKey {
492 (d.value.date(), d.value.priority(), d.file_id, d.span.start)
493}
494
495impl crate::Directives<crate::Raw> {
496 /// Sort directives into canonical display order — see
497 /// [`canonical_sort_key`].
498 ///
499 /// Booking needs a different iteration order (augmentations
500 /// BEFORE reductions on the same `(date, priority)`) but doesn't
501 /// need the underlying vec reordered — `run_booking` walks via
502 /// a transient `Vec<usize>` index. This sort goes once, here,
503 /// and the display order survives the rest of the pipeline.
504 #[must_use]
505 pub(crate) fn sort(mut self) -> crate::Directives<crate::Sorted> {
506 self.as_vec_mut().sort_by_key(canonical_sort_key);
507 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
508 }
509}
510
511impl crate::Directives<crate::Sorted> {
512 /// Run synth-only plugins (`auto_accounts`, `document_discovery`)
513 /// BEFORE early validation so the synthesizers inject Opens /
514 /// Documents that Early checks depend on (E1001 account
515 /// presence, E5001 missing-document file).
516 ///
517 /// Only this narrow synth subset runs here; everything else
518 /// waits until after booking (post-booking plugin pass) so
519 /// cost-spec-reading plugins see filled-in per-unit values on
520 /// `CostNumber::PerUnitFromTotal`. See `PluginPass` rustdoc for
521 /// the detailed split rationale.
522 pub(crate) fn apply_synth_plugins(
523 mut self,
524 plugins: &[crate::Plugin],
525 file_options: &crate::Options,
526 options: &LoadOptions,
527 source_map: &SourceMap,
528 errors: &mut Vec<LedgerError>,
529 ) -> Result<crate::Directives<crate::Synthed>, ProcessError> {
530 // `run_plugins` early-returns when no plugin entry matches the
531 // pass; no outer gate needed (and any outer gate risked
532 // missing one of the implicit-synth triggers — auto_accounts,
533 // document_discovery via `option "documents"`, file-declared
534 // synth plugins).
535 #[cfg(feature = "plugins")]
536 run_plugins(
537 self.as_vec_mut(),
538 plugins,
539 file_options,
540 options,
541 source_map,
542 errors,
543 PluginPass::PreBookingSynth,
544 )?;
545 // Suppress unused-arg warnings when `plugins` feature is off.
546 #[cfg(not(feature = "plugins"))]
547 {
548 let _ = (plugins, file_options, options, source_map, errors);
549 }
550 Ok(crate::Directives::new_unchecked(std::mem::take(
551 self.as_vec_mut(),
552 )))
553 }
554}
555
556impl crate::Directives<crate::Synthed> {
557 /// Run the early-phase validators. Account-presence /
558 /// lifecycle / structural errors are collected into `errors`
559 /// (via the `LedgerError` stream); the directive list itself is
560 /// unchanged by validation.
561 ///
562 /// Runs on pre-booking directives, AFTER synth plugins so
563 /// account-presence checks (E1001) see any Opens that plugins
564 /// like `auto_accounts` injected. This is what lets booking
565 /// match Python's "prune zero-interp postings" behavior without
566 /// losing E1001 on the elided-zero-to-unopened-account case
567 /// (rustledger#877).
568 #[cfg(feature = "validation")]
569 pub(crate) fn early_validate(
570 mut self,
571 validation_session: Option<
572 rustledger_validate::ValidationSession<rustledger_validate::Pending>,
573 >,
574 today: rustledger_core::NaiveDate,
575 source_map: &SourceMap,
576 errors: &mut Vec<LedgerError>,
577 ) -> (
578 crate::Directives<crate::EarlyValidated>,
579 Option<rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>>,
580 ) {
581 // Typestate move: consume `Pending`, return `EarlyDone`. The
582 // session must be threaded by value rather than `&mut`-borrowed
583 // because the phase parameter on `ValidationSession<P>` changes
584 // as a result of the call (#1236). The caller in `process()`
585 // captures the returned session and passes it to
586 // `late_validate`.
587 let session_out = validation_session.map(|session| {
588 let (session, phase_errors) = session.run_early_spanned(self.as_slice(), today);
589 ledger_errors_extend(errors, phase_errors, source_map);
590 session
591 });
592 (
593 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut())),
594 session_out,
595 )
596 }
597
598 #[cfg(not(feature = "validation"))]
599 pub(crate) fn early_validate(
600 mut self,
601 source_map: &SourceMap,
602 errors: &mut Vec<LedgerError>,
603 ) -> crate::Directives<crate::EarlyValidated> {
604 let _ = (source_map, errors);
605 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
606 }
607}
608
609impl crate::Directives<crate::EarlyValidated> {
610 /// Run booking/interpolation. Returns the successfully-booked
611 /// directives plus a typed wrapper holding failed transactions.
612 ///
613 /// Failed transactions are in pre-booking shape (unresolved cost
614 /// specs, unfilled elided slots, possibly unbalanced); they
615 /// don't flow into regular plugins or Late validation — booking
616 /// already reported the root cause and the downstream checks
617 /// would cascade misleading errors. They get re-merged at
618 /// [`crate::Directives::<crate::LateValidated>::finalize`].
619 ///
620 /// When the `booking` feature is disabled this is an identity
621 /// transition: directives pass through unchanged and the failed
622 /// set is always empty. The same method exists in both feature
623 /// configurations so the caller in `process()` doesn't need a
624 /// `#[cfg]` match — the booking-specific arguments appear or
625 /// disappear via per-parameter `#[cfg]` attributes, mirroring
626 /// `early_validate` / `late_validate`.
627 pub(crate) fn book(
628 mut self,
629 #[cfg(feature = "booking")] effective_method: rustledger_core::BookingMethod,
630 #[cfg(feature = "booking")] errors: &mut Vec<LedgerError>,
631 ) -> (
632 crate::Directives<crate::Booked>,
633 crate::phase::FailedBookings,
634 ) {
635 #[cfg(feature = "booking")]
636 let (booked, failed) =
637 run_booking(std::mem::take(self.as_vec_mut()), effective_method, errors);
638 #[cfg(not(feature = "booking"))]
639 let (booked, failed): (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) =
640 (std::mem::take(self.as_vec_mut()), Vec::new());
641 (
642 crate::Directives::new_unchecked(booked),
643 crate::phase::FailedBookings::new(failed),
644 )
645 }
646}
647
648impl crate::Directives<crate::Booked> {
649 /// Run post-booking plugins — file-declared + CLI extras.
650 /// Cost-spec-reading plugins (`implicit_prices`,
651 /// `capital_gains_classifier`, `check_average_cost`,
652 /// `sell_gains`, `unrealized`, `valuation`) see filled-in
653 /// per-unit values on `CostNumber::PerUnitFromTotal` because
654 /// booking has run.
655 ///
656 /// Matches Python beancount's plugins-after-booking ordering
657 /// and closes rustledger#1117. Failed transactions were
658 /// partitioned out by `book`; plugins only see
659 /// successfully-booked input.
660 pub(crate) fn apply_regular_plugins(
661 mut self,
662 plugins: &[crate::Plugin],
663 file_options: &crate::Options,
664 options: &LoadOptions,
665 source_map: &SourceMap,
666 errors: &mut Vec<LedgerError>,
667 ) -> Result<crate::Directives<crate::RegularPluginsApplied>, ProcessError> {
668 // `run_plugins` early-returns when no plugin entry matches
669 // the pass; no outer gate needed.
670 #[cfg(feature = "plugins")]
671 run_plugins(
672 self.as_vec_mut(),
673 plugins,
674 file_options,
675 options,
676 source_map,
677 errors,
678 PluginPass::PostBooking,
679 )?;
680 #[cfg(not(feature = "plugins"))]
681 {
682 let _ = (plugins, file_options, options, source_map, errors);
683 }
684 Ok(crate::Directives::new_unchecked(std::mem::take(
685 self.as_vec_mut(),
686 )))
687 }
688}
689
690impl crate::Directives<crate::RegularPluginsApplied> {
691 /// Run the late-phase validators on booked + plugin-processed
692 /// directives. Reuses the `ValidationSession` from
693 /// `early_validate` so account / commodity / pad bookkeeping
694 /// carries forward.
695 #[cfg(feature = "validation")]
696 pub(crate) fn late_validate(
697 mut self,
698 validation_session: Option<
699 rustledger_validate::ValidationSession<rustledger_validate::EarlyDone>,
700 >,
701 today: rustledger_core::NaiveDate,
702 source_map: &SourceMap,
703 errors: &mut Vec<LedgerError>,
704 ) -> crate::Directives<crate::LateValidated> {
705 // Typestate move: consume `EarlyDone`, drive through `LateDone`
706 // to `finalize()`. The compile-time enforcement here is that
707 // we cannot call `late_validate` with a fresh `Pending` session
708 // (no `From<Pending>` to `EarlyDone`), so the loader caller
709 // must have routed the session through `early_validate` first
710 // (#1236).
711 if let Some(session) = validation_session {
712 let (session, phase_errors) = session.run_late_spanned(self.as_slice(), today);
713 ledger_errors_extend(errors, phase_errors, source_map);
714 let finalize_errors = session.finalize();
715 ledger_errors_extend(errors, finalize_errors, source_map);
716 }
717 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
718 }
719
720 #[cfg(not(feature = "validation"))]
721 pub(crate) fn late_validate(
722 mut self,
723 source_map: &SourceMap,
724 errors: &mut Vec<LedgerError>,
725 ) -> crate::Directives<crate::LateValidated> {
726 let _ = (source_map, errors);
727 crate::Directives::new_unchecked(std::mem::take(self.as_vec_mut()))
728 }
729}
730
731impl crate::Directives<crate::LateValidated> {
732 /// Re-merge failed (un-booked) transactions back into the
733 /// directive list for output. The user wrote them and expects
734 /// to see them in `Ledger.directives`; we kept them isolated
735 /// from post-booking processing.
736 ///
737 /// Re-sorts to restore canonical display order — `booked`
738 /// retained order during plugin transformation; the sort
739 /// restores the failed entries' positions.
740 pub(crate) fn finalize(
741 mut self,
742 failed: crate::phase::FailedBookings,
743 ) -> crate::Directives<crate::Finalized> {
744 let mut v = std::mem::take(self.as_vec_mut());
745 v.extend(failed.into_inner());
746 v.sort_by_key(canonical_sort_key);
747 crate::Directives::new_unchecked(v)
748 }
749}
750
751/// Run booking and interpolation on transactions, returning the
752/// directives partitioned into `(booked, failed)`.
753///
754/// The caller has already sorted `directives` into canonical display
755/// order `(date, priority, file_id, span.start)`. Booking needs the
756/// extra constraint that cost-reduction transactions process AFTER
757/// augmentations on the same `(date, priority)` so lots exist when
758/// matched. Rather than re-sorting the whole vec, we walk it via a
759/// transient `Vec<usize>` of indices sorted by booking order. Stable
760/// sort preserves display-order tiebreaks between transactions with
761/// the same `has_cost_reduction` flag.
762///
763/// Failed transactions are partitioned out into the second return
764/// value so they don't flow into regular plugins or Late validation
765/// (they're in pre-booking shape — postings have unresolved cost
766/// specs and unfilled elided slots, so downstream processing would
767/// cascade misleading errors). The caller is responsible for
768/// re-merging `failed` into the final `Ledger.directives` for output
769/// so the user still sees their original input.
770#[cfg(feature = "booking")]
771fn run_booking(
772 mut directives: Vec<Spanned<Directive>>,
773 booking_method: BookingMethod,
774 errors: &mut Vec<LedgerError>,
775) -> (Vec<Spanned<Directive>>, Vec<Spanned<Directive>>) {
776 use rustledger_booking::BookingEngine;
777
778 let mut engine = BookingEngine::with_method(booking_method);
779 engine.register_account_methods(directives.iter().map(|s| &s.value));
780
781 // Build an index ordered for booking: stable sort by
782 // `has_cost_reduction` only (display order — `(date, priority,
783 // file_id, span.start)` — is already encoded in the existing
784 // positional order, and stable_sort preserves that as the tiebreak).
785 let mut order: Vec<usize> = (0..directives.len()).collect();
786 order.sort_by_key(|&i| {
787 let d = &directives[i].value;
788 (d.date(), d.priority(), d.has_cost_reduction())
789 });
790
791 let mut failed_indices: Vec<usize> = Vec::new();
792 for &i in &order {
793 let spanned = &mut directives[i];
794 if let Directive::Transaction(txn) = &mut spanned.value {
795 match engine.book_and_interpolate(txn) {
796 Ok(result) => {
797 engine.apply(&result.transaction);
798 *txn = result.transaction;
799 }
800 Err(e) => {
801 errors.push(LedgerError::error(
802 "BOOK",
803 format!("{} ({}, \"{}\")", e, txn.date, txn.narration),
804 ));
805 failed_indices.push(i);
806 }
807 }
808 }
809 }
810
811 // Partition into (booked, failed). Indices are valid in the current
812 // `directives` vec (no mutation has happened since they were
813 // collected); after this consuming iteration the vec is gone and
814 // partition is fait accompli — no window where a caller could
815 // accidentally mutate between collection and partition.
816 let failed_set: rustc_hash::FxHashSet<usize> = failed_indices.iter().copied().collect();
817 let mut booked = Vec::with_capacity(directives.len() - failed_indices.len());
818 let mut failed = Vec::with_capacity(failed_indices.len());
819 for (i, d) in directives.into_iter().enumerate() {
820 if failed_set.contains(&i) {
821 failed.push(d);
822 } else {
823 booked.push(d);
824 }
825 }
826 (booked, failed)
827}
828
829/// Which subset of plugins to run.
830///
831/// The loader pipeline calls `run_plugins` twice: once with
832/// [`PluginPass::PreBookingSynth`] before the Early validation phase
833/// (so synthesizers can inject Opens / Documents that early checks
834/// depend on), and once with [`PluginPass::PostBooking`] after booking
835/// (so cost-spec-reading plugins like `implicit_prices`,
836/// `capital_gains_classifier`, `check_average_cost`, `sell_gains`,
837/// `unrealized`, and `valuation` see filled-in per-unit values on the
838/// `CostNumber::PerUnitFromTotal` variant).
839///
840/// Standalone callers (LSP / FFI / tests on already-booked input) pass
841/// [`PluginPass::PostBooking`] — synth plugins are a loader-internal
842/// concern and would re-Open already-opened accounts if run a second
843/// time.
844#[cfg(feature = "plugins")]
845#[derive(Debug, Clone, Copy, PartialEq, Eq)]
846pub enum PluginPass {
847 /// Only plugins that synthesize directives the Early validator
848 /// depends on: `auto_accounts` (synthesizes Open directives) and
849 /// the built-in document discovery walker (synthesizes Document
850 /// directives the early phase checks for missing files).
851 PreBookingSynth,
852 /// All file-declared plugins and CLI `extra_plugins`, EXCLUDING
853 /// `auto_accounts` and `document_discovery` (those ran pre-booking).
854 /// Includes the 28 plugins that don't depend on synth state but
855 /// may depend on booked cost specs.
856 PostBooking,
857}
858
859/// Run plugins on directives.
860///
861/// Executes native plugins (and document discovery) on the given directives,
862/// modifying them in-place. Plugin errors are appended to `errors`.
863///
864/// A single plugin invocation in `run_plugins`'s unified dispatch
865/// list. `force_python` ("python:..." prefix) overrides native
866/// resolution; `config` is the plugin-specific string passed to
867/// `PluginInput.config`.
868#[cfg(feature = "plugins")]
869struct PluginInvocation {
870 name: String,
871 config: Option<String>,
872 force_python: bool,
873}
874
875/// `pass` selects which subset of plugins to run — see [`PluginPass`].
876/// The loader pipeline calls this twice (synth pass before Early,
877/// regular pass after booking).
878#[cfg(feature = "plugins")]
879pub fn run_plugins(
880 directives: &mut Vec<Spanned<Directive>>,
881 file_plugins: &[Plugin],
882 file_options: &Options,
883 options: &LoadOptions,
884 source_map: &SourceMap,
885 errors: &mut Vec<LedgerError>,
886 pass: PluginPass,
887) -> Result<(), ProcessError> {
888 use rustledger_plugin::{NativePlugin, NativePluginRegistry, PluginInput, PluginOptions};
889
890 // Resolve document directories relative to the main file's directory.
891 // Used to build doc_discovery's per-call config in the synth pass.
892 let base_dir = source_map
893 .files()
894 .first()
895 .and_then(|f| f.path.parent())
896 .unwrap_or_else(|| std::path::Path::new("."));
897
898 // Access the process-wide registry singleton. The registry is
899 // immutable and stateless, so the same instance services every
900 // call.
901 let registry = NativePluginRegistry::global();
902
903 // Build the unified list of plugins to invoke for this pass:
904 // 1. Implicit synth plugins triggered by `LoadOptions` /
905 // `file_options` (auto_accounts via `options.auto_accounts`;
906 // document_discovery via non-empty `file_options.documents`).
907 // 2. File-declared plugins from `plugin "..."` directives.
908 // 3. CLI `--plugin` extras.
909 // Pass classification happens here — once — via `registry.find_synth`.
910 // A plugin enters the list iff its pass matches the requested `pass`.
911 let mut entries: Vec<PluginInvocation> = Vec::new();
912
913 if matches!(pass, PluginPass::PreBookingSynth) {
914 // Implicit synth: API-level auto_accounts flag.
915 if options.auto_accounts {
916 entries.push(PluginInvocation {
917 name: rustledger_plugin::AUTO_ACCOUNTS_NAME.to_string(),
918 config: None,
919 force_python: false,
920 });
921 }
922 // Implicit synth: document_discovery, driven by `option "documents"`.
923 // The plugin sits in the registry as a ZST; we hand it the
924 // resolved directories + base_dir via its config JSON.
925 if options.run_plugins && !file_options.documents.is_empty() {
926 let resolved: Vec<String> = file_options
927 .documents
928 .iter()
929 .map(|d| {
930 let path = std::path::Path::new(d);
931 if path.is_absolute() {
932 d.clone()
933 } else {
934 base_dir.join(path).to_string_lossy().to_string()
935 }
936 })
937 .collect();
938 entries.push(PluginInvocation {
939 name: rustledger_plugin::DOCUMENT_DISCOVERY_NAME.to_string(),
940 config: Some(rustledger_plugin::document_discovery_config(
941 base_dir, &resolved,
942 )),
943 force_python: false,
944 });
945 }
946 }
947
948 // A plugin name belongs in the current pass iff its synth-marker
949 // membership matches `pass`. Non-native plugins (WASM/Python) are
950 // never in the synth registry and therefore always fall into the
951 // PostBooking pass.
952 let want_synth = matches!(pass, PluginPass::PreBookingSynth);
953
954 // File-declared plugins.
955 if options.run_plugins {
956 for plugin in file_plugins {
957 if registry.find_synth(&plugin.name).is_some() == want_synth {
958 entries.push(PluginInvocation {
959 name: plugin.name.clone(),
960 config: plugin.config.clone(),
961 force_python: plugin.force_python,
962 });
963 }
964 }
965 }
966
967 // CLI extra plugins.
968 for extra in &options.extra_plugins {
969 if registry.find_synth(&extra.name).is_some() == want_synth {
970 entries.push(PluginInvocation {
971 name: extra.name.clone(),
972 config: extra.config.clone(),
973 force_python: false,
974 });
975 }
976 }
977
978 if entries.is_empty() {
979 return Ok(());
980 }
981
982 let plugin_options = PluginOptions {
983 operating_currencies: file_options.operating_currency.clone(),
984 title: file_options.title.clone(),
985 };
986
987 // Dispatch each entry. Native plugins resolve through the typed
988 // registry (`find_synth` / `find_regular`) keyed on the pass — the
989 // returned reference type reflects the pass. Anything that doesn't
990 // resolve falls through to the WASM/Python branches.
991 for invocation in &entries {
992 let PluginInvocation {
993 name: raw_name,
994 config: plugin_config,
995 force_python,
996 } = invocation;
997
998 // Dispatch via the typed registry. `find_synth`/`find_regular`
999 // internally take the short name (last `.`-separated segment),
1000 // so prefixed names like `"beancount.plugins.implicit_prices"`
1001 // resolve through the same call — no explicit prefix-stripping
1002 // needed. Returns `Some` only if the plugin exists AND its
1003 // marker trait matches the requested pass: a `RegularPlugin`
1004 // won't be returned from `find_synth` (and vice versa), even
1005 // on a name collision. Anything that returns `None` (WASM,
1006 // Python, unknown names, wrong-pass natives) falls through
1007 // to the WASM/Python branches below.
1008 let native_plugin: Option<&dyn NativePlugin> = if *force_python {
1009 None
1010 } else {
1011 match pass {
1012 PluginPass::PreBookingSynth => registry
1013 .find_synth(raw_name)
1014 .map(|p| p as &dyn NativePlugin),
1015 PluginPass::PostBooking => registry
1016 .find_regular(raw_name)
1017 .map(|p| p as &dyn NativePlugin),
1018 }
1019 };
1020
1021 if let Some(plugin) = native_plugin {
1022 let wrappers = build_wrappers(directives, source_map);
1023 let input = PluginInput {
1024 directives: wrappers,
1025 options: plugin_options.clone(),
1026 config: plugin_config.clone(),
1027 };
1028 let output = plugin.process(input);
1029 record_plugin_errors(errors, output.errors, source_map);
1030 apply_plugin_ops(directives, output.ops, errors, source_map)?;
1031 } else {
1032 // Not a native plugin — categorize and handle
1033 let plugin_path = std::path::Path::new(raw_name);
1034 let ext = plugin_path
1035 .extension()
1036 .and_then(|e| e.to_str())
1037 .unwrap_or("")
1038 .to_lowercase();
1039
1040 // The closure is only invoked from inside the wasm-plugins /
1041 // python-plugins cfg blocks below. The whole function is
1042 // already `#[cfg(feature = "plugins")]`, so this only matters
1043 // when `plugins` is enabled but neither child feature is
1044 // (e.g. `--features native-plugins`). Allow `unused_variables`
1045 // for exactly that configuration. Underscore-prefixing the
1046 // binding would have been the wrong fix because we DO call
1047 // the closure in builds with one of the features enabled,
1048 // which would trip `no_effect_underscore_binding` instead.
1049 #[cfg_attr(
1050 not(any(feature = "wasm-plugins", feature = "python-plugins")),
1051 allow(unused_variables)
1052 )]
1053 let resolve_path = |name: &str| -> Result<std::path::PathBuf, String> {
1054 let p = std::path::Path::new(name);
1055 let resolved = if p.is_absolute() {
1056 p.to_path_buf()
1057 } else {
1058 base_dir.join(name)
1059 };
1060
1061 // Path security: prevent plugins from outside the ledger directory
1062 if options.path_security
1063 && let (Ok(canon_base), Ok(canon_plugin)) =
1064 (base_dir.canonicalize(), resolved.canonicalize())
1065 && !canon_plugin.starts_with(&canon_base)
1066 {
1067 return Err(format!(
1068 "plugin path '{name}' is outside the ledger directory"
1069 ));
1070 }
1071
1072 Ok(resolved)
1073 };
1074
1075 if ext == "wasm" {
1076 // WASM plugin
1077 #[cfg(feature = "wasm-plugins")]
1078 {
1079 let wasm_path = match resolve_path(raw_name) {
1080 Ok(p) => p,
1081 Err(e) => {
1082 errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1083 continue;
1084 }
1085 };
1086 let wrappers = build_wrappers(directives, source_map);
1087 match run_wasm_plugin(&wasm_path, &wrappers, &plugin_options, plugin_config) {
1088 Ok((ops, plugin_errors)) => {
1089 for err in plugin_errors {
1090 errors.push(err);
1091 }
1092 apply_plugin_ops(directives, ops, errors, source_map)?;
1093 }
1094 Err(e) => {
1095 errors.push(
1096 LedgerError::error(
1097 "PLUGIN",
1098 format!("WASM plugin {} failed: {e}", wasm_path.display()),
1099 )
1100 .with_phase("plugin"),
1101 );
1102 }
1103 }
1104 }
1105 #[cfg(not(feature = "wasm-plugins"))]
1106 {
1107 errors.push(
1108 LedgerError::error(
1109 "PLUGIN",
1110 format!("WASM plugin '{raw_name}' requires the wasm-plugins feature"),
1111 )
1112 .with_phase("plugin"),
1113 );
1114 }
1115 } else if *force_python
1116 || ext == "py"
1117 || raw_name.contains(std::path::MAIN_SEPARATOR)
1118 || raw_name.contains('.')
1119 {
1120 // Python module or file-based plugin (or force_python via "python:" prefix)
1121 #[cfg(feature = "python-plugins")]
1122 {
1123 let resolved = match resolve_path(raw_name) {
1124 Ok(p) => p,
1125 Err(e) => {
1126 errors.push(LedgerError::error("PLUGIN", e).with_phase("plugin"));
1127 continue;
1128 }
1129 };
1130 let wrappers = build_wrappers(directives, source_map);
1131 match run_python_plugin(
1132 raw_name,
1133 &resolved,
1134 base_dir,
1135 &wrappers,
1136 &plugin_options,
1137 plugin_config,
1138 ) {
1139 Ok((ops, plugin_errors)) => {
1140 for err in plugin_errors {
1141 errors.push(err);
1142 }
1143 apply_plugin_ops(directives, ops, errors, source_map)?;
1144 }
1145 Err(e) => {
1146 errors.push(LedgerError::error("E8002", e).with_phase("plugin"));
1147 }
1148 }
1149 }
1150 #[cfg(not(feature = "python-plugins"))]
1151 {
1152 errors.push(
1153 LedgerError::error(
1154 "E8005",
1155 format!(
1156 "Python plugin \"{raw_name}\" requires the python-plugins feature",
1157 ),
1158 )
1159 .with_phase("plugin"),
1160 );
1161 }
1162 } else {
1163 // Completely unknown plugin name — try to suggest a module path
1164 #[cfg(feature = "python-plugins")]
1165 {
1166 use rustledger_plugin::python::{is_python_available, suggest_module_path};
1167 let suggestion = if is_python_available() {
1168 suggest_module_path(raw_name)
1169 } else {
1170 None
1171 };
1172 if let Some(module_path) = suggestion {
1173 errors.push(
1174 LedgerError::error(
1175 "E8004",
1176 format!(
1177 "Cannot resolve Python module '{raw_name}'. Replace with: plugin \"{module_path}\""
1178 ),
1179 )
1180 .with_phase("plugin"),
1181 );
1182 } else {
1183 errors.push(
1184 LedgerError::error(
1185 "E8001",
1186 format!("Plugin not found: \"{raw_name}\""),
1187 )
1188 .with_phase("plugin"),
1189 );
1190 }
1191 }
1192 #[cfg(not(feature = "python-plugins"))]
1193 {
1194 errors.push(
1195 LedgerError::error("E8001", format!("Plugin not found: \"{raw_name}\""))
1196 .with_phase("plugin"),
1197 );
1198 }
1199 }
1200 }
1201 }
1202 // No final wrapper→directive conversion needed: `apply_plugin_ops`
1203 // updates `directives` in place after each plugin call, preserving
1204 // original spans on Keep/Modify ops. Plugin-synthesized directives
1205 // (Insert ops) get `SYNTHESIZED_FILE_ID` and a zero span.
1206 Ok(())
1207}
1208
1209/// Build a fresh `Vec<DirectiveWrapper>` from the current directives,
1210/// carrying filename + line number for plugin-side error reporting.
1211/// Spans don't need to round-trip through the wrappers — the loader
1212/// preserves them via `apply_plugin_ops` matching on op index.
1213#[cfg(feature = "plugins")]
1214fn build_wrappers(
1215 directives: &[Spanned<Directive>],
1216 source_map: &SourceMap,
1217) -> Vec<rustledger_plugin::DirectiveWrapper> {
1218 use rustledger_plugin::directive_to_wrapper_with_location;
1219
1220 directives
1221 .iter()
1222 .map(|spanned| {
1223 let (filename, lineno) = if let Some(file) = source_map.get(spanned.file_id as usize) {
1224 let (line, _col) = file.line_col(spanned.span.start);
1225 (Some(file.path.display().to_string()), Some(line as u32))
1226 } else {
1227 (None, None)
1228 };
1229 directive_to_wrapper_with_location(&spanned.value, filename, lineno)
1230 })
1231 .collect()
1232}
1233
1234/// Push plugin errors into the ledger's error stream, tagged with
1235/// `phase: "plugin"` and — when the plugin set `source_file` /
1236/// `line_number` on the error — an attached `ErrorLocation` so
1237/// downstream renderers (CLI, LSP, JSON output) can pinpoint where
1238/// the plugin objected.
1239///
1240/// Source-location resolution: if the wrapper's `source_file` resolves
1241/// to a real file in the source map, use that for `ErrorLocation.file`
1242/// and treat `line_number` as the line index. Plugin-synthesized
1243/// filenames (e.g. `"<auto_accounts>"`) that don't match any real
1244/// file are passed through as `PathBuf::from(name)` so the rendered
1245/// location still attributes the error to the originating plugin —
1246/// better than silently dropping the field.
1247#[cfg(feature = "plugins")]
1248fn record_plugin_errors(
1249 errors: &mut Vec<LedgerError>,
1250 plugin_errors: Vec<rustledger_plugin::PluginError>,
1251 source_map: &SourceMap,
1252) {
1253 for err in plugin_errors {
1254 let mut ledger_err = match err.severity {
1255 rustledger_plugin::PluginErrorSeverity::Error => {
1256 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1257 }
1258 rustledger_plugin::PluginErrorSeverity::Warning => {
1259 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1260 }
1261 };
1262 // Propagate plugin-set source location into `ErrorLocation`.
1263 // Column defaults to 1 — plugin errors don't carry column info
1264 // through the wrapper protocol.
1265 if let (Some(file), Some(line)) = (&err.source_file, err.line_number) {
1266 let resolved_path = source_map
1267 .get_by_path(std::path::Path::new(file))
1268 .map_or_else(|| std::path::PathBuf::from(file), |f| f.path.clone());
1269 ledger_err = ledger_err.with_location(ErrorLocation {
1270 file: resolved_path,
1271 line: line as usize,
1272 column: 1,
1273 });
1274 }
1275 errors.push(ledger_err);
1276 }
1277}
1278
1279/// Apply a plugin's `Vec<PluginOp>` to `directives` in place.
1280///
1281/// Validates that the op set forms a complete partition of the input
1282/// indices (each input index appears in exactly one `Keep` / `Modify` /
1283/// `Delete` op). Protocol violations produce a `PLUGIN` error in
1284/// `errors` and leave `directives` untouched.
1285///
1286/// For `Keep(i)` / `Modify(i, w)`, the resulting `Spanned<Directive>`
1287/// inherits `directives[i]`'s span and `file_id` — this is the core of
1288/// the ops protocol's correctness guarantee (plugin-transformed
1289/// directives keep their original source identity for error reporting).
1290/// `Insert(w)` directives get `(Span::ZERO, SYNTHESIZED_FILE_ID)`.
1291///
1292/// Inner posting spans returned by plugins are sanitized against the
1293/// host's `SourceMap` (see [`sanitize_inner_posting_spans`]) so a
1294/// misbehaving plugin cannot smuggle out-of-bounds spans into the LSP.
1295#[cfg(feature = "plugins")]
1296fn apply_plugin_ops(
1297 directives: &mut Vec<Spanned<Directive>>,
1298 ops: Vec<rustledger_plugin::PluginOp>,
1299 errors: &mut Vec<LedgerError>,
1300 source_map: &SourceMap,
1301) -> Result<(), ProcessError> {
1302 use rustledger_plugin::PluginOp;
1303 use rustledger_plugin::wrapper_to_directive;
1304
1305 let n = directives.len();
1306
1307 // Validate: every input index in {Keep, Modify, Delete} exactly once.
1308 let mut seen = vec![false; n];
1309 for op in &ops {
1310 let idx = match op {
1311 PluginOp::Keep(i) | PluginOp::Modify(i, _) | PluginOp::Delete(i) => Some(*i),
1312 PluginOp::Insert(_) => None,
1313 };
1314 if let Some(i) = idx {
1315 if i >= n {
1316 errors.push(
1317 LedgerError::error(
1318 "PLUGIN",
1319 format!(
1320 "plugin op references out-of-bounds input index {i} (input has {n} directives)"
1321 ),
1322 )
1323 .with_phase("plugin"),
1324 );
1325 return Ok(());
1326 }
1327 if seen[i] {
1328 errors.push(
1329 LedgerError::error(
1330 "PLUGIN",
1331 format!("plugin op references input index {i} more than once"),
1332 )
1333 .with_phase("plugin"),
1334 );
1335 return Ok(());
1336 }
1337 seen[i] = true;
1338 }
1339 }
1340 for (i, was_seen) in seen.iter().enumerate() {
1341 if !was_seen {
1342 errors.push(
1343 LedgerError::error(
1344 "PLUGIN",
1345 format!(
1346 "plugin omitted input directive {i} (must appear in exactly one of Keep/Modify/Delete)"
1347 ),
1348 )
1349 .with_phase("plugin"),
1350 );
1351 return Ok(());
1352 }
1353 }
1354
1355 // Materialize new directives, preserving spans for Keep/Modify.
1356 let mut new_directives = Vec::with_capacity(ops.len());
1357 for op in ops {
1358 match op {
1359 PluginOp::Keep(i) => {
1360 new_directives.push(directives[i].clone());
1361 }
1362 PluginOp::Modify(i, wrapper) => {
1363 let mut directive = wrapper_to_directive(&wrapper)
1364 .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1365 // Plugins are not trusted to return well-formed inner
1366 // posting spans — a misbehaving plugin can synthesize a
1367 // file_id pointing at a nonexistent source or a span
1368 // that runs past EOF. The LSP later builds TextEdits
1369 // from these spans, so an out-of-bounds posting span
1370 // would produce a corrupt edit. Reset any inner posting
1371 // span that doesn't refer to a real loaded file or that
1372 // exceeds the file's length to `Spanned::synthesized`.
1373 sanitize_inner_posting_spans(&mut directive, source_map);
1374 new_directives.push(Spanned {
1375 value: directive,
1376 span: directives[i].span,
1377 file_id: directives[i].file_id,
1378 });
1379 }
1380 PluginOp::Insert(wrapper) => {
1381 // Same trust caveat as Modify: don't let an Insert smuggle
1382 // bogus inner-posting spans through.
1383 // (Wrapper-derived outer span is validated below.)
1384 // Resolve the wrapper's filename + line number, if set,
1385 // into a real (file_id, span) when the filename
1386 // corresponds to a loaded source file. Falls back to
1387 // SYNTHESIZED_FILE_ID + zero span otherwise — including
1388 // for plugin-only attribution like `"<auto_accounts>"`
1389 // (which never matches a loaded file).
1390 let (span, file_id) = match (&wrapper.filename, wrapper.lineno) {
1391 (Some(filename), Some(lineno)) => {
1392 if let Some(file) = source_map.get_by_path(std::path::Path::new(filename)) {
1393 let span_start = file.line_start(lineno as usize).unwrap_or(0);
1394 (
1395 rustledger_parser::Span::new(span_start, span_start),
1396 file.id as u16,
1397 )
1398 } else {
1399 (
1400 rustledger_parser::Span::ZERO,
1401 rustledger_parser::SYNTHESIZED_FILE_ID,
1402 )
1403 }
1404 }
1405 _ => (
1406 rustledger_parser::Span::ZERO,
1407 rustledger_parser::SYNTHESIZED_FILE_ID,
1408 ),
1409 };
1410 let mut directive = wrapper_to_directive(&wrapper)
1411 .map_err(|e| ProcessError::PluginConversion(e.to_string()))?;
1412 sanitize_inner_posting_spans(&mut directive, source_map);
1413 new_directives.push(Spanned::new(directive, span).with_file_id(file_id as usize));
1414 }
1415 PluginOp::Delete(_) => {}
1416 }
1417 }
1418
1419 *directives = new_directives;
1420 Ok(())
1421}
1422
1423/// Reset any inner `Spanned<Posting>` whose location does not refer to a
1424/// real loaded source range to [`Spanned::synthesized`]. Plugins are not
1425/// trusted to return well-formed `file_id` + byte ranges; without this,
1426/// a misbehaving plugin could induce out-of-bounds LSP text edits.
1427///
1428/// A span is considered valid when:
1429/// - `file_id == SYNTHESIZED_FILE_ID` (genuine synthesis), OR
1430/// - the `file_id` resolves in `SourceMap` AND `0 <= start <= end <= len`
1431/// for that file's source.
1432///
1433/// Everything else collapses to `Spanned::synthesized(posting)`. As a
1434/// final pass, synthesized postings that arrived with a non-zero span
1435/// are normalized to `Span::ZERO` so the in-memory state matches the
1436/// `Spanned::synthesized` constructor's contract (`file_id` +
1437/// `Span::ZERO`).
1438#[cfg(feature = "plugins")]
1439fn sanitize_inner_posting_spans(directive: &mut Directive, source_map: &SourceMap) {
1440 use rustledger_core::Span;
1441 use rustledger_parser::SYNTHESIZED_FILE_ID;
1442 if let Directive::Transaction(txn) = directive {
1443 for p in &mut txn.postings {
1444 let ok = if p.file_id == SYNTHESIZED_FILE_ID {
1445 true
1446 } else {
1447 source_map
1448 .get(p.file_id as usize)
1449 .is_some_and(|f| p.span.start <= p.span.end && p.span.end <= f.source.len())
1450 };
1451 if !ok {
1452 let inner = std::mem::replace(
1453 &mut p.value,
1454 rustledger_core::Posting::auto(rustledger_core::InternedStr::from("")),
1455 );
1456 *p = rustledger_core::Spanned::synthesized(inner);
1457 } else if p.file_id == SYNTHESIZED_FILE_ID && p.span != Span::ZERO {
1458 // Synthesized → span is meaningless; normalize so the
1459 // state is consistent with `Spanned::synthesized`.
1460 p.span = Span::ZERO;
1461 }
1462 }
1463 }
1464}
1465
1466/// Build a [`ValidationOptions`] from loader-level file options.
1467///
1468/// Factored out of the old `run_validation` so both the early and
1469/// late phases in `process()` can share the same `ValidationSession`
1470/// configuration. Document-dir resolution is relative to the main
1471/// file's parent directory.
1472#[cfg(feature = "validation")]
1473fn build_validation_options(
1474 file_options: &Options,
1475 source_map: &SourceMap,
1476 default_booking_method: BookingMethod,
1477) -> rustledger_validate::ValidationOptions {
1478 use rustledger_validate::ValidationOptions;
1479
1480 // Resolve document directories relative to the main file's
1481 // directory. Absolute paths pass through; relative paths are
1482 // joined onto the source map's first file's parent. Matches the
1483 // pre-refactor `run_validation` behavior exactly.
1484 let base_dir = source_map
1485 .files()
1486 .first()
1487 .and_then(|f| f.path.parent())
1488 .unwrap_or_else(|| std::path::Path::new("."));
1489
1490 let resolved_document_dirs: Vec<std::path::PathBuf> = file_options
1491 .documents
1492 .iter()
1493 .map(|d| {
1494 let path = std::path::Path::new(d);
1495 if path.is_absolute() {
1496 path.to_path_buf()
1497 } else {
1498 base_dir.join(path)
1499 }
1500 })
1501 .collect();
1502
1503 let account_types: Vec<String> = file_options
1504 .account_types()
1505 .iter()
1506 .map(|s| (*s).to_string())
1507 .collect();
1508
1509 ValidationOptions::default()
1510 .with_account_types(account_types)
1511 .with_document_dirs(resolved_document_dirs)
1512 .with_infer_tolerance_from_cost(file_options.infer_tolerance_from_cost)
1513 .with_tolerance_multiplier(file_options.inferred_tolerance_multiplier)
1514 .with_inferred_tolerance_default(file_options.inferred_tolerance_default.clone())
1515 .with_default_booking_method(default_booking_method)
1516}
1517
1518/// Convert a batch of [`rustledger_validate::ValidationError`]s into
1519/// loader-level [`LedgerError`]s (with resolved `file:line:column`
1520/// locations) and append to the existing list.
1521///
1522/// Factored out so both validation phases in `process()` share the
1523/// same conversion path.
1524#[cfg(feature = "validation")]
1525fn ledger_errors_extend(
1526 errors: &mut Vec<LedgerError>,
1527 validation_errors: Vec<rustledger_validate::ValidationError>,
1528 source_map: &SourceMap,
1529) {
1530 for err in validation_errors {
1531 let phase = if err.code.is_parse_phase() {
1532 "parse"
1533 } else {
1534 "validate"
1535 };
1536 let severity_level = if err.code.is_warning() {
1537 ErrorSeverity::Warning
1538 } else {
1539 ErrorSeverity::Error
1540 };
1541 // Fold the advisory note (if any) into the message so it propagates
1542 // through every downstream format (LedgerError, JSON diagnostic, CLI
1543 // report, LSP diagnostic) without each one needing a dedicated field.
1544 let message = match &err.note {
1545 Some(note) => format!("{err}\n note: {note}"),
1546 None => err.to_string(),
1547 };
1548 // Resolve span + file_id into a file/line/column triple so CLI and
1549 // LSP consumers can render `file:line:col` headers without having
1550 // to do the lookup themselves (issue #901).
1551 let location = err.span.and_then(|span| {
1552 let fid = err.file_id? as usize;
1553 let file = source_map.get(fid)?;
1554 let (line, column) = file.line_col(span.start);
1555 Some(ErrorLocation {
1556 file: file.path.clone(),
1557 line,
1558 column,
1559 })
1560 });
1561 errors.push(LedgerError {
1562 severity: severity_level,
1563 code: err.code.code().to_string(),
1564 message,
1565 location,
1566 source_span: err.span.map(|s| (s.start, s.end)),
1567 file_id: err.file_id,
1568 phase: phase.to_string(),
1569 });
1570 }
1571}
1572
1573/// Load and fully process a beancount file.
1574///
1575/// This is the main entry point, equivalent to Python's `loader.load_file()`.
1576/// It performs: parse → sort → synth-plugins → Early → book → regular-plugins → Late → finalize.
1577///
1578/// # Example
1579///
1580/// ```ignore
1581/// use rustledger_loader::{load, LoadOptions};
1582/// use std::path::Path;
1583///
1584/// let ledger = load(Path::new("ledger.beancount"), LoadOptions::default())?;
1585/// for error in &ledger.errors {
1586/// eprintln!("{}: {}", error.code, error.message);
1587/// }
1588/// ```
1589pub fn load(path: &Path, options: &LoadOptions) -> Result<Ledger, ProcessError> {
1590 let mut loader = crate::Loader::new();
1591
1592 if options.path_security {
1593 loader = loader.with_path_security(true);
1594 }
1595
1596 let raw = loader.load(path)?;
1597 process(raw, options)
1598}
1599
1600/// Load a beancount file without processing.
1601///
1602/// This returns raw directives without sorting, booking, or plugins.
1603/// Use this when you need the original parse output.
1604pub fn load_raw(path: &Path) -> Result<LoadResult, LoadError> {
1605 crate::Loader::new().load(path)
1606}
1607
1608/// Run a WASM plugin and return its output ops and errors.
1609#[cfg(feature = "wasm-plugins")]
1610fn run_wasm_plugin(
1611 wasm_path: &std::path::Path,
1612 directives: &[rustledger_plugin::DirectiveWrapper],
1613 options: &rustledger_plugin::PluginOptions,
1614 config: &Option<String>,
1615) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1616 use rustledger_plugin::{PluginInput, PluginManager};
1617
1618 let mut mgr = PluginManager::new();
1619 let plugin_idx = mgr
1620 .load(wasm_path)
1621 .map_err(|e| format!("failed to load: {e}"))?;
1622
1623 let input = PluginInput {
1624 directives: directives.to_vec(),
1625 options: options.clone(),
1626 config: config.clone(),
1627 };
1628
1629 let output = mgr
1630 .execute(plugin_idx, &input)
1631 .map_err(|e| format!("execution failed: {e}"))?;
1632
1633 let mut errors = Vec::new();
1634 for err in output.errors {
1635 let ledger_err = match err.severity {
1636 rustledger_plugin::PluginErrorSeverity::Error => {
1637 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1638 }
1639 rustledger_plugin::PluginErrorSeverity::Warning => {
1640 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1641 }
1642 };
1643 errors.push(ledger_err);
1644 }
1645
1646 Ok((output.ops, errors))
1647}
1648
1649/// Run a Python module plugin via the WASI-based Python runtime.
1650#[cfg(feature = "python-plugins")]
1651fn run_python_plugin(
1652 module_name: &str,
1653 resolved_path: &std::path::Path,
1654 base_dir: &std::path::Path,
1655 directives: &[rustledger_plugin::DirectiveWrapper],
1656 options: &rustledger_plugin::PluginOptions,
1657 config: &Option<String>,
1658) -> Result<(Vec<rustledger_plugin::PluginOp>, Vec<LedgerError>), String> {
1659 use rustledger_plugin::{PluginInput, python::PythonRuntime};
1660
1661 let runtime = PythonRuntime::new().map_err(|e| format!("Python runtime unavailable: {e}"))?;
1662
1663 let input = PluginInput {
1664 directives: directives.to_vec(),
1665 options: options.clone(),
1666 config: config.clone(),
1667 };
1668
1669 // Try file-based execution first, then module-based
1670 let is_file = resolved_path.exists()
1671 || std::path::Path::new(module_name)
1672 .extension()
1673 .is_some_and(|ext| ext.eq_ignore_ascii_case("py"))
1674 || module_name.contains(std::path::MAIN_SEPARATOR);
1675
1676 let output = if is_file {
1677 runtime
1678 .execute_module(module_name, &input, Some(base_dir))
1679 .map_err(|e| format!("Python plugin execution failed: {e}"))?
1680 } else {
1681 runtime
1682 .execute_module(module_name, &input, Some(base_dir))
1683 .map_err(|e| format!("Python plugin '{module_name}' execution failed: {e}"))?
1684 };
1685
1686 let mut errors = Vec::new();
1687 for err in output.errors {
1688 let ledger_err = match err.severity {
1689 rustledger_plugin::PluginErrorSeverity::Error => {
1690 LedgerError::error("PLUGIN", err.message).with_phase("plugin")
1691 }
1692 rustledger_plugin::PluginErrorSeverity::Warning => {
1693 LedgerError::warning("PLUGIN", err.message).with_phase("plugin")
1694 }
1695 };
1696 errors.push(ledger_err);
1697 }
1698
1699 Ok((output.ops, errors))
1700}
1701
1702#[cfg(all(test, feature = "plugins"))]
1703mod sanitize_tests {
1704 use super::sanitize_inner_posting_spans;
1705 use crate::source_map::SourceMap;
1706 use rust_decimal_macros::dec;
1707 use rustledger_core::{
1708 Amount, Directive, IncompleteAmount, Posting, SYNTHESIZED_FILE_ID, Span, Spanned,
1709 Transaction,
1710 };
1711 use std::path::PathBuf;
1712 use std::sync::Arc;
1713
1714 fn txn_with_postings(postings: Vec<Spanned<Posting>>) -> Directive {
1715 let date = rustledger_core::naive_date(2024, 1, 15).unwrap();
1716 let mut txn = Transaction::new(date, "x");
1717 txn.postings = postings;
1718 Directive::Transaction(txn)
1719 }
1720
1721 fn posting_at(file_id: u16, span: Span) -> Spanned<Posting> {
1722 let p = Posting::with_incomplete(
1723 "Assets:Cash",
1724 IncompleteAmount::Complete(Amount::new(dec!(1), "USD")),
1725 );
1726 Spanned::new(p, span).with_file_id(file_id as usize)
1727 }
1728
1729 fn source_map_with_one_file(source: &str) -> (SourceMap, u16) {
1730 let mut sm = SourceMap::new();
1731 let id = sm.add_file(PathBuf::from("test.bean"), Arc::from(source));
1732 (sm, id as u16)
1733 }
1734
1735 #[test]
1736 fn span_within_real_file_is_preserved() {
1737 let (sm, fid) = source_map_with_one_file("0123456789");
1738 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(2, 6))]);
1739 sanitize_inner_posting_spans(&mut d, &sm);
1740 let Directive::Transaction(t) = &d else {
1741 unreachable!()
1742 };
1743 assert_eq!(t.postings[0].file_id, fid);
1744 assert_eq!(t.postings[0].span, Span::new(2, 6));
1745 }
1746
1747 #[test]
1748 fn span_past_eof_is_reset_to_synthesized() {
1749 // Bug case: a misbehaving plugin claims the posting extends past
1750 // the file's actual length. The sanitizer must reject it so the
1751 // LSP can't be tricked into producing an out-of-bounds TextEdit.
1752 let (sm, fid) = source_map_with_one_file("0123456789"); // 10 bytes
1753 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 9999))]);
1754 sanitize_inner_posting_spans(&mut d, &sm);
1755 let Directive::Transaction(t) = &d else {
1756 unreachable!()
1757 };
1758 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1759 assert_eq!(t.postings[0].span, Span::ZERO);
1760 }
1761
1762 #[test]
1763 fn unknown_file_id_is_reset_to_synthesized() {
1764 // Plugin claims a file_id that the host's SourceMap doesn't know.
1765 let (sm, _real) = source_map_with_one_file("hello");
1766 let mut d = txn_with_postings(vec![posting_at(123, Span::new(0, 5))]);
1767 sanitize_inner_posting_spans(&mut d, &sm);
1768 let Directive::Transaction(t) = &d else {
1769 unreachable!()
1770 };
1771 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1772 assert_eq!(t.postings[0].span, Span::ZERO);
1773 }
1774
1775 #[test]
1776 fn start_after_end_is_reset_to_synthesized() {
1777 let (sm, fid) = source_map_with_one_file("abcdef");
1778 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(5, 2))]);
1779 sanitize_inner_posting_spans(&mut d, &sm);
1780 let Directive::Transaction(t) = &d else {
1781 unreachable!()
1782 };
1783 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1784 assert_eq!(t.postings[0].span, Span::ZERO);
1785 }
1786
1787 #[test]
1788 fn synthesized_file_id_is_left_alone_but_span_normalized() {
1789 // file_id == SYNTHESIZED_FILE_ID with a non-zero span: the
1790 // sanitizer leaves it synthesized (span is meaningless for
1791 // synth postings) but normalizes to Span::ZERO for tidy state.
1792 let (sm, _fid) = source_map_with_one_file("x");
1793 let mut d = txn_with_postings(vec![posting_at(SYNTHESIZED_FILE_ID, Span::new(100, 200))]);
1794 sanitize_inner_posting_spans(&mut d, &sm);
1795 let Directive::Transaction(t) = &d else {
1796 unreachable!()
1797 };
1798 assert_eq!(t.postings[0].file_id, SYNTHESIZED_FILE_ID);
1799 assert_eq!(t.postings[0].span, Span::ZERO, "synth span normalized");
1800 }
1801
1802 #[test]
1803 fn boundary_span_eq_source_len_is_valid() {
1804 // end == source.len() is the canonical "to-end-of-file" span;
1805 // must not be rejected.
1806 let (sm, fid) = source_map_with_one_file("abcd");
1807 let mut d = txn_with_postings(vec![posting_at(fid, Span::new(0, 4))]);
1808 sanitize_inner_posting_spans(&mut d, &sm);
1809 let Directive::Transaction(t) = &d else {
1810 unreachable!()
1811 };
1812 assert_eq!(t.postings[0].file_id, fid);
1813 assert_eq!(t.postings[0].span, Span::new(0, 4));
1814 }
1815
1816 #[test]
1817 fn non_transaction_directive_is_left_alone() {
1818 // Sanitizer only walks transactions; other directive types have
1819 // no inner posting spans.
1820 let (sm, _fid) = source_map_with_one_file("x");
1821 let mut d = Directive::Open(rustledger_core::Open {
1822 date: rustledger_core::naive_date(2024, 1, 1).unwrap(),
1823 account: "Assets:Bank".into(),
1824 currencies: vec![],
1825 booking: None,
1826 meta: Default::default(),
1827 });
1828 sanitize_inner_posting_spans(&mut d, &sm); // no panic, no change
1829 assert!(matches!(d, Directive::Open(_)));
1830 }
1831}